From 9885f38a7f814982481639f6ecfdb47d808c91ac Mon Sep 17 00:00:00 2001
From: Anthony Platanios <e.a.platanios@gmail.com>
Date: Mon, 1 Apr 2019 09:58:29 -0400
Subject: [PATCH 01/55] Re-organized the operators source files.

---
 Sources/DeepLearning/Operators/Basic.swift    |  23 +++
 Sources/DeepLearning/Operators/Math.swift     |  22 +++
 .../{Operators.swift => Operators/NN.swift}   | 154 ++++++++----------
 3 files changed, 114 insertions(+), 85 deletions(-)
 create mode 100644 Sources/DeepLearning/Operators/Basic.swift
 create mode 100644 Sources/DeepLearning/Operators/Math.swift
 rename Sources/DeepLearning/{Operators.swift => Operators/NN.swift} (74%)
diff --git a/Sources/DeepLearning/Operators/Basic.swift b/Sources/DeepLearning/Operators/Basic.swift
new file mode 100644
index 000000000..2e43792e7
--- /dev/null
+++ b/Sources/DeepLearning/Operators/Basic.swift
@@ -0,0 +1,23 @@
+// Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#if !COMPILING_TENSORFLOW_MODULE
+@_exported import TensorFlow
+#endif
+
+/// Returns a tensor with the same shape and scalars as the specified tensor.
+@differentiable
+public func identity<Scalar>(_ x: Tensor<Scalar>) -> Tensor<Scalar> {
+  return x
+}
diff --git a/Sources/DeepLearning/Operators/Math.swift b/Sources/DeepLearning/Operators/Math.swift
new file mode 100644
index 000000000..e838ff32d
--- /dev/null
+++ b/Sources/DeepLearning/Operators/Math.swift
@@ -0,0 +1,22 @@
+// Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#if !COMPILING_TENSORFLOW_MODULE
+@_exported import TensorFlow
+#endif
+
+/// Returns the values of the specified tensor rounded to the nearest integer, element-wise.
+public func round<Scalar: BinaryFloatingPoint>(_ x: Tensor<Scalar>) -> Tensor<Scalar> {
+  return Raw.round(x)
+}
diff --git a/Sources/DeepLearning/Operators.swift b/Sources/DeepLearning/Operators/NN.swift
similarity index 74%
rename from Sources/DeepLearning/Operators.swift
rename to Sources/DeepLearning/Operators/NN.swift
index 51b2406a8..6ca32e22d 100644
--- a/Sources/DeepLearning/Operators.swift
+++ b/Sources/DeepLearning/Operators/NN.swift
@@ -13,111 +13,95 @@
 // limitations under the License.
 
 #if !COMPILING_TENSORFLOW_MODULE
-import TensorFlow
+@_exported import TensorFlow
 #endif
 
-/// Returns the values of the specified tensor rounded to the nearest integer, element-wise.
-public func round<Scalar: BinaryFloatingPoint>(_ x: Tensor<Scalar>) -> Tensor<Scalar> {
-    return Raw.round(x)
-}
-
-/// Returns a tensor with the same shape and scalars as the specified tensor.
-@differentiable
-public func identity<Scalar>(_ x: Tensor<Scalar>) -> Tensor<Scalar> {
-    return x
-}
-
 //===------------------------------------------------------------------------------------------===//
 // Normalization
 //===------------------------------------------------------------------------------------------===//
 
-public extension Tensor where Scalar: TensorFlowFloatingPoint {
-    // TODO: Verify that these calculations are correct.
-    @inlinable
-    internal func _vjpBatchNormalized(
-        alongAxis axis: Int32,
-        offset: Tensor,
-        scale: Tensor,
-        epsilon: Scalar
-    ) -> (Tensor, (Tensor) -> (Tensor, Tensor, Tensor)) {
-        let value = batchNormalized(alongAxis: axis, offset: offset, scale: scale,
-                                    epsilon: epsilon)
-        return (value, { v in
-            let mean = self.mean(alongAxes: axis)
-            let squaredDiff: Tensor = Raw.squaredDifference(self, mean)
-            let variance = squaredDiff.mean(alongAxes: axis)
-
-             let diff = self - mean
-            let inv = rsqrt(variance + epsilon)
-            let norm = diff * inv
-
-             let dNorm = v * scale
-            let dVariance = -(dNorm * diff).sum(alongAxes: axis) / 2 * pow(inv, -3)
-            let dMean = (-dNorm * inv).sum(alongAxes: axis) +
-                dVariance * (-diff * 2).mean(alongAxes: axis)
-            let dOffset = v.sum(alongAxes: axis)
-            let dScale = (norm * v).sum(alongAxes: axis)
-            let dim = Tensor(Tensor<Int32>(self.shapeTensor[axis]))
-            let tmp = (dNorm * inv) + (dVariance * 2 * dMean / dim)
-            let dSelf = tmp + (dMean / dim)
-            return (dSelf, dOffset, dScale)
-        })
-    }
+public extension Tensor where Scalar: BinaryFloatingPoint {
+  /// Computes the batch normalized tensor along the specified axis.
+  ///
+  /// Specifically, returns `(self - mu) / (var + epsilon) * gamma + beta` where `mu` and `var` are 
+  /// respectively the mean and variance of `self` along `axis`.
+  ///
+  /// - Parameters:
+  ///   - axis: The batch dimension.
+  ///   - offset: The offset, also known as beta.
+  ///   - scale: The scale, also known as gamma.
+  ///   - epsilon: A small value added to the denominator for numerical stability.
+  @inlinable
+  @differentiable(
+    wrt: (self, offset, scale),
+    vjp: _vjpBatchNormalized where Scalar: TensorFlowFloatingPoint)
+  func batchNormalized(
+    alongAxis axis: Int32,
+    offset: Tensor = Tensor(0),
+    scale: Tensor = Tensor(1),
+    epsilon: Scalar = 0.001
+  ) -> Tensor {
+    let mean = self.mean(alongAxes: axis)
+    let squaredDiff: Tensor = Raw.squaredDifference(self, mean)
+    let variance = squaredDiff.mean(alongAxes: axis)
+    let inv = rsqrt(variance + epsilon) * scale
+    return self * inv + offset - mean * inv
+  }
 }
 
-public extension Tensor where Scalar: BinaryFloatingPoint {
-    /// Computes the batch normalized tensor along the specified axis.
-    ///
-    /// Specifically, returns `(self - mu)/(var + epsilon) * gamma + beta` where
-    /// `mu` and `var` are respectively the mean and variance of `self` along
-    /// `axis`.
-    ///
-    /// - Parameters:
-    ///     - axis: The batch dimension.
-    ///     - offset: The offset, also known as beta.
-    ///     - scale: The scale, also known as gamma.
-    ///     - epsilon: A small value added to the denominator for numerical
-    ///         stability.
-    @inlinable
-    @differentiable(
-        wrt: (self, offset, scale), vjp: _vjpBatchNormalized
-        where Scalar : TensorFlowFloatingPoint
-    )
-    func batchNormalized(
-        alongAxis axis: Int32,
-        offset: Tensor = Tensor(0),
-        scale: Tensor = Tensor(1),
-        epsilon: Scalar = 0.001
-    ) -> Tensor {
-        let mean = self.mean(alongAxes: axis)
-        let squaredDiff: Tensor = Raw.squaredDifference(self, mean)
-        let variance = squaredDiff.mean(alongAxes: axis)
-        let inv = rsqrt(variance + epsilon) * scale
-        return self * inv + offset - mean * inv
-    }
+internal extension Tensor where Scalar: TensorFlowFloatingPoint {
+  // TODO: Verify that these calculations are correct.
+  @inlinable
+  func _vjpBatchNormalized(
+    alongAxis axis: Int32,
+    offset: Tensor,
+    scale: Tensor,
+    epsilon: Scalar
+  ) -> (Tensor, (Tensor) -> (Tensor, Tensor, Tensor)) {
+    let value = batchNormalized(
+      alongAxis: axis, offset: offset, scale: scale, epsilon: epsilon)
+    return (value, { v in
+      let mean = self.mean(alongAxes: axis)
+      let squaredDiff: Tensor = Raw.squaredDifference(self, mean)
+      let variance = squaredDiff.mean(alongAxes: axis)
+      let diff = self - mean
+      let inv = rsqrt(variance + epsilon)
+      let norm = diff * inv
+      let dNorm = v * scale
+      let dVariance = -(dNorm * diff).sum(alongAxes: axis) / 2 * pow(inv, -3)
+      let dMean = (-dNorm * inv).sum(alongAxes: axis) +
+        dVariance * (-diff * 2).mean(alongAxes: axis)
+      let dOffset = v.sum(alongAxes: axis)
+      let dScale = (norm * v).sum(alongAxes: axis)
+      let dim = Tensor(Tensor<Int32>(self.shapeTensor[axis]))
+      let tmp = (dNorm * inv) + (dVariance * 2 * dMean / dim)
+      let dSelf = tmp + (dMean / dim)
+      return (dSelf, dOffset, dScale)
+    })
+  }
 }
 
 //===------------------------------------------------------------------------------------------===//
-// Convolution and pooling
+// Convolution and Pooling
 //===------------------------------------------------------------------------------------------===//
 
 /// A padding scheme. Used by padding, convolution, and pooling ops.
 // @_frozen // SR-9739
 public enum Padding {
-    /// The "valid" padding scheme.
-    case valid
-    /// The "same" padding scheme.
-    case same
+  /// The "valid" padding scheme.
+  case valid
+  /// The "same" padding scheme.
+  case same
 }
 
 public extension Padding {
-    @inlinable
-    var raw: Raw.Padding {
-        switch self {
-        case .same: return .same
-        case .valid: return .valid
-        }
+  @inlinable
+  var raw: Raw.Padding {
+    switch self {
+    case .same: return .same
+    case .valid: return .valid
     }
+  }
 }
 
 public extension Tensor where Scalar: TensorFlowFloatingPoint {

From 3ce90c09ba050cbe97cc19e5f4b9d40d2a2c2ee1 Mon Sep 17 00:00:00 2001
From: Anthony Platanios <e.a.platanios@gmail.com>
Date: Mon, 1 Apr 2019 13:30:25 -0400
Subject: [PATCH 02/55] Added support for 'stacked', 'concatenated',
 'gathered', 'batchGathered', and 'masked'.

---
 Sources/DeepLearning/Operators/Basic.swift | 253 ++++++++++++++++++++-
 1 file changed, 249 insertions(+), 4 deletions(-)

diff --git a/Sources/DeepLearning/Operators/Basic.swift b/Sources/DeepLearning/Operators/Basic.swift
index 2e43792e7..b491832ac 100644
--- a/Sources/DeepLearning/Operators/Basic.swift
+++ b/Sources/DeepLearning/Operators/Basic.swift
@@ -16,8 +16,253 @@
 @_exported import TensorFlow
 #endif
 
-/// Returns a tensor with the same shape and scalars as the specified tensor.
-@differentiable
-public func identity<Scalar>(_ x: Tensor<Scalar>) -> Tensor<Scalar> {
-  return x
+public extension Tensor where Scalar: TensorFlowScalar {
+  /// Stacks the current tensor with `tensors`, along the `axis` dimension, into a tensor with 
+  /// rank one higher than the current tensor and each tensor in `tensors`.
+  /// 
+  /// Given `self` and `tensors` all have shape `[A, B, C]`, and `tensors.count = N-1`, then:
+  /// - if `axis == 0` then the resulting tensor will have the shape `[N, A, B, C]`.
+  /// - if `axis == 1` then the resulting tensor will have the shape `[A, N, B, C]`.
+  /// - etc.
+  ///
+  /// For example:
+  /// ```
+  /// // 'x' is [1, 4]
+  /// // 'y' is [2, 5]
+  /// // 'z' is [3, 6]
+  /// x.packed(with: [y, z]) // is [[1, 4], [2, 5], [3, 6]]
+  /// x.packed(with: [y, z], alongAxis: 1) // is [[1, 2, 3], [4, 5, 6]]
+  /// ```
+  ///
+  /// This is the opposite of `unstacked`.
+  ///
+  /// - Parameters:
+  ///   - tensors: Tensors to stack with the current tensor.
+  ///   - axis: Dimension along which to stack. Negative values wrap around.
+  /// 
+  /// - Precondition: All tensors must have the same shape as the current tensor.
+  /// - Precondition: `axis` must be in the range `[-rank, rank)`.
+  /// 
+  /// - Returns: The packed tensor.
+  @inlinable
+  // @differentiable(vjp: _vjpPacked where Scalar: TensorFlowFloatingPoint)
+  func stacked(with tensors: [Tensor], alongAxis axis: Int64 = 0) -> Tensor {
+    return Raw.pack([self] + tensors, axis: axis)
+  }
+
+  /// Concatenates the current tensor with `tensors` along the `axis` dimension.
+  ///
+  /// Given `self` and `tensors` are all put in a single array, `values`, and 
+  /// `values[i].shape = [D0, D1, ... Daxis(i), ...Dn]`, then the concatenated result has shape 
+  /// `[D0, D1, ... Raxis, ...Dn]`, where `Raxis = sum(Daxis(i))`. That is, the data from the input 
+  /// tensors is joined along the `axis` dimension.
+  ///
+  /// For example:
+  /// ```
+  /// // t1 is [[1, 2, 3], [4, 5, 6]]
+  /// // t2 is [[7, 8, 9], [10, 11, 12]]
+  /// t1.concatenated(with: [t2]) // is [[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]]
+  /// t1.concatenated(with: [t2], alongAxis: 1) // is [[1, 2, 3, 7, 8, 9], [4, 5, 6, 10, 11, 12]]
+  /// 
+  /// // t3 has shape [2, 3]
+  /// // t4 has shape [2, 3]
+  /// t3.concatenated(with: [t4]) // has shape [4, 3]
+  /// t3.concatenated(with: [t4], alongAxis: 1) // has shape [2, 6]
+  /// ```
+  ///
+  /// - Note: If you are concatenating along a new axis consider using `stacked`.
+  ///
+  /// - Parameters:
+  ///   - tensors: Tensors to concatenate with the current tensor.
+  ///   - axis: Dimension along which to concatenate. Negative values wrap around.
+  ///
+  /// - Precondition: All tensors must have the same rank as the current tensor and all dimensions 
+  ///     except `axis` must be equal.
+  /// - Precondition: `axis` must be in the range `[-rank, rank)`.
+  /// 
+  /// - Returns: The concatenated tensor.
+  @inlinable
+  // @differentiable(vjp: _vjpConcatenated where Scalar : TensorFlowFloatingPoint)
+  func concatenated(with tensors: [Tensor], alongAxis axis: Int32 = 0) -> Tensor {
+    return Raw.concatV2([self] + tensors, axis: Tensor<Int32>(axis))
+  }
+
+  /// Gathers slices of this tensor at `indices` along the `axis` dimension.
+  ///
+  /// For 0-D (scalar) `indices`:
+  /// ```
+  /// result[p_0,          ..., p_{axis-1},
+  ///        p_{axis + 1}, ..., p_{N-1}] = 
+  /// self[p_0,          ..., p_{axis-1},
+  ///      indices,
+  ///      p_{axis + 1}, ..., p_{N-1}]
+  /// ```
+  /// 
+  /// For 1-D (vector) `indices`:
+  /// ```
+  /// result[p_0,          ..., p_{axis-1},
+  ///        i,
+  ///        p_{axis + 1}, ..., p_{N-1}] = 
+  /// self[p_0,          ..., p_{axis-1},
+  ///      indices[i],
+  ///      p_{axis + 1}, ..., p_{N-1}]
+  /// ```
+  /// 
+  /// In the general case, produces a resulting tensor where:
+  /// ```
+  /// result[p_0,             ..., p_{axis-1},
+  ///        i_{batch\_dims}, ..., i_{M-1},
+  ///        p_{axis + 1},    ..., p_{N-1}] = 
+  /// self[p_0,             ..., p_{axis-1},
+  ///      indices[i_0,     ..., i_{M-1}],
+  ///      p_{axis + 1},    ..., p_{N-1}]
+  /// ```
+  /// where `N = self.rank` and `M = indices.rank`.
+  ///
+  /// The shape of the resulting tensor is:
+  /// `self.shape[..<axis] + indices.shape + self.shape[(axis + 1)...]`.
+  /// 
+  /// - Note: On CPU, if an out-of-range index is found, an error is thrown. On GPU, if an 
+  /// out-of-range index is found, a 0 is stored in the corresponding output values.
+  ///
+  /// - Parameters:
+  ///   - indices: Contains the indices to gather.
+  ///   - axis: Dimension along which to gather. Negative values wrap around.
+  /// 
+  /// - Precondition: `axis` must be in the range `[-rank, rank)`.
+  /// 
+  /// - Returns: The gathered tensor.
+  @inlinable
+  // @differentiable(vjp: _vjpGathered where Scalar: TensorFlowFloatingPoint)
+  func gathered<I: TensorFlowInteger>(
+    atIndices indices: Tensor<I>, 
+    alongAxis axis: Int32 = 0
+  ) -> Tensor {
+    return Raw.gatherV2(params: self, indices: indices, axis: Tensor<Int32>(axis))
+  }
+
+  /// Gathers slices of this tensor at `indices` along the `axis` dimension, while ignoring the 
+  /// first `batchDims` dimensions that correspond to batch dimensions.
+  /// 
+  /// Performs similar functionality to `gathered`, except that the resulting tensor shape is now:
+  /// `self.shape[..<axis] + indices.shape[batchDims...] + self.shape[(axis + 1)...]`.
+  ///
+  /// - Parameters:
+  ///   - indices: Contains the indices to gather.
+  ///   - axis: Dimension along which to gather. Negative values wrap around.
+  ///   - batchDims: Number of leading batch dimensions to ignore.
+  /// 
+  /// - Precondition: `axis` must be in the range `[-rank, rank)`, while also being greater than or 
+  ///     equal to `batchDims`.
+  /// - Precondition: `batchDims` must be less than `indices.rank`.
+  /// 
+  /// - Returns: The gathered tensor.
+  @inlinable
+  func batchGathered<I: TensorFlowInteger>(
+    atIndices indices: Tensor<I>, 
+    alongAxis axis: Int32,
+    numBatchDims batchDims: Int32
+  ) -> Tensor {
+    precondition(batchDims >= 0 && batchDims < indices.rank, 
+                 "'numBatchDims' must be non-negative and less than 'indices.rank'.")
+    precondition(batchDims < rank, "'numBatchDims' must be less than the tensor's rank.")
+
+    // Handle the axis argument by transposing the axis dimension so that it is the first non-batch 
+    // dimension, recursively calling `batchGathering` with `axis = 0`, and then transposing the 
+    // result to put the pre-axis dimensions before the indices dimensions.
+    if axis != batchDims {
+      // Adjust axis to be positive.
+      let posAxis = axis < 0 ? axis + rank : axis
+
+      precondition(posAxis >= 0 && posAxis < rank, "'axis' is out of range.")
+      precondition(batchDims <= posAxis, "'batchDims' must be less than or equal to 'axis'.")
+
+      // Move self[axis] up to self[batchDims].
+      let permutation = Tensor<Int32>(0 ..< batchDims).concatenated(with: [
+        Tensor<Int32>(axis).rankLifted(),
+        Tensor<Int32>(rangeFrom: batchDims, to: posAxis, stride: 1),
+        Tensor<Int32>(rangeFrom: axis + 1, to: rank, stride: 1)])
+      let tensor = transposed(withPermutations: permutation)
+      let result = tensor.batchGathered(
+        atIndices: indices, alongAxis: batchDims, numBatchDims: batchDims)
+      
+      // Move the result dimensions corresponding to self[batchDims ..< axis] to just before the 
+      // dimensions corresponding to indices[batchDims ...].
+      let start = indices.rank + posAxis - batchDims
+      let resultPermutation = Tensor<Int32>(0 ..< batchDims).concatenated(with: [
+        Tensor<Int32>(rangeFrom: indices.rank, to: start, stride: 1),
+        Tensor<Int32>(batchDims ..< indices.rank), 
+        Tensor<Int32>(rangeFrom: start, to: result.rank, stride: 1)])
+      return result.transposed(withPermutations: resultPermutation)
+    }
+
+    let castedShape = Tensor<I>(shapeTensor)
+    var batchIndices = indices
+    var accumulated = Tensor<I>(ones: [])
+    for d in (1 ... batchDims).reversed() {
+      accumulated *= castedShape[d]
+      let dValue = castedShape[d - 1]
+      let dIndices = Tensor<I>(
+        rangeFrom: Tensor<I>(zeros: []),
+        to: dValue,
+        stride: Tensor<I>(ones: [])
+      ) * accumulated
+      let dShape = Tensor<Int32>(d - 1).packed(with: [
+        Tensor<Int32>(dValue), 
+        Tensor<Int32>(indices.rank - 1)])
+      batchIndices += dIndices.reshaped(toShape: dShape)
+    }
+
+    let flatIndices = batchIndices.flattened()
+    let outerShape = shapeTensor[Int(batchDims + 1)...]
+    let flatInnerShape = shapeTensor[..<Int(batchDims + 1)].product(squeezingAxes: [0])
+    let flatTensor = reshaped(toShape: flatInnerShape.rankLifted().concatenated(with: outerShape))
+    let flatResult = flatTensor.gathered(atIndices: flatIndices)
+    return flatResult.reshaped(toShape: indices.shapeTensor.concatenated(with: outerShape))
+  }
+
+  /// Applies the provided boolean mask to this tensor.
+  ///
+  /// For example:
+  /// ```
+  /// // 1-D example
+  /// // tensor is [0, 1, 2, 3]
+  /// // mask is [true, false, true, false]
+  /// tensor.masked(with: mask) // is [0, 2]
+  /// 
+  /// // 2-D example
+  /// // tensor is [[1, 2], [3, 4], [5, 6]]
+  /// // mask is [true, false, true]
+  /// tensor.masked(with: mask) // is [[1, 2], [5, 6]]
+  /// ```
+  ///
+  /// In general, `0 < mask.rank = K <= tensor.rank`, and the `mask`'s shape must match the first K 
+  /// dimensions of the `tensor`'s shape. We then have:
+  /// `tensor.masked(with: mask)[i, j1, ..., jd] = tensor[i1, ..., iK, j1, ..., jd]`, where 
+  /// `[i1, ..., iK]` is the `i`th `true` entry of `mask` (row-major order).
+  /// 
+  /// The `axis` could be used with `mask` to indicate the axis to mask from. In that case, 
+  /// `axis + mask.rank <= tensor.rank` and the `mask``'s shape must match the first 
+  /// `axis + mask.rank` dimensions of the `tensor`'s shape.
+  /// 
+  /// - Parameters:
+  ///   - mask: K-D boolean tensor, where `K <= self.rank`.
+  ///   - axis: 0-D integer tensor representing the axis in `self` to mask from, where 
+  ///     `K + axis <= self.rank`.
+  /// 
+  /// - Precondition: The `mask` cannot be a scalar: `mask.rank != 0`.
+  /// 
+  /// - Returns: `(self.rank - K + 1)`-dimensional tensor populated by entries in this tensor 
+  ///   corresponding to `true` values in `mask`.
+  @inlinable @inline(__always)
+  func masked(with mask: Tensor<Bool>, alongAxis axis: Int32 = 0) -> Tensor {
+    precondition(mask.rank != 0, "The boolean mask cannot be a scalar.")
+    let posAxis = axis < 0 ? axis + rank : axis
+    let leadingSize = shapeTensor[posAxis ..< posAxis + mask.rank].product().rankLifted()
+    let reshapedTensor = reshaped(
+      toShape: shapeTensor[..<Int(posAxis)].concatenated(
+        with: [leadingSize, shapeTensor[Int(posAxis + mask.rank)...]]))
+    let indices = mask.flattened().whereTrue().squeezingShape(at: 1)
+    return reshapedTensor.gathered(atIndices: indices, alongAxis: posAxis)
+  }
 }

From 19a3add9793eb65859cd7064e0b6489e2efbc2df Mon Sep 17 00:00:00 2001
From: Anthony Platanios <e.a.platanios@gmail.com>
Date: Mon, 1 Apr 2019 15:08:37 -0400
Subject: [PATCH 03/55] Reverted back to 4-space tabs.

---
 Sources/DeepLearning/Operators/Basic.swift | 478 ++++++++++-----------
 Sources/DeepLearning/Operators/Math.swift  |   2 +-
 Sources/DeepLearning/Operators/NN.swift    | 130 +++---
 3 files changed, 305 insertions(+), 305 deletions(-)

diff --git a/Sources/DeepLearning/Operators/Basic.swift b/Sources/DeepLearning/Operators/Basic.swift
index b491832ac..9f89b56ee 100644
--- a/Sources/DeepLearning/Operators/Basic.swift
+++ b/Sources/DeepLearning/Operators/Basic.swift
@@ -17,252 +17,252 @@
 #endif
 
 public extension Tensor where Scalar: TensorFlowScalar {
-  /// Stacks the current tensor with `tensors`, along the `axis` dimension, into a tensor with 
-  /// rank one higher than the current tensor and each tensor in `tensors`.
-  /// 
-  /// Given `self` and `tensors` all have shape `[A, B, C]`, and `tensors.count = N-1`, then:
-  /// - if `axis == 0` then the resulting tensor will have the shape `[N, A, B, C]`.
-  /// - if `axis == 1` then the resulting tensor will have the shape `[A, N, B, C]`.
-  /// - etc.
-  ///
-  /// For example:
-  /// ```
-  /// // 'x' is [1, 4]
-  /// // 'y' is [2, 5]
-  /// // 'z' is [3, 6]
-  /// x.packed(with: [y, z]) // is [[1, 4], [2, 5], [3, 6]]
-  /// x.packed(with: [y, z], alongAxis: 1) // is [[1, 2, 3], [4, 5, 6]]
-  /// ```
-  ///
-  /// This is the opposite of `unstacked`.
-  ///
-  /// - Parameters:
-  ///   - tensors: Tensors to stack with the current tensor.
-  ///   - axis: Dimension along which to stack. Negative values wrap around.
-  /// 
-  /// - Precondition: All tensors must have the same shape as the current tensor.
-  /// - Precondition: `axis` must be in the range `[-rank, rank)`.
-  /// 
-  /// - Returns: The packed tensor.
-  @inlinable
-  // @differentiable(vjp: _vjpPacked where Scalar: TensorFlowFloatingPoint)
-  func stacked(with tensors: [Tensor], alongAxis axis: Int64 = 0) -> Tensor {
-    return Raw.pack([self] + tensors, axis: axis)
-  }
+    /// Stacks the current tensor with `tensors`, along the `axis` dimension, into a tensor with 
+    /// rank one higher than the current tensor and each tensor in `tensors`.
+    /// 
+    /// Given `self` and `tensors` all have shape `[A, B, C]`, and `tensors.count = N-1`, then:
+    /// - if `axis == 0` then the resulting tensor will have the shape `[N, A, B, C]`.
+    /// - if `axis == 1` then the resulting tensor will have the shape `[A, N, B, C]`.
+    /// - etc.
+    ///
+    /// For example:
+    /// ```
+    /// // 'x' is [1, 4]
+    /// // 'y' is [2, 5]
+    /// // 'z' is [3, 6]
+    /// x.packed(with: [y, z]) // is [[1, 4], [2, 5], [3, 6]]
+    /// x.packed(with: [y, z], alongAxis: 1) // is [[1, 2, 3], [4, 5, 6]]
+    /// ```
+    ///
+    /// This is the opposite of `unstacked`.
+    ///
+    /// - Parameters:
+    ///   - tensors: Tensors to stack with the current tensor.
+    ///   - axis: Dimension along which to stack. Negative values wrap around.
+    /// 
+    /// - Precondition: All tensors must have the same shape as the current tensor.
+    /// - Precondition: `axis` must be in the range `[-rank, rank)`.
+    /// 
+    /// - Returns: The packed tensor.
+    @inlinable
+    // @differentiable(vjp: _vjpPacked where Scalar: TensorFlowFloatingPoint)
+    func stacked(with tensors: [Tensor], alongAxis axis: Int64 = 0) -> Tensor {
+        return Raw.pack([self] + tensors, axis: axis)
+    }
 
-  /// Concatenates the current tensor with `tensors` along the `axis` dimension.
-  ///
-  /// Given `self` and `tensors` are all put in a single array, `values`, and 
-  /// `values[i].shape = [D0, D1, ... Daxis(i), ...Dn]`, then the concatenated result has shape 
-  /// `[D0, D1, ... Raxis, ...Dn]`, where `Raxis = sum(Daxis(i))`. That is, the data from the input 
-  /// tensors is joined along the `axis` dimension.
-  ///
-  /// For example:
-  /// ```
-  /// // t1 is [[1, 2, 3], [4, 5, 6]]
-  /// // t2 is [[7, 8, 9], [10, 11, 12]]
-  /// t1.concatenated(with: [t2]) // is [[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]]
-  /// t1.concatenated(with: [t2], alongAxis: 1) // is [[1, 2, 3, 7, 8, 9], [4, 5, 6, 10, 11, 12]]
-  /// 
-  /// // t3 has shape [2, 3]
-  /// // t4 has shape [2, 3]
-  /// t3.concatenated(with: [t4]) // has shape [4, 3]
-  /// t3.concatenated(with: [t4], alongAxis: 1) // has shape [2, 6]
-  /// ```
-  ///
-  /// - Note: If you are concatenating along a new axis consider using `stacked`.
-  ///
-  /// - Parameters:
-  ///   - tensors: Tensors to concatenate with the current tensor.
-  ///   - axis: Dimension along which to concatenate. Negative values wrap around.
-  ///
-  /// - Precondition: All tensors must have the same rank as the current tensor and all dimensions 
-  ///     except `axis` must be equal.
-  /// - Precondition: `axis` must be in the range `[-rank, rank)`.
-  /// 
-  /// - Returns: The concatenated tensor.
-  @inlinable
-  // @differentiable(vjp: _vjpConcatenated where Scalar : TensorFlowFloatingPoint)
-  func concatenated(with tensors: [Tensor], alongAxis axis: Int32 = 0) -> Tensor {
-    return Raw.concatV2([self] + tensors, axis: Tensor<Int32>(axis))
-  }
+    /// Concatenates the current tensor with `tensors` along the `axis` dimension.
+    ///
+    /// Given `self` and `tensors` are all put in a single array, `values`, and 
+    /// `values[i].shape = [D0, D1, ... Daxis(i), ...Dn]`, then the concatenated result has shape 
+    /// `[D0, D1, ... Raxis, ...Dn]`, where `Raxis = sum(Daxis(i))`. That is, the data from the 
+    /// input tensors is joined along the `axis` dimension.
+    ///
+    /// For example:
+    /// ```
+    /// // t1 is [[1, 2, 3], [4, 5, 6]]
+    /// // t2 is [[7, 8, 9], [10, 11, 12]]
+    /// t1.concatenated(with: [t2]) // is [[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]]
+    /// t1.concatenated(with: [t2], alongAxis: 1) // is [[1, 2, 3, 7, 8, 9], [4, 5, 6, 10, 11, 12]]
+    /// 
+    /// // t3 has shape [2, 3]
+    /// // t4 has shape [2, 3]
+    /// t3.concatenated(with: [t4]) // has shape [4, 3]
+    /// t3.concatenated(with: [t4], alongAxis: 1) // has shape [2, 6]
+    /// ```
+    ///
+    /// - Note: If you are concatenating along a new axis consider using `stacked`.
+    ///
+    /// - Parameters:
+    ///   - tensors: Tensors to concatenate with the current tensor.
+    ///   - axis: Dimension along which to concatenate. Negative values wrap around.
+    ///
+    /// - Precondition: All tensors must have the same rank as the current tensor and all dimensions 
+    ///     except `axis` must be equal.
+    /// - Precondition: `axis` must be in the range `[-rank, rank)`.
+    /// 
+    /// - Returns: The concatenated tensor.
+    @inlinable
+    // @differentiable(vjp: _vjpConcatenated where Scalar : TensorFlowFloatingPoint)
+    func concatenated(with tensors: [Tensor], alongAxis axis: Int32 = 0) -> Tensor {
+        return Raw.concatV2([self] + tensors, axis: Tensor<Int32>(axis))
+    }
 
-  /// Gathers slices of this tensor at `indices` along the `axis` dimension.
-  ///
-  /// For 0-D (scalar) `indices`:
-  /// ```
-  /// result[p_0,          ..., p_{axis-1},
-  ///        p_{axis + 1}, ..., p_{N-1}] = 
-  /// self[p_0,          ..., p_{axis-1},
-  ///      indices,
-  ///      p_{axis + 1}, ..., p_{N-1}]
-  /// ```
-  /// 
-  /// For 1-D (vector) `indices`:
-  /// ```
-  /// result[p_0,          ..., p_{axis-1},
-  ///        i,
-  ///        p_{axis + 1}, ..., p_{N-1}] = 
-  /// self[p_0,          ..., p_{axis-1},
-  ///      indices[i],
-  ///      p_{axis + 1}, ..., p_{N-1}]
-  /// ```
-  /// 
-  /// In the general case, produces a resulting tensor where:
-  /// ```
-  /// result[p_0,             ..., p_{axis-1},
-  ///        i_{batch\_dims}, ..., i_{M-1},
-  ///        p_{axis + 1},    ..., p_{N-1}] = 
-  /// self[p_0,             ..., p_{axis-1},
-  ///      indices[i_0,     ..., i_{M-1}],
-  ///      p_{axis + 1},    ..., p_{N-1}]
-  /// ```
-  /// where `N = self.rank` and `M = indices.rank`.
-  ///
-  /// The shape of the resulting tensor is:
-  /// `self.shape[..<axis] + indices.shape + self.shape[(axis + 1)...]`.
-  /// 
-  /// - Note: On CPU, if an out-of-range index is found, an error is thrown. On GPU, if an 
-  /// out-of-range index is found, a 0 is stored in the corresponding output values.
-  ///
-  /// - Parameters:
-  ///   - indices: Contains the indices to gather.
-  ///   - axis: Dimension along which to gather. Negative values wrap around.
-  /// 
-  /// - Precondition: `axis` must be in the range `[-rank, rank)`.
-  /// 
-  /// - Returns: The gathered tensor.
-  @inlinable
-  // @differentiable(vjp: _vjpGathered where Scalar: TensorFlowFloatingPoint)
-  func gathered<I: TensorFlowInteger>(
-    atIndices indices: Tensor<I>, 
-    alongAxis axis: Int32 = 0
-  ) -> Tensor {
-    return Raw.gatherV2(params: self, indices: indices, axis: Tensor<Int32>(axis))
-  }
+    /// Gathers slices of this tensor at `indices` along the `axis` dimension.
+    ///
+    /// For 0-D (scalar) `indices`:
+    /// ```
+    /// result[p_0,          ..., p_{axis-1},
+    ///        p_{axis + 1}, ..., p_{N-1}] = 
+    /// self[p_0,          ..., p_{axis-1},
+    ///      indices,
+    ///      p_{axis + 1}, ..., p_{N-1}]
+    /// ```
+    /// 
+    /// For 1-D (vector) `indices`:
+    /// ```
+    /// result[p_0,          ..., p_{axis-1},
+    ///        i,
+    ///        p_{axis + 1}, ..., p_{N-1}] = 
+    /// self[p_0,          ..., p_{axis-1},
+    ///      indices[i],
+    ///      p_{axis + 1}, ..., p_{N-1}]
+    /// ```
+    /// 
+    /// In the general case, produces a resulting tensor where:
+    /// ```
+    /// result[p_0,             ..., p_{axis-1},
+    ///        i_{batch\_dims}, ..., i_{M-1},
+    ///        p_{axis + 1},    ..., p_{N-1}] = 
+    /// self[p_0,             ..., p_{axis-1},
+    ///      indices[i_0,     ..., i_{M-1}],
+    ///      p_{axis + 1},    ..., p_{N-1}]
+    /// ```
+    /// where `N = self.rank` and `M = indices.rank`.
+    ///
+    /// The shape of the resulting tensor is:
+    /// `self.shape[..<axis] + indices.shape + self.shape[(axis + 1)...]`.
+    /// 
+    /// - Note: On CPU, if an out-of-range index is found, an error is thrown. On GPU, if an 
+    /// out-of-range index is found, a 0 is stored in the corresponding output values.
+    ///
+    /// - Parameters:
+    ///   - indices: Contains the indices to gather.
+    ///   - axis: Dimension along which to gather. Negative values wrap around.
+    /// 
+    /// - Precondition: `axis` must be in the range `[-rank, rank)`.
+    /// 
+    /// - Returns: The gathered tensor.
+    @inlinable
+    // @differentiable(vjp: _vjpGathered where Scalar: TensorFlowFloatingPoint)
+    func gathered<I: TensorFlowInteger>(
+        atIndices indices: Tensor<I>, 
+        alongAxis axis: Int32 = 0
+    ) -> Tensor {
+        return Raw.gatherV2(params: self, indices: indices, axis: Tensor<Int32>(axis))
+    }
 
-  /// Gathers slices of this tensor at `indices` along the `axis` dimension, while ignoring the 
-  /// first `batchDims` dimensions that correspond to batch dimensions.
-  /// 
-  /// Performs similar functionality to `gathered`, except that the resulting tensor shape is now:
-  /// `self.shape[..<axis] + indices.shape[batchDims...] + self.shape[(axis + 1)...]`.
-  ///
-  /// - Parameters:
-  ///   - indices: Contains the indices to gather.
-  ///   - axis: Dimension along which to gather. Negative values wrap around.
-  ///   - batchDims: Number of leading batch dimensions to ignore.
-  /// 
-  /// - Precondition: `axis` must be in the range `[-rank, rank)`, while also being greater than or 
-  ///     equal to `batchDims`.
-  /// - Precondition: `batchDims` must be less than `indices.rank`.
-  /// 
-  /// - Returns: The gathered tensor.
-  @inlinable
-  func batchGathered<I: TensorFlowInteger>(
-    atIndices indices: Tensor<I>, 
-    alongAxis axis: Int32,
-    numBatchDims batchDims: Int32
-  ) -> Tensor {
-    precondition(batchDims >= 0 && batchDims < indices.rank, 
-                 "'numBatchDims' must be non-negative and less than 'indices.rank'.")
-    precondition(batchDims < rank, "'numBatchDims' must be less than the tensor's rank.")
+    /// Gathers slices of this tensor at `indices` along the `axis` dimension, while ignoring the 
+    /// first `batchDims` dimensions that correspond to batch dimensions.
+    /// 
+    /// Performs similar functionality to `gathered`, except that the resulting tensor shape is now:
+    /// `self.shape[..<axis] + indices.shape[batchDims...] + self.shape[(axis + 1)...]`.
+    ///
+    /// - Parameters:
+    ///   - indices: Contains the indices to gather.
+    ///   - axis: Dimension along which to gather. Negative values wrap around.
+    ///   - batchDims: Number of leading batch dimensions to ignore.
+    /// 
+    /// - Precondition: `axis` must be in the range `[-rank, rank)`, while also being greater than 
+    ///     or equal to `batchDims`.
+    /// - Precondition: `batchDims` must be less than `indices.rank`.
+    /// 
+    /// - Returns: The gathered tensor.
+    @inlinable
+    func batchGathered<I: TensorFlowInteger>(
+        atIndices indices: Tensor<I>, 
+        alongAxis axis: Int32,
+        numBatchDims batchDims: Int32
+    ) -> Tensor {
+        precondition(batchDims >= 0 && batchDims < indices.rank, 
+                     "'numBatchDims' must be non-negative and less than 'indices.rank'.")
+        precondition(batchDims < rank, "'numBatchDims' must be less than the tensor's rank.")
 
-    // Handle the axis argument by transposing the axis dimension so that it is the first non-batch 
-    // dimension, recursively calling `batchGathering` with `axis = 0`, and then transposing the 
-    // result to put the pre-axis dimensions before the indices dimensions.
-    if axis != batchDims {
-      // Adjust axis to be positive.
-      let posAxis = axis < 0 ? axis + rank : axis
+        // Handle the axis argument by transposing the axis dimension so that it is the first 
+        // non-batch dimension, recursively calling `batchGathering` with `axis = 0`, and then 
+        // transposing the result to put the pre-axis dimensions before the indices dimensions.
+        if axis != batchDims {
+            // Adjust axis to be positive.
+            let posAxis = axis < 0 ? axis + rank : axis
 
-      precondition(posAxis >= 0 && posAxis < rank, "'axis' is out of range.")
-      precondition(batchDims <= posAxis, "'batchDims' must be less than or equal to 'axis'.")
+            precondition(posAxis >= 0 && posAxis < rank, "'axis' is out of range.")
+            precondition(batchDims <= posAxis, "'batchDims' must be less than or equal to 'axis'.")
 
-      // Move self[axis] up to self[batchDims].
-      let permutation = Tensor<Int32>(0 ..< batchDims).concatenated(with: [
-        Tensor<Int32>(axis).rankLifted(),
-        Tensor<Int32>(rangeFrom: batchDims, to: posAxis, stride: 1),
-        Tensor<Int32>(rangeFrom: axis + 1, to: rank, stride: 1)])
-      let tensor = transposed(withPermutations: permutation)
-      let result = tensor.batchGathered(
-        atIndices: indices, alongAxis: batchDims, numBatchDims: batchDims)
-      
-      // Move the result dimensions corresponding to self[batchDims ..< axis] to just before the 
-      // dimensions corresponding to indices[batchDims ...].
-      let start = indices.rank + posAxis - batchDims
-      let resultPermutation = Tensor<Int32>(0 ..< batchDims).concatenated(with: [
-        Tensor<Int32>(rangeFrom: indices.rank, to: start, stride: 1),
-        Tensor<Int32>(batchDims ..< indices.rank), 
-        Tensor<Int32>(rangeFrom: start, to: result.rank, stride: 1)])
-      return result.transposed(withPermutations: resultPermutation)
-    }
+            // Move self[axis] up to self[batchDims].
+            let permutation = Tensor<Int32>(0 ..< batchDims).concatenated(with: [
+                Tensor<Int32>(axis).rankLifted(),
+                Tensor<Int32>(rangeFrom: batchDims, to: posAxis, stride: 1),
+                Tensor<Int32>(rangeFrom: axis + 1, to: rank, stride: 1)])
+            let tensor = transposed(withPermutations: permutation)
+            let result = tensor.batchGathered(
+                atIndices: indices, alongAxis: batchDims, numBatchDims: batchDims)
+            
+            // Move the result dimensions corresponding to self[batchDims ..< axis] to just before 
+            // the dimensions corresponding to indices[batchDims ...].
+            let start = indices.rank + posAxis - batchDims
+            let resultPermutation = Tensor<Int32>(0 ..< batchDims).concatenated(with: [
+                Tensor<Int32>(rangeFrom: indices.rank, to: start, stride: 1),
+                Tensor<Int32>(batchDims ..< indices.rank), 
+                Tensor<Int32>(rangeFrom: start, to: result.rank, stride: 1)])
+            return result.transposed(withPermutations: resultPermutation)
+        }
 
-    let castedShape = Tensor<I>(shapeTensor)
-    var batchIndices = indices
-    var accumulated = Tensor<I>(ones: [])
-    for d in (1 ... batchDims).reversed() {
-      accumulated *= castedShape[d]
-      let dValue = castedShape[d - 1]
-      let dIndices = Tensor<I>(
-        rangeFrom: Tensor<I>(zeros: []),
-        to: dValue,
-        stride: Tensor<I>(ones: [])
-      ) * accumulated
-      let dShape = Tensor<Int32>(d - 1).packed(with: [
-        Tensor<Int32>(dValue), 
-        Tensor<Int32>(indices.rank - 1)])
-      batchIndices += dIndices.reshaped(toShape: dShape)
-    }
+        let castedShape = Tensor<I>(shapeTensor)
+        var batchIndices = indices
+        var accumulated = Tensor<I>(ones: [])
+        for d in (1 ... batchDims).reversed() {
+        accumulated *= castedShape[d]
+        let dValue = castedShape[d - 1]
+        let dIndices = Tensor<I>(
+            rangeFrom: Tensor<I>(zeros: []),
+            to: dValue,
+            stride: Tensor<I>(ones: [])
+        ) * accumulated
+        let dShape = Tensor<Int32>(d - 1).packed(with: [
+            Tensor<Int32>(dValue), 
+            Tensor<Int32>(indices.rank - 1)])
+        batchIndices += dIndices.reshaped(toShape: dShape)
+        }
 
-    let flatIndices = batchIndices.flattened()
-    let outerShape = shapeTensor[Int(batchDims + 1)...]
-    let flatInnerShape = shapeTensor[..<Int(batchDims + 1)].product(squeezingAxes: [0])
-    let flatTensor = reshaped(toShape: flatInnerShape.rankLifted().concatenated(with: outerShape))
-    let flatResult = flatTensor.gathered(atIndices: flatIndices)
-    return flatResult.reshaped(toShape: indices.shapeTensor.concatenated(with: outerShape))
-  }
+        let flatIndices = batchIndices.flattened()
+        let outerShape = shapeTensor[Int(batchDims + 1)...]
+        let innerShape = shapeTensor[..<Int(batchDims + 1)].product(squeezingAxes: [0])
+        let flatTensor = reshaped(toShape: innerShape.rankLifted().concatenated(with: outerShape))
+        let flatResult = flatTensor.gathered(atIndices: flatIndices)
+        return flatResult.reshaped(toShape: indices.shapeTensor.concatenated(with: outerShape))
+    }
 
-  /// Applies the provided boolean mask to this tensor.
-  ///
-  /// For example:
-  /// ```
-  /// // 1-D example
-  /// // tensor is [0, 1, 2, 3]
-  /// // mask is [true, false, true, false]
-  /// tensor.masked(with: mask) // is [0, 2]
-  /// 
-  /// // 2-D example
-  /// // tensor is [[1, 2], [3, 4], [5, 6]]
-  /// // mask is [true, false, true]
-  /// tensor.masked(with: mask) // is [[1, 2], [5, 6]]
-  /// ```
-  ///
-  /// In general, `0 < mask.rank = K <= tensor.rank`, and the `mask`'s shape must match the first K 
-  /// dimensions of the `tensor`'s shape. We then have:
-  /// `tensor.masked(with: mask)[i, j1, ..., jd] = tensor[i1, ..., iK, j1, ..., jd]`, where 
-  /// `[i1, ..., iK]` is the `i`th `true` entry of `mask` (row-major order).
-  /// 
-  /// The `axis` could be used with `mask` to indicate the axis to mask from. In that case, 
-  /// `axis + mask.rank <= tensor.rank` and the `mask``'s shape must match the first 
-  /// `axis + mask.rank` dimensions of the `tensor`'s shape.
-  /// 
-  /// - Parameters:
-  ///   - mask: K-D boolean tensor, where `K <= self.rank`.
-  ///   - axis: 0-D integer tensor representing the axis in `self` to mask from, where 
-  ///     `K + axis <= self.rank`.
-  /// 
-  /// - Precondition: The `mask` cannot be a scalar: `mask.rank != 0`.
-  /// 
-  /// - Returns: `(self.rank - K + 1)`-dimensional tensor populated by entries in this tensor 
-  ///   corresponding to `true` values in `mask`.
-  @inlinable @inline(__always)
-  func masked(with mask: Tensor<Bool>, alongAxis axis: Int32 = 0) -> Tensor {
-    precondition(mask.rank != 0, "The boolean mask cannot be a scalar.")
-    let posAxis = axis < 0 ? axis + rank : axis
-    let leadingSize = shapeTensor[posAxis ..< posAxis + mask.rank].product().rankLifted()
-    let reshapedTensor = reshaped(
-      toShape: shapeTensor[..<Int(posAxis)].concatenated(
-        with: [leadingSize, shapeTensor[Int(posAxis + mask.rank)...]]))
-    let indices = mask.flattened().whereTrue().squeezingShape(at: 1)
-    return reshapedTensor.gathered(atIndices: indices, alongAxis: posAxis)
-  }
+    /// Applies the provided boolean mask to this tensor.
+    ///
+    /// For example:
+    /// ```
+    /// // 1-D example
+    /// // tensor is [0, 1, 2, 3]
+    /// // mask is [true, false, true, false]
+    /// tensor.masked(with: mask) // is [0, 2]
+    /// 
+    /// // 2-D example
+    /// // tensor is [[1, 2], [3, 4], [5, 6]]
+    /// // mask is [true, false, true]
+    /// tensor.masked(with: mask) // is [[1, 2], [5, 6]]
+    /// ```
+    ///
+    /// In general, `0 < mask.rank = K <= tensor.rank`, and the `mask`'s shape must match the first 
+    /// K dimensions of the `tensor`'s shape. We then have:
+    /// `tensor.masked(with: mask)[i, j1, ..., jd] = tensor[i1, ..., iK, j1, ..., jd]`, where 
+    /// `[i1, ..., iK]` is the `i`th `true` entry of `mask` (row-major order).
+    /// 
+    /// The `axis` could be used with `mask` to indicate the axis to mask from. In that case, 
+    /// `axis + mask.rank <= tensor.rank` and the `mask``'s shape must match the first 
+    /// `axis + mask.rank` dimensions of the `tensor`'s shape.
+    /// 
+    /// - Parameters:
+    ///   - mask: K-D boolean tensor, where `K <= self.rank`.
+    ///   - axis: 0-D integer tensor representing the axis in `self` to mask from, where 
+    ///     `K + axis <= self.rank`.
+    /// 
+    /// - Precondition: The `mask` cannot be a scalar: `mask.rank != 0`.
+    /// 
+    /// - Returns: `(self.rank - K + 1)`-dimensional tensor populated by entries in this tensor 
+    ///   corresponding to `true` values in `mask`.
+    @inlinable @inline(__always)
+    func masked(with mask: Tensor<Bool>, alongAxis axis: Int32 = 0) -> Tensor {
+        precondition(mask.rank != 0, "The boolean mask cannot be a scalar.")
+        let posAxis = axis < 0 ? axis + rank : axis
+        let leadingSize = shapeTensor[posAxis ..< posAxis + mask.rank].product().rankLifted()
+        let reshapedTensor = reshaped(
+        toShape: shapeTensor[..<Int(posAxis)].concatenated(
+            with: [leadingSize, shapeTensor[Int(posAxis + mask.rank)...]]))
+        let indices = mask.flattened().whereTrue().squeezingShape(at: 1)
+        return reshapedTensor.gathered(atIndices: indices, alongAxis: posAxis)
+    }
 }
diff --git a/Sources/DeepLearning/Operators/Math.swift b/Sources/DeepLearning/Operators/Math.swift
index e838ff32d..c39fa7637 100644
--- a/Sources/DeepLearning/Operators/Math.swift
+++ b/Sources/DeepLearning/Operators/Math.swift
@@ -18,5 +18,5 @@
 
 /// Returns the values of the specified tensor rounded to the nearest integer, element-wise.
 public func round<Scalar: BinaryFloatingPoint>(_ x: Tensor<Scalar>) -> Tensor<Scalar> {
-  return Raw.round(x)
+    return Raw.round(x)
 }
diff --git a/Sources/DeepLearning/Operators/NN.swift b/Sources/DeepLearning/Operators/NN.swift
index 6ca32e22d..51117b833 100644
--- a/Sources/DeepLearning/Operators/NN.swift
+++ b/Sources/DeepLearning/Operators/NN.swift
@@ -21,64 +21,64 @@
 //===------------------------------------------------------------------------------------------===//
 
 public extension Tensor where Scalar: BinaryFloatingPoint {
-  /// Computes the batch normalized tensor along the specified axis.
-  ///
-  /// Specifically, returns `(self - mu) / (var + epsilon) * gamma + beta` where `mu` and `var` are 
-  /// respectively the mean and variance of `self` along `axis`.
-  ///
-  /// - Parameters:
-  ///   - axis: The batch dimension.
-  ///   - offset: The offset, also known as beta.
-  ///   - scale: The scale, also known as gamma.
-  ///   - epsilon: A small value added to the denominator for numerical stability.
-  @inlinable
-  @differentiable(
-    wrt: (self, offset, scale),
-    vjp: _vjpBatchNormalized where Scalar: TensorFlowFloatingPoint)
-  func batchNormalized(
-    alongAxis axis: Int32,
-    offset: Tensor = Tensor(0),
-    scale: Tensor = Tensor(1),
-    epsilon: Scalar = 0.001
-  ) -> Tensor {
-    let mean = self.mean(alongAxes: axis)
-    let squaredDiff: Tensor = Raw.squaredDifference(self, mean)
-    let variance = squaredDiff.mean(alongAxes: axis)
-    let inv = rsqrt(variance + epsilon) * scale
-    return self * inv + offset - mean * inv
-  }
+    /// Computes the batch normalized tensor along the specified axis.
+    ///
+    /// Specifically, returns `(self - mu) / (var + epsilon) * gamma + beta` where `mu` and `var` are 
+    /// respectively the mean and variance of `self` along `axis`.
+    ///
+    /// - Parameters:
+    ///   - axis: The batch dimension.
+    ///   - offset: The offset, also known as beta.
+    ///   - scale: The scale, also known as gamma.
+    ///   - epsilon: A small value added to the denominator for numerical stability.
+    @inlinable
+    @differentiable(
+        wrt: (self, offset, scale),
+        vjp: _vjpBatchNormalized where Scalar: TensorFlowFloatingPoint)
+    func batchNormalized(
+        alongAxis axis: Int32,
+        offset: Tensor = Tensor(0),
+        scale: Tensor = Tensor(1),
+        epsilon: Scalar = 0.001
+    ) -> Tensor {
+        let mean = self.mean(alongAxes: axis)
+        let squaredDiff: Tensor = Raw.squaredDifference(self, mean)
+        let variance = squaredDiff.mean(alongAxes: axis)
+        let inv = rsqrt(variance + epsilon) * scale
+        return self * inv + offset - mean * inv
+    }
 }
 
 internal extension Tensor where Scalar: TensorFlowFloatingPoint {
-  // TODO: Verify that these calculations are correct.
-  @inlinable
-  func _vjpBatchNormalized(
-    alongAxis axis: Int32,
-    offset: Tensor,
-    scale: Tensor,
-    epsilon: Scalar
-  ) -> (Tensor, (Tensor) -> (Tensor, Tensor, Tensor)) {
-    let value = batchNormalized(
-      alongAxis: axis, offset: offset, scale: scale, epsilon: epsilon)
-    return (value, { v in
-      let mean = self.mean(alongAxes: axis)
-      let squaredDiff: Tensor = Raw.squaredDifference(self, mean)
-      let variance = squaredDiff.mean(alongAxes: axis)
-      let diff = self - mean
-      let inv = rsqrt(variance + epsilon)
-      let norm = diff * inv
-      let dNorm = v * scale
-      let dVariance = -(dNorm * diff).sum(alongAxes: axis) / 2 * pow(inv, -3)
-      let dMean = (-dNorm * inv).sum(alongAxes: axis) +
-        dVariance * (-diff * 2).mean(alongAxes: axis)
-      let dOffset = v.sum(alongAxes: axis)
-      let dScale = (norm * v).sum(alongAxes: axis)
-      let dim = Tensor(Tensor<Int32>(self.shapeTensor[axis]))
-      let tmp = (dNorm * inv) + (dVariance * 2 * dMean / dim)
-      let dSelf = tmp + (dMean / dim)
-      return (dSelf, dOffset, dScale)
-    })
-  }
+    // TODO: Verify that these calculations are correct.
+    @inlinable
+    func _vjpBatchNormalized(
+        alongAxis axis: Int32,
+        offset: Tensor,
+        scale: Tensor,
+        epsilon: Scalar
+    ) -> (Tensor, (Tensor) -> (Tensor, Tensor, Tensor)) {
+        let value = batchNormalized(
+            alongAxis: axis, offset: offset, scale: scale, epsilon: epsilon)
+        return (value, { v in
+            let mean = self.mean(alongAxes: axis)
+            let squaredDiff: Tensor = Raw.squaredDifference(self, mean)
+            let variance = squaredDiff.mean(alongAxes: axis)
+            let diff = self - mean
+            let inv = rsqrt(variance + epsilon)
+            let norm = diff * inv
+            let dNorm = v * scale
+            let dVariance = -(dNorm * diff).sum(alongAxes: axis) / 2 * pow(inv, -3)
+            let dMean = (-dNorm * inv).sum(alongAxes: axis) +
+                dVariance * (-diff * 2).mean(alongAxes: axis)
+            let dOffset = v.sum(alongAxes: axis)
+            let dScale = (norm * v).sum(alongAxes: axis)
+            let dim = Tensor(Tensor<Int32>(self.shapeTensor[axis]))
+            let tmp = (dNorm * inv) + (dVariance * 2 * dMean / dim)
+            let dSelf = tmp + (dMean / dim)
+            return (dSelf, dOffset, dScale)
+        })
+    }
 }
 
 //===------------------------------------------------------------------------------------------===//
@@ -88,20 +88,20 @@ internal extension Tensor where Scalar: TensorFlowFloatingPoint {
 /// A padding scheme. Used by padding, convolution, and pooling ops.
 // @_frozen // SR-9739
 public enum Padding {
-  /// The "valid" padding scheme.
-  case valid
-  /// The "same" padding scheme.
-  case same
+    /// The "valid" padding scheme.
+    case valid
+    /// The "same" padding scheme.
+    case same
 }
 
 public extension Padding {
-  @inlinable
-  var raw: Raw.Padding {
-    switch self {
-    case .same: return .same
-    case .valid: return .valid
+    @inlinable
+    var raw: Raw.Padding {
+        switch self {
+        case .same: return .same
+        case .valid: return .valid
+        }
     }
-  }
 }
 
 public extension Tensor where Scalar: TensorFlowFloatingPoint {

From b3f6281de7afc4d8310182ab34ea9c3ef19fbe0b Mon Sep 17 00:00:00 2001
From: Anthony Platanios <e.a.platanios@gmail.com>
Date: Mon, 1 Apr 2019 15:10:25 -0400
Subject: [PATCH 04/55] Made some other minor changes.

---
 Sources/DeepLearning/Helpers.swift      | 12 ++++++++++--
 Sources/DeepLearning/Initializers.swift |  4 ++++
 Sources/DeepLearning/Loss.swift         |  2 +-
 Sources/DeepLearning/Optimizer.swift    |  2 +-
 Sources/DeepLearning/Random.swift       |  2 +-
 5 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/Sources/DeepLearning/Helpers.swift b/Sources/DeepLearning/Helpers.swift
index 86aec74bf..4d9c0217b 100644
--- a/Sources/DeepLearning/Helpers.swift
+++ b/Sources/DeepLearning/Helpers.swift
@@ -13,12 +13,20 @@
 // limitations under the License.
 
 #if !COMPILING_TENSORFLOW_MODULE
-import TensorFlow
+@_exported import TensorFlow
 #endif
 
+/// Returns a tensor with the same shape and scalars as the specified tensor.
+@inlinable
+@differentiable
+public func identity<Scalar>(_ x: Tensor<Scalar>) -> Tensor<Scalar> {
+    return x
+}
+
 // `pow` is defined in Darwin/Glibc on `Float` and `Double`, but there doesn't exist a generic
 // version for `FloatingPoint`.
 // This is a manual definition.
-func pow<T : BinaryFloatingPoint>(_ x: T, _ y: T) -> T {
+@inlinable
+func pow<T: BinaryFloatingPoint>(_ x: T, _ y: T) -> T {
     return T(pow(Double(x), Double(y)))
 }
diff --git a/Sources/DeepLearning/Initializers.swift b/Sources/DeepLearning/Initializers.swift
index ef4de6228..bddc8f3f0 100644
--- a/Sources/DeepLearning/Initializers.swift
+++ b/Sources/DeepLearning/Initializers.swift
@@ -16,6 +16,10 @@
 @_exported import TensorFlow
 #endif
 
+//===------------------------------------------------------------------------------------------===//
+// Random
+//===------------------------------------------------------------------------------------------===//
+
 public extension Tensor where Scalar == Int32 {
     /// Creates a tensor with the specified shape, randomly sampling scalar values
     /// from a discrete uniform distribution.
diff --git a/Sources/DeepLearning/Loss.swift b/Sources/DeepLearning/Loss.swift
index fe9400302..45a5d15d6 100644
--- a/Sources/DeepLearning/Loss.swift
+++ b/Sources/DeepLearning/Loss.swift
@@ -13,7 +13,7 @@
 // limitations under the License.
 
 #if !COMPILING_TENSORFLOW_MODULE
-import TensorFlow
+@_exported import TensorFlow
 #endif
 
 /// Computes the mean squared error between predictions and labels.
diff --git a/Sources/DeepLearning/Optimizer.swift b/Sources/DeepLearning/Optimizer.swift
index 479488e79..0c381833b 100644
--- a/Sources/DeepLearning/Optimizer.swift
+++ b/Sources/DeepLearning/Optimizer.swift
@@ -13,7 +13,7 @@
 // limitations under the License.
 
 #if !COMPILING_TENSORFLOW_MODULE
-import TensorFlow
+@_exported import TensorFlow
 #endif
 
 /// A machine learning optimizer.
diff --git a/Sources/DeepLearning/Random.swift b/Sources/DeepLearning/Random.swift
index 44e55223c..7a6752193 100644
--- a/Sources/DeepLearning/Random.swift
+++ b/Sources/DeepLearning/Random.swift
@@ -19,7 +19,7 @@ import Glibc
 #endif
 
 //===------------------------------------------------------------------------------------------===//
-// Random number generators
+// Random Number Generators
 //===------------------------------------------------------------------------------------------===//
 
 /// A type that provides seedable deterministic pseudo-random data.

From 111d96cf89a084d91ae18a296865748fe0a3f8ad Mon Sep 17 00:00:00 2001
From: Anthony Platanios <e.a.platanios@gmail.com>
Date: Mon, 1 Apr 2019 15:18:33 -0400
Subject: [PATCH 05/55] Added support or 'selecting'.

---
 Sources/DeepLearning/Operators/Basic.swift | 46 ++++++++++++++++++++++
 1 file changed, 46 insertions(+)

diff --git a/Sources/DeepLearning/Operators/Basic.swift b/Sources/DeepLearning/Operators/Basic.swift
index 9f89b56ee..d520ece14 100644
--- a/Sources/DeepLearning/Operators/Basic.swift
+++ b/Sources/DeepLearning/Operators/Basic.swift
@@ -266,3 +266,49 @@ public extension Tensor where Scalar: TensorFlowScalar {
         return reshapedTensor.gathered(atIndices: indices, alongAxis: posAxis)
     }
 }
+
+public extension Tensor where Scalar == Bool {
+    /// Returns the elements of either `x` or `y`, depending on the values in stored in this tensor.
+    /// 
+    /// `x` and `y` must be scalar if this tensor is scalar. Otherwise, either the first dimension 
+    /// of `x` and `y` must match the shape of this tensor (i.e., this tensor must be a vector), or 
+    /// the shapes of `x` and `y` must match the shape of this tensor. This tensor acts as a mask 
+    /// that chooses, based on the value at each element, whether the corresponding element / row in 
+    /// the output should be taken from `x` (if true) or `y` (if false). If this tensor is a vector 
+    /// and `x` and `y` are higher rank matrices, then it chooses which row (outer dimension) to 
+    /// copy from `x` and `y`. If it has the same shape as `x` and `y`, then it chooses which 
+    /// element to copy from `x` and `y`.
+    /// 
+    /// - Parameters:
+    ///   - x: Contains the values to use when the condition is true.
+    ///   - y: Contains the values to use when the condition is false.
+    /// 
+    /// - Precondition: `x` and `y` must have the same shape.
+    /// 
+    /// - Returns: A tensor with the same type and shape as `x` and `y`.
+    @differentiable(
+        wrt: (x, y),
+        vjp: _vjpSelecting(ifTrue:else:) where T: TensorFlowFloatingPoint)
+    func selecting<T: TensorFlowScalar>(
+        ifTrue x: Tensor<T>,
+        else y: Tensor<T>
+    ) -> Tensor<T> {
+        return Raw.select(condition: self, t: x, e: y)
+    }
+}
+
+internal extension Tensor where Scalar == Bool {
+    @inlinable @inline(__always)
+    func _vjpSelecting<T: TensorFlowFloatingPoint>(
+        ifTrue x: Tensor<T>,
+        else y: Tensor<T>
+    ) -> (Tensor<T>, (Tensor<T>) -> (Tensor<T>, Tensor<T>)) {
+        let value = selecting(ifTrue: x, else: y)
+        return (value, { v in
+            let zeros = Tensor<T>(zeros: self.shape)
+            let gIfTrue = self.selecting(ifTrue: v, else: zeros)
+            let gElse = self.selecting(ifTrue: zeros, else: v)
+            return (gIfTrue, gElse)
+        })
+    }
+}

From 371021b6d2c22943afd24b28c881cd250406e79b Mon Sep 17 00:00:00 2001
From: Anthony Platanios <e.a.platanios@gmail.com>
Date: Mon, 1 Apr 2019 15:28:02 -0400
Subject: [PATCH 06/55] Added support for 'nonZeroIndices'.

---
 Sources/DeepLearning/Operators/Basic.swift | 34 ++++++++++++++++++++++
 1 file changed, 34 insertions(+)

diff --git a/Sources/DeepLearning/Operators/Basic.swift b/Sources/DeepLearning/Operators/Basic.swift
index d520ece14..4ce9ac5a8 100644
--- a/Sources/DeepLearning/Operators/Basic.swift
+++ b/Sources/DeepLearning/Operators/Basic.swift
@@ -267,6 +267,40 @@ public extension Tensor where Scalar: TensorFlowScalar {
     }
 }
 
+public extension Tensor where Scalar: TensorFlowScalar {
+    /// Returns the locations of non-zero / true values in this tensor.
+    ///
+    /// The coordinates are returned in a 2-D tensor where the first dimension (rows) represents the 
+    /// number of non-zero elements, and the second dimension (columns) represents the coordinates 
+    /// of the non-zero elements. Keep in mind that the shape of the output tensor can vary 
+    /// depending on how many true values there are in this tensor. Indices are output in row-major 
+    /// order.
+    ///
+    /// For example:
+    /// ```
+    /// // 'input' is [[true, false], [true, false]]
+    /// // 'input' has 2 true values and so the output has 2 rows.
+    /// // 'input' has rank of 2, and so the second dimension of the output has size 2.
+    /// input.nonZeroIndices() // is [[0, 0], [1, 0]]
+    ///
+    /// // 'input' is [[[ true, false], [ true, false]],
+    /// //             [[false,  true], [false,  true]],
+    /// //             [[false, false], [false,  true]]]
+    /// // 'input' has 5 true values and so the output has 5 rows.
+    /// // 'input' has rank 3, and so the second dimension of the output has size 3.
+    /// input.nonZeroIndices() // is [[0, 0, 0],
+    ///                        //     [0, 1, 0],
+    ///                        //     [1, 0, 1],
+    ///                        //     [1, 1, 1],
+    ///                        //     [2, 1, 1]]
+    /// ```
+    ///
+    /// - Returns: A tensor with shape `(num_true, rank(condition))`.
+    func nonZeroIndices() -> Tensor<Int64> {
+        return Raw.where_(self)
+    }
+}
+
 public extension Tensor where Scalar == Bool {
     /// Returns the elements of either `x` or `y`, depending on the values in stored in this tensor.
     /// 

From 112707bdda7fa8f597d4d94bbcd7fd931843cc36 Mon Sep 17 00:00:00 2001
From: Anthony Platanios <e.a.platanios@gmail.com>
Date: Mon, 1 Apr 2019 15:29:30 -0400
Subject: [PATCH 07/55] Minor edits.

---
 Sources/DeepLearning/Operators/Basic.swift | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/Sources/DeepLearning/Operators/Basic.swift b/Sources/DeepLearning/Operators/Basic.swift
index 4ce9ac5a8..d55d517af 100644
--- a/Sources/DeepLearning/Operators/Basic.swift
+++ b/Sources/DeepLearning/Operators/Basic.swift
@@ -254,7 +254,7 @@ public extension Tensor where Scalar: TensorFlowScalar {
     /// 
     /// - Returns: `(self.rank - K + 1)`-dimensional tensor populated by entries in this tensor 
     ///   corresponding to `true` values in `mask`.
-    @inlinable @inline(__always)
+    @inlinable
     func masked(with mask: Tensor<Bool>, alongAxis axis: Int32 = 0) -> Tensor {
         precondition(mask.rank != 0, "The boolean mask cannot be a scalar.")
         let posAxis = axis < 0 ? axis + rank : axis
@@ -296,6 +296,7 @@ public extension Tensor where Scalar: TensorFlowScalar {
     /// ```
     ///
     /// - Returns: A tensor with shape `(num_true, rank(condition))`.
+    @inlinable
     func nonZeroIndices() -> Tensor<Int64> {
         return Raw.where_(self)
     }
@@ -320,6 +321,7 @@ public extension Tensor where Scalar == Bool {
     /// - Precondition: `x` and `y` must have the same shape.
     /// 
     /// - Returns: A tensor with the same type and shape as `x` and `y`.
+    @inlinable
     @differentiable(
         wrt: (x, y),
         vjp: _vjpSelecting(ifTrue:else:) where T: TensorFlowFloatingPoint)
@@ -332,7 +334,7 @@ public extension Tensor where Scalar == Bool {
 }
 
 internal extension Tensor where Scalar == Bool {
-    @inlinable @inline(__always)
+    @inlinable
     func _vjpSelecting<T: TensorFlowFloatingPoint>(
         ifTrue x: Tensor<T>,
         else y: Tensor<T>

From 3594e0e70a367c3c4169553b49b233129b6ef807 Mon Sep 17 00:00:00 2001
From: Anthony Platanios <e.a.platanios@gmail.com>
Date: Mon, 1 Apr 2019 16:16:45 -0400
Subject: [PATCH 08/55] Addressed Richard's feedback.

---
 Sources/DeepLearning/Operators/Basic.swift | 47 ----------------------
 Sources/DeepLearning/Operators/NN.swift    | 10 ++---
 2 files changed, 4 insertions(+), 53 deletions(-)

diff --git a/Sources/DeepLearning/Operators/Basic.swift b/Sources/DeepLearning/Operators/Basic.swift
index d55d517af..f36575189 100644
--- a/Sources/DeepLearning/Operators/Basic.swift
+++ b/Sources/DeepLearning/Operators/Basic.swift
@@ -301,50 +301,3 @@ public extension Tensor where Scalar: TensorFlowScalar {
         return Raw.where_(self)
     }
 }
-
-public extension Tensor where Scalar == Bool {
-    /// Returns the elements of either `x` or `y`, depending on the values in stored in this tensor.
-    /// 
-    /// `x` and `y` must be scalar if this tensor is scalar. Otherwise, either the first dimension 
-    /// of `x` and `y` must match the shape of this tensor (i.e., this tensor must be a vector), or 
-    /// the shapes of `x` and `y` must match the shape of this tensor. This tensor acts as a mask 
-    /// that chooses, based on the value at each element, whether the corresponding element / row in 
-    /// the output should be taken from `x` (if true) or `y` (if false). If this tensor is a vector 
-    /// and `x` and `y` are higher rank matrices, then it chooses which row (outer dimension) to 
-    /// copy from `x` and `y`. If it has the same shape as `x` and `y`, then it chooses which 
-    /// element to copy from `x` and `y`.
-    /// 
-    /// - Parameters:
-    ///   - x: Contains the values to use when the condition is true.
-    ///   - y: Contains the values to use when the condition is false.
-    /// 
-    /// - Precondition: `x` and `y` must have the same shape.
-    /// 
-    /// - Returns: A tensor with the same type and shape as `x` and `y`.
-    @inlinable
-    @differentiable(
-        wrt: (x, y),
-        vjp: _vjpSelecting(ifTrue:else:) where T: TensorFlowFloatingPoint)
-    func selecting<T: TensorFlowScalar>(
-        ifTrue x: Tensor<T>,
-        else y: Tensor<T>
-    ) -> Tensor<T> {
-        return Raw.select(condition: self, t: x, e: y)
-    }
-}
-
-internal extension Tensor where Scalar == Bool {
-    @inlinable
-    func _vjpSelecting<T: TensorFlowFloatingPoint>(
-        ifTrue x: Tensor<T>,
-        else y: Tensor<T>
-    ) -> (Tensor<T>, (Tensor<T>) -> (Tensor<T>, Tensor<T>)) {
-        let value = selecting(ifTrue: x, else: y)
-        return (value, { v in
-            let zeros = Tensor<T>(zeros: self.shape)
-            let gIfTrue = self.selecting(ifTrue: v, else: zeros)
-            let gElse = self.selecting(ifTrue: zeros, else: v)
-            return (gIfTrue, gElse)
-        })
-    }
-}
diff --git a/Sources/DeepLearning/Operators/NN.swift b/Sources/DeepLearning/Operators/NN.swift
index 51117b833..9142d432e 100644
--- a/Sources/DeepLearning/Operators/NN.swift
+++ b/Sources/DeepLearning/Operators/NN.swift
@@ -20,7 +20,7 @@
 // Normalization
 //===------------------------------------------------------------------------------------------===//
 
-public extension Tensor where Scalar: BinaryFloatingPoint {
+public extension Tensor where Scalar: TensorFlowFloatingPoint {
     /// Computes the batch normalized tensor along the specified axis.
     ///
     /// Specifically, returns `(self - mu) / (var + epsilon) * gamma + beta` where `mu` and `var` are 
@@ -34,7 +34,7 @@ public extension Tensor where Scalar: BinaryFloatingPoint {
     @inlinable
     @differentiable(
         wrt: (self, offset, scale),
-        vjp: _vjpBatchNormalized where Scalar: TensorFlowFloatingPoint)
+        vjp: _vjpBatchNormalized)
     func batchNormalized(
         alongAxis axis: Int32,
         offset: Tensor = Tensor(0),
@@ -47,12 +47,10 @@ public extension Tensor where Scalar: BinaryFloatingPoint {
         let inv = rsqrt(variance + epsilon) * scale
         return self * inv + offset - mean * inv
     }
-}
-
-internal extension Tensor where Scalar: TensorFlowFloatingPoint {
+    
     // TODO: Verify that these calculations are correct.
     @inlinable
-    func _vjpBatchNormalized(
+    internal func _vjpBatchNormalized(
         alongAxis axis: Int32,
         offset: Tensor,
         scale: Tensor,

From adf20ebc93d3b474411bae52fa52e1794d72d603 Mon Sep 17 00:00:00 2001
From: Anthony Platanios <e.a.platanios@gmail.com>
Date: Mon, 1 Apr 2019 19:44:53 -0400
Subject: [PATCH 09/55] Addressed Richard's comments.

---
 Sources/DeepLearning/Initializers.swift    |  2 +-
 Sources/DeepLearning/Layer.swift           |  2 +-
 Sources/DeepLearning/Loss.swift            |  2 +-
 Sources/DeepLearning/Operators/Basic.swift | 54 +++++++++++-----------
 Sources/DeepLearning/Operators/Math.swift  |  2 +-
 Sources/DeepLearning/Operators/NN.swift    |  2 +-
 Sources/DeepLearning/Optimizer.swift       |  2 +-
 7 files changed, 33 insertions(+), 33 deletions(-)

diff --git a/Sources/DeepLearning/Initializers.swift b/Sources/DeepLearning/Initializers.swift
index bddc8f3f0..943ec8e24 100644
--- a/Sources/DeepLearning/Initializers.swift
+++ b/Sources/DeepLearning/Initializers.swift
@@ -13,7 +13,7 @@
 // limitations under the License.
 
 #if !COMPILING_TENSORFLOW_MODULE
-@_exported import TensorFlow
+import TensorFlow
 #endif
 
 //===------------------------------------------------------------------------------------------===//
diff --git a/Sources/DeepLearning/Layer.swift b/Sources/DeepLearning/Layer.swift
index 73f1b263a..586d58abd 100644
--- a/Sources/DeepLearning/Layer.swift
+++ b/Sources/DeepLearning/Layer.swift
@@ -13,7 +13,7 @@
 // limitations under the License.
 
 #if !COMPILING_TENSORFLOW_MODULE
-@_exported import TensorFlow
+import TensorFlow
 #endif
 
 /// A value that indicates either a training phase or an inference phase for a layer.
diff --git a/Sources/DeepLearning/Loss.swift b/Sources/DeepLearning/Loss.swift
index 45a5d15d6..fe9400302 100644
--- a/Sources/DeepLearning/Loss.swift
+++ b/Sources/DeepLearning/Loss.swift
@@ -13,7 +13,7 @@
 // limitations under the License.
 
 #if !COMPILING_TENSORFLOW_MODULE
-@_exported import TensorFlow
+import TensorFlow
 #endif
 
 /// Computes the mean squared error between predictions and labels.
diff --git a/Sources/DeepLearning/Operators/Basic.swift b/Sources/DeepLearning/Operators/Basic.swift
index f36575189..8725c82b1 100644
--- a/Sources/DeepLearning/Operators/Basic.swift
+++ b/Sources/DeepLearning/Operators/Basic.swift
@@ -13,7 +13,7 @@
 // limitations under the License.
 
 #if !COMPILING_TENSORFLOW_MODULE
-@_exported import TensorFlow
+import TensorFlow
 #endif
 
 public extension Tensor where Scalar: TensorFlowScalar {
@@ -30,8 +30,8 @@ public extension Tensor where Scalar: TensorFlowScalar {
     /// // 'x' is [1, 4]
     /// // 'y' is [2, 5]
     /// // 'z' is [3, 6]
-    /// x.packed(with: [y, z]) // is [[1, 4], [2, 5], [3, 6]]
-    /// x.packed(with: [y, z], alongAxis: 1) // is [[1, 2, 3], [4, 5, 6]]
+    /// x.stacked(with: [y, z]) // is [[1, 4], [2, 5], [3, 6]]
+    /// x.stacked(with: [y, z], alongAxis: 1) // is [[1, 2, 3], [4, 5, 6]]
     /// ```
     ///
     /// This is the opposite of `unstacked`.
@@ -43,9 +43,9 @@ public extension Tensor where Scalar: TensorFlowScalar {
     /// - Precondition: All tensors must have the same shape as the current tensor.
     /// - Precondition: `axis` must be in the range `[-rank, rank)`.
     /// 
-    /// - Returns: The packed tensor.
+    /// - Returns: The stacked tensor.
     @inlinable
-    // @differentiable(vjp: _vjpPacked where Scalar: TensorFlowFloatingPoint)
+    // @differentiable(vjp: _vjpStacked where Scalar: TensorFlowFloatingPoint)
     func stacked(with tensors: [Tensor], alongAxis axis: Int64 = 0) -> Tensor {
         return Raw.pack([self] + tensors, axis: axis)
     }
@@ -133,8 +133,8 @@ public extension Tensor where Scalar: TensorFlowScalar {
     /// 
     /// - Returns: The gathered tensor.
     @inlinable
-    // @differentiable(vjp: _vjpGathered where Scalar: TensorFlowFloatingPoint)
-    func gathered<I: TensorFlowInteger>(
+    // @differentiable(vjp: _vjpGathering where Scalar: TensorFlowFloatingPoint)
+    func gathering<I: TensorFlowInteger>(
         atIndices indices: Tensor<I>, 
         alongAxis axis: Int32 = 0
     ) -> Tensor {
@@ -144,7 +144,7 @@ public extension Tensor where Scalar: TensorFlowScalar {
     /// Gathers slices of this tensor at `indices` along the `axis` dimension, while ignoring the 
     /// first `batchDims` dimensions that correspond to batch dimensions.
     /// 
-    /// Performs similar functionality to `gathered`, except that the resulting tensor shape is now:
+    /// Performs similar functionality to `gathering`, except that the resulting tensor shape is now:
     /// `self.shape[..<axis] + indices.shape[batchDims...] + self.shape[(axis + 1)...]`.
     ///
     /// - Parameters:
@@ -158,7 +158,7 @@ public extension Tensor where Scalar: TensorFlowScalar {
     /// 
     /// - Returns: The gathered tensor.
     @inlinable
-    func batchGathered<I: TensorFlowInteger>(
+    func batchGathering<I: TensorFlowInteger>(
         atIndices indices: Tensor<I>, 
         alongAxis axis: Int32,
         numBatchDims batchDims: Int32
@@ -183,7 +183,7 @@ public extension Tensor where Scalar: TensorFlowScalar {
                 Tensor<Int32>(rangeFrom: batchDims, to: posAxis, stride: 1),
                 Tensor<Int32>(rangeFrom: axis + 1, to: rank, stride: 1)])
             let tensor = transposed(withPermutations: permutation)
-            let result = tensor.batchGathered(
+            let result = tensor.batchGathering(
                 atIndices: indices, alongAxis: batchDims, numBatchDims: batchDims)
             
             // Move the result dimensions corresponding to self[batchDims ..< axis] to just before 
@@ -199,25 +199,25 @@ public extension Tensor where Scalar: TensorFlowScalar {
         let castedShape = Tensor<I>(shapeTensor)
         var batchIndices = indices
         var accumulated = Tensor<I>(ones: [])
-        for d in (1 ... batchDims).reversed() {
-        accumulated *= castedShape[d]
-        let dValue = castedShape[d - 1]
-        let dIndices = Tensor<I>(
-            rangeFrom: Tensor<I>(zeros: []),
-            to: dValue,
-            stride: Tensor<I>(ones: [])
-        ) * accumulated
-        let dShape = Tensor<Int32>(d - 1).packed(with: [
-            Tensor<Int32>(dValue), 
-            Tensor<Int32>(indices.rank - 1)])
-        batchIndices += dIndices.reshaped(toShape: dShape)
+        for d in (1...batchDims).reversed() {
+            accumulated *= castedShape[d]
+            let dValue = castedShape[d - 1]
+            let dIndices = Tensor<I>(
+                rangeFrom: Tensor<I>(zeros: []),
+                to: dValue,
+                stride: Tensor<I>(ones: [])
+            ) * accumulated
+            let dShape = Tensor<Int32>(d - 1).stacked(with: [
+                Tensor<Int32>(dValue), 
+                Tensor<Int32>(indices.rank - 1)])
+            batchIndices += dIndices.reshaped(toShape: dShape)
         }
 
         let flatIndices = batchIndices.flattened()
         let outerShape = shapeTensor[Int(batchDims + 1)...]
         let innerShape = shapeTensor[..<Int(batchDims + 1)].product(squeezingAxes: [0])
         let flatTensor = reshaped(toShape: innerShape.rankLifted().concatenated(with: outerShape))
-        let flatResult = flatTensor.gathered(atIndices: flatIndices)
+        let flatResult = flatTensor.gathering(atIndices: flatIndices)
         return flatResult.reshaped(toShape: indices.shapeTensor.concatenated(with: outerShape))
     }
 
@@ -260,14 +260,14 @@ public extension Tensor where Scalar: TensorFlowScalar {
         let posAxis = axis < 0 ? axis + rank : axis
         let leadingSize = shapeTensor[posAxis ..< posAxis + mask.rank].product().rankLifted()
         let reshapedTensor = reshaped(
-        toShape: shapeTensor[..<Int(posAxis)].concatenated(
-            with: [leadingSize, shapeTensor[Int(posAxis + mask.rank)...]]))
+            toShape: shapeTensor[..<Int(posAxis)].concatenated(
+                with: [leadingSize, shapeTensor[Int(posAxis + mask.rank)...]]))
         let indices = mask.flattened().whereTrue().squeezingShape(at: 1)
-        return reshapedTensor.gathered(atIndices: indices, alongAxis: posAxis)
+        return reshapedTensor.gathering(atIndices: indices, alongAxis: posAxis)
     }
 }
 
-public extension Tensor where Scalar: TensorFlowScalar {
+public extension Tensor {
     /// Returns the locations of non-zero / true values in this tensor.
     ///
     /// The coordinates are returned in a 2-D tensor where the first dimension (rows) represents the 
diff --git a/Sources/DeepLearning/Operators/Math.swift b/Sources/DeepLearning/Operators/Math.swift
index c39fa7637..249fc6015 100644
--- a/Sources/DeepLearning/Operators/Math.swift
+++ b/Sources/DeepLearning/Operators/Math.swift
@@ -13,7 +13,7 @@
 // limitations under the License.
 
 #if !COMPILING_TENSORFLOW_MODULE
-@_exported import TensorFlow
+import TensorFlow
 #endif
 
 /// Returns the values of the specified tensor rounded to the nearest integer, element-wise.
diff --git a/Sources/DeepLearning/Operators/NN.swift b/Sources/DeepLearning/Operators/NN.swift
index 9142d432e..6e8a40daf 100644
--- a/Sources/DeepLearning/Operators/NN.swift
+++ b/Sources/DeepLearning/Operators/NN.swift
@@ -13,7 +13,7 @@
 // limitations under the License.
 
 #if !COMPILING_TENSORFLOW_MODULE
-@_exported import TensorFlow
+import TensorFlow
 #endif
 
 //===------------------------------------------------------------------------------------------===//
diff --git a/Sources/DeepLearning/Optimizer.swift b/Sources/DeepLearning/Optimizer.swift
index 0c381833b..479488e79 100644
--- a/Sources/DeepLearning/Optimizer.swift
+++ b/Sources/DeepLearning/Optimizer.swift
@@ -13,7 +13,7 @@
 // limitations under the License.
 
 #if !COMPILING_TENSORFLOW_MODULE
-@_exported import TensorFlow
+import TensorFlow
 #endif
 
 /// A machine learning optimizer.

From 4ae4e080355a080989450c85e993acc95d780a2f Mon Sep 17 00:00:00 2001
From: Anthony Platanios <e.a.platanios@gmail.com>
Date: Mon, 1 Apr 2019 21:07:21 -0400
Subject: [PATCH 10/55] Addressed Richard's comments.

---
 Sources/DeepLearning/Operators/Basic.swift | 28 +++++++++++-----------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/Sources/DeepLearning/Operators/Basic.swift b/Sources/DeepLearning/Operators/Basic.swift
index 8725c82b1..fe75ac81c 100644
--- a/Sources/DeepLearning/Operators/Basic.swift
+++ b/Sources/DeepLearning/Operators/Basic.swift
@@ -134,8 +134,8 @@ public extension Tensor where Scalar: TensorFlowScalar {
     /// - Returns: The gathered tensor.
     @inlinable
     // @differentiable(vjp: _vjpGathering where Scalar: TensorFlowFloatingPoint)
-    func gathering<I: TensorFlowInteger>(
-        atIndices indices: Tensor<I>, 
+    func gathering(
+        atIndices indices: Tensor<Int32>, 
         alongAxis axis: Int32 = 0
     ) -> Tensor {
         return Raw.gatherV2(params: self, indices: indices, axis: Tensor<Int32>(axis))
@@ -158,8 +158,8 @@ public extension Tensor where Scalar: TensorFlowScalar {
     /// 
     /// - Returns: The gathered tensor.
     @inlinable
-    func batchGathering<I: TensorFlowInteger>(
-        atIndices indices: Tensor<I>, 
+    func batchGathering(
+        atIndices indices: Tensor<Int32>, 
         alongAxis axis: Int32,
         numBatchDims batchDims: Int32
     ) -> Tensor {
@@ -196,16 +196,16 @@ public extension Tensor where Scalar: TensorFlowScalar {
             return result.transposed(withPermutations: resultPermutation)
         }
 
-        let castedShape = Tensor<I>(shapeTensor)
+        let castedShape = Tensor<Int32>(shapeTensor)
         var batchIndices = indices
-        var accumulated = Tensor<I>(ones: [])
+        var accumulated = Tensor<Int32>(ones: [])
         for d in (1...batchDims).reversed() {
             accumulated *= castedShape[d]
             let dValue = castedShape[d - 1]
-            let dIndices = Tensor<I>(
-                rangeFrom: Tensor<I>(zeros: []),
+            let dIndices = Tensor<Int32>(
+                rangeFrom: Tensor<Int32>(zeros: []),
                 to: dValue,
-                stride: Tensor<I>(ones: [])
+                stride: Tensor<Int32>(ones: [])
             ) * accumulated
             let dShape = Tensor<Int32>(d - 1).stacked(with: [
                 Tensor<Int32>(dValue), 
@@ -221,24 +221,24 @@ public extension Tensor where Scalar: TensorFlowScalar {
         return flatResult.reshaped(toShape: indices.shapeTensor.concatenated(with: outerShape))
     }
 
-    /// Applies the provided boolean mask to this tensor.
+    /// Gathers values from this tensor according to the provided boolean mask.
     ///
     /// For example:
     /// ```
     /// // 1-D example
     /// // tensor is [0, 1, 2, 3]
     /// // mask is [true, false, true, false]
-    /// tensor.masked(with: mask) // is [0, 2]
+    /// tensor.gathering(where: mask) // is [0, 2]
     /// 
     /// // 2-D example
     /// // tensor is [[1, 2], [3, 4], [5, 6]]
     /// // mask is [true, false, true]
-    /// tensor.masked(with: mask) // is [[1, 2], [5, 6]]
+    /// tensor.gathering(where: mask) // is [[1, 2], [5, 6]]
     /// ```
     ///
     /// In general, `0 < mask.rank = K <= tensor.rank`, and the `mask`'s shape must match the first 
     /// K dimensions of the `tensor`'s shape. We then have:
-    /// `tensor.masked(with: mask)[i, j1, ..., jd] = tensor[i1, ..., iK, j1, ..., jd]`, where 
+    /// `tensor.gathering(where: mask)[i, j1, ..., jd] = tensor[i1, ..., iK, j1, ..., jd]`, where 
     /// `[i1, ..., iK]` is the `i`th `true` entry of `mask` (row-major order).
     /// 
     /// The `axis` could be used with `mask` to indicate the axis to mask from. In that case, 
@@ -255,7 +255,7 @@ public extension Tensor where Scalar: TensorFlowScalar {
     /// - Returns: `(self.rank - K + 1)`-dimensional tensor populated by entries in this tensor 
     ///   corresponding to `true` values in `mask`.
     @inlinable
-    func masked(with mask: Tensor<Bool>, alongAxis axis: Int32 = 0) -> Tensor {
+    func gathering(where mask: Tensor<Bool>, alongAxis axis: Int32 = 0) -> Tensor {
         precondition(mask.rank != 0, "The boolean mask cannot be a scalar.")
         let posAxis = axis < 0 ? axis + rank : axis
         let leadingSize = shapeTensor[posAxis ..< posAxis + mask.rank].product().rankLifted()

From b0aba5de12cf2436bde2e3898f60ad1e57837aee Mon Sep 17 00:00:00 2001
From: Anthony Platanios <e.a.platanios@gmail.com>
Date: Mon, 1 Apr 2019 21:16:55 -0400
Subject: [PATCH 11/55] Updated the convolution ops to support explicit
 paddings.

---
 Sources/DeepLearning/Operators/NN.swift | 23 +++++++++++++++++++----
 1 file changed, 19 insertions(+), 4 deletions(-)

diff --git a/Sources/DeepLearning/Operators/NN.swift b/Sources/DeepLearning/Operators/NN.swift
index 6e8a40daf..70db254d5 100644
--- a/Sources/DeepLearning/Operators/NN.swift
+++ b/Sources/DeepLearning/Operators/NN.swift
@@ -86,6 +86,8 @@ public extension Tensor where Scalar: TensorFlowFloatingPoint {
 /// A padding scheme. Used by padding, convolution, and pooling ops.
 // @_frozen // SR-9739
 public enum Padding {
+    /// The "explicit" padding scheme.
+    case explicit(paddings: [Int32])
     /// The "valid" padding scheme.
     case valid
     /// The "same" padding scheme.
@@ -94,12 +96,22 @@ public enum Padding {
 
 public extension Padding {
     @inlinable
-    var raw: Raw.Padding {
+    var raw: Raw.Padding2 {
         switch self {
+        case .explicit: return .explicit
         case .same: return .same
         case .valid: return .valid
         }
     }
+
+    @inlinable
+    var explicitPaddings: [Int32] {
+        switch self {
+        case .explicit(let paddings): return paddings
+        case .same: return []
+        case .valid: return []
+        }
+    }
 }
 
 public extension Tensor where Scalar: TensorFlowFloatingPoint {
@@ -117,7 +129,8 @@ public extension Tensor where Scalar: TensorFlowFloatingPoint {
             filter: filter,
             outBackprop: self,
             strides: [strides.0, strides.1, strides.2, strides.3],
-            padding: padding.raw)
+            padding: padding.raw,
+            explicitPaddings: padding.explicitPaddings)
     }
 
     /// TensorFlow builtin conv2d gradient helper for the filter.
@@ -134,7 +147,8 @@ public extension Tensor where Scalar: TensorFlowFloatingPoint {
             filterSizes: filterSizes,
             outBackprop: self,
             strides: [strides.0, strides.1, strides.2, strides.3],
-            padding: padding.raw)
+            padding: padding.raw,
+            explicitPaddings: padding.explicitPaddings)
     }
 
     @inlinable
@@ -264,7 +278,8 @@ public extension Tensor where Scalar: FloatingPoint {
             self,
             filter: filter,
             strides: [strides.0, strides.1, strides.2, strides.3],
-            padding: padding.raw)
+            padding: padding.raw,
+            explicitPaddings: padding.explicitPaddings)
     }
 
     /// Computes a 2-D max pooling, with the specified kernel sizes, strides, and

From 05704d0b862d1d80df75c396a92484acf95683ee Mon Sep 17 00:00:00 2001
From: Anthony Platanios <e.a.platanios@gmail.com>
Date: Tue, 2 Apr 2019 08:15:46 -0400
Subject: [PATCH 12/55] Small edits.

---
 Sources/DeepLearning/Operators/NN.swift | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Sources/DeepLearning/Operators/NN.swift b/Sources/DeepLearning/Operators/NN.swift
index 70db254d5..4b53ca514 100644
--- a/Sources/DeepLearning/Operators/NN.swift
+++ b/Sources/DeepLearning/Operators/NN.swift
@@ -87,7 +87,7 @@ public extension Tensor where Scalar: TensorFlowFloatingPoint {
 // @_frozen // SR-9739
 public enum Padding {
     /// The "explicit" padding scheme.
-    case explicit(paddings: [Int32])
+    case explicit(_ paddings: [Int32])
     /// The "valid" padding scheme.
     case valid
     /// The "same" padding scheme.
@@ -105,7 +105,7 @@ public extension Padding {
     }
 
     @inlinable
-    var explicitPaddings: [Int32] {
+    internal var explicitPaddings: [Int32] {
         switch self {
         case .explicit(let paddings): return paddings
         case .same: return []

From a686a76eccf5d44d21aec4af9544c22655aa73f4 Mon Sep 17 00:00:00 2001
From: Anthony Platanios <e.a.platanios@gmail.com>
Date: Tue, 2 Apr 2019 08:17:10 -0400
Subject: [PATCH 13/55] Updated the convolution ops to support explicit
 paddings.

---
 Sources/DeepLearning/Operators.swift | 25 ++++++++++++++++++++-----
 1 file changed, 20 insertions(+), 5 deletions(-)

diff --git a/Sources/DeepLearning/Operators.swift b/Sources/DeepLearning/Operators.swift
index 51b2406a8..3a660c07a 100644
--- a/Sources/DeepLearning/Operators.swift
+++ b/Sources/DeepLearning/Operators.swift
@@ -98,12 +98,14 @@ public extension Tensor where Scalar: BinaryFloatingPoint {
 }
 
 //===------------------------------------------------------------------------------------------===//
-// Convolution and pooling
+// Convolution and Pooling
 //===------------------------------------------------------------------------------------------===//
 
 /// A padding scheme. Used by padding, convolution, and pooling ops.
 // @_frozen // SR-9739
 public enum Padding {
+    /// The "explicit" padding scheme.
+    case explicit(_ paddings: [Int32])
     /// The "valid" padding scheme.
     case valid
     /// The "same" padding scheme.
@@ -112,12 +114,22 @@ public enum Padding {
 
 public extension Padding {
     @inlinable
-    var raw: Raw.Padding {
+    var raw: Raw.Padding2 {
         switch self {
+        case .explicit: return .explicit
         case .same: return .same
         case .valid: return .valid
         }
     }
+
+    @inlinable
+    internal var explicitPaddings: [Int32] {
+        switch self {
+        case .explicit(let paddings): return paddings
+        case .same: return []
+        case .valid: return []
+        }
+    }
 }
 
 public extension Tensor where Scalar: TensorFlowFloatingPoint {
@@ -135,7 +147,8 @@ public extension Tensor where Scalar: TensorFlowFloatingPoint {
             filter: filter,
             outBackprop: self,
             strides: [strides.0, strides.1, strides.2, strides.3],
-            padding: padding.raw)
+            padding: padding.raw,
+            explicitPaddings: padding.explicitPaddings)
     }
 
     /// TensorFlow builtin conv2d gradient helper for the filter.
@@ -152,7 +165,8 @@ public extension Tensor where Scalar: TensorFlowFloatingPoint {
             filterSizes: filterSizes,
             outBackprop: self,
             strides: [strides.0, strides.1, strides.2, strides.3],
-            padding: padding.raw)
+            padding: padding.raw,
+            explicitPaddings: padding.explicitPaddings)
     }
 
     @inlinable
@@ -282,7 +296,8 @@ public extension Tensor where Scalar: FloatingPoint {
             self,
             filter: filter,
             strides: [strides.0, strides.1, strides.2, strides.3],
-            padding: padding.raw)
+            padding: padding.raw,
+            explicitPaddings: padding.explicitPaddings)
     }
 
     /// Computes a 2-D max pooling, with the specified kernel sizes, strides, and

From cc4665849a25347c890133a1c496b10d6dc9479a Mon Sep 17 00:00:00 2001
From: Anthony Platanios <e.a.platanios@gmail.com>
Date: Tue, 2 Apr 2019 08:22:27 -0400
Subject: [PATCH 14/55] Small fix.

---
 Sources/DeepLearning/Operators/NN.swift | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Sources/DeepLearning/Operators/NN.swift b/Sources/DeepLearning/Operators/NN.swift
index 4b53ca514..27a94dc1a 100644
--- a/Sources/DeepLearning/Operators/NN.swift
+++ b/Sources/DeepLearning/Operators/NN.swift
@@ -87,7 +87,7 @@ public extension Tensor where Scalar: TensorFlowFloatingPoint {
 // @_frozen // SR-9739
 public enum Padding {
     /// The "explicit" padding scheme.
-    case explicit(_ paddings: [Int32])
+    case explicit([Int32])
     /// The "valid" padding scheme.
     case valid
     /// The "same" padding scheme.

From 8494c04d0ef849119b73cd566bccb304bb11a999 Mon Sep 17 00:00:00 2001
From: Anthony Platanios <e.a.platanios@gmail.com>
Date: Tue, 2 Apr 2019 08:22:58 -0400
Subject: [PATCH 15/55] Small fix.

---
 Sources/DeepLearning/Operators.swift | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Sources/DeepLearning/Operators.swift b/Sources/DeepLearning/Operators.swift
index 3a660c07a..501722a9e 100644
--- a/Sources/DeepLearning/Operators.swift
+++ b/Sources/DeepLearning/Operators.swift
@@ -105,7 +105,7 @@ public extension Tensor where Scalar: BinaryFloatingPoint {
 // @_frozen // SR-9739
 public enum Padding {
     /// The "explicit" padding scheme.
-    case explicit(_ paddings: [Int32])
+    case explicit([Int32])
     /// The "valid" padding scheme.
     case valid
     /// The "same" padding scheme.

From 976061f3b6fae3875f953f0c4938ea1e1104b2c4 Mon Sep 17 00:00:00 2001
From: Anthony Platanios <e.a.platanios@gmail.com>
Date: Tue, 2 Apr 2019 08:30:23 -0400
Subject: [PATCH 16/55] Added a new tensor initializer from ranges of tensors.

---
 Sources/DeepLearning/Initializers.swift | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/Sources/DeepLearning/Initializers.swift b/Sources/DeepLearning/Initializers.swift
index 943ec8e24..1d2041836 100644
--- a/Sources/DeepLearning/Initializers.swift
+++ b/Sources/DeepLearning/Initializers.swift
@@ -16,6 +16,22 @@
 import TensorFlow
 #endif
 
+public extension Tensor where Scalar: TensorFlowScalar & Numeric {
+  /// Creates a 1-D tensor representing a sequence from a starting value to, but not including, an 
+  /// end value, stepping by the specified amount.
+  ///
+  /// - Parameters:
+  ///   - start: The starting value to use for the sequence. If the sequence contains any values, 
+  ///     the first one is `start`.
+  ///   - end: An end value to limit the sequence. `end` is never an element of the resulting 
+  ///     sequence.
+  ///   - stride: The amount to step by with each iteration. `stride` must be positive.
+  @inlinable @inline(__always)
+  init(rangeFrom start: Tensor<Scalar>, to end: Tensor<Scalar>, stride: Tensor<Scalar>) {
+    self = Raw.range(start: start, limit: end, delta: stride)
+  }
+}
+
 //===------------------------------------------------------------------------------------------===//
 // Random
 //===------------------------------------------------------------------------------------------===//

From 5dfaaeecf5b48e4784ec2c3b9c0607f2bb7ceced Mon Sep 17 00:00:00 2001
From: Anthony Platanios <e.a.platanios@gmail.com>
Date: Tue, 2 Apr 2019 08:32:29 -0400
Subject: [PATCH 17/55] Added documentation string for the "explicit" padding
 scheme.

---
 Sources/DeepLearning/Operators.swift | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/Sources/DeepLearning/Operators.swift b/Sources/DeepLearning/Operators.swift
index 501722a9e..827890b3d 100644
--- a/Sources/DeepLearning/Operators.swift
+++ b/Sources/DeepLearning/Operators.swift
@@ -104,7 +104,8 @@ public extension Tensor where Scalar: BinaryFloatingPoint {
 /// A padding scheme. Used by padding, convolution, and pooling ops.
 // @_frozen // SR-9739
 public enum Padding {
-    /// The "explicit" padding scheme.
+    /// The "explicit" padding scheme, which is defined by an array indicating the explicit padding 
+    /// sizes at the start and end of each dimension.
     case explicit([Int32])
     /// The "valid" padding scheme.
     case valid

From eda9514edf0884d1da3545bf9417005a1a2fc1b7 Mon Sep 17 00:00:00 2001
From: Anthony Platanios <e.a.platanios@gmail.com>
Date: Tue, 2 Apr 2019 21:24:34 -0400
Subject: [PATCH 18/55] More fixes.

---
 Sources/DeepLearning/Layer.swift     | 20 ++++++++++----------
 Sources/DeepLearning/Operators.swift | 23 +++++++++++++++++++++--
 2 files changed, 31 insertions(+), 12 deletions(-)

diff --git a/Sources/DeepLearning/Layer.swift b/Sources/DeepLearning/Layer.swift
index 7c4cf1d23..ffa83d6b9 100644
--- a/Sources/DeepLearning/Layer.swift
+++ b/Sources/DeepLearning/Layer.swift
@@ -836,7 +836,7 @@ public struct MaxPool1D<Scalar: TensorFlowFloatingPoint>: Layer {
     /// The stride of the sliding window for temporal dimension.
     @noDerivative let stride: Int32
     /// The padding algorithm for pooling.
-    @noDerivative let padding: Padding
+    @noDerivative let padding: PaddingV1
 
     /// Creates a max pooling layer.
     ///
@@ -847,7 +847,7 @@ public struct MaxPool1D<Scalar: TensorFlowFloatingPoint>: Layer {
     public init(
         poolSize: Int,
         stride: Int,
-        padding: Padding
+        padding: PaddingV1
     ) {
         self.poolSize = Int32(poolSize)
         self.stride = Int32(stride)
@@ -878,13 +878,13 @@ public struct MaxPool2D<Scalar: TensorFlowFloatingPoint>: Layer {
     /// Strides in non-spatial dimensions must be `1`.
     @noDerivative let strides: (Int32, Int32, Int32, Int32)
     /// The padding algorithm for pooling.
-    @noDerivative let padding: Padding
+    @noDerivative let padding: PaddingV1
 
     /// Creates a max pooling layer.
     public init(
         poolSize: (Int, Int, Int, Int),
         strides: (Int, Int, Int, Int),
-        padding: Padding
+        padding: PaddingV1
     ) {
         (self.poolSize.0, self.poolSize.1, self.poolSize.2, self.poolSize.3)
             = (Int32(poolSize.0), Int32(poolSize.1), Int32(poolSize.2), Int32(poolSize.3))
@@ -899,7 +899,7 @@ public struct MaxPool2D<Scalar: TensorFlowFloatingPoint>: Layer {
     ///   - poolSize: Vertical and horizontal factors by which to downscale.
     ///   - strides: The strides.
     ///   - padding: The padding.
-    public init(poolSize: (Int, Int), strides: (Int, Int), padding: Padding = .valid) {
+    public init(poolSize: (Int, Int), strides: (Int, Int), padding: PaddingV1 = .valid) {
         self.poolSize = (1, Int32(poolSize.0), Int32(poolSize.1), 1)
         self.strides = (1, Int32(strides.0), Int32(strides.1), 1)
         self.padding = padding
@@ -927,7 +927,7 @@ public struct AvgPool1D<Scalar: TensorFlowFloatingPoint>: Layer {
     /// The stride of the sliding window for temporal dimension.
     @noDerivative let stride: Int32
     /// The padding algorithm for pooling.
-    @noDerivative let padding: Padding
+    @noDerivative let padding: PaddingV1
 
     /// Creates an average pooling layer.
     ///
@@ -938,7 +938,7 @@ public struct AvgPool1D<Scalar: TensorFlowFloatingPoint>: Layer {
     public init(
         poolSize: Int,
         stride: Int,
-        padding: Padding
+        padding: PaddingV1
     ) {
         self.poolSize = Int32(poolSize)
         self.stride = Int32(stride)
@@ -969,13 +969,13 @@ public struct AvgPool2D<Scalar: TensorFlowFloatingPoint>: Layer {
     /// Strides in non-spatial dimensions must be `1`.
     @noDerivative let strides: (Int32, Int32, Int32, Int32)
     /// The padding algorithm for pooling.
-    @noDerivative let padding: Padding
+    @noDerivative let padding: PaddingV1
 
     /// Creates a average pooling layer.
     public init(
         poolSize: (Int, Int, Int, Int),
         strides: (Int, Int, Int, Int),
-        padding: Padding
+        padding: PaddingV1
     ) {
         (self.poolSize.0, self.poolSize.1, self.poolSize.2, self.poolSize.3)
             = (Int32(poolSize.0), Int32(poolSize.1), Int32(poolSize.2), Int32(poolSize.3))
@@ -990,7 +990,7 @@ public struct AvgPool2D<Scalar: TensorFlowFloatingPoint>: Layer {
     ///   - poolSize: Vertical and horizontal factors by which to downscale.
     ///   - strides: The strides.
     ///   - padding: The padding.
-    public init(poolSize: (Int, Int), strides: (Int, Int), padding: Padding = .valid) {
+    public init(poolSize: (Int, Int), strides: (Int, Int), padding: PaddingV1 = .valid) {
         self.poolSize = (1, Int32(poolSize.0), Int32(poolSize.1), 1)
         self.strides = (1, Int32(strides.0), Int32(strides.1), 1)
         self.padding = padding
diff --git a/Sources/DeepLearning/Operators.swift b/Sources/DeepLearning/Operators.swift
index 3a660c07a..1587facbd 100644
--- a/Sources/DeepLearning/Operators.swift
+++ b/Sources/DeepLearning/Operators.swift
@@ -132,6 +132,25 @@ public extension Padding {
     }
 }
 
+/// An older padding scheme. Used by padding, convolution, and pooling ops.
+// @_frozen // SR-9739
+public enum PaddingV1 {
+    /// The "valid" padding scheme.
+    case valid
+    /// The "same" padding scheme.
+    case same
+}
+
+public extension PaddingV1 {
+    @inlinable
+    var raw: Raw.Padding {
+        switch self {
+        case .same: return .same
+        case .valid: return .valid
+        }
+    }
+}
+
 public extension Tensor where Scalar: TensorFlowFloatingPoint {
     /// TensorFlow builtin conv2d gradient helper for the input.
     @inlinable
@@ -316,7 +335,7 @@ public extension Tensor where Scalar: FloatingPoint {
     func maxPooled(
         kernelSize: (Int32, Int32, Int32, Int32),
         strides: (Int32, Int32, Int32, Int32),
-        padding: Padding
+        padding: PaddingV1
     ) -> Tensor {
         return Raw.maxPoolV2(
             self,
@@ -343,7 +362,7 @@ public extension Tensor where Scalar: FloatingPoint {
     func averagePooled(
         kernelSize: (Int32, Int32, Int32, Int32),
         strides: (Int32, Int32, Int32, Int32),
-        padding: Padding
+        padding: PaddingV1
     ) -> Tensor {
         return Raw.avgPool(
             value: self,

From aed430a6155db601d6cba9357f252de08808481b Mon Sep 17 00:00:00 2001
From: Anthony Platanios <e.a.platanios@gmail.com>
Date: Fri, 12 Apr 2019 11:50:10 -0400
Subject: [PATCH 19/55] Added 'zerosLike' and 'onesLike' tensor initializers.

---
 Sources/DeepLearning/Initializers.swift | 24 +++++++++++++++++++-----
 1 file changed, 19 insertions(+), 5 deletions(-)

diff --git a/Sources/DeepLearning/Initializers.swift b/Sources/DeepLearning/Initializers.swift
index 1d2041836..6b6d029b1 100644
--- a/Sources/DeepLearning/Initializers.swift
+++ b/Sources/DeepLearning/Initializers.swift
@@ -16,7 +16,25 @@
 import TensorFlow
 #endif
 
-public extension Tensor where Scalar: TensorFlowScalar & Numeric {
+public extension Tensor where Scalar : Numeric {
+  /// Creates a tensor with all scalars set to zero that has the same shape and type as the provided 
+  /// tensor.
+  ///
+  /// - Parameter other: Tensor whose shape and data type to use.
+  @inlinable @inline(__always)
+  init(zerosLike other: Tensor) {
+    self = Raw.zerosLike(other)
+  }
+
+  /// Creates a tensor with all scalars set to one that has the same shape and type as the provided 
+  /// tensor.
+  ///
+  /// - Parameter other: Tensor whose shape and data type to use.
+  @inlinable @inline(__always)
+  init(onesLike other: Tensor) {
+    self = Raw.onesLike(other)
+  }
+
   /// Creates a 1-D tensor representing a sequence from a starting value to, but not including, an 
   /// end value, stepping by the specified amount.
   ///
@@ -32,10 +50,6 @@ public extension Tensor where Scalar: TensorFlowScalar & Numeric {
   }
 }
 
-//===------------------------------------------------------------------------------------------===//
-// Random
-//===------------------------------------------------------------------------------------------===//
-
 public extension Tensor where Scalar == Int32 {
     /// Creates a tensor with the specified shape, randomly sampling scalar values
     /// from a discrete uniform distribution.

From 5a093a8f6305b599891bb75728106cf0aac67113 Mon Sep 17 00:00:00 2001
From: Anthony Platanios <e.a.platanios@gmail.com>
Date: Mon, 15 Apr 2019 10:00:36 -0400
Subject: [PATCH 20/55] Added a new 'stacking' tensor initializer and made some
 compatibility fixes.

---
 Sources/DeepLearning/Initializers.swift    |  9 +++++
 Sources/DeepLearning/Layer.swift           | 20 +++++-----
 Sources/DeepLearning/Operators/Basic.swift | 16 ++++----
 Sources/DeepLearning/Operators/NN.swift    | 44 +++++-----------------
 4 files changed, 37 insertions(+), 52 deletions(-)

diff --git a/Sources/DeepLearning/Initializers.swift b/Sources/DeepLearning/Initializers.swift
index 6b6d029b1..4f465f609 100644
--- a/Sources/DeepLearning/Initializers.swift
+++ b/Sources/DeepLearning/Initializers.swift
@@ -48,6 +48,15 @@ public extension Tensor where Scalar : Numeric {
   init(rangeFrom start: Tensor<Scalar>, to end: Tensor<Scalar>, stride: Tensor<Scalar>) {
     self = Raw.range(start: start, limit: end, delta: stride)
   }
+
+  /// Returns a stacked tensor, constructed by stacking the provided tensors along 
+  /// the specified axis.
+  /// - Precondition: The tensors must have the same dimensions,.
+  /// - Precondition: The axis must be in the range `-rank..<rank`.
+  @inlinable @inline(__always)
+  init(stacking tensors: [Tensor<Scalar>], alongAxis axis: Int32 = 0) {
+    self = Raw.pack(tensors, axis: Int64(axis))
+  }
 }
 
 public extension Tensor where Scalar == Int32 {
diff --git a/Sources/DeepLearning/Layer.swift b/Sources/DeepLearning/Layer.swift
index 6bca9eb67..801173654 100644
--- a/Sources/DeepLearning/Layer.swift
+++ b/Sources/DeepLearning/Layer.swift
@@ -836,7 +836,7 @@ public struct MaxPool1D<Scalar: TensorFlowFloatingPoint>: Layer {
     /// The stride of the sliding window for temporal dimension.
     @noDerivative let stride: Int32
     /// The padding algorithm for pooling.
-    @noDerivative let padding: PaddingV1
+    @noDerivative let padding: Padding
 
     /// Creates a max pooling layer.
     ///
@@ -847,7 +847,7 @@ public struct MaxPool1D<Scalar: TensorFlowFloatingPoint>: Layer {
     public init(
         poolSize: Int,
         stride: Int,
-        padding: PaddingV1
+        padding: Padding
     ) {
         self.poolSize = Int32(poolSize)
         self.stride = Int32(stride)
@@ -878,13 +878,13 @@ public struct MaxPool2D<Scalar: TensorFlowFloatingPoint>: Layer {
     /// Strides in non-spatial dimensions must be `1`.
     @noDerivative let strides: (Int32, Int32, Int32, Int32)
     /// The padding algorithm for pooling.
-    @noDerivative let padding: PaddingV1
+    @noDerivative let padding: Padding
 
     /// Creates a max pooling layer.
     public init(
         poolSize: (Int, Int, Int, Int),
         strides: (Int, Int, Int, Int),
-        padding: PaddingV1
+        padding: Padding
     ) {
         (self.poolSize.0, self.poolSize.1, self.poolSize.2, self.poolSize.3)
             = (Int32(poolSize.0), Int32(poolSize.1), Int32(poolSize.2), Int32(poolSize.3))
@@ -899,7 +899,7 @@ public struct MaxPool2D<Scalar: TensorFlowFloatingPoint>: Layer {
     ///   - poolSize: Vertical and horizontal factors by which to downscale.
     ///   - strides: The strides.
     ///   - padding: The padding.
-    public init(poolSize: (Int, Int), strides: (Int, Int), padding: PaddingV1 = .valid) {
+    public init(poolSize: (Int, Int), strides: (Int, Int), padding: Padding = .valid) {
         self.poolSize = (1, Int32(poolSize.0), Int32(poolSize.1), 1)
         self.strides = (1, Int32(strides.0), Int32(strides.1), 1)
         self.padding = padding
@@ -927,7 +927,7 @@ public struct AvgPool1D<Scalar: TensorFlowFloatingPoint>: Layer {
     /// The stride of the sliding window for temporal dimension.
     @noDerivative let stride: Int32
     /// The padding algorithm for pooling.
-    @noDerivative let padding: PaddingV1
+    @noDerivative let padding: Padding
 
     /// Creates an average pooling layer.
     ///
@@ -938,7 +938,7 @@ public struct AvgPool1D<Scalar: TensorFlowFloatingPoint>: Layer {
     public init(
         poolSize: Int,
         stride: Int,
-        padding: PaddingV1
+        padding: Padding
     ) {
         self.poolSize = Int32(poolSize)
         self.stride = Int32(stride)
@@ -969,13 +969,13 @@ public struct AvgPool2D<Scalar: TensorFlowFloatingPoint>: Layer {
     /// Strides in non-spatial dimensions must be `1`.
     @noDerivative let strides: (Int32, Int32, Int32, Int32)
     /// The padding algorithm for pooling.
-    @noDerivative let padding: PaddingV1
+    @noDerivative let padding: Padding
 
     /// Creates a average pooling layer.
     public init(
         poolSize: (Int, Int, Int, Int),
         strides: (Int, Int, Int, Int),
-        padding: PaddingV1
+        padding: Padding
     ) {
         (self.poolSize.0, self.poolSize.1, self.poolSize.2, self.poolSize.3)
             = (Int32(poolSize.0), Int32(poolSize.1), Int32(poolSize.2), Int32(poolSize.3))
@@ -990,7 +990,7 @@ public struct AvgPool2D<Scalar: TensorFlowFloatingPoint>: Layer {
     ///   - poolSize: Vertical and horizontal factors by which to downscale.
     ///   - strides: The strides.
     ///   - padding: The padding.
-    public init(poolSize: (Int, Int), strides: (Int, Int), padding: PaddingV1 = .valid) {
+    public init(poolSize: (Int, Int), strides: (Int, Int), padding: Padding = .valid) {
         self.poolSize = (1, Int32(poolSize.0), Int32(poolSize.1), 1)
         self.strides = (1, Int32(strides.0), Int32(strides.1), 1)
         self.padding = padding
diff --git a/Sources/DeepLearning/Operators/Basic.swift b/Sources/DeepLearning/Operators/Basic.swift
index fe75ac81c..520e05ed1 100644
--- a/Sources/DeepLearning/Operators/Basic.swift
+++ b/Sources/DeepLearning/Operators/Basic.swift
@@ -46,8 +46,8 @@ public extension Tensor where Scalar: TensorFlowScalar {
     /// - Returns: The stacked tensor.
     @inlinable
     // @differentiable(vjp: _vjpStacked where Scalar: TensorFlowFloatingPoint)
-    func stacked(with tensors: [Tensor], alongAxis axis: Int64 = 0) -> Tensor {
-        return Raw.pack([self] + tensors, axis: axis)
+    func stacked(with tensors: [Tensor], alongAxis axis: Int32 = 0) -> Tensor {
+        return Raw.pack([self] + tensors, axis: Int64(axis))
     }
 
     /// Concatenates the current tensor with `tensors` along the `axis` dimension.
@@ -257,13 +257,13 @@ public extension Tensor where Scalar: TensorFlowScalar {
     @inlinable
     func gathering(where mask: Tensor<Bool>, alongAxis axis: Int32 = 0) -> Tensor {
         precondition(mask.rank != 0, "The boolean mask cannot be a scalar.")
-        let posAxis = axis < 0 ? axis + rank : axis
-        let leadingSize = shapeTensor[posAxis ..< posAxis + mask.rank].product().rankLifted()
+        let posAxis = Int(axis < 0 ? axis + rank : axis)
+        let leadingSize = shapeTensor[posAxis ..< posAxis + Int(mask.rank)].product().rankLifted()
         let reshapedTensor = reshaped(
-            toShape: shapeTensor[..<Int(posAxis)].concatenated(
-                with: [leadingSize, shapeTensor[Int(posAxis + mask.rank)...]]))
-        let indices = mask.flattened().whereTrue().squeezingShape(at: 1)
-        return reshapedTensor.gathering(atIndices: indices, alongAxis: posAxis)
+            toShape: shapeTensor[..<posAxis].concatenated(
+                with: [leadingSize, shapeTensor[(posAxis + Int(mask.rank))...]]))
+        let indices = Tensor<Int32>(mask.flattened().nonZeroIndices().squeezingShape(at: 1))
+        return reshapedTensor.gathering(atIndices: indices, alongAxis: Int32(posAxis))
     }
 }
 
diff --git a/Sources/DeepLearning/Operators/NN.swift b/Sources/DeepLearning/Operators/NN.swift
index 4b8afd7dc..ac3ac43d8 100644
--- a/Sources/DeepLearning/Operators/NN.swift
+++ b/Sources/DeepLearning/Operators/NN.swift
@@ -86,9 +86,6 @@ public extension Tensor where Scalar: TensorFlowFloatingPoint {
 /// A padding scheme. Used by padding, convolution, and pooling ops.
 // @_frozen // SR-9739
 public enum Padding {
-    /// The "explicit" padding scheme, which is defined by an array indicating the explicit padding 
-    /// sizes at the start and end of each dimension.
-    case explicit([Int32])
     /// The "valid" padding scheme.
     case valid
     /// The "same" padding scheme.
@@ -97,36 +94,15 @@ public enum Padding {
 
 public extension Padding {
     @inlinable
-    var raw: Raw.Padding2 {
+    internal var raw: Raw.Padding {
         switch self {
-        case .explicit: return .explicit
         case .same: return .same
         case .valid: return .valid
         }
     }
 
     @inlinable
-    internal var explicitPaddings: [Int32] {
-        switch self {
-        case .explicit(let paddings): return paddings
-        case .same: return []
-        case .valid: return []
-        }
-    }
-}
-
-/// An older padding scheme. Used by padding, convolution, and pooling ops.
-// @_frozen // SR-9739
-public enum PaddingV1 {
-    /// The "valid" padding scheme.
-    case valid
-    /// The "same" padding scheme.
-    case same
-}
-
-public extension PaddingV1 {
-    @inlinable
-    var raw: Raw.Padding {
+    internal var raw2: Raw.Padding2 {
         switch self {
         case .same: return .same
         case .valid: return .valid
@@ -149,8 +125,8 @@ public extension Tensor where Scalar: TensorFlowFloatingPoint {
             filter: filter,
             outBackprop: self,
             strides: [strides.0, strides.1, strides.2, strides.3],
-            padding: padding.raw,
-            explicitPaddings: padding.explicitPaddings)
+            padding: padding.raw2,
+            explicitPaddings: [])
     }
 
     /// TensorFlow builtin conv2d gradient helper for the filter.
@@ -167,8 +143,8 @@ public extension Tensor where Scalar: TensorFlowFloatingPoint {
             filterSizes: filterSizes,
             outBackprop: self,
             strides: [strides.0, strides.1, strides.2, strides.3],
-            padding: padding.raw,
-            explicitPaddings: padding.explicitPaddings)
+            padding: padding.raw2,
+            explicitPaddings: [])
     }
 
     @inlinable
@@ -298,8 +274,8 @@ public extension Tensor where Scalar: FloatingPoint {
             self,
             filter: filter,
             strides: [strides.0, strides.1, strides.2, strides.3],
-            padding: padding.raw,
-            explicitPaddings: padding.explicitPaddings)
+            padding: padding.raw2,
+            explicitPaddings: [])
     }
 
     /// Computes a 2-D max pooling, with the specified kernel sizes, strides, and
@@ -318,7 +294,7 @@ public extension Tensor where Scalar: FloatingPoint {
     func maxPooled(
         kernelSize: (Int32, Int32, Int32, Int32),
         strides: (Int32, Int32, Int32, Int32),
-        padding: PaddingV1
+        padding: Padding
     ) -> Tensor {
         return Raw.maxPoolV2(
             self,
@@ -345,7 +321,7 @@ public extension Tensor where Scalar: FloatingPoint {
     func averagePooled(
         kernelSize: (Int32, Int32, Int32, Int32),
         strides: (Int32, Int32, Int32, Int32),
-        padding: PaddingV1
+        padding: Padding
     ) -> Tensor {
         return Raw.avgPool(
             value: self,

From 467a443d015bf97b3735c7f68ca29d349599715b Mon Sep 17 00:00:00 2001
From: Anthony Platanios <e.a.platanios@gmail.com>
Date: Mon, 15 Apr 2019 10:55:53 -0400
Subject: [PATCH 21/55] Added a new 'tiling' tensor initializer.

---
 Sources/DeepLearning/Initializers.swift | 87 ++++++++++++++-----------
 1 file changed, 50 insertions(+), 37 deletions(-)

diff --git a/Sources/DeepLearning/Initializers.swift b/Sources/DeepLearning/Initializers.swift
index 4f465f609..c58539684 100644
--- a/Sources/DeepLearning/Initializers.swift
+++ b/Sources/DeepLearning/Initializers.swift
@@ -17,46 +17,59 @@ import TensorFlow
 #endif
 
 public extension Tensor where Scalar : Numeric {
-  /// Creates a tensor with all scalars set to zero that has the same shape and type as the provided 
-  /// tensor.
-  ///
-  /// - Parameter other: Tensor whose shape and data type to use.
-  @inlinable @inline(__always)
-  init(zerosLike other: Tensor) {
-    self = Raw.zerosLike(other)
-  }
+    /// Creates a tensor with all scalars set to zero that has the same shape and type as the provided 
+    /// tensor.
+    ///
+    /// - Parameter other: Tensor whose shape and data type to use.
+    @inlinable @inline(__always)
+    init(zerosLike other: Tensor) {
+        self = Raw.zerosLike(other)
+    }
+
+    /// Creates a tensor with all scalars set to one that has the same shape and type as the provided 
+    /// tensor.
+    ///
+    /// - Parameter other: Tensor whose shape and data type to use.
+    @inlinable @inline(__always)
+    init(onesLike other: Tensor) {
+        self = Raw.onesLike(other)
+    }
 
-  /// Creates a tensor with all scalars set to one that has the same shape and type as the provided 
-  /// tensor.
-  ///
-  /// - Parameter other: Tensor whose shape and data type to use.
-  @inlinable @inline(__always)
-  init(onesLike other: Tensor) {
-    self = Raw.onesLike(other)
-  }
+    /// Creates a 1-D tensor representing a sequence from a starting value to, but not including, an 
+    /// end value, stepping by the specified amount.
+    ///
+    /// - Parameters:
+    ///   - start: The starting value to use for the sequence. If the sequence contains any values, 
+    ///     the first one is `start`.
+    ///   - end: An end value to limit the sequence. `end` is never an element of the resulting 
+    ///     sequence.
+    ///   - stride: The amount to step by with each iteration. `stride` must be positive.
+    @inlinable @inline(__always)
+    init(rangeFrom start: Tensor<Scalar>, to end: Tensor<Scalar>, stride: Tensor<Scalar>) {
+        self = Raw.range(start: start, limit: end, delta: stride)
+    }
 
-  /// Creates a 1-D tensor representing a sequence from a starting value to, but not including, an 
-  /// end value, stepping by the specified amount.
-  ///
-  /// - Parameters:
-  ///   - start: The starting value to use for the sequence. If the sequence contains any values, 
-  ///     the first one is `start`.
-  ///   - end: An end value to limit the sequence. `end` is never an element of the resulting 
-  ///     sequence.
-  ///   - stride: The amount to step by with each iteration. `stride` must be positive.
-  @inlinable @inline(__always)
-  init(rangeFrom start: Tensor<Scalar>, to end: Tensor<Scalar>, stride: Tensor<Scalar>) {
-    self = Raw.range(start: start, limit: end, delta: stride)
-  }
+    /// Returns a stacked tensor, constructed by stacking the provided tensors along 
+    /// the specified axis.
+    /// - Precondition: The tensors must have the same dimensions,.
+    /// - Precondition: The axis must be in the range `-rank..<rank`.
+    @inlinable @inline(__always)
+    init(stacking tensors: [Tensor<Scalar>], alongAxis axis: Int32 = 0) {
+        self = Raw.pack(tensors, axis: Int64(axis))
+    }
 
-  /// Returns a stacked tensor, constructed by stacking the provided tensors along 
-  /// the specified axis.
-  /// - Precondition: The tensors must have the same dimensions,.
-  /// - Precondition: The axis must be in the range `-rank..<rank`.
-  @inlinable @inline(__always)
-  init(stacking tensors: [Tensor<Scalar>], alongAxis axis: Int32 = 0) {
-    self = Raw.pack(tensors, axis: Int64(axis))
-  }
+    /// Returns a tiled tensor, constructed by tiling the provided tensor.
+    ///
+    /// This constructor creates a new tensor by replicating `tensor` `multiples` times. The
+    /// constructed tensor's `i`'th dimension has `tensor.shape[i] * multiples[i]` elements, and the
+    /// values of `tensor` are replicated `multiples[i]` times along the `i`'th dimension. For 
+    /// example, tiling `[a b c d]` by `[2]` produces `[a b c d a b c d]`.
+    /// 
+    /// - Precondition: The shape of `multiples` must be `[tensor.rank]`.
+    @inlinable @inline(__always)
+    init(tiling tensor: Tensor<Scalar>, multiples: Tensor<Int32>) {
+        self = Raw.tile(tensor, multiples: multiples)
+    }
 }
 
 public extension Tensor where Scalar == Int32 {

From 1faaef456c420a20d024f39e8584ebee2c8cd28d Mon Sep 17 00:00:00 2001
From: Anthony Platanios <e.a.platanios@gmail.com>
Date: Mon, 15 Apr 2019 10:57:32 -0400
Subject: [PATCH 22/55] Minor edit.

---
 Sources/DeepLearning/Initializers.swift | 46 +++++++++++++------------
 1 file changed, 24 insertions(+), 22 deletions(-)

diff --git a/Sources/DeepLearning/Initializers.swift b/Sources/DeepLearning/Initializers.swift
index c58539684..c42e2dc64 100644
--- a/Sources/DeepLearning/Initializers.swift
+++ b/Sources/DeepLearning/Initializers.swift
@@ -16,6 +16,30 @@
 import TensorFlow
 #endif
 
+public extension Tensor {
+    /// Returns a stacked tensor, constructed by stacking the provided tensors along 
+    /// the specified axis.
+    /// - Precondition: The tensors must have the same dimensions,.
+    /// - Precondition: The axis must be in the range `-rank..<rank`.
+    @inlinable @inline(__always)
+    init(stacking tensors: [Tensor<Scalar>], alongAxis axis: Int32 = 0) {
+        self = Raw.pack(tensors, axis: Int64(axis))
+    }
+
+    /// Returns a tiled tensor, constructed by tiling the provided tensor.
+    ///
+    /// This constructor creates a new tensor by replicating `tensor` `multiples` times. The
+    /// constructed tensor's `i`'th dimension has `tensor.shape[i] * multiples[i]` elements, and the
+    /// values of `tensor` are replicated `multiples[i]` times along the `i`'th dimension. For 
+    /// example, tiling `[a b c d]` by `[2]` produces `[a b c d a b c d]`.
+    /// 
+    /// - Precondition: The shape of `multiples` must be `[tensor.rank]`.
+    @inlinable @inline(__always)
+    init(tiling tensor: Tensor<Scalar>, multiples: Tensor<Int32>) {
+        self = Raw.tile(tensor, multiples: multiples)
+    }
+}
+
 public extension Tensor where Scalar : Numeric {
     /// Creates a tensor with all scalars set to zero that has the same shape and type as the provided 
     /// tensor.
@@ -48,28 +72,6 @@ public extension Tensor where Scalar : Numeric {
     init(rangeFrom start: Tensor<Scalar>, to end: Tensor<Scalar>, stride: Tensor<Scalar>) {
         self = Raw.range(start: start, limit: end, delta: stride)
     }
-
-    /// Returns a stacked tensor, constructed by stacking the provided tensors along 
-    /// the specified axis.
-    /// - Precondition: The tensors must have the same dimensions,.
-    /// - Precondition: The axis must be in the range `-rank..<rank`.
-    @inlinable @inline(__always)
-    init(stacking tensors: [Tensor<Scalar>], alongAxis axis: Int32 = 0) {
-        self = Raw.pack(tensors, axis: Int64(axis))
-    }
-
-    /// Returns a tiled tensor, constructed by tiling the provided tensor.
-    ///
-    /// This constructor creates a new tensor by replicating `tensor` `multiples` times. The
-    /// constructed tensor's `i`'th dimension has `tensor.shape[i] * multiples[i]` elements, and the
-    /// values of `tensor` are replicated `multiples[i]` times along the `i`'th dimension. For 
-    /// example, tiling `[a b c d]` by `[2]` produces `[a b c d a b c d]`.
-    /// 
-    /// - Precondition: The shape of `multiples` must be `[tensor.rank]`.
-    @inlinable @inline(__always)
-    init(tiling tensor: Tensor<Scalar>, multiples: Tensor<Int32>) {
-        self = Raw.tile(tensor, multiples: multiples)
-    }
 }
 
 public extension Tensor where Scalar == Int32 {

From 94cf85fa877faf4907a3bf89447e38d3a3c195e1 Mon Sep 17 00:00:00 2001
From: Anthony Platanios <e.a.platanios@gmail.com>
Date: Mon, 15 Apr 2019 11:18:20 -0400
Subject: [PATCH 23/55] Made some refactoring.

---
 Sources/DeepLearning/Initializers.swift    | 81 ++++++++++++++++++---
 Sources/DeepLearning/Operators/Basic.swift | 85 +++-------------------
 2 files changed, 81 insertions(+), 85 deletions(-)

diff --git a/Sources/DeepLearning/Initializers.swift b/Sources/DeepLearning/Initializers.swift
index c42e2dc64..6e259f72b 100644
--- a/Sources/DeepLearning/Initializers.swift
+++ b/Sources/DeepLearning/Initializers.swift
@@ -17,15 +17,78 @@ import TensorFlow
 #endif
 
 public extension Tensor {
-    /// Returns a stacked tensor, constructed by stacking the provided tensors along 
-    /// the specified axis.
-    /// - Precondition: The tensors must have the same dimensions,.
-    /// - Precondition: The axis must be in the range `-rank..<rank`.
-    @inlinable @inline(__always)
+    /// Stacks `tensors`, along the `axis` dimension, into a new tensor with rank one higher than
+    /// the current tensor and each tensor in `tensors`.
+    /// 
+    /// Given that `tensors` all have shape `[A, B, C]`, and `tensors.count = N`, then:
+    /// - if `axis == 0` then the resulting tensor will have the shape `[N, A, B, C]`.
+    /// - if `axis == 1` then the resulting tensor will have the shape `[A, N, B, C]`.
+    /// - etc.
+    ///
+    /// For example:
+    /// ```
+    /// // 'x' is [1, 4]
+    /// // 'y' is [2, 5]
+    /// // 'z' is [3, 6]
+    /// Tensor(stacking: [x, y, z]) // is [[1, 4], [2, 5], [3, 6]]
+    /// Tensor(stacking: [x, y, z], alongAxis: 1) // is [[1, 2, 3], [4, 5, 6]]
+    /// ```
+    ///
+    /// This is the opposite of `Tensor.unstacked`.
+    ///
+    /// - Parameters:
+    ///   - tensors: Tensors to stack.
+    ///   - axis: Dimension along which to stack. Negative values wrap around.
+    /// 
+    /// - Precondition: All tensors must have the same shape.
+    /// - Precondition: `axis` must be in the range `[-rank, rank)`, where `rank` is the rank of the
+    ///   provided tensors.
+    /// 
+    /// - Returns: The stacked tensor.
+    @inlinable
+    // @differentiable(vjp: _vjpStacking where Scalar: TensorFlowFloatingPoint)
     init(stacking tensors: [Tensor<Scalar>], alongAxis axis: Int32 = 0) {
         self = Raw.pack(tensors, axis: Int64(axis))
     }
 
+    /// Concatenates `tensors` along the `axis` dimension.
+    ///
+    /// Given that `tensors[i].shape = [D0, D1, ... Daxis(i), ...Dn]`, then the concatenated result 
+    /// has shape `[D0, D1, ... Raxis, ...Dn]`, where `Raxis = sum(Daxis(i))`. That is, the data 
+    /// from the input tensors is joined along the `axis` dimension.
+    ///
+    /// For example:
+    /// ```
+    /// // t1 is [[1, 2, 3], [4, 5, 6]]
+    /// // t2 is [[7, 8, 9], [10, 11, 12]]
+    /// Tensor(concatenating: [t1, t2]) // is [[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]]
+    /// Tensor(concatenating: [t1, t2], alongAxis: 1) // is [[1, 2, 3, 7, 8, 9], [4, 5, 6, 10, 11, 12]]
+    /// 
+    /// // t3 has shape [2, 3]
+    /// // t4 has shape [2, 3]
+    /// Tensor(concatenating: [t3, t4]) // has shape [4, 3]
+    /// Tensor(concatenating: [t3, t4], alongAxis: 1) // has shape [2, 6]
+    /// ```
+    ///
+    /// - Note: If you are concatenating along a new axis consider using 
+    ///   `Tensor.init(stacking:alongAxis:)`.
+    ///
+    /// - Parameters:
+    ///   - tensors: Tensors to concatenate.
+    ///   - axis: Dimension along which to concatenate. Negative values wrap around.
+    ///
+    /// - Precondition: All tensors must have the same rank and all dimensions except `axis`
+    ///   must be equal.
+    /// - Precondition: `axis` must be in the range `[-rank, rank)`, where `rank` is the rank of the
+    ///   provided tensors.
+    /// 
+    /// - Returns: The concatenated tensor.
+    @inlinable
+    // @differentiable(vjp: _vjpConcatenating where Scalar : TensorFlowFloatingPoint)
+    init(concatenating tensors: [Tensor<Scalar>], alongAxis axis: Int32 = 0) {
+        self = Raw.concatV2(tensors, axis: Tensor<Int32>(axis))
+    }
+
     /// Returns a tiled tensor, constructed by tiling the provided tensor.
     ///
     /// This constructor creates a new tensor by replicating `tensor` `multiples` times. The
@@ -34,7 +97,7 @@ public extension Tensor {
     /// example, tiling `[a b c d]` by `[2]` produces `[a b c d a b c d]`.
     /// 
     /// - Precondition: The shape of `multiples` must be `[tensor.rank]`.
-    @inlinable @inline(__always)
+    @inlinable
     init(tiling tensor: Tensor<Scalar>, multiples: Tensor<Int32>) {
         self = Raw.tile(tensor, multiples: multiples)
     }
@@ -45,7 +108,7 @@ public extension Tensor where Scalar : Numeric {
     /// tensor.
     ///
     /// - Parameter other: Tensor whose shape and data type to use.
-    @inlinable @inline(__always)
+    @inlinable
     init(zerosLike other: Tensor) {
         self = Raw.zerosLike(other)
     }
@@ -54,7 +117,7 @@ public extension Tensor where Scalar : Numeric {
     /// tensor.
     ///
     /// - Parameter other: Tensor whose shape and data type to use.
-    @inlinable @inline(__always)
+    @inlinable
     init(onesLike other: Tensor) {
         self = Raw.onesLike(other)
     }
@@ -68,7 +131,7 @@ public extension Tensor where Scalar : Numeric {
     ///   - end: An end value to limit the sequence. `end` is never an element of the resulting 
     ///     sequence.
     ///   - stride: The amount to step by with each iteration. `stride` must be positive.
-    @inlinable @inline(__always)
+    @inlinable
     init(rangeFrom start: Tensor<Scalar>, to end: Tensor<Scalar>, stride: Tensor<Scalar>) {
         self = Raw.range(start: start, limit: end, delta: stride)
     }
diff --git a/Sources/DeepLearning/Operators/Basic.swift b/Sources/DeepLearning/Operators/Basic.swift
index 520e05ed1..740eb3826 100644
--- a/Sources/DeepLearning/Operators/Basic.swift
+++ b/Sources/DeepLearning/Operators/Basic.swift
@@ -16,77 +16,7 @@
 import TensorFlow
 #endif
 
-public extension Tensor where Scalar: TensorFlowScalar {
-    /// Stacks the current tensor with `tensors`, along the `axis` dimension, into a tensor with 
-    /// rank one higher than the current tensor and each tensor in `tensors`.
-    /// 
-    /// Given `self` and `tensors` all have shape `[A, B, C]`, and `tensors.count = N-1`, then:
-    /// - if `axis == 0` then the resulting tensor will have the shape `[N, A, B, C]`.
-    /// - if `axis == 1` then the resulting tensor will have the shape `[A, N, B, C]`.
-    /// - etc.
-    ///
-    /// For example:
-    /// ```
-    /// // 'x' is [1, 4]
-    /// // 'y' is [2, 5]
-    /// // 'z' is [3, 6]
-    /// x.stacked(with: [y, z]) // is [[1, 4], [2, 5], [3, 6]]
-    /// x.stacked(with: [y, z], alongAxis: 1) // is [[1, 2, 3], [4, 5, 6]]
-    /// ```
-    ///
-    /// This is the opposite of `unstacked`.
-    ///
-    /// - Parameters:
-    ///   - tensors: Tensors to stack with the current tensor.
-    ///   - axis: Dimension along which to stack. Negative values wrap around.
-    /// 
-    /// - Precondition: All tensors must have the same shape as the current tensor.
-    /// - Precondition: `axis` must be in the range `[-rank, rank)`.
-    /// 
-    /// - Returns: The stacked tensor.
-    @inlinable
-    // @differentiable(vjp: _vjpStacked where Scalar: TensorFlowFloatingPoint)
-    func stacked(with tensors: [Tensor], alongAxis axis: Int32 = 0) -> Tensor {
-        return Raw.pack([self] + tensors, axis: Int64(axis))
-    }
-
-    /// Concatenates the current tensor with `tensors` along the `axis` dimension.
-    ///
-    /// Given `self` and `tensors` are all put in a single array, `values`, and 
-    /// `values[i].shape = [D0, D1, ... Daxis(i), ...Dn]`, then the concatenated result has shape 
-    /// `[D0, D1, ... Raxis, ...Dn]`, where `Raxis = sum(Daxis(i))`. That is, the data from the 
-    /// input tensors is joined along the `axis` dimension.
-    ///
-    /// For example:
-    /// ```
-    /// // t1 is [[1, 2, 3], [4, 5, 6]]
-    /// // t2 is [[7, 8, 9], [10, 11, 12]]
-    /// t1.concatenated(with: [t2]) // is [[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]]
-    /// t1.concatenated(with: [t2], alongAxis: 1) // is [[1, 2, 3, 7, 8, 9], [4, 5, 6, 10, 11, 12]]
-    /// 
-    /// // t3 has shape [2, 3]
-    /// // t4 has shape [2, 3]
-    /// t3.concatenated(with: [t4]) // has shape [4, 3]
-    /// t3.concatenated(with: [t4], alongAxis: 1) // has shape [2, 6]
-    /// ```
-    ///
-    /// - Note: If you are concatenating along a new axis consider using `stacked`.
-    ///
-    /// - Parameters:
-    ///   - tensors: Tensors to concatenate with the current tensor.
-    ///   - axis: Dimension along which to concatenate. Negative values wrap around.
-    ///
-    /// - Precondition: All tensors must have the same rank as the current tensor and all dimensions 
-    ///     except `axis` must be equal.
-    /// - Precondition: `axis` must be in the range `[-rank, rank)`.
-    /// 
-    /// - Returns: The concatenated tensor.
-    @inlinable
-    // @differentiable(vjp: _vjpConcatenated where Scalar : TensorFlowFloatingPoint)
-    func concatenated(with tensors: [Tensor], alongAxis axis: Int32 = 0) -> Tensor {
-        return Raw.concatV2([self] + tensors, axis: Tensor<Int32>(axis))
-    }
-
+public extension Tensor {
     /// Gathers slices of this tensor at `indices` along the `axis` dimension.
     ///
     /// For 0-D (scalar) `indices`:
@@ -178,7 +108,8 @@ public extension Tensor where Scalar: TensorFlowScalar {
             precondition(batchDims <= posAxis, "'batchDims' must be less than or equal to 'axis'.")
 
             // Move self[axis] up to self[batchDims].
-            let permutation = Tensor<Int32>(0 ..< batchDims).concatenated(with: [
+            let permutation = Tensor<Int32>(concatenating: [
+                Tensor<Int32>(0 ..< batchDims),
                 Tensor<Int32>(axis).rankLifted(),
                 Tensor<Int32>(rangeFrom: batchDims, to: posAxis, stride: 1),
                 Tensor<Int32>(rangeFrom: axis + 1, to: rank, stride: 1)])
@@ -189,7 +120,8 @@ public extension Tensor where Scalar: TensorFlowScalar {
             // Move the result dimensions corresponding to self[batchDims ..< axis] to just before 
             // the dimensions corresponding to indices[batchDims ...].
             let start = indices.rank + posAxis - batchDims
-            let resultPermutation = Tensor<Int32>(0 ..< batchDims).concatenated(with: [
+            let resultPermutation = Tensor<Int32>(concatenating: [
+                Tensor<Int32>(0 ..< batchDims),
                 Tensor<Int32>(rangeFrom: indices.rank, to: start, stride: 1),
                 Tensor<Int32>(batchDims ..< indices.rank), 
                 Tensor<Int32>(rangeFrom: start, to: result.rank, stride: 1)])
@@ -207,7 +139,8 @@ public extension Tensor where Scalar: TensorFlowScalar {
                 to: dValue,
                 stride: Tensor<Int32>(ones: [])
             ) * accumulated
-            let dShape = Tensor<Int32>(d - 1).stacked(with: [
+            let dShape = Tensor<Int32>(stacking: [
+                Tensor<Int32>(d - 1),
                 Tensor<Int32>(dValue), 
                 Tensor<Int32>(indices.rank - 1)])
             batchIndices += dIndices.reshaped(toShape: dShape)
@@ -260,8 +193,8 @@ public extension Tensor where Scalar: TensorFlowScalar {
         let posAxis = Int(axis < 0 ? axis + rank : axis)
         let leadingSize = shapeTensor[posAxis ..< posAxis + Int(mask.rank)].product().rankLifted()
         let reshapedTensor = reshaped(
-            toShape: shapeTensor[..<posAxis].concatenated(
-                with: [leadingSize, shapeTensor[(posAxis + Int(mask.rank))...]]))
+            toShape: Tensor<Int32>(concatenating: [
+                shapeTensor[..<posAxis], leadingSize, shapeTensor[(posAxis + Int(mask.rank))...]]))
         let indices = Tensor<Int32>(mask.flattened().nonZeroIndices().squeezingShape(at: 1))
         return reshapedTensor.gathering(atIndices: indices, alongAxis: Int32(posAxis))
     }

From e0bbfc049c9d1f7b9452bbffaecd4c9c894488d8 Mon Sep 17 00:00:00 2001
From: Anthony Platanios <e.a.platanios@gmail.com>
Date: Mon, 15 Apr 2019 15:29:31 -0400
Subject: [PATCH 24/55] Bug fix.

---
 Sources/DeepLearning/Operators/Basic.swift | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/Sources/DeepLearning/Operators/Basic.swift b/Sources/DeepLearning/Operators/Basic.swift
index 740eb3826..cb63eca1b 100644
--- a/Sources/DeepLearning/Operators/Basic.swift
+++ b/Sources/DeepLearning/Operators/Basic.swift
@@ -139,10 +139,10 @@ public extension Tensor {
                 to: dValue,
                 stride: Tensor<Int32>(ones: [])
             ) * accumulated
-            let dShape = Tensor<Int32>(stacking: [
-                Tensor<Int32>(d - 1),
-                Tensor<Int32>(dValue), 
-                Tensor<Int32>(indices.rank - 1)])
+            let dShape = Tensor<Int32>(concatenating: [
+                Tensor<Int32>([Int32](repeating: 1, count: Int(d - 1))),
+                Tensor<Int32>([dValue]),
+                Tensor<Int32>([Int32](repeating: 1, count: Int(indices.rank - 1)))])
             batchIndices += dIndices.reshaped(toShape: dShape)
         }
 

From 6c0436878d41b5a1d6f1aa7b8c8b559b1250ff16 Mon Sep 17 00:00:00 2001
From: Anthony Platanios <e.a.platanios@gmail.com>
Date: Thu, 18 Apr 2019 22:14:11 -0400
Subject: [PATCH 25/55] Added support for the split op and its VJP.

---
 Sources/DeepLearning/Operators/Basic.swift | 92 ++++++++++++++++------
 Sources/DeepLearning/Operators/Math.swift  | 48 ++++++++++-
 Sources/DeepLearning/Operators/NN.swift    |  2 +-
 3 files changed, 117 insertions(+), 25 deletions(-)

diff --git a/Sources/DeepLearning/Operators/Basic.swift b/Sources/DeepLearning/Operators/Basic.swift
index cb63eca1b..82c96fd2e 100644
--- a/Sources/DeepLearning/Operators/Basic.swift
+++ b/Sources/DeepLearning/Operators/Basic.swift
@@ -17,6 +17,32 @@ import TensorFlow
 #endif
 
 public extension Tensor {
+    @inlinable
+    @differentiable(wrt: self where Scalar : TensorFlowFloatingPoint)
+    func unstack(alongAxis axis: Int = 0) -> [Tensor] {
+        return split(numSplits: shape[axis], alongAxis: axis)
+    }
+
+    @inlinable
+    @differentiable(
+        vjp: _vjpSplit(numSplits:alongAxis:) where Scalar : TensorFlowFloatingPoint)
+    func split(numSplits: Int, alongAxis axis: Int = 0) -> [Tensor] {
+        return Raw.split(
+            splitDim: Tensor<Int32>(Int32(axis)), value: self, numSplit: Int64(numSplits))
+    }
+
+    @inlinable
+    @differentiable(
+        wrt: self,
+        vjp: _vjpSplit(splitSizes:alongAxis:) where Scalar : TensorFlowFloatingPoint)
+    func split(splitSizes: Tensor<Int32>, alongAxis axis: Int = 0) -> [Tensor] {
+        return Raw.splitV(
+            value: self,
+            sizeSplits: splitSizes,
+            splitDim: Tensor<Int32>(Int32(axis)),
+            numSplit: Int64(splitSizes.shape[0]))
+    }
+
     /// Gathers slices of this tensor at `indices` along the `axis` dimension.
     ///
     /// For 0-D (scalar) `indices`:
@@ -66,9 +92,9 @@ public extension Tensor {
     // @differentiable(vjp: _vjpGathering where Scalar: TensorFlowFloatingPoint)
     func gathering(
         atIndices indices: Tensor<Int32>, 
-        alongAxis axis: Int32 = 0
+        alongAxis axis: Int = 0
     ) -> Tensor {
-        return Raw.gatherV2(params: self, indices: indices, axis: Tensor<Int32>(axis))
+        return Raw.gatherV2(params: self, indices: indices, axis: Tensor<Int32>(Int32(axis)))
     }
 
     /// Gathers slices of this tensor at `indices` along the `axis` dimension, while ignoring the 
@@ -89,16 +115,16 @@ public extension Tensor {
     /// - Returns: The gathered tensor.
     @inlinable
     func batchGathering(
-        atIndices indices: Tensor<Int32>, 
-        alongAxis axis: Int32,
-        numBatchDims batchDims: Int32
+        atIndices indices: Tensor<Int32>,
+        alongAxis axis: Int,
+        numBatchDims batchDims: Int
     ) -> Tensor {
-        precondition(batchDims >= 0 && batchDims < indices.rank, 
+        precondition(batchDims >= 0 && batchDims < indices.rank,
                      "'numBatchDims' must be non-negative and less than 'indices.rank'.")
         precondition(batchDims < rank, "'numBatchDims' must be less than the tensor's rank.")
 
-        // Handle the axis argument by transposing the axis dimension so that it is the first 
-        // non-batch dimension, recursively calling `batchGathering` with `axis = 0`, and then 
+        // Handle the axis argument by transposing the axis dimension so that it is the first
+        // non-batch dimension, recursively calling `batchGathering` with `axis = 0`, and then
         // transposing the result to put the pre-axis dimensions before the indices dimensions.
         if axis != batchDims {
             // Adjust axis to be positive.
@@ -109,22 +135,22 @@ public extension Tensor {
 
             // Move self[axis] up to self[batchDims].
             let permutation = Tensor<Int32>(concatenating: [
-                Tensor<Int32>(0 ..< batchDims),
-                Tensor<Int32>(axis).rankLifted(),
-                Tensor<Int32>(rangeFrom: batchDims, to: posAxis, stride: 1),
-                Tensor<Int32>(rangeFrom: axis + 1, to: rank, stride: 1)])
+                Tensor<Int32>(0 ..< Int32(batchDims)),
+                Tensor<Int32>(Int32(axis)).rankLifted(),
+                Tensor<Int32>(rangeFrom: Int32(batchDims), to: Int32(posAxis), stride: 1),
+                Tensor<Int32>(rangeFrom: Int32(axis) + 1, to: Int32(rank), stride: 1)])
             let tensor = transposed(withPermutations: permutation)
             let result = tensor.batchGathering(
                 atIndices: indices, alongAxis: batchDims, numBatchDims: batchDims)
-            
-            // Move the result dimensions corresponding to self[batchDims ..< axis] to just before 
+
+            // Move the result dimensions corresponding to self[batchDims ..< axis] to just before
             // the dimensions corresponding to indices[batchDims ...].
             let start = indices.rank + posAxis - batchDims
             let resultPermutation = Tensor<Int32>(concatenating: [
-                Tensor<Int32>(0 ..< batchDims),
-                Tensor<Int32>(rangeFrom: indices.rank, to: start, stride: 1),
-                Tensor<Int32>(batchDims ..< indices.rank), 
-                Tensor<Int32>(rangeFrom: start, to: result.rank, stride: 1)])
+                Tensor<Int32>(0 ..< Int32(batchDims)),
+                Tensor<Int32>(rangeFrom: Int32(indices.rank), to: Int32(start), stride: 1),
+                Tensor<Int32>(Int32(batchDims) ..< Int32(indices.rank)),
+                Tensor<Int32>(rangeFrom: Int32(start), to: Int32(result.rank), stride: 1)])
             return result.transposed(withPermutations: resultPermutation)
         }
 
@@ -188,15 +214,15 @@ public extension Tensor {
     /// - Returns: `(self.rank - K + 1)`-dimensional tensor populated by entries in this tensor 
     ///   corresponding to `true` values in `mask`.
     @inlinable
-    func gathering(where mask: Tensor<Bool>, alongAxis axis: Int32 = 0) -> Tensor {
+    func gathering(where mask: Tensor<Bool>, alongAxis axis: Int = 0) -> Tensor {
         precondition(mask.rank != 0, "The boolean mask cannot be a scalar.")
-        let posAxis = Int(axis < 0 ? axis + rank : axis)
-        let leadingSize = shapeTensor[posAxis ..< posAxis + Int(mask.rank)].product().rankLifted()
+        let posAxis = axis < 0 ? axis + rank : axis
+        let leadingSize = shapeTensor[posAxis ..< posAxis + mask.rank].product().rankLifted()
         let reshapedTensor = reshaped(
             toShape: Tensor<Int32>(concatenating: [
-                shapeTensor[..<posAxis], leadingSize, shapeTensor[(posAxis + Int(mask.rank))...]]))
+                shapeTensor[..<posAxis], leadingSize, shapeTensor[(posAxis + mask.rank)...]]))
         let indices = Tensor<Int32>(mask.flattened().nonZeroIndices().squeezingShape(at: 1))
-        return reshapedTensor.gathering(atIndices: indices, alongAxis: Int32(posAxis))
+        return reshapedTensor.gathering(atIndices: indices, alongAxis: posAxis)
     }
 }
 
@@ -234,3 +260,23 @@ public extension Tensor {
         return Raw.where_(self)
     }
 }
+
+public extension Tensor where Scalar : TensorFlowFloatingPoint {
+    @inlinable
+    internal func _vjpSplit(
+        numSplits: Int,
+        alongAxis axis: Int = 0
+    ) -> ([Tensor], (Array<Tensor>.DifferentiableView) -> Tensor) {
+        let result = split(numSplits: numSplits, alongAxis: axis)
+        return (result, { v in Tensor(concatenating: v.base, alongAxis: axis) })
+    }
+
+    @inlinable
+    internal func _vjpSplit(
+        splitSizes: Tensor<Int32>,
+        alongAxis axis: Int = 0
+    ) -> ([Tensor], (Array<Tensor>.DifferentiableView) -> Tensor) {
+        let result = split(splitSizes: splitSizes, alongAxis: axis)
+        return (result, { v in Tensor(concatenating: v.base, alongAxis: axis) })
+    }
+}
diff --git a/Sources/DeepLearning/Operators/Math.swift b/Sources/DeepLearning/Operators/Math.swift
index 249fc6015..883f0a2bb 100644
--- a/Sources/DeepLearning/Operators/Math.swift
+++ b/Sources/DeepLearning/Operators/Math.swift
@@ -17,6 +17,52 @@ import TensorFlow
 #endif
 
 /// Returns the values of the specified tensor rounded to the nearest integer, element-wise.
-public func round<Scalar: BinaryFloatingPoint>(_ x: Tensor<Scalar>) -> Tensor<Scalar> {
+@inlinable
+@differentiable(vjp: _vjpRound)
+public func round<T: TensorFlowFloatingPoint>(_ x: Tensor<T>) -> Tensor<T> {
     return Raw.round(x)
 }
+
+@inlinable
+internal func _vjpRound<T: TensorFlowFloatingPoint>(
+  _ x: Tensor<T>
+) -> (Tensor<T>, (Tensor<T>) -> Tensor<T>) {
+  return (round(x), { v in Tensor<T>(zerosLike: v) })
+}
+
+/// Computes the sigmoid of the specified tensor element-wise.
+@inlinable
+@differentiable(vjp: _vjpSigmoid)
+public func sigmoid<T: TensorFlowFloatingPoint>(_ x: Tensor<T>) -> Tensor<T> {
+  return Raw.sigmoid(x)
+}
+
+@inlinable
+internal func _vjpSigmoid<T: TensorFlowFloatingPoint>(
+  _ x: Tensor<T>
+) -> (Tensor<T>, (Tensor<T>) -> Tensor<T>) {
+  return (sigmoid(x), { v in Raw.sigmoidGrad(x, dy: v) })
+}
+
+/// Computes the log-sigmoid of the specified tensor element-wise. Specifically, 
+/// `y = log(1 / (1 + exp(-x)))`. For numerical stability, we use `y = -softplus(-x)`.
+@inlinable
+@differentiable
+public func logSigmoid<T: TensorFlowFloatingPoint>(_ x: Tensor<T>) -> Tensor<T> {
+  return -softplus(-x)
+}
+
+/// Computes the softplus function for the specified tensor element-wise. The softplus function is 
+/// defined as `log(exp(x) + 1)`.
+@inlinable
+@differentiable(vjp: _vjpSoftplus)
+public func softplus<T: TensorFlowFloatingPoint>(_ x: Tensor<T>) -> Tensor<T> {
+  return Raw.softplus(features: x)
+}
+
+@inlinable
+internal func _vjpSoftplus<T: TensorFlowFloatingPoint>(
+  _ x: Tensor<T>
+) -> (Tensor<T>, (Tensor<T>) -> Tensor<T>) {
+  return (softplus(x), { v in v * sigmoid(x) })
+}
diff --git a/Sources/DeepLearning/Operators/NN.swift b/Sources/DeepLearning/Operators/NN.swift
index d9b729722..f1401af20 100644
--- a/Sources/DeepLearning/Operators/NN.swift
+++ b/Sources/DeepLearning/Operators/NN.swift
@@ -36,7 +36,7 @@ public extension Tensor where Scalar: TensorFlowFloatingPoint {
         wrt: (self, offset, scale),
         vjp: _vjpBatchNormalized)
     func batchNormalized(
-        alongAxis axis: Int32,
+        alongAxis axis: Int,
         offset: Tensor = Tensor(0),
         scale: Tensor = Tensor(1),
         epsilon: Scalar = 0.001

From ca8ce02db1d00c9b8ddae11a971c4a2f9398b9c1 Mon Sep 17 00:00:00 2001
From: Anthony Platanios <e.a.platanios@gmail.com>
Date: Thu, 18 Apr 2019 22:49:03 -0400
Subject: [PATCH 26/55] Added VJPs for stacking and tiling.

---
 Sources/DeepLearning/Initializers.swift | 49 +++++++++++++++++++++----
 1 file changed, 42 insertions(+), 7 deletions(-)

diff --git a/Sources/DeepLearning/Initializers.swift b/Sources/DeepLearning/Initializers.swift
index 0d6953703..457462fcb 100644
--- a/Sources/DeepLearning/Initializers.swift
+++ b/Sources/DeepLearning/Initializers.swift
@@ -17,6 +17,13 @@ import TensorFlow
 #endif
 
 public extension Tensor {
+    /// Creates a tensor from an array of tensors (which may themselves be scalars).
+    @inlinable
+    @differentiable(where Scalar: TensorFlowFloatingPoint)
+    init(_ elements: [Tensor]) {
+        self = Tensor(stacking: elements)
+    }
+
     /// Stacks `tensors`, along the `axis` dimension, into a new tensor with rank one higher than
     /// the current tensor and each tensor in `tensors`.
     /// 
@@ -46,8 +53,8 @@ public extension Tensor {
     /// 
     /// - Returns: The stacked tensor.
     @inlinable
-    // @differentiable(vjp: _vjpStacking where Scalar: TensorFlowFloatingPoint)
-    init(stacking tensors: [Tensor<Scalar>], alongAxis axis: Int32 = 0) {
+    @differentiable(vjp: _vjpStacking where Scalar: TensorFlowFloatingPoint)
+    init(stacking tensors: [Tensor], alongAxis axis: Int = 0) {
         self = Raw.pack(tensors, axis: Int64(axis))
     }
 
@@ -83,11 +90,11 @@ public extension Tensor {
     ///   provided tensors.
     /// 
     /// - Returns: The concatenated tensor.
-    @inlinable
-    // @differentiable(vjp: _vjpConcatenating where Scalar : TensorFlowFloatingPoint)
-    init(concatenating tensors: [Tensor<Scalar>], alongAxis axis: Int32 = 0) {
-        self = Raw.concatV2(tensors, axis: Tensor<Int32>(axis))
-    }
+    // @inlinable
+    // // @differentiable(vjp: _vjpConcatenating where Scalar : TensorFlowFloatingPoint)
+    // init(concatenating tensors: [Tensor<Scalar>], alongAxis axis: Int = 0) {
+    //     self = Raw.concatV2(tensors, axis: Tensor<Int32>(Int32(axis)))
+    // }
 
     /// Returns a tiled tensor, constructed by tiling the provided tensor.
     ///
@@ -98,11 +105,39 @@ public extension Tensor {
     /// 
     /// - Precondition: The shape of `multiples` must be `[tensor.rank]`.
     @inlinable
+    @differentiable(wrt: tensor, vjp: _vjpTiling where Scalar: TensorFlowFloatingPoint)
     init(tiling tensor: Tensor<Scalar>, multiples: Tensor<Int32>) {
         self = Raw.tile(tensor, multiples: multiples)
     }
 }
 
+internal extension Tensor where Scalar : TensorFlowFloatingPoint {
+    @inlinable
+    static func _vjpStacking(
+        stacking tensors: [Tensor],
+        alongAxis axis: Int = 0
+    ) -> (Tensor, (Tensor) -> Array<Tensor>.DifferentiableView) {
+        let result = Tensor<Scalar>(stacking: tensors, alongAxis: axis)
+        return (result, { v in
+            return Array<Tensor>.DifferentiableView(v.unstack(alongAxis: axis))
+        })
+    }
+
+    @inlinable
+    static func _vjpTiling(
+        tiling tensor: Tensor<Scalar>,
+        multiples: Tensor<Int32>
+    ) -> (Tensor, (Tensor) -> Tensor) {
+        let result = Tensor(tiling: tensor, multiples: multiples)
+        return (result, { [shape = tensor.shapeTensor] v in
+            let splitShape = Tensor<Int32>(stacking: [multiples, shape]).transposed().flattened()
+            let axes = Tensor<Int32>(
+                rangeFrom: 0, to: Int32(splitShape.shape.contiguousSize), stride: 2)
+            return v.reshaped(toShape: splitShape).sum(squeezingAxes: axes)
+        })
+    }
+}
+
 public extension Tensor where Scalar : Numeric {
     /// Creates a tensor with all scalars set to zero that has the same shape and type as the provided 
     /// tensor.

From 26e91232fc6de0adbf57aa4ec50c7fd142b959b6 Mon Sep 17 00:00:00 2001
From: Anthony Platanios <e.a.platanios@gmail.com>
Date: Thu, 18 Apr 2019 23:05:30 -0400
Subject: [PATCH 27/55] Added VJP for concatenating.

---
 Sources/DeepLearning/Initializers.swift | 64 +++++++++++++++++--------
 1 file changed, 44 insertions(+), 20 deletions(-)

diff --git a/Sources/DeepLearning/Initializers.swift b/Sources/DeepLearning/Initializers.swift
index 457462fcb..bca769299 100644
--- a/Sources/DeepLearning/Initializers.swift
+++ b/Sources/DeepLearning/Initializers.swift
@@ -19,7 +19,7 @@ import TensorFlow
 public extension Tensor {
     /// Creates a tensor from an array of tensors (which may themselves be scalars).
     @inlinable
-    @differentiable(where Scalar: TensorFlowFloatingPoint)
+    @differentiable(where Scalar : TensorFlowFloatingPoint)
     init(_ elements: [Tensor]) {
         self = Tensor(stacking: elements)
     }
@@ -53,7 +53,7 @@ public extension Tensor {
     /// 
     /// - Returns: The stacked tensor.
     @inlinable
-    @differentiable(vjp: _vjpStacking where Scalar: TensorFlowFloatingPoint)
+    @differentiable(vjp: _vjpStacking where Scalar : TensorFlowFloatingPoint)
     init(stacking tensors: [Tensor], alongAxis axis: Int = 0) {
         self = Raw.pack(tensors, axis: Int64(axis))
     }
@@ -90,11 +90,12 @@ public extension Tensor {
     ///   provided tensors.
     /// 
     /// - Returns: The concatenated tensor.
-    // @inlinable
-    // // @differentiable(vjp: _vjpConcatenating where Scalar : TensorFlowFloatingPoint)
-    // init(concatenating tensors: [Tensor<Scalar>], alongAxis axis: Int = 0) {
-    //     self = Raw.concatV2(tensors, axis: Tensor<Int32>(Int32(axis)))
-    // }
+    @inlinable
+    @differentiable(wrt: tensors, vjp: _vjpConcatenating where Scalar : TensorFlowFloatingPoint)
+    init(concatenating tensors: [Tensor], alongAxis axis: Int = 0) {
+        precondition(tensors.count > 0)
+        self = Raw.concatV2(tensors, axis: Tensor<Int32>(Int32(axis)))
+    }
 
     /// Returns a tiled tensor, constructed by tiling the provided tensor.
     ///
@@ -105,8 +106,8 @@ public extension Tensor {
     /// 
     /// - Precondition: The shape of `multiples` must be `[tensor.rank]`.
     @inlinable
-    @differentiable(wrt: tensor, vjp: _vjpTiling where Scalar: TensorFlowFloatingPoint)
-    init(tiling tensor: Tensor<Scalar>, multiples: Tensor<Int32>) {
+    @differentiable(wrt: tensor, vjp: _vjpTiling where Scalar : TensorFlowFloatingPoint)
+    init(tiling tensor: Tensor, multiples: Tensor<Int32>) {
         self = Raw.tile(tensor, multiples: multiples)
     }
 }
@@ -123,6 +124,21 @@ internal extension Tensor where Scalar : TensorFlowFloatingPoint {
         })
     }
 
+    @inlinable
+    static func _vjpConcatenating(
+        concatenating tensors: [Tensor],
+        alongAxis axis: Int = 0
+    ) -> (Tensor, (Tensor) -> Array<Tensor>.DifferentiableView) {
+        let result = Tensor<Scalar>(concatenating: tensors, alongAxis: axis)
+        let posAxis = axis < 0 ? axis + tensors[0].rank : axis
+        let sizes = Tensor<Int32>(stacking: tensors.map { $0.shapeTensor[posAxis] })
+        return (result, { [count = tensors.count] v in
+            if count == 1 { return Array<Tensor>.DifferentiableView([v]) }
+            let splits = v.split(sizes: sizes, alongAxis: posAxis)
+            return Array<Tensor>.DifferentiableView(splits)
+        })
+    }
+
     @inlinable
     static func _vjpTiling(
         tiling tensor: Tensor<Scalar>,
@@ -180,8 +196,10 @@ public extension Tensor where Scalar == Int32 {
     ///   - shape: The dimensions of the tensor.
     ///   - generator: Random number generator to use.
     ///
-    init<G: RandomNumberGenerator>(randomStandardUniform shape: TensorShape,
-                                   generator: inout G) {
+    init<G: RandomNumberGenerator>(
+        randomStandardUniform shape: TensorShape,
+        generator: inout G
+    ) {
         let dist = UniformIntegerDistribution<Scalar>()
         var scalars: [Scalar] = []
         for _ in 0 ..< shape.contiguousSize {
@@ -250,8 +268,10 @@ public extension Tensor where Scalar: BinaryFloatingPoint,
     ///   - shape: The dimensions of the tensor.
     ///   - generator: Random number generator to use.
     ///
-    init<G: RandomNumberGenerator>(randomUniform shape: TensorShape,
-                                   generator: inout G) {
+    init<G: RandomNumberGenerator>(
+        randomUniform shape: TensorShape,
+        generator: inout G
+    ) {
         let dist = UniformFloatingPointDistribution<Scalar>()
         var scalars: [Scalar] = []
         for _ in 0 ..< shape.contiguousSize {
@@ -269,10 +289,12 @@ public extension Tensor where Scalar: BinaryFloatingPoint,
     ///   - stddev: The standard deviation of the distribution.
     ///   - generator: Random number generator to use.
     ///
-    init<G: RandomNumberGenerator>(randomNormal shape: TensorShape,
-                                   mean: Scalar = 0,
-                                   stddev: Scalar = 1,
-                                   generator: inout G) {
+    init<G: RandomNumberGenerator>(
+        randomNormal shape: TensorShape,
+        mean: Scalar = 0,
+        stddev: Scalar = 1,
+        generator: inout G
+    ) {
         let dist = NormalDistribution<Scalar>(mean: mean, standardDeviation: stddev)
         var scalars: [Scalar] = []
         for _ in 0 ..< shape.contiguousSize {
@@ -306,9 +328,11 @@ public extension Tensor where Scalar: TensorFlowFloatingPoint {
     /// - Parameters:
     ///   - shape: The dimensions of the tensor.
     ///
-    init(glorotUniform shape: TensorShape,
-         seed: (Int64, Int64) = (Int64.random(in: Int64.min..<Int64.max),
-                                 Int64.random(in: Int64.min..<Int64.max))) {
+    init(
+        glorotUniform shape: TensorShape,
+        seed: (Int64, Int64) = (Int64.random(in: Int64.min..<Int64.max),
+                                Int64.random(in: Int64.min..<Int64.max))
+    ) {
         let uniform = Tensor(randomUniform: shape, seed: seed)
         self = Tensor.glorot(fromStandardUniform: uniform, shape: shape)
     }

From 87db644739c1173fa20e6999196572694d2524b6 Mon Sep 17 00:00:00 2001
From: Anthony Platanios <e.a.platanios@gmail.com>
Date: Thu, 18 Apr 2019 23:26:38 -0400
Subject: [PATCH 28/55] Added the gathering VJP.

---
 Sources/DeepLearning/Initializers.swift    |  2 +-
 Sources/DeepLearning/Operators/Basic.swift | 65 ++++++++++++++++++----
 2 files changed, 56 insertions(+), 11 deletions(-)

diff --git a/Sources/DeepLearning/Initializers.swift b/Sources/DeepLearning/Initializers.swift
index bca769299..dd9d01923 100644
--- a/Sources/DeepLearning/Initializers.swift
+++ b/Sources/DeepLearning/Initializers.swift
@@ -148,7 +148,7 @@ internal extension Tensor where Scalar : TensorFlowFloatingPoint {
         return (result, { [shape = tensor.shapeTensor] v in
             let splitShape = Tensor<Int32>(stacking: [multiples, shape]).transposed().flattened()
             let axes = Tensor<Int32>(
-                rangeFrom: 0, to: Int32(splitShape.shape.contiguousSize), stride: 2)
+                rangeFrom: 0, to: Int32(splitShape.scalarCount), stride: 2)
             return v.reshaped(toShape: splitShape).sum(squeezingAxes: axes)
         })
     }
diff --git a/Sources/DeepLearning/Operators/Basic.swift b/Sources/DeepLearning/Operators/Basic.swift
index 82c96fd2e..78acc9a2e 100644
--- a/Sources/DeepLearning/Operators/Basic.swift
+++ b/Sources/DeepLearning/Operators/Basic.swift
@@ -34,13 +34,13 @@ public extension Tensor {
     @inlinable
     @differentiable(
         wrt: self,
-        vjp: _vjpSplit(splitSizes:alongAxis:) where Scalar : TensorFlowFloatingPoint)
-    func split(splitSizes: Tensor<Int32>, alongAxis axis: Int = 0) -> [Tensor] {
+        vjp: _vjpSplit(sizes:alongAxis:) where Scalar : TensorFlowFloatingPoint)
+    func split(sizes: Tensor<Int32>, alongAxis axis: Int = 0) -> [Tensor] {
         return Raw.splitV(
             value: self,
-            sizeSplits: splitSizes,
+            sizeSplits: sizes,
             splitDim: Tensor<Int32>(Int32(axis)),
-            numSplit: Int64(splitSizes.shape[0]))
+            numSplit: Int64(sizes.shape[0]))
     }
 
     /// Gathers slices of this tensor at `indices` along the `axis` dimension.
@@ -89,7 +89,7 @@ public extension Tensor {
     /// 
     /// - Returns: The gathered tensor.
     @inlinable
-    // @differentiable(vjp: _vjpGathering where Scalar: TensorFlowFloatingPoint)
+    @differentiable(wrt: self, vjp: _vjpGathering where Scalar : TensorFlowFloatingPoint)
     func gathering(
         atIndices indices: Tensor<Int32>, 
         alongAxis axis: Int = 0
@@ -114,6 +114,7 @@ public extension Tensor {
     /// 
     /// - Returns: The gathered tensor.
     @inlinable
+    @differentiable(wrt: self where Scalar : TensorFlowFloatingPoint)
     func batchGathering(
         atIndices indices: Tensor<Int32>,
         alongAxis axis: Int,
@@ -214,6 +215,7 @@ public extension Tensor {
     /// - Returns: `(self.rank - K + 1)`-dimensional tensor populated by entries in this tensor 
     ///   corresponding to `true` values in `mask`.
     @inlinable
+    @differentiable(wrt: self where Scalar : TensorFlowFloatingPoint)
     func gathering(where mask: Tensor<Bool>, alongAxis axis: Int = 0) -> Tensor {
         precondition(mask.rank != 0, "The boolean mask cannot be a scalar.")
         let posAxis = axis < 0 ? axis + rank : axis
@@ -261,9 +263,9 @@ public extension Tensor {
     }
 }
 
-public extension Tensor where Scalar : TensorFlowFloatingPoint {
+internal extension Tensor where Scalar : TensorFlowFloatingPoint {
     @inlinable
-    internal func _vjpSplit(
+    func _vjpSplit(
         numSplits: Int,
         alongAxis axis: Int = 0
     ) -> ([Tensor], (Array<Tensor>.DifferentiableView) -> Tensor) {
@@ -272,11 +274,54 @@ public extension Tensor where Scalar : TensorFlowFloatingPoint {
     }
 
     @inlinable
-    internal func _vjpSplit(
-        splitSizes: Tensor<Int32>,
+    func _vjpSplit(
+        sizes: Tensor<Int32>,
         alongAxis axis: Int = 0
     ) -> ([Tensor], (Array<Tensor>.DifferentiableView) -> Tensor) {
-        let result = split(splitSizes: splitSizes, alongAxis: axis)
+        let result = split(sizes: sizes, alongAxis: axis)
         return (result, { v in Tensor(concatenating: v.base, alongAxis: axis) })
     }
+
+    @inlinable
+    func _vjpGathering(
+        atIndices indices: Tensor<Int32>, 
+        alongAxis axis: Int = 0
+    ) -> (Tensor, (Tensor) -> Tensor) {
+        let result = gathering(atIndices: indices, alongAxis: axis)
+        let posAxis = axis < 0 ? axis + rank : axis
+        return (result, { [shape = shapeTensor] v in
+            let indicesSize = Tensor<Int32>(Int32(indices.scalarCount))
+            let outerShape = shape[..<posAxis]
+            let outerSize = outerShape.scalarCount
+            let innerShape = shape[(posAxis + 1)...]
+            let innerSize = innerShape.scalarCount
+            let outerIndices = Tensor<Int32>(rangeFrom: 0, to: Int32(outerSize), stride: 1)
+            let innerIndices = Tensor<Int32>(
+                rangeFrom: Int32(outerSize) + 1,
+                to: Int32(outerSize) + 1 + Int32(innerSize),
+                stride: 1)
+            let valuesShape = Tensor<Int32>(concatenating: [outerShape, indicesSize, innerShape])
+            let values = v.reshaped(toShape: valuesShape)
+            let valueIndices = indices.reshaped(toShape: indicesSize)
+
+            // We need to sum up every slice `values[..., i, ....]` corresponding to
+            // `tensor[..., indices[i], ...]`. Since `unsortedSegmentSum` does not support an axis 
+            // parameter, we transpose the gather dimension to the front, then use 
+            // `unsortedSegmentSum` to build a `[gatherAxis, outerAxes, innerAxes]` tensor with all
+            // the gradients affecting each index in `gatherAxis` summed up.
+            let permutations = Tensor<Int32>(concatenating: [
+                Tensor<Int32>([Int32(outerSize)]), outerIndices, innerIndices])
+            let transposedValues = values.transposed(withPermutations: permutations)
+            let gradient = Raw.unsortedSegmentSum(
+                data: transposedValues,
+                segmentIds: valueIndices,
+                numSegments: shape[posAxis])
+            
+            // Finally, we invert the above transpose operation by moving dimension 0 back to its
+            // original position.
+            let inversePermutations = Tensor<Int32>(concatenating: [
+                outerIndices + 1, Tensor<Int32>([0]), innerIndices])
+            return gradient.transposed(withPermutations: inversePermutations)
+        })
+    }
 }

From 49bfe8df71057167a41a3cd25fea9940a47bd028 Mon Sep 17 00:00:00 2001
From: Anthony Platanios <e.a.platanios@gmail.com>
Date: Fri, 19 Apr 2019 13:31:14 -0400
Subject: [PATCH 29/55] Bug fixes.

---
 Sources/DeepLearning/Initializers.swift    |  8 ++---
 Sources/DeepLearning/Operators/Basic.swift | 36 ++++++++++------------
 2 files changed, 21 insertions(+), 23 deletions(-)

diff --git a/Sources/DeepLearning/Initializers.swift b/Sources/DeepLearning/Initializers.swift
index dd9d01923..13384567b 100644
--- a/Sources/DeepLearning/Initializers.swift
+++ b/Sources/DeepLearning/Initializers.swift
@@ -113,18 +113,18 @@ public extension Tensor {
 }
 
 internal extension Tensor where Scalar : TensorFlowFloatingPoint {
-    @inlinable
+    @usableFromInline
     static func _vjpStacking(
         stacking tensors: [Tensor],
         alongAxis axis: Int = 0
     ) -> (Tensor, (Tensor) -> Array<Tensor>.DifferentiableView) {
-        let result = Tensor<Scalar>(stacking: tensors, alongAxis: axis)
+        let result = Tensor(stacking: tensors, alongAxis: axis)
         return (result, { v in
             return Array<Tensor>.DifferentiableView(v.unstack(alongAxis: axis))
         })
     }
 
-    @inlinable
+    @usableFromInline
     static func _vjpConcatenating(
         concatenating tensors: [Tensor],
         alongAxis axis: Int = 0
@@ -139,7 +139,7 @@ internal extension Tensor where Scalar : TensorFlowFloatingPoint {
         })
     }
 
-    @inlinable
+    @usableFromInline
     static func _vjpTiling(
         tiling tensor: Tensor<Scalar>,
         multiples: Tensor<Int32>
diff --git a/Sources/DeepLearning/Operators/Basic.swift b/Sources/DeepLearning/Operators/Basic.swift
index 78acc9a2e..052a2d368 100644
--- a/Sources/DeepLearning/Operators/Basic.swift
+++ b/Sources/DeepLearning/Operators/Basic.swift
@@ -24,8 +24,7 @@ public extension Tensor {
     }
 
     @inlinable
-    @differentiable(
-        vjp: _vjpSplit(numSplits:alongAxis:) where Scalar : TensorFlowFloatingPoint)
+    @differentiable(vjp: _vjpSplit(numSplits:alongAxis:) where Scalar : TensorFlowFloatingPoint)
     func split(numSplits: Int, alongAxis axis: Int = 0) -> [Tensor] {
         return Raw.split(
             splitDim: Tensor<Int32>(Int32(axis)), value: self, numSplit: Int64(numSplits))
@@ -114,7 +113,7 @@ public extension Tensor {
     /// 
     /// - Returns: The gathered tensor.
     @inlinable
-    @differentiable(wrt: self where Scalar : TensorFlowFloatingPoint)
+    // TODO: @differentiable(wrt: self where Scalar : TensorFlowFloatingPoint)
     func batchGathering(
         atIndices indices: Tensor<Int32>,
         alongAxis axis: Int,
@@ -136,7 +135,7 @@ public extension Tensor {
 
             // Move self[axis] up to self[batchDims].
             let permutation = Tensor<Int32>(concatenating: [
-                Tensor<Int32>(0 ..< Int32(batchDims)),
+                Tensor<Int32>(rangeFrom: 0, to: Int32(batchDims), stride: 1),
                 Tensor<Int32>(Int32(axis)).rankLifted(),
                 Tensor<Int32>(rangeFrom: Int32(batchDims), to: Int32(posAxis), stride: 1),
                 Tensor<Int32>(rangeFrom: Int32(axis) + 1, to: Int32(rank), stride: 1)])
@@ -148,28 +147,27 @@ public extension Tensor {
             // the dimensions corresponding to indices[batchDims ...].
             let start = indices.rank + posAxis - batchDims
             let resultPermutation = Tensor<Int32>(concatenating: [
-                Tensor<Int32>(0 ..< Int32(batchDims)),
+                Tensor<Int32>(rangeFrom: 0, to: Int32(batchDims), stride: 1),
                 Tensor<Int32>(rangeFrom: Int32(indices.rank), to: Int32(start), stride: 1),
-                Tensor<Int32>(Int32(batchDims) ..< Int32(indices.rank)),
+                Tensor<Int32>(rangeFrom: Int32(batchDims), to: Int32(indices.rank), stride: 1),
                 Tensor<Int32>(rangeFrom: Int32(start), to: Int32(result.rank), stride: 1)])
             return result.transposed(withPermutations: resultPermutation)
         }
 
-        let castedShape = Tensor<Int32>(shapeTensor)
         var batchIndices = indices
         var accumulated = Tensor<Int32>(ones: [])
         for d in (1...batchDims).reversed() {
-            accumulated *= castedShape[d]
-            let dValue = castedShape[d - 1]
+            accumulated *= shapeTensor[d]
+            let dValue = shapeTensor[d - 1]
             let dIndices = Tensor<Int32>(
                 rangeFrom: Tensor<Int32>(zeros: []),
                 to: dValue,
                 stride: Tensor<Int32>(ones: [])
             ) * accumulated
             let dShape = Tensor<Int32>(concatenating: [
-                Tensor<Int32>([Int32](repeating: 1, count: Int(d - 1))),
-                Tensor<Int32>([dValue]),
-                Tensor<Int32>([Int32](repeating: 1, count: Int(indices.rank - 1)))])
+                Tensor<Int32>([Int32](repeating: 1, count: d - 1)),
+                dValue.rankLifted(),
+                Tensor<Int32>([Int32](repeating: 1, count: indices.rank - 1))])
             batchIndices += dIndices.reshaped(toShape: dShape)
         }
 
@@ -215,7 +213,7 @@ public extension Tensor {
     /// - Returns: `(self.rank - K + 1)`-dimensional tensor populated by entries in this tensor 
     ///   corresponding to `true` values in `mask`.
     @inlinable
-    @differentiable(wrt: self where Scalar : TensorFlowFloatingPoint)
+    // TODO: @differentiable(wrt: self where Scalar : TensorFlowFloatingPoint)
     func gathering(where mask: Tensor<Bool>, alongAxis axis: Int = 0) -> Tensor {
         precondition(mask.rank != 0, "The boolean mask cannot be a scalar.")
         let posAxis = axis < 0 ? axis + rank : axis
@@ -264,25 +262,25 @@ public extension Tensor {
 }
 
 internal extension Tensor where Scalar : TensorFlowFloatingPoint {
-    @inlinable
+    @usableFromInline
     func _vjpSplit(
         numSplits: Int,
         alongAxis axis: Int = 0
-    ) -> ([Tensor], (Array<Tensor>.DifferentiableView) -> Tensor) {
+    ) -> ([Tensor], (Array<Tensor>.CotangentVector) -> Tensor) {
         let result = split(numSplits: numSplits, alongAxis: axis)
         return (result, { v in Tensor(concatenating: v.base, alongAxis: axis) })
     }
 
-    @inlinable
+    @usableFromInline
     func _vjpSplit(
         sizes: Tensor<Int32>,
         alongAxis axis: Int = 0
-    ) -> ([Tensor], (Array<Tensor>.DifferentiableView) -> Tensor) {
+    ) -> ([Tensor], (Array<Tensor>.CotangentVector) -> Tensor) {
         let result = split(sizes: sizes, alongAxis: axis)
         return (result, { v in Tensor(concatenating: v.base, alongAxis: axis) })
     }
 
-    @inlinable
+    @usableFromInline
     func _vjpGathering(
         atIndices indices: Tensor<Int32>, 
         alongAxis axis: Int = 0
@@ -316,7 +314,7 @@ internal extension Tensor where Scalar : TensorFlowFloatingPoint {
                 data: transposedValues,
                 segmentIds: valueIndices,
                 numSegments: shape[posAxis])
-            
+    
             // Finally, we invert the above transpose operation by moving dimension 0 back to its
             // original position.
             let inversePermutations = Tensor<Int32>(concatenating: [

From 10de441b10669039d08fa4a24679bf40a812f6e3 Mon Sep 17 00:00:00 2001
From: Anthony Platanios <e.a.platanios@gmail.com>
Date: Fri, 19 Apr 2019 13:49:26 -0400
Subject: [PATCH 30/55] Added an 'Optimizable' protocol.

---
 Sources/DeepLearning/Layer.swift     |  3 +--
 Sources/DeepLearning/Optimizer.swift | 13 ++++++++-----
 2 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/Sources/DeepLearning/Layer.swift b/Sources/DeepLearning/Layer.swift
index db93adc19..870d55569 100644
--- a/Sources/DeepLearning/Layer.swift
+++ b/Sources/DeepLearning/Layer.swift
@@ -23,8 +23,7 @@ import TensorFlow
 ///
 /// `Layer` instances define a differentiable `applied(to:)` method for mapping inputs to
 /// outputs.
-public protocol Layer: Differentiable & KeyPathIterable
-    where AllDifferentiableVariables: KeyPathIterable {
+public protocol Layer: Optimizable {
     /// The input type of the layer.
     associatedtype Input: Differentiable
     /// The output type of the layer.
diff --git a/Sources/DeepLearning/Optimizer.swift b/Sources/DeepLearning/Optimizer.swift
index 440a691e0..81e03bcbd 100644
--- a/Sources/DeepLearning/Optimizer.swift
+++ b/Sources/DeepLearning/Optimizer.swift
@@ -16,13 +16,16 @@
 import TensorFlow
 #endif
 
+public protocol Optimizable: Differentiable & KeyPathIterable
+    where AllDifferentiableVariables: KeyPathIterable { }
+
 /// A machine learning optimizer.
 ///
 /// Optimizers apply an optimization algorithm to update the differentiable variables of a machine
 /// learning model.
 public protocol Optimizer {
     /// The type of the model whose parameters are optimized.
-    associatedtype Model: Differentiable
+    associatedtype Model: Optimizable
     /// The scalar parameter type.
     associatedtype Scalar: FloatingPoint
     /// The learning rate.
@@ -45,7 +48,7 @@ fileprivate extension Tensor where Scalar: Numeric {
 ///
 /// Reference: ["Adam - A Method for Stochastic Optimization"](
 /// https://arxiv.org/abs/1412.6980v8)
-public class Adam<Model: Layer>: Optimizer
+public class Adam<Model: Optimizable>: Optimizer
     where Model.AllDifferentiableVariables == Model.CotangentVector {
     /// The learning rate.
     public var learningRate: Float
@@ -139,7 +142,7 @@ public class Adam<Model: Layer>: Optimizer
 ///
 /// Reference: ["rmsprop: Divide the gradient by a running average of its recent magnitude"](
 /// http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf)
-public class RMSProp<Model: Layer>: Optimizer
+public class RMSProp<Model: Optimizable>: Optimizer
     where Model.AllDifferentiableVariables == Model.CotangentVector {
     /// The learning rate.
     public var learningRate: Float
@@ -203,7 +206,7 @@ public class RMSProp<Model: Layer>: Optimizer
 ///
 /// An optimizer that implements stochastic gradient descent, with support for momentum, learning
 /// rate decay, and Nesterov momentum.
-public class SGD<Model: Layer>: Optimizer
+public class SGD<Model: Optimizable>: Optimizer
     where Model.AllDifferentiableVariables == Model.CotangentVector {
     /// The learning rate.
     public var learningRate: Float
@@ -275,7 +278,7 @@ public class SGD<Model: Layer>: Optimizer
 // MARK: - Manifold optimizers
 
 /// A Riemann manifold stochastic gradient descent (SGD) optimizer.
-public class RiemannSGD<Model: Layer, Scalar: FloatingPoint>: Optimizer
+public class RiemannSGD<Model: Optimizable, Scalar: FloatingPoint>: Optimizer
     where Model.TangentVector: VectorNumeric, Model.TangentVector.Scalar == Scalar {
     /// The learning rate.
     public var learningRate: Scalar

From a6303962e0703ea73bd8451e5dd3685e3f40eed5 Mon Sep 17 00:00:00 2001
From: Anthony Platanios <e.a.platanios@gmail.com>
Date: Fri, 19 Apr 2019 17:23:34 -0400
Subject: [PATCH 31/55] Moved some more activation functions from the stdlib.

---
 Sources/DeepLearning/Operators/Math.swift | 84 +++++++++++++++++++----
 1 file changed, 69 insertions(+), 15 deletions(-)

diff --git a/Sources/DeepLearning/Operators/Math.swift b/Sources/DeepLearning/Operators/Math.swift
index 883f0a2bb..4ffa1e800 100644
--- a/Sources/DeepLearning/Operators/Math.swift
+++ b/Sources/DeepLearning/Operators/Math.swift
@@ -19,50 +19,104 @@ import TensorFlow
 /// Returns the values of the specified tensor rounded to the nearest integer, element-wise.
 @inlinable
 @differentiable(vjp: _vjpRound)
-public func round<T: TensorFlowFloatingPoint>(_ x: Tensor<T>) -> Tensor<T> {
+public func round<T : TensorFlowFloatingPoint>(_ x: Tensor<T>) -> Tensor<T> {
     return Raw.round(x)
 }
 
 @inlinable
-internal func _vjpRound<T: TensorFlowFloatingPoint>(
+internal func _vjpRound<T : TensorFlowFloatingPoint>(
   _ x: Tensor<T>
 ) -> (Tensor<T>, (Tensor<T>) -> Tensor<T>) {
-  return (round(x), { v in Tensor<T>(zerosLike: v) })
+    return (round(x), { v in Tensor<T>(zerosLike: v) })
 }
 
 /// Computes the sigmoid of the specified tensor element-wise.
+/// Specifically, computes `1 / (1 + exp(-x))`.
 @inlinable
 @differentiable(vjp: _vjpSigmoid)
-public func sigmoid<T: TensorFlowFloatingPoint>(_ x: Tensor<T>) -> Tensor<T> {
-  return Raw.sigmoid(x)
+public func sigmoid<T : TensorFlowFloatingPoint>(_ x: Tensor<T>) -> Tensor<T> {
+    return Raw.sigmoid(x)
 }
 
 @inlinable
-internal func _vjpSigmoid<T: TensorFlowFloatingPoint>(
-  _ x: Tensor<T>
+internal func _vjpSigmoid<T : TensorFlowFloatingPoint>(
+    _ x: Tensor<T>
 ) -> (Tensor<T>, (Tensor<T>) -> Tensor<T>) {
-  return (sigmoid(x), { v in Raw.sigmoidGrad(x, dy: v) })
+    return (sigmoid(x), { v in Raw.sigmoidGrad(x, dy: v) })
 }
 
 /// Computes the log-sigmoid of the specified tensor element-wise. Specifically, 
 /// `y = log(1 / (1 + exp(-x)))`. For numerical stability, we use `y = -softplus(-x)`.
 @inlinable
 @differentiable
-public func logSigmoid<T: TensorFlowFloatingPoint>(_ x: Tensor<T>) -> Tensor<T> {
-  return -softplus(-x)
+public func logSigmoid<T : TensorFlowFloatingPoint>(_ x: Tensor<T>) -> Tensor<T> {
+    return -softplus(-x)
 }
 
 /// Computes the softplus function for the specified tensor element-wise. The softplus function is 
 /// defined as `log(exp(x) + 1)`.
 @inlinable
 @differentiable(vjp: _vjpSoftplus)
-public func softplus<T: TensorFlowFloatingPoint>(_ x: Tensor<T>) -> Tensor<T> {
-  return Raw.softplus(features: x)
+public func softplus<T : TensorFlowFloatingPoint>(_ x: Tensor<T>) -> Tensor<T> {
+    return Raw.softplus(features: x)
 }
 
 @inlinable
-internal func _vjpSoftplus<T: TensorFlowFloatingPoint>(
-  _ x: Tensor<T>
+internal func _vjpSoftplus<T : TensorFlowFloatingPoint>(
+    _ x: Tensor<T>
+) -> (Tensor<T>, (Tensor<T>) -> Tensor<T>) {
+    return (softplus(x), { v in v * sigmoid(x) })
+}
+
+
+/// Computes the softmax of the specified tensor along the last axis.
+/// Specifically, computes `exp(x) / exp(x).sum(alongAxes: -1)`.
+@inlinable
+@differentiable(vjp: _vjpSoftmax(_:) where T : TensorFlowFloatingPoint)
+public func softmax<T : FloatingPoint>(_ x: Tensor<T>) -> Tensor<T> {
+    return Raw.softmax(logits: x)
+}
+
+/// Computes the softmax of the specified tensor along the specified axis.
+/// Specifically, computes `exp(x) / exp(x).sum(alongAxes: axis)`.
+@inlinable
+public func softmax<T : TensorFlowFloatingPoint>(
+    _ x: Tensor<T>,
+    alongAxis axis: Int
+) -> Tensor<T> {
+    let expx = exp(x)
+    return expx / expx.sum(alongAxes: axis)
+}
+
+@inlinable
+func _vjpSoftmax<T : TensorFlowFloatingPoint>(
+    _ x: Tensor<T>
+) -> (Tensor<T>, (Tensor<T>) -> Tensor<T>) {
+    let value = softmax(x)
+    return (value, { v in
+        let sumChannels = (v * value).sum(alongAxes: -1)
+        return (v - sumChannels) * value
+    })
+}
+
+/// Computes the log-softmax of the specified tensor element-wise.
+@inlinable
+@differentiable(vjp: _vjpLogSoftmax(_:) where T : TensorFlowFloatingPoint)
+public func logSoftmax<T : FloatingPoint>(_ x: Tensor<T>) -> Tensor<T> {
+    return Raw.logSoftmax(logits: x)
+}
+
+/// Computes `relu` of the specified tensor element-wise.
+/// Specifically, computes `max(0, x)`.
+@inlinable
+@differentiable(vjp: _vjpRelu(_:) where T : TensorFlowFloatingPoint)
+public func relu<T : FloatingPoint>(_ x: Tensor<T>) -> Tensor<T> {
+    return max(0, x)
+}
+
+@inlinable
+func _vjpRelu<T : TensorFlowFloatingPoint>(
+    _ x: Tensor<T>
 ) -> (Tensor<T>, (Tensor<T>) -> Tensor<T>) {
-  return (softplus(x), { v in v * sigmoid(x) })
+    return (relu(x), { v in Tensor(x .> 0) * v })
 }

From c3243f414a253694af8a1fb9a8eb3bf1af19d3b4 Mon Sep 17 00:00:00 2001
From: Anthony Platanios <e.a.platanios@gmail.com>
Date: Fri, 19 Apr 2019 17:30:34 -0400
Subject: [PATCH 32/55] Added log-softmax VJP.

---
 Sources/DeepLearning/Operators/Math.swift | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/Sources/DeepLearning/Operators/Math.swift b/Sources/DeepLearning/Operators/Math.swift
index 4ffa1e800..1156f72b9 100644
--- a/Sources/DeepLearning/Operators/Math.swift
+++ b/Sources/DeepLearning/Operators/Math.swift
@@ -106,6 +106,16 @@ public func logSoftmax<T : FloatingPoint>(_ x: Tensor<T>) -> Tensor<T> {
     return Raw.logSoftmax(logits: x)
 }
 
+@inlinable
+func _vjpLogSoftmax<T : TensorFlowFloatingPoint>(
+  _ x: Tensor<T>
+) -> (Tensor<T>, (Tensor<T>) -> Tensor<T>) {
+  let value = logSoftmax(x)
+  return (value, { v in
+    v - v.sum(alongAxes: -1) * exp(value)
+  })
+}
+
 /// Computes `relu` of the specified tensor element-wise.
 /// Specifically, computes `max(0, x)`.
 @inlinable

From 4547a6dc617a9cdb9544980d477b7760672aeefc Mon Sep 17 00:00:00 2001
From: Anthony Platanios <e.a.platanios@gmail.com>
Date: Fri, 19 Apr 2019 20:20:49 -0400
Subject: [PATCH 33/55] Minor bug fix.

---
 Sources/DeepLearning/Operators/Math.swift | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/Sources/DeepLearning/Operators/Math.swift b/Sources/DeepLearning/Operators/Math.swift
index 1156f72b9..1169f774d 100644
--- a/Sources/DeepLearning/Operators/Math.swift
+++ b/Sources/DeepLearning/Operators/Math.swift
@@ -16,6 +16,10 @@
 import TensorFlow
 #endif
 
+#if COMPILING_TENSORFLOW_MODULE
+infix operator .> : ComparisonPrecedence
+#endif
+
 /// Returns the values of the specified tensor rounded to the nearest integer, element-wise.
 @inlinable
 @differentiable(vjp: _vjpRound)

From 19cdbd9fd40dcb6ba24562608e345af67c999d53 Mon Sep 17 00:00:00 2001
From: Anthony Platanios <e.a.platanios@gmail.com>
Date: Sat, 20 Apr 2019 12:09:27 -0400
Subject: [PATCH 34/55] Brought some initializers from stdlib.

---
 Sources/DeepLearning/Initializers.swift   | 378 ++++++++++++++++------
 Sources/DeepLearning/Operators/Math.swift |  46 +--
 2 files changed, 297 insertions(+), 127 deletions(-)

diff --git a/Sources/DeepLearning/Initializers.swift b/Sources/DeepLearning/Initializers.swift
index 13384567b..2b61a5c5b 100644
--- a/Sources/DeepLearning/Initializers.swift
+++ b/Sources/DeepLearning/Initializers.swift
@@ -17,46 +17,132 @@ import TensorFlow
 #endif
 
 public extension Tensor {
-    /// Creates a tensor from an array of tensors (which may themselves be scalars).
+    /// Creates a tensor with the specified shape and a single, repeated scalar
+    /// value.
+    ///
+    /// - Parameters:
+    ///   - shape: The dimensions of the tensor.
+    ///   - repeatedValue: The scalar value to repeat.
     @inlinable
-    @differentiable(where Scalar : TensorFlowFloatingPoint)
-    init(_ elements: [Tensor]) {
-        self = Tensor(stacking: elements)
+    @available(*, deprecated, renamed: "init(repeating:shape:)")
+    init(shape: TensorShape, repeating repeatedValue: Scalar) {
+        self.init(repeating: repeatedValue, shape: shape)
     }
 
-    /// Stacks `tensors`, along the `axis` dimension, into a new tensor with rank one higher than
-    /// the current tensor and each tensor in `tensors`.
-    /// 
-    /// Given that `tensors` all have shape `[A, B, C]`, and `tensors.count = N`, then:
-    /// - if `axis == 0` then the resulting tensor will have the shape `[N, A, B, C]`.
-    /// - if `axis == 1` then the resulting tensor will have the shape `[A, N, B, C]`.
-    /// - etc.
-    ///
-    /// For example:
-    /// ```
-    /// // 'x' is [1, 4]
-    /// // 'y' is [2, 5]
-    /// // 'z' is [3, 6]
-    /// Tensor(stacking: [x, y, z]) // is [[1, 4], [2, 5], [3, 6]]
-    /// Tensor(stacking: [x, y, z], alongAxis: 1) // is [[1, 2, 3], [4, 5, 6]]
-    /// ```
-    ///
-    /// This is the opposite of `Tensor.unstacked`.
+    /// Creates a tensor with the specified shape and a single, repeated scalar value.
     ///
     /// - Parameters:
-    ///   - tensors: Tensors to stack.
-    ///   - axis: Dimension along which to stack. Negative values wrap around.
-    /// 
-    /// - Precondition: All tensors must have the same shape.
-    /// - Precondition: `axis` must be in the range `[-rank, rank)`, where `rank` is the rank of the
-    ///   provided tensors.
-    /// 
-    /// - Returns: The stacked tensor.
+    ///   - repeatedValue: The scalar value to repeat.
+    ///   - shape: The dimensions of the tensor.
+    @inlinable
+    @differentiable(
+        vjp: _vjpInit(repeating:shape:) where Scalar : TensorFlowFloatingPoint)
+    init(repeating repeatedValue: Scalar, shape: TensorShape) {
+        self = Raw.fill(
+            dims: Tensor<Int32>(shape.dimensions.map(Int32.init)),
+            value: Tensor(repeatedValue))
+    }
+
+    /// Creates a tensor by broadcasting the given scalar to a given rank with
+    /// all dimensions being 1.
+    @inlinable
+    // @differentiable(where Scalar : TensorFlowFloatingPoint)
+    init(broadcasting scalar: Scalar, rank: Int) {
+        self = Tensor(scalar).reshaped(to: TensorShape(repeating: 1, count: rank))
+    }
+
+    /// Creates a tensor of shape `[4]` from a 4-tuple.
+    /// - Note: This is intended for internal use, for example, to initialize a
+    ///   tensor attribute from `convolved2D`'s `strides` argument.
+    @inlinable
+    internal init(_ scalars: (Scalar, Scalar, Scalar, Scalar)) {
+        self.init([scalars.0, scalars.1, scalars.2, scalars.3])
+    }
+}
+
+internal extension Tensor where Scalar : TensorFlowFloatingPoint {
+    @inlinable
+    static func _vjpInit(
+        repeating repeatedValue: Scalar,
+        shape: TensorShape
+    ) -> (Tensor, (Tensor) -> Scalar) {
+        return (Tensor(repeating: repeatedValue, shape: shape), {
+            $0.sum().scalarized()
+        })
+    }
+}
+
+//===------------------------------------------------------------------------------------------===//
+// Casting
+//===------------------------------------------------------------------------------------------===//
+
+public extension Tensor where Scalar : Numeric {
+    /// Perform an element-wise type conversion from a `Bool` tensor.
+    @inlinable
+    init(_ other: Tensor<Bool>) {
+        self = Raw.cast(other)
+    }
+
+    /// Perform an element-wise conversion from another `Tensor`.
+    @inlinable
+    @differentiable(
+        vjp: _vjpCast where Scalar : TensorFlowFloatingPoint,
+                            OtherScalar: TensorFlowFloatingPoint)
+    init<OtherScalar : Numeric>(_ other: Tensor<OtherScalar>) {
+        self = Raw.cast(other)
+    }
+}
+
+internal extension Tensor where Scalar : TensorFlowFloatingPoint {
     @inlinable
-    @differentiable(vjp: _vjpStacking where Scalar : TensorFlowFloatingPoint)
-    init(stacking tensors: [Tensor], alongAxis axis: Int = 0) {
-        self = Raw.pack(tensors, axis: Int64(axis))
+    static func _vjpCast<OtherScalar : TensorFlowFloatingPoint>(
+        _ other: Tensor<OtherScalar>
+    ) -> (Tensor, (Tensor) -> Tensor<OtherScalar>) {
+        return (Tensor(other), { v in Tensor<OtherScalar>(v) })
     }
+}
+
+public extension Tensor {
+    /// Creates a tensor from an array of tensors (which may themselves be scalars).
+    @inlinable
+    // @differentiable(where Scalar : TensorFlowFloatingPoint)
+    init(_ elements: [Tensor]) {
+        self = Tensor(stacking: elements)
+    }
+
+    // /// Stacks `tensors`, along the `axis` dimension, into a new tensor with rank one higher than
+    // /// the current tensor and each tensor in `tensors`.
+    // /// 
+    // /// Given that `tensors` all have shape `[A, B, C]`, and `tensors.count = N`, then:
+    // /// - if `axis == 0` then the resulting tensor will have the shape `[N, A, B, C]`.
+    // /// - if `axis == 1` then the resulting tensor will have the shape `[A, N, B, C]`.
+    // /// - etc.
+    // ///
+    // /// For example:
+    // /// ```
+    // /// // 'x' is [1, 4]
+    // /// // 'y' is [2, 5]
+    // /// // 'z' is [3, 6]
+    // /// Tensor(stacking: [x, y, z]) // is [[1, 4], [2, 5], [3, 6]]
+    // /// Tensor(stacking: [x, y, z], alongAxis: 1) // is [[1, 2, 3], [4, 5, 6]]
+    // /// ```
+    // ///
+    // /// This is the opposite of `Tensor.unstacked`.
+    // ///
+    // /// - Parameters:
+    // ///   - tensors: Tensors to stack.
+    // ///   - axis: Dimension along which to stack. Negative values wrap around.
+    // /// 
+    // /// - Precondition: All tensors must have the same shape.
+    // /// - Precondition: `axis` must be in the range `[-rank, rank)`, where `rank` is the rank of the
+    // ///   provided tensors.
+    // /// 
+    // /// - Returns: The stacked tensor.
+    // @inlinable
+    // @differentiable(vjp: _vjpStacking where Scalar : TensorFlowFloatingPoint)
+    // init(stacking tensors: [Tensor], alongAxis axis: Int = 0) {
+    //     self = Raw.pack(tensors, axis: Int64(axis))
+    // }
 
     /// Concatenates `tensors` along the `axis` dimension.
     ///
@@ -91,100 +177,182 @@ public extension Tensor {
     /// 
     /// - Returns: The concatenated tensor.
     @inlinable
-    @differentiable(wrt: tensors, vjp: _vjpConcatenating where Scalar : TensorFlowFloatingPoint)
+    // @differentiable(wrt: tensors, vjp: _vjpConcatenating where Scalar : TensorFlowFloatingPoint)
     init(concatenating tensors: [Tensor], alongAxis axis: Int = 0) {
         precondition(tensors.count > 0)
         self = Raw.concatV2(tensors, axis: Tensor<Int32>(Int32(axis)))
     }
 
-    /// Returns a tiled tensor, constructed by tiling the provided tensor.
-    ///
-    /// This constructor creates a new tensor by replicating `tensor` `multiples` times. The
-    /// constructed tensor's `i`'th dimension has `tensor.shape[i] * multiples[i]` elements, and the
-    /// values of `tensor` are replicated `multiples[i]` times along the `i`'th dimension. For 
-    /// example, tiling `[a b c d]` by `[2]` produces `[a b c d a b c d]`.
-    /// 
-    /// - Precondition: The shape of `multiples` must be `[tensor.rank]`.
-    @inlinable
-    @differentiable(wrt: tensor, vjp: _vjpTiling where Scalar : TensorFlowFloatingPoint)
-    init(tiling tensor: Tensor, multiples: Tensor<Int32>) {
-        self = Raw.tile(tensor, multiples: multiples)
-    }
+    // /// Returns a tiled tensor, constructed by tiling the provided tensor.
+    // ///
+    // /// This constructor creates a new tensor by replicating `tensor` `multiples` times. The
+    // /// constructed tensor's `i`'th dimension has `tensor.shape[i] * multiples[i]` elements, and the
+    // /// values of `tensor` are replicated `multiples[i]` times along the `i`'th dimension. For 
+    // /// example, tiling `[a b c d]` by `[2]` produces `[a b c d a b c d]`.
+    // /// 
+    // /// - Precondition: The shape of `multiples` must be `[tensor.rank]`.
+    // @inlinable
+    // @differentiable(wrt: tensor, vjp: _vjpTiling where Scalar : TensorFlowFloatingPoint)
+    // init(tiling tensor: Tensor, multiples: Tensor<Int32>) {
+    //     self = Raw.tile(tensor, multiples: multiples)
+    // }
 }
 
-internal extension Tensor where Scalar : TensorFlowFloatingPoint {
-    @usableFromInline
-    static func _vjpStacking(
-        stacking tensors: [Tensor],
-        alongAxis axis: Int = 0
-    ) -> (Tensor, (Tensor) -> Array<Tensor>.DifferentiableView) {
-        let result = Tensor(stacking: tensors, alongAxis: axis)
-        return (result, { v in
-            return Array<Tensor>.DifferentiableView(v.unstack(alongAxis: axis))
-        })
-    }
+// internal extension Tensor where Scalar : TensorFlowFloatingPoint {
+//     @inlinable
+//     static func _vjpStacking(
+//         stacking tensors: [Tensor],
+//         alongAxis axis: Int = 0
+//     ) -> (Tensor, (Tensor) -> Array<Tensor>.DifferentiableView) {
+//         let result = Tensor(stacking: tensors, alongAxis: axis)
+//         return (result, { v in
+//             return Array<Tensor>.DifferentiableView(v.unstack(alongAxis: axis))
+//         })
+//     }
 
-    @usableFromInline
-    static func _vjpConcatenating(
-        concatenating tensors: [Tensor],
-        alongAxis axis: Int = 0
-    ) -> (Tensor, (Tensor) -> Array<Tensor>.DifferentiableView) {
-        let result = Tensor<Scalar>(concatenating: tensors, alongAxis: axis)
-        let posAxis = axis < 0 ? axis + tensors[0].rank : axis
-        let sizes = Tensor<Int32>(stacking: tensors.map { $0.shapeTensor[posAxis] })
-        return (result, { [count = tensors.count] v in
-            if count == 1 { return Array<Tensor>.DifferentiableView([v]) }
-            let splits = v.split(sizes: sizes, alongAxis: posAxis)
-            return Array<Tensor>.DifferentiableView(splits)
-        })
-    }
+//     @inlinable
+//     static func _vjpConcatenating(
+//         concatenating tensors: [Tensor],
+//         alongAxis axis: Int = 0
+//     ) -> (Tensor, (Tensor) -> Array<Tensor>.DifferentiableView) {
+//         let result = Tensor<Scalar>(concatenating: tensors, alongAxis: axis)
+//         let posAxis = axis < 0 ? axis + tensors[0].rank : axis
+//         let sizes = Tensor<Int32>(stacking: tensors.map { $0.shapeTensor[posAxis] })
+//         return (result, { [count = tensors.count] v in
+//             if count == 1 { return Array<Tensor>.DifferentiableView([v]) }
+//             let splits = v.split(sizes: sizes, alongAxis: posAxis)
+//             return Array<Tensor>.DifferentiableView(splits)
+//         })
+//     }
+
+//     @inlinable
+//     static func _vjpTiling(
+//         tiling tensor: Tensor<Scalar>,
+//         multiples: Tensor<Int32>
+//     ) -> (Tensor, (Tensor) -> Tensor) {
+//         let result = Tensor(tiling: tensor, multiples: multiples)
+//         return (result, { [shape = tensor.shapeTensor] v in
+//             let splitShape = Tensor<Int32>(stacking: [multiples, shape]).transposed().flattened()
+//             let axes = Tensor<Int32>(
+//                 rangeFrom: 0, to: Int32(splitShape.scalarCount), stride: 2)
+//             return v.reshaped(toShape: splitShape).sum(squeezingAxes: axes)
+//         })
+//     }
+// }
 
-    @usableFromInline
-    static func _vjpTiling(
-        tiling tensor: Tensor<Scalar>,
-        multiples: Tensor<Int32>
-    ) -> (Tensor, (Tensor) -> Tensor) {
-        let result = Tensor(tiling: tensor, multiples: multiples)
-        return (result, { [shape = tensor.shapeTensor] v in
-            let splitShape = Tensor<Int32>(stacking: [multiples, shape]).transposed().flattened()
-            let axes = Tensor<Int32>(
-                rangeFrom: 0, to: Int32(splitShape.scalarCount), stride: 2)
-            return v.reshaped(toShape: splitShape).sum(squeezingAxes: axes)
-        })
-    }
-}
+
+//===------------------------------------------------------------------------------------------===//
+// Numeric Initialization
+//===------------------------------------------------------------------------------------------===//
 
 public extension Tensor where Scalar : Numeric {
-    /// Creates a tensor with all scalars set to zero that has the same shape and type as the provided 
-    /// tensor.
+    /// Creates a tensor with all scalars set to zero.
+    ///
+    /// - Parameter shape: Shape of the tensor.
+    @inlinable
+    init(zeros shape: TensorShape) {
+        self.init(repeating: 0, shape: shape)
+    }
+
+    /// Creates a tensor with all scalars set to one.
     ///
-    /// - Parameter other: Tensor whose shape and data type to use.
+    /// - Parameter shape: Shape of the tensor.
     @inlinable
-    init(zerosLike other: Tensor) {
-        self = Raw.zerosLike(other)
+    init(ones shape: TensorShape) {
+        self.init(repeating: 1, shape: shape)
     }
 
-    /// Creates a tensor with all scalars set to one that has the same shape and type as the provided 
-    /// tensor.
+    // /// Creates a tensor with all scalars set to zero that has the same shape and type as the provided 
+    // /// tensor.
+    // ///
+    // /// - Parameter other: Tensor whose shape and data type to use.
+    // @inlinable
+    // init(zerosLike other: Tensor) {
+    //     self = Raw.zerosLike(other)
+    // }
+
+    // /// Creates a tensor with all scalars set to one that has the same shape and type as the provided 
+    // /// tensor.
+    // ///
+    // /// - Parameter other: Tensor whose shape and data type to use.
+    // @inlinable
+    // init(onesLike other: Tensor) {
+    //     self = Raw.onesLike(other)
+    // }
+
+
+    /// Creates a 1-D tensor representing a sequence from a starting value to, but not including, 
+    /// an end value, stepping by the specified amount.
+    ///
+    /// - Parameters:
+    ///   - start: The starting value to use for the sequence. If the sequence
+    ///     contains any values, the first one is `start`.
+    ///   - end: An end value to limit the sequence. `end` is never an element of
+    ///     the resulting sequence.
+    ///   - stride: The amount to step by with each iteration. `stride` must be
+    ///     positive.
     ///
-    /// - Parameter other: Tensor whose shape and data type to use.
     @inlinable
-    init(onesLike other: Tensor) {
-        self = Raw.onesLike(other)
+    init(rangeFrom start: Scalar, to end: Scalar, stride: Scalar) {
+        self = Raw.range(start: Tensor(start), limit: Tensor(end), delta: Tensor(stride))
     }
 
-    /// Creates a 1-D tensor representing a sequence from a starting value to, but not including, an 
-    /// end value, stepping by the specified amount.
+    // /// Creates a 1-D tensor representing a sequence from a starting value to, but not including, an 
+    // /// end value, stepping by the specified amount.
+    // ///
+    // /// - Parameters:
+    // ///   - start: The starting value to use for the sequence. If the sequence contains any values, 
+    // ///     the first one is `start`.
+    // ///   - end: An end value to limit the sequence. `end` is never an element of the resulting 
+    // ///     sequence.
+    // ///   - stride: The amount to step by with each iteration. `stride` must be positive.
+    // @inlinable
+    // init(rangeFrom start: Tensor<Scalar>, to end: Tensor<Scalar>, stride: Tensor<Scalar>) {
+    //     self = Raw.range(start: start, limit: end, delta: stride)
+    // }
+
+    /// Creates a one-hot tensor at given indices. The locations represented by
+    /// `indices` take value `onValue` (`1` by default), while all other locations
+    /// take value `offValue` (`0` by default). If the input `indices` is rank
+    /// `n`, the new tensor will have rank `n+1`. The new axis is created at
+    /// dimension `axis` (by default, the new axis is appended at the end).
+    ///
+    /// If `indices` is a scalar, the new tensor's shape will be a vector of
+    /// length `depth`.
+    ///
+    /// If `indices` is a vector of length `features`, the output shape will be:
+    ///     features x depth, if axis == -1
+    ///     depth x features, if axis == 0
+    ///
+    /// If `indices` is a matrix (batch) with shape `[batch, features]`, the
+    /// output shape will be:
+    ///     batch x features x depth, if axis == -1
+    ///     batch x depth x features, if axis == 1
+    ///     depth x batch x features, if axis == 0
     ///
     /// - Parameters:
-    ///   - start: The starting value to use for the sequence. If the sequence contains any values, 
-    ///     the first one is `start`.
-    ///   - end: An end value to limit the sequence. `end` is never an element of the resulting 
-    ///     sequence.
-    ///   - stride: The amount to step by with each iteration. `stride` must be positive.
+    ///   - indices: A `Tensor` of indices.
+    ///   - depth: A scalar defining the depth of the one hot dimension.
+    ///   - onValue: A scalar defining the value at the location referred to by
+    ///     some index in `indices`.
+    ///   - offValue: A scalar defining the value at a location that is not
+    ///     referred to by any index in `indices`.
+    ///   - axis: The axis to fill. The default is `-1`, a new inner-most axis.
+    ///
     @inlinable
-    init(rangeFrom start: Tensor<Scalar>, to end: Tensor<Scalar>, stride: Tensor<Scalar>) {
-        self = Raw.range(start: start, limit: end, delta: stride)
+    init(
+        oneHotAtIndices indices: Tensor<Int32>,
+        depth: Int,
+        onValue: Scalar = 1,
+        offValue: Scalar = 0,
+        axis: Int = -1
+    ) {
+        self = Raw.oneHot(
+            indices: indices,
+            depth: Tensor<Int32>(Int32(depth)),
+            onValue: Tensor(onValue),
+            offValue: Tensor(offValue),
+            axis: Int64(axis))
     }
 }
 
diff --git a/Sources/DeepLearning/Operators/Math.swift b/Sources/DeepLearning/Operators/Math.swift
index 1169f774d..be927ef61 100644
--- a/Sources/DeepLearning/Operators/Math.swift
+++ b/Sources/DeepLearning/Operators/Math.swift
@@ -49,28 +49,28 @@ internal func _vjpSigmoid<T : TensorFlowFloatingPoint>(
     return (sigmoid(x), { v in Raw.sigmoidGrad(x, dy: v) })
 }
 
-/// Computes the log-sigmoid of the specified tensor element-wise. Specifically, 
-/// `y = log(1 / (1 + exp(-x)))`. For numerical stability, we use `y = -softplus(-x)`.
-@inlinable
-@differentiable
-public func logSigmoid<T : TensorFlowFloatingPoint>(_ x: Tensor<T>) -> Tensor<T> {
-    return -softplus(-x)
-}
-
-/// Computes the softplus function for the specified tensor element-wise. The softplus function is 
-/// defined as `log(exp(x) + 1)`.
-@inlinable
-@differentiable(vjp: _vjpSoftplus)
-public func softplus<T : TensorFlowFloatingPoint>(_ x: Tensor<T>) -> Tensor<T> {
-    return Raw.softplus(features: x)
-}
-
-@inlinable
-internal func _vjpSoftplus<T : TensorFlowFloatingPoint>(
-    _ x: Tensor<T>
-) -> (Tensor<T>, (Tensor<T>) -> Tensor<T>) {
-    return (softplus(x), { v in v * sigmoid(x) })
-}
+// /// Computes the log-sigmoid of the specified tensor element-wise. Specifically, 
+// /// `y = log(1 / (1 + exp(-x)))`. For numerical stability, we use `y = -softplus(-x)`.
+// @inlinable
+// @differentiable
+// public func logSigmoid<T : TensorFlowFloatingPoint>(_ x: Tensor<T>) -> Tensor<T> {
+//     return -softplus(-x)
+// }
+
+// /// Computes the softplus function for the specified tensor element-wise. The softplus function is 
+// /// defined as `log(exp(x) + 1)`.
+// @inlinable
+// @differentiable(vjp: _vjpSoftplus)
+// public func softplus<T : TensorFlowFloatingPoint>(_ x: Tensor<T>) -> Tensor<T> {
+//     return Raw.softplus(features: x)
+// }
+
+// @inlinable
+// internal func _vjpSoftplus<T : TensorFlowFloatingPoint>(
+//     _ x: Tensor<T>
+// ) -> (Tensor<T>, (Tensor<T>) -> Tensor<T>) {
+//     return (softplus(x), { v in v * sigmoid(x) })
+// }
 
 
 /// Computes the softmax of the specified tensor along the last axis.
@@ -84,11 +84,13 @@ public func softmax<T : FloatingPoint>(_ x: Tensor<T>) -> Tensor<T> {
 /// Computes the softmax of the specified tensor along the specified axis.
 /// Specifically, computes `exp(x) / exp(x).sum(alongAxes: axis)`.
 @inlinable
+// TODO: [AD].
 public func softmax<T : TensorFlowFloatingPoint>(
     _ x: Tensor<T>,
     alongAxis axis: Int
 ) -> Tensor<T> {
     let expx = exp(x)
+    // TODO: [BUG] keepDims = true for the sum.
     return expx / expx.sum(alongAxes: axis)
 }
 

From a16d911230cb9a19c3f2e495b40b40f09f5233b4 Mon Sep 17 00:00:00 2001
From: Anthony Platanios <e.a.platanios@gmail.com>
Date: Sat, 20 Apr 2019 12:26:50 -0400
Subject: [PATCH 35/55] Brought some more stuff from the stdlib.

---
 Sources/DeepLearning/Initializers.swift    |  11 +-
 Sources/DeepLearning/Operators/Basic.swift | 635 ++++++++++++---------
 Sources/DeepLearning/Tensors.swift         | 109 ++++
 3 files changed, 482 insertions(+), 273 deletions(-)
 create mode 100644 Sources/DeepLearning/Tensors.swift

diff --git a/Sources/DeepLearning/Initializers.swift b/Sources/DeepLearning/Initializers.swift
index 2b61a5c5b..7eb77250b 100644
--- a/Sources/DeepLearning/Initializers.swift
+++ b/Sources/DeepLearning/Initializers.swift
@@ -102,6 +102,10 @@ internal extension Tensor where Scalar : TensorFlowFloatingPoint {
     }
 }
 
+//===------------------------------------------------------------------------------------------===//
+// Stacking / Concatenating / Tiling
+//===------------------------------------------------------------------------------------------===//
+
 public extension Tensor {
     /// Creates a tensor from an array of tensors (which may themselves be scalars).
     @inlinable
@@ -240,9 +244,8 @@ public extension Tensor {
 //     }
 // }
 
-
 //===------------------------------------------------------------------------------------------===//
-// Numeric Initialization
+// Numeric
 //===------------------------------------------------------------------------------------------===//
 
 public extension Tensor where Scalar : Numeric {
@@ -356,6 +359,10 @@ public extension Tensor where Scalar : Numeric {
     }
 }
 
+//===------------------------------------------------------------------------------------------===//
+// Random
+//===------------------------------------------------------------------------------------------===//
+
 public extension Tensor where Scalar == Int32 {
     /// Creates a tensor with the specified shape, randomly sampling scalar values
     /// from a discrete uniform distribution.
diff --git a/Sources/DeepLearning/Operators/Basic.swift b/Sources/DeepLearning/Operators/Basic.swift
index 052a2d368..7df841cf1 100644
--- a/Sources/DeepLearning/Operators/Basic.swift
+++ b/Sources/DeepLearning/Operators/Basic.swift
@@ -16,310 +16,403 @@
 import TensorFlow
 #endif
 
+//===------------------------------------------------------------------------------------------===//
+// Shape Transformations
+//===------------------------------------------------------------------------------------------===//
+
 public extension Tensor {
+    /// Convert to a tensor with the specified rank, with all dimensions equal to 1.
+    @inlinable
+    func makeTensor(rank: Int) -> Tensor<Self> {
+        return Tensor(repeating: self, shape: TensorShape(rank))
+    }
+
+    /// Reshape to the shape of the specified `Tensor`.
+    /// - Precondition: The number of scalars matches the new shape.
     @inlinable
     @differentiable(wrt: self where Scalar : TensorFlowFloatingPoint)
-    func unstack(alongAxis axis: Int = 0) -> [Tensor] {
-        return split(numSplits: shape[axis], alongAxis: axis)
+    func reshaped<T>(like other: Tensor<T>) -> Tensor {
+        return reshaped(toShape: other.shapeTensor)
     }
 
+    /// Reshape to the specified shape.
+    /// - Precondition: The number of scalars matches the new shape.
     @inlinable
-    @differentiable(vjp: _vjpSplit(numSplits:alongAxis:) where Scalar : TensorFlowFloatingPoint)
-    func split(numSplits: Int, alongAxis axis: Int = 0) -> [Tensor] {
-        return Raw.split(
-            splitDim: Tensor<Int32>(Int32(axis)), value: self, numSplit: Int64(numSplits))
+    @differentiable(wrt: self where Scalar : TensorFlowFloatingPoint)
+    func reshaped(to newShape: TensorShape) -> Tensor {
+        // TODO(TF-433): Remove workaround for differentiating `map`.
+        return reshaped(toShape: Tensor<Int32>({newShape.dimensions.map(Int32.init)}()))
     }
 
+    /// Reshape to the specified `Tensor` representing a shape.
+    /// - Precondition: The number of scalars matches the new shape.
     @inlinable
     @differentiable(
         wrt: self,
-        vjp: _vjpSplit(sizes:alongAxis:) where Scalar : TensorFlowFloatingPoint)
-    func split(sizes: Tensor<Int32>, alongAxis axis: Int = 0) -> [Tensor] {
-        return Raw.splitV(
-            value: self,
-            sizeSplits: sizes,
-            splitDim: Tensor<Int32>(Int32(axis)),
-            numSplit: Int64(sizes.shape[0]))
+        vjp: _vjpReshaped(toShape:) where Scalar : TensorFlowFloatingPoint)
+    func reshaped(toShape newShape: Tensor<Int32>) -> Tensor {
+        return Raw.reshape(self, shape: newShape)
     }
 
-    /// Gathers slices of this tensor at `indices` along the `axis` dimension.
-    ///
-    /// For 0-D (scalar) `indices`:
-    /// ```
-    /// result[p_0,          ..., p_{axis-1},
-    ///        p_{axis + 1}, ..., p_{N-1}] = 
-    /// self[p_0,          ..., p_{axis-1},
-    ///      indices,
-    ///      p_{axis + 1}, ..., p_{N-1}]
-    /// ```
-    /// 
-    /// For 1-D (vector) `indices`:
-    /// ```
-    /// result[p_0,          ..., p_{axis-1},
-    ///        i,
-    ///        p_{axis + 1}, ..., p_{N-1}] = 
-    /// self[p_0,          ..., p_{axis-1},
-    ///      indices[i],
-    ///      p_{axis + 1}, ..., p_{N-1}]
-    /// ```
-    /// 
-    /// In the general case, produces a resulting tensor where:
-    /// ```
-    /// result[p_0,             ..., p_{axis-1},
-    ///        i_{batch\_dims}, ..., i_{M-1},
-    ///        p_{axis + 1},    ..., p_{N-1}] = 
-    /// self[p_0,             ..., p_{axis-1},
-    ///      indices[i_0,     ..., i_{M-1}],
-    ///      p_{axis + 1},    ..., p_{N-1}]
-    /// ```
-    /// where `N = self.rank` and `M = indices.rank`.
-    ///
-    /// The shape of the resulting tensor is:
-    /// `self.shape[..<axis] + indices.shape + self.shape[(axis + 1)...]`.
-    /// 
-    /// - Note: On CPU, if an out-of-range index is found, an error is thrown. On GPU, if an 
-    /// out-of-range index is found, a 0 is stored in the corresponding output values.
-    ///
-    /// - Parameters:
-    ///   - indices: Contains the indices to gather.
-    ///   - axis: Dimension along which to gather. Negative values wrap around.
-    /// 
-    /// - Precondition: `axis` must be in the range `[-rank, rank)`.
-    /// 
-    /// - Returns: The gathered tensor.
+    /// Return a copy of the tensor collapsed into a 1-D `Tensor`, in row-major order.
     @inlinable
-    @differentiable(wrt: self, vjp: _vjpGathering where Scalar : TensorFlowFloatingPoint)
-    func gathering(
-        atIndices indices: Tensor<Int32>, 
-        alongAxis axis: Int = 0
-    ) -> Tensor {
-        return Raw.gatherV2(params: self, indices: indices, axis: Tensor<Int32>(Int32(axis)))
+    @differentiable(wrt: self where Scalar : TensorFlowFloatingPoint)
+    func flattened() -> Tensor {
+        return reshaped(to: [-1])
     }
 
-    /// Gathers slices of this tensor at `indices` along the `axis` dimension, while ignoring the 
-    /// first `batchDims` dimensions that correspond to batch dimensions.
-    /// 
-    /// Performs similar functionality to `gathering`, except that the resulting tensor shape is now:
-    /// `self.shape[..<axis] + indices.shape[batchDims...] + self.shape[(axis + 1)...]`.
-    ///
-    /// - Parameters:
-    ///   - indices: Contains the indices to gather.
-    ///   - axis: Dimension along which to gather. Negative values wrap around.
-    ///   - batchDims: Number of leading batch dimensions to ignore.
-    /// 
-    /// - Precondition: `axis` must be in the range `[-rank, rank)`, while also being greater than 
-    ///     or equal to `batchDims`.
-    /// - Precondition: `batchDims` must be less than `indices.rank`.
-    /// 
-    /// - Returns: The gathered tensor.
+    /// Returns a shape-expanded `Tensor`, with a dimension of 1 inserted at the
+    /// specified shape index.
     @inlinable
-    // TODO: @differentiable(wrt: self where Scalar : TensorFlowFloatingPoint)
-    func batchGathering(
-        atIndices indices: Tensor<Int32>,
-        alongAxis axis: Int,
-        numBatchDims batchDims: Int
-    ) -> Tensor {
-        precondition(batchDims >= 0 && batchDims < indices.rank,
-                     "'numBatchDims' must be non-negative and less than 'indices.rank'.")
-        precondition(batchDims < rank, "'numBatchDims' must be less than the tensor's rank.")
+    @differentiable(wrt: self, vjp: _vjpExpandingShape(at:) where Scalar : TensorFlowFloatingPoint)
+    func expandingShape(at shapeIndex: Int) -> Tensor {
+        return Raw.expandDims(self, dim: Tensor<Int32>(Int32(shapeIndex)))
+    }
 
-        // Handle the axis argument by transposing the axis dimension so that it is the first
-        // non-batch dimension, recursively calling `batchGathering` with `axis = 0`, and then
-        // transposing the result to put the pre-axis dimensions before the indices dimensions.
-        if axis != batchDims {
-            // Adjust axis to be positive.
-            let posAxis = axis < 0 ? axis + rank : axis
+    /// Returns a rank-lifted `Tensor` with a leading dimension of 1.
+    @inlinable
+    @differentiable(wrt: self where Scalar : TensorFlowFloatingPoint)
+    func rankLifted() -> Tensor {
+        return expandingShape(at: 0)
+    }
 
-            precondition(posAxis >= 0 && posAxis < rank, "'axis' is out of range.")
-            precondition(batchDims <= posAxis, "'batchDims' must be less than or equal to 'axis'.")
+    /// Remove the specified dimensions of size 1 from the shape of a tensor. If no dimensions are
+    /// specified, then all dimensions of size 1 will be removed.
+    @inlinable
+    @differentiable(wrt: self where Scalar : TensorFlowFloatingPoint)
+    func squeezingShape(at axes: Int...) -> Tensor {
+        return squeezingShape(at: axes)
+    }
 
-            // Move self[axis] up to self[batchDims].
-            let permutation = Tensor<Int32>(concatenating: [
-                Tensor<Int32>(rangeFrom: 0, to: Int32(batchDims), stride: 1),
-                Tensor<Int32>(Int32(axis)).rankLifted(),
-                Tensor<Int32>(rangeFrom: Int32(batchDims), to: Int32(posAxis), stride: 1),
-                Tensor<Int32>(rangeFrom: Int32(axis) + 1, to: Int32(rank), stride: 1)])
-            let tensor = transposed(withPermutations: permutation)
-            let result = tensor.batchGathering(
-                atIndices: indices, alongAxis: batchDims, numBatchDims: batchDims)
+    /// Remove the specified dimensions of size 1 from the shape of a tensor. If no dimensions are
+    /// specified, then all dimensions of size 1 will be removed.
+    @inlinable
+    @differentiable(wrt: self, vjp: _vjpSqueezingShape(at:) where Scalar : TensorFlowFloatingPoint)
+    func squeezingShape(at axes: [Int]) -> Tensor {
+        return Raw.squeeze(self, squeezeDims: axes.map(Int32.init))
+    }
 
-            // Move the result dimensions corresponding to self[batchDims ..< axis] to just before
-            // the dimensions corresponding to indices[batchDims ...].
-            let start = indices.rank + posAxis - batchDims
-            let resultPermutation = Tensor<Int32>(concatenating: [
-                Tensor<Int32>(rangeFrom: 0, to: Int32(batchDims), stride: 1),
-                Tensor<Int32>(rangeFrom: Int32(indices.rank), to: Int32(start), stride: 1),
-                Tensor<Int32>(rangeFrom: Int32(batchDims), to: Int32(indices.rank), stride: 1),
-                Tensor<Int32>(rangeFrom: Int32(start), to: Int32(result.rank), stride: 1)])
-            return result.transposed(withPermutations: resultPermutation)
-        }
+    // @inlinable
+    // @differentiable(wrt: self where Scalar : TensorFlowFloatingPoint)
+    // func unstack(alongAxis axis: Int = 0) -> [Tensor] {
+    //     return split(numSplits: shape[axis], alongAxis: axis)
+    // }
 
-        var batchIndices = indices
-        var accumulated = Tensor<Int32>(ones: [])
-        for d in (1...batchDims).reversed() {
-            accumulated *= shapeTensor[d]
-            let dValue = shapeTensor[d - 1]
-            let dIndices = Tensor<Int32>(
-                rangeFrom: Tensor<Int32>(zeros: []),
-                to: dValue,
-                stride: Tensor<Int32>(ones: [])
-            ) * accumulated
-            let dShape = Tensor<Int32>(concatenating: [
-                Tensor<Int32>([Int32](repeating: 1, count: d - 1)),
-                dValue.rankLifted(),
-                Tensor<Int32>([Int32](repeating: 1, count: indices.rank - 1))])
-            batchIndices += dIndices.reshaped(toShape: dShape)
-        }
+    // @inlinable
+    // @differentiable(vjp: _vjpSplit(numSplits:alongAxis:) where Scalar : TensorFlowFloatingPoint)
+    // func split(numSplits: Int, alongAxis axis: Int = 0) -> [Tensor] {
+    //     return Raw.split(
+    //         splitDim: Tensor<Int32>(Int32(axis)), value: self, numSplit: Int64(numSplits))
+    // }
 
-        let flatIndices = batchIndices.flattened()
-        let outerShape = shapeTensor[Int(batchDims + 1)...]
-        let innerShape = shapeTensor[..<Int(batchDims + 1)].product(squeezingAxes: [0])
-        let flatTensor = reshaped(toShape: innerShape.rankLifted().concatenated(with: outerShape))
-        let flatResult = flatTensor.gathering(atIndices: flatIndices)
-        return flatResult.reshaped(toShape: indices.shapeTensor.concatenated(with: outerShape))
-    }
+    // @inlinable
+    // @differentiable(
+    //     wrt: self,
+    //     vjp: _vjpSplit(sizes:alongAxis:) where Scalar : TensorFlowFloatingPoint)
+    // func split(sizes: Tensor<Int32>, alongAxis axis: Int = 0) -> [Tensor] {
+    //     return Raw.splitV(
+    //         value: self,
+    //         sizeSplits: sizes,
+    //         splitDim: Tensor<Int32>(Int32(axis)),
+    //         numSplit: Int64(sizes.shape[0]))
+    // }
 
-    /// Gathers values from this tensor according to the provided boolean mask.
-    ///
-    /// For example:
-    /// ```
-    /// // 1-D example
-    /// // tensor is [0, 1, 2, 3]
-    /// // mask is [true, false, true, false]
-    /// tensor.gathering(where: mask) // is [0, 2]
-    /// 
-    /// // 2-D example
-    /// // tensor is [[1, 2], [3, 4], [5, 6]]
-    /// // mask is [true, false, true]
-    /// tensor.gathering(where: mask) // is [[1, 2], [5, 6]]
-    /// ```
-    ///
-    /// In general, `0 < mask.rank = K <= tensor.rank`, and the `mask`'s shape must match the first 
-    /// K dimensions of the `tensor`'s shape. We then have:
-    /// `tensor.gathering(where: mask)[i, j1, ..., jd] = tensor[i1, ..., iK, j1, ..., jd]`, where 
-    /// `[i1, ..., iK]` is the `i`th `true` entry of `mask` (row-major order).
-    /// 
-    /// The `axis` could be used with `mask` to indicate the axis to mask from. In that case, 
-    /// `axis + mask.rank <= tensor.rank` and the `mask``'s shape must match the first 
-    /// `axis + mask.rank` dimensions of the `tensor`'s shape.
-    /// 
-    /// - Parameters:
-    ///   - mask: K-D boolean tensor, where `K <= self.rank`.
-    ///   - axis: 0-D integer tensor representing the axis in `self` to mask from, where 
-    ///     `K + axis <= self.rank`.
-    /// 
-    /// - Precondition: The `mask` cannot be a scalar: `mask.rank != 0`.
-    /// 
-    /// - Returns: `(self.rank - K + 1)`-dimensional tensor populated by entries in this tensor 
-    ///   corresponding to `true` values in `mask`.
-    @inlinable
-    // TODO: @differentiable(wrt: self where Scalar : TensorFlowFloatingPoint)
-    func gathering(where mask: Tensor<Bool>, alongAxis axis: Int = 0) -> Tensor {
-        precondition(mask.rank != 0, "The boolean mask cannot be a scalar.")
-        let posAxis = axis < 0 ? axis + rank : axis
-        let leadingSize = shapeTensor[posAxis ..< posAxis + mask.rank].product().rankLifted()
-        let reshapedTensor = reshaped(
-            toShape: Tensor<Int32>(concatenating: [
-                shapeTensor[..<posAxis], leadingSize, shapeTensor[(posAxis + mask.rank)...]]))
-        let indices = Tensor<Int32>(mask.flattened().nonZeroIndices().squeezingShape(at: 1))
-        return reshapedTensor.gathering(atIndices: indices, alongAxis: posAxis)
-    }
+    // /// Gathers slices of this tensor at `indices` along the `axis` dimension.
+    // ///
+    // /// For 0-D (scalar) `indices`:
+    // /// ```
+    // /// result[p_0,          ..., p_{axis-1},
+    // ///        p_{axis + 1}, ..., p_{N-1}] = 
+    // /// self[p_0,          ..., p_{axis-1},
+    // ///      indices,
+    // ///      p_{axis + 1}, ..., p_{N-1}]
+    // /// ```
+    // /// 
+    // /// For 1-D (vector) `indices`:
+    // /// ```
+    // /// result[p_0,          ..., p_{axis-1},
+    // ///        i,
+    // ///        p_{axis + 1}, ..., p_{N-1}] = 
+    // /// self[p_0,          ..., p_{axis-1},
+    // ///      indices[i],
+    // ///      p_{axis + 1}, ..., p_{N-1}]
+    // /// ```
+    // /// 
+    // /// In the general case, produces a resulting tensor where:
+    // /// ```
+    // /// result[p_0,             ..., p_{axis-1},
+    // ///        i_{batch\_dims}, ..., i_{M-1},
+    // ///        p_{axis + 1},    ..., p_{N-1}] = 
+    // /// self[p_0,             ..., p_{axis-1},
+    // ///      indices[i_0,     ..., i_{M-1}],
+    // ///      p_{axis + 1},    ..., p_{N-1}]
+    // /// ```
+    // /// where `N = self.rank` and `M = indices.rank`.
+    // ///
+    // /// The shape of the resulting tensor is:
+    // /// `self.shape[..<axis] + indices.shape + self.shape[(axis + 1)...]`.
+    // /// 
+    // /// - Note: On CPU, if an out-of-range index is found, an error is thrown. On GPU, if an 
+    // /// out-of-range index is found, a 0 is stored in the corresponding output values.
+    // ///
+    // /// - Parameters:
+    // ///   - indices: Contains the indices to gather.
+    // ///   - axis: Dimension along which to gather. Negative values wrap around.
+    // /// 
+    // /// - Precondition: `axis` must be in the range `[-rank, rank)`.
+    // /// 
+    // /// - Returns: The gathered tensor.
+    // @inlinable
+    // @differentiable(wrt: self, vjp: _vjpGathering where Scalar : TensorFlowFloatingPoint)
+    // func gathering(
+    //     atIndices indices: Tensor<Int32>, 
+    //     alongAxis axis: Int = 0
+    // ) -> Tensor {
+    //     return Raw.gatherV2(params: self, indices: indices, axis: Tensor<Int32>(Int32(axis)))
+    // }
+
+    // /// Gathers slices of this tensor at `indices` along the `axis` dimension, while ignoring the 
+    // /// first `batchDims` dimensions that correspond to batch dimensions.
+    // /// 
+    // /// Performs similar functionality to `gathering`, except that the resulting tensor shape is now:
+    // /// `self.shape[..<axis] + indices.shape[batchDims...] + self.shape[(axis + 1)...]`.
+    // ///
+    // /// - Parameters:
+    // ///   - indices: Contains the indices to gather.
+    // ///   - axis: Dimension along which to gather. Negative values wrap around.
+    // ///   - batchDims: Number of leading batch dimensions to ignore.
+    // /// 
+    // /// - Precondition: `axis` must be in the range `[-rank, rank)`, while also being greater than 
+    // ///     or equal to `batchDims`.
+    // /// - Precondition: `batchDims` must be less than `indices.rank`.
+    // /// 
+    // /// - Returns: The gathered tensor.
+    // @inlinable
+    // // TODO: @differentiable(wrt: self where Scalar : TensorFlowFloatingPoint)
+    // func batchGathering(
+    //     atIndices indices: Tensor<Int32>,
+    //     alongAxis axis: Int,
+    //     numBatchDims batchDims: Int
+    // ) -> Tensor {
+    //     precondition(batchDims >= 0 && batchDims < indices.rank,
+    //                  "'numBatchDims' must be non-negative and less than 'indices.rank'.")
+    //     precondition(batchDims < rank, "'numBatchDims' must be less than the tensor's rank.")
+
+    //     // Handle the axis argument by transposing the axis dimension so that it is the first
+    //     // non-batch dimension, recursively calling `batchGathering` with `axis = 0`, and then
+    //     // transposing the result to put the pre-axis dimensions before the indices dimensions.
+    //     if axis != batchDims {
+    //         // Adjust axis to be positive.
+    //         let posAxis = axis < 0 ? axis + rank : axis
+
+    //         precondition(posAxis >= 0 && posAxis < rank, "'axis' is out of range.")
+    //         precondition(batchDims <= posAxis, "'batchDims' must be less than or equal to 'axis'.")
+
+    //         // Move self[axis] up to self[batchDims].
+    //         let permutation = Tensor<Int32>(concatenating: [
+    //             Tensor<Int32>(rangeFrom: 0, to: Int32(batchDims), stride: 1),
+    //             Tensor<Int32>(Int32(axis)).rankLifted(),
+    //             Tensor<Int32>(rangeFrom: Int32(batchDims), to: Int32(posAxis), stride: 1),
+    //             Tensor<Int32>(rangeFrom: Int32(axis) + 1, to: Int32(rank), stride: 1)])
+    //         let tensor = transposed(withPermutations: permutation)
+    //         let result = tensor.batchGathering(
+    //             atIndices: indices, alongAxis: batchDims, numBatchDims: batchDims)
+
+    //         // Move the result dimensions corresponding to self[batchDims ..< axis] to just before
+    //         // the dimensions corresponding to indices[batchDims ...].
+    //         let start = indices.rank + posAxis - batchDims
+    //         let resultPermutation = Tensor<Int32>(concatenating: [
+    //             Tensor<Int32>(rangeFrom: 0, to: Int32(batchDims), stride: 1),
+    //             Tensor<Int32>(rangeFrom: Int32(indices.rank), to: Int32(start), stride: 1),
+    //             Tensor<Int32>(rangeFrom: Int32(batchDims), to: Int32(indices.rank), stride: 1),
+    //             Tensor<Int32>(rangeFrom: Int32(start), to: Int32(result.rank), stride: 1)])
+    //         return result.transposed(withPermutations: resultPermutation)
+    //     }
+
+    //     var batchIndices = indices
+    //     var accumulated = Tensor<Int32>(ones: [])
+    //     for d in (1...batchDims).reversed() {
+    //         accumulated *= shapeTensor[d]
+    //         let dValue = shapeTensor[d - 1]
+    //         let dIndices = Tensor<Int32>(
+    //             rangeFrom: Tensor<Int32>(zeros: []),
+    //             to: dValue,
+    //             stride: Tensor<Int32>(ones: [])
+    //         ) * accumulated
+    //         let dShape = Tensor<Int32>(concatenating: [
+    //             Tensor<Int32>([Int32](repeating: 1, count: d - 1)),
+    //             dValue.rankLifted(),
+    //             Tensor<Int32>([Int32](repeating: 1, count: indices.rank - 1))])
+    //         batchIndices += dIndices.reshaped(toShape: dShape)
+    //     }
+
+    //     let flatIndices = batchIndices.flattened()
+    //     let outerShape = shapeTensor[Int(batchDims + 1)...]
+    //     let innerShape = shapeTensor[..<Int(batchDims + 1)].product(squeezingAxes: [0])
+    //     let flatTensor = reshaped(toShape: innerShape.rankLifted().concatenated(with: outerShape))
+    //     let flatResult = flatTensor.gathering(atIndices: flatIndices)
+    //     return flatResult.reshaped(toShape: indices.shapeTensor.concatenated(with: outerShape))
+    // }
+
+    // /// Gathers values from this tensor according to the provided boolean mask.
+    // ///
+    // /// For example:
+    // /// ```
+    // /// // 1-D example
+    // /// // tensor is [0, 1, 2, 3]
+    // /// // mask is [true, false, true, false]
+    // /// tensor.gathering(where: mask) // is [0, 2]
+    // /// 
+    // /// // 2-D example
+    // /// // tensor is [[1, 2], [3, 4], [5, 6]]
+    // /// // mask is [true, false, true]
+    // /// tensor.gathering(where: mask) // is [[1, 2], [5, 6]]
+    // /// ```
+    // ///
+    // /// In general, `0 < mask.rank = K <= tensor.rank`, and the `mask`'s shape must match the first 
+    // /// K dimensions of the `tensor`'s shape. We then have:
+    // /// `tensor.gathering(where: mask)[i, j1, ..., jd] = tensor[i1, ..., iK, j1, ..., jd]`, where 
+    // /// `[i1, ..., iK]` is the `i`th `true` entry of `mask` (row-major order).
+    // /// 
+    // /// The `axis` could be used with `mask` to indicate the axis to mask from. In that case, 
+    // /// `axis + mask.rank <= tensor.rank` and the `mask``'s shape must match the first 
+    // /// `axis + mask.rank` dimensions of the `tensor`'s shape.
+    // /// 
+    // /// - Parameters:
+    // ///   - mask: K-D boolean tensor, where `K <= self.rank`.
+    // ///   - axis: 0-D integer tensor representing the axis in `self` to mask from, where 
+    // ///     `K + axis <= self.rank`.
+    // /// 
+    // /// - Precondition: The `mask` cannot be a scalar: `mask.rank != 0`.
+    // /// 
+    // /// - Returns: `(self.rank - K + 1)`-dimensional tensor populated by entries in this tensor 
+    // ///   corresponding to `true` values in `mask`.
+    // @inlinable
+    // // TODO: @differentiable(wrt: self where Scalar : TensorFlowFloatingPoint)
+    // func gathering(where mask: Tensor<Bool>, alongAxis axis: Int = 0) -> Tensor {
+    //     precondition(mask.rank != 0, "The boolean mask cannot be a scalar.")
+    //     let posAxis = axis < 0 ? axis + rank : axis
+    //     let leadingSize = shapeTensor[posAxis ..< posAxis + mask.rank].product().rankLifted()
+    //     let reshapedTensor = reshaped(
+    //         toShape: Tensor<Int32>(concatenating: [
+    //             shapeTensor[..<posAxis], leadingSize, shapeTensor[(posAxis + mask.rank)...]]))
+    //     let indices = Tensor<Int32>(mask.flattened().nonZeroIndices().squeezingShape(at: 1))
+    //     return reshapedTensor.gathering(atIndices: indices, alongAxis: posAxis)
+    // }
 }
 
-public extension Tensor {
-    /// Returns the locations of non-zero / true values in this tensor.
-    ///
-    /// The coordinates are returned in a 2-D tensor where the first dimension (rows) represents the 
-    /// number of non-zero elements, and the second dimension (columns) represents the coordinates 
-    /// of the non-zero elements. Keep in mind that the shape of the output tensor can vary 
-    /// depending on how many true values there are in this tensor. Indices are output in row-major 
-    /// order.
-    ///
-    /// For example:
-    /// ```
-    /// // 'input' is [[true, false], [true, false]]
-    /// // 'input' has 2 true values and so the output has 2 rows.
-    /// // 'input' has rank of 2, and so the second dimension of the output has size 2.
-    /// input.nonZeroIndices() // is [[0, 0], [1, 0]]
-    ///
-    /// // 'input' is [[[ true, false], [ true, false]],
-    /// //             [[false,  true], [false,  true]],
-    /// //             [[false, false], [false,  true]]]
-    /// // 'input' has 5 true values and so the output has 5 rows.
-    /// // 'input' has rank 3, and so the second dimension of the output has size 3.
-    /// input.nonZeroIndices() // is [[0, 0, 0],
-    ///                        //     [0, 1, 0],
-    ///                        //     [1, 0, 1],
-    ///                        //     [1, 1, 1],
-    ///                        //     [2, 1, 1]]
-    /// ```
-    ///
-    /// - Returns: A tensor with shape `(num_true, rank(condition))`.
+internal extension Tensor where Scalar : TensorFlowFloatingPoint {
     @inlinable
-    func nonZeroIndices() -> Tensor<Int64> {
-        return Raw.where_(self)
+    func _vjpReshaped(toShape newShape: Tensor<Int32>) -> (Tensor, (Tensor) -> Tensor) {
+        let value = reshaped(toShape: newShape)
+        return (value, { [shape = shapeTensor] v in v.reshaped(toShape: shape) })
     }
-}
 
-internal extension Tensor where Scalar : TensorFlowFloatingPoint {
-    @usableFromInline
-    func _vjpSplit(
-        numSplits: Int,
-        alongAxis axis: Int = 0
-    ) -> ([Tensor], (Array<Tensor>.CotangentVector) -> Tensor) {
-        let result = split(numSplits: numSplits, alongAxis: axis)
-        return (result, { v in Tensor(concatenating: v.base, alongAxis: axis) })
+    @inlinable
+    func _vjpExpandingShape(at shapeIndex: Int) -> (Tensor, (Tensor) -> Tensor) {
+        let value = expandingShape(at: shapeIndex)
+        return (value, { v in v.squeezingShape(at: shapeIndex) })
     }
 
-    @usableFromInline
-    func _vjpSplit(
-        sizes: Tensor<Int32>,
-        alongAxis axis: Int = 0
-    ) -> ([Tensor], (Array<Tensor>.CotangentVector) -> Tensor) {
-        let result = split(sizes: sizes, alongAxis: axis)
-        return (result, { v in Tensor(concatenating: v.base, alongAxis: axis) })
+    @inlinable
+    func _vjpSqueezingShape(at axes: [Int]) -> (Tensor, (Tensor) -> Tensor) {
+        let value = squeezingShape(at: axes)
+        return (value, { [shape = shapeTensor] v in v.reshaped(toShape: shape) })
     }
 
-    @usableFromInline
-    func _vjpGathering(
-        atIndices indices: Tensor<Int32>, 
-        alongAxis axis: Int = 0
-    ) -> (Tensor, (Tensor) -> Tensor) {
-        let result = gathering(atIndices: indices, alongAxis: axis)
-        let posAxis = axis < 0 ? axis + rank : axis
-        return (result, { [shape = shapeTensor] v in
-            let indicesSize = Tensor<Int32>(Int32(indices.scalarCount))
-            let outerShape = shape[..<posAxis]
-            let outerSize = outerShape.scalarCount
-            let innerShape = shape[(posAxis + 1)...]
-            let innerSize = innerShape.scalarCount
-            let outerIndices = Tensor<Int32>(rangeFrom: 0, to: Int32(outerSize), stride: 1)
-            let innerIndices = Tensor<Int32>(
-                rangeFrom: Int32(outerSize) + 1,
-                to: Int32(outerSize) + 1 + Int32(innerSize),
-                stride: 1)
-            let valuesShape = Tensor<Int32>(concatenating: [outerShape, indicesSize, innerShape])
-            let values = v.reshaped(toShape: valuesShape)
-            let valueIndices = indices.reshaped(toShape: indicesSize)
+    // @inlinable
+    // func _vjpSplit(
+    //     numSplits: Int,
+    //     alongAxis axis: Int = 0
+    // ) -> ([Tensor], (Array<Tensor>.CotangentVector) -> Tensor) {
+    //     let result = split(numSplits: numSplits, alongAxis: axis)
+    //     return (result, { v in Tensor(concatenating: v.base, alongAxis: axis) })
+    // }
 
-            // We need to sum up every slice `values[..., i, ....]` corresponding to
-            // `tensor[..., indices[i], ...]`. Since `unsortedSegmentSum` does not support an axis 
-            // parameter, we transpose the gather dimension to the front, then use 
-            // `unsortedSegmentSum` to build a `[gatherAxis, outerAxes, innerAxes]` tensor with all
-            // the gradients affecting each index in `gatherAxis` summed up.
-            let permutations = Tensor<Int32>(concatenating: [
-                Tensor<Int32>([Int32(outerSize)]), outerIndices, innerIndices])
-            let transposedValues = values.transposed(withPermutations: permutations)
-            let gradient = Raw.unsortedSegmentSum(
-                data: transposedValues,
-                segmentIds: valueIndices,
-                numSegments: shape[posAxis])
+    // @inlinable
+    // func _vjpSplit(
+    //     sizes: Tensor<Int32>,
+    //     alongAxis axis: Int = 0
+    // ) -> ([Tensor], (Array<Tensor>.CotangentVector) -> Tensor) {
+    //     let result = split(sizes: sizes, alongAxis: axis)
+    //     return (result, { v in Tensor(concatenating: v.base, alongAxis: axis) })
+    // }
+
+    // @inlinable
+    // func _vjpGathering(
+    //     atIndices indices: Tensor<Int32>, 
+    //     alongAxis axis: Int = 0
+    // ) -> (Tensor, (Tensor) -> Tensor) {
+    //     let result = gathering(atIndices: indices, alongAxis: axis)
+    //     let posAxis = axis < 0 ? axis + rank : axis
+    //     return (result, { [shape = shapeTensor] v in
+    //         let indicesSize = Tensor<Int32>(Int32(indices.scalarCount))
+    //         let outerShape = shape[..<posAxis]
+    //         let outerSize = outerShape.scalarCount
+    //         let innerShape = shape[(posAxis + 1)...]
+    //         let innerSize = innerShape.scalarCount
+    //         let outerIndices = Tensor<Int32>(rangeFrom: 0, to: Int32(outerSize), stride: 1)
+    //         let innerIndices = Tensor<Int32>(
+    //             rangeFrom: Int32(outerSize) + 1,
+    //             to: Int32(outerSize) + 1 + Int32(innerSize),
+    //             stride: 1)
+    //         let valuesShape = Tensor<Int32>(concatenating: [outerShape, indicesSize, innerShape])
+    //         let values = v.reshaped(toShape: valuesShape)
+    //         let valueIndices = indices.reshaped(toShape: indicesSize)
+
+    //         // We need to sum up every slice `values[..., i, ....]` corresponding to
+    //         // `tensor[..., indices[i], ...]`. Since `unsortedSegmentSum` does not support an axis 
+    //         // parameter, we transpose the gather dimension to the front, then use 
+    //         // `unsortedSegmentSum` to build a `[gatherAxis, outerAxes, innerAxes]` tensor with all
+    //         // the gradients affecting each index in `gatherAxis` summed up.
+    //         let permutations = Tensor<Int32>(concatenating: [
+    //             Tensor<Int32>([Int32(outerSize)]), outerIndices, innerIndices])
+    //         let transposedValues = values.transposed(withPermutations: permutations)
+    //         let gradient = Raw.unsortedSegmentSum(
+    //             data: transposedValues,
+    //             segmentIds: valueIndices,
+    //             numSegments: shape[posAxis])
     
-            // Finally, we invert the above transpose operation by moving dimension 0 back to its
-            // original position.
-            let inversePermutations = Tensor<Int32>(concatenating: [
-                outerIndices + 1, Tensor<Int32>([0]), innerIndices])
-            return gradient.transposed(withPermutations: inversePermutations)
-        })
-    }
+    //         // Finally, we invert the above transpose operation by moving dimension 0 back to its
+    //         // original position.
+    //         let inversePermutations = Tensor<Int32>(concatenating: [
+    //             outerIndices + 1, Tensor<Int32>([0]), innerIndices])
+    //         return gradient.transposed(withPermutations: inversePermutations)
+    //     })
+    // }
 }
+
+// public extension Tensor {
+//     /// Returns the locations of non-zero / true values in this tensor.
+//     ///
+//     /// The coordinates are returned in a 2-D tensor where the first dimension (rows) represents the 
+//     /// number of non-zero elements, and the second dimension (columns) represents the coordinates 
+//     /// of the non-zero elements. Keep in mind that the shape of the output tensor can vary 
+//     /// depending on how many true values there are in this tensor. Indices are output in row-major 
+//     /// order.
+//     ///
+//     /// For example:
+//     /// ```
+//     /// // 'input' is [[true, false], [true, false]]
+//     /// // 'input' has 2 true values and so the output has 2 rows.
+//     /// // 'input' has rank of 2, and so the second dimension of the output has size 2.
+//     /// input.nonZeroIndices() // is [[0, 0], [1, 0]]
+//     ///
+//     /// // 'input' is [[[ true, false], [ true, false]],
+//     /// //             [[false,  true], [false,  true]],
+//     /// //             [[false, false], [false,  true]]]
+//     /// // 'input' has 5 true values and so the output has 5 rows.
+//     /// // 'input' has rank 3, and so the second dimension of the output has size 3.
+//     /// input.nonZeroIndices() // is [[0, 0, 0],
+//     ///                        //     [0, 1, 0],
+//     ///                        //     [1, 0, 1],
+//     ///                        //     [1, 1, 1],
+//     ///                        //     [2, 1, 1]]
+//     /// ```
+//     ///
+//     /// - Returns: A tensor with shape `(num_true, rank(condition))`.
+//     @inlinable
+//     func nonZeroIndices() -> Tensor<Int64> {
+//         return Raw.where_(self)
+//     }
+// }
diff --git a/Sources/DeepLearning/Tensors.swift b/Sources/DeepLearning/Tensors.swift
new file mode 100644
index 000000000..d400bf935
--- /dev/null
+++ b/Sources/DeepLearning/Tensors.swift
@@ -0,0 +1,109 @@
+// Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#if !COMPILING_TENSORFLOW_MODULE
+import TensorFlow
+#endif
+
+//===------------------------------------------------------------------------------------------===//
+// Description and Visualization
+//===------------------------------------------------------------------------------------------===//
+
+// String conversion.
+extension Tensor : CustomStringConvertible {
+    /// A textual representation of the tensor.
+    ///
+    /// - Note: use `fullDescription` for a non-pretty-printed description showing all scalars.
+    public var description: String {
+        return array.description
+    }
+}
+
+public extension Tensor {
+    /// A textual representation of the tensor. Returns a summarized description if `summarize` is
+    /// true and the element count exceeds twice the `edgeElementCount`.
+    ///
+    /// - Parameters:
+    ///   - lineWidth: The max line width for printing. Used to determine number of scalars to print
+    ///     per line.
+    ///   - edgeElementCount: The maximum number of elements to print before and after summarization
+    ///     via ellipses (`...`).
+    ///   - summarizing: If true, summarize description if element count exceeds twice
+    ///     `edgeElementCount`.
+    func description(
+        lineWidth: Int = 80,
+        edgeElementCount: Int = 3,
+        summarizing: Bool = false
+    ) -> String {
+        return array.description(
+            lineWidth: lineWidth,
+            edgeElementCount: edgeElementCount,
+            summarizing: summarizing)
+    }
+
+    /// A full, non-pretty-printed textual representation of the tensor, showing
+    /// all scalars.
+    var fullDescription: String {
+        return array.fullDescription
+    }
+}
+
+// Xcode Playground display conversion.
+extension Tensor : CustomPlaygroundDisplayConvertible {
+    public var playgroundDescription: Any {
+        return description
+    }
+}
+
+// Mirror representation, used by debugger/REPL.
+extension Tensor : CustomReflectable {
+    public var customMirror: Mirror {
+        return Mirror(self, children: [], displayStyle: .struct)
+    }
+}
+
+//===------------------------------------------------------------------------------------------===//
+// Codable Conformance
+//===------------------------------------------------------------------------------------------===//
+
+extension Tensor : Codable where Scalar : Codable {
+    @inlinable
+    public func encode(to encoder: Encoder) throws {
+        var container = encoder.singleValueContainer()
+        try container.encode(array)
+    }
+
+    @inlinable
+    public init(from decoder: Decoder) throws {
+        let container = try decoder.singleValueContainer()
+        let array = try container.decode(ShapedArray<Scalar>.self)
+        self.init(array)
+    }
+}
+
+//===------------------------------------------------------------------------------------------===//
+// Equality
+//===------------------------------------------------------------------------------------------===//
+
+extension Tensor : Equatable where Scalar : Equatable {
+    @inlinable
+    public static func == (lhs: Tensor, rhs: Tensor) -> Bool {
+        return (lhs .== rhs).all()
+    }
+
+    @inlinable
+    public static func != (lhs: Tensor, rhs: Tensor) -> Bool {
+        return (lhs .== rhs).any()
+    }
+}

From 34b475acfdbd13d60885ce9b20f062f09c933e19 Mon Sep 17 00:00:00 2001
From: Anthony Platanios <e.a.platanios@gmail.com>
Date: Sat, 20 Apr 2019 12:28:50 -0400
Subject: [PATCH 36/55] Minor edit.

---
 Sources/DeepLearning/Tensors.swift | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/Sources/DeepLearning/Tensors.swift b/Sources/DeepLearning/Tensors.swift
index d400bf935..daee815d8 100644
--- a/Sources/DeepLearning/Tensors.swift
+++ b/Sources/DeepLearning/Tensors.swift
@@ -16,6 +16,10 @@
 import TensorFlow
 #endif
 
+#if COMPILING_TENSORFLOW_MODULE
+infix operator .== : ComparisonPrecedence
+#endif
+
 //===------------------------------------------------------------------------------------------===//
 // Description and Visualization
 //===------------------------------------------------------------------------------------------===//

From 86072a49f0b65655dfdf24d549bb11f20aff67f8 Mon Sep 17 00:00:00 2001
From: Anthony Platanios <e.a.platanios@gmail.com>
Date: Sat, 20 Apr 2019 12:56:34 -0400
Subject: [PATCH 37/55] Moved some more stuff to swift-apis.

---
 .../DeepLearning/Operators/Comparison.swift   | 237 +++++++++++++
 Sources/DeepLearning/Operators/Math.swift     | 332 ++++++++++++++++++
 Sources/DeepLearning/Tensors.swift            |  16 -
 3 files changed, 569 insertions(+), 16 deletions(-)
 create mode 100644 Sources/DeepLearning/Operators/Comparison.swift

diff --git a/Sources/DeepLearning/Operators/Comparison.swift b/Sources/DeepLearning/Operators/Comparison.swift
new file mode 100644
index 000000000..2bc7329be
--- /dev/null
+++ b/Sources/DeepLearning/Operators/Comparison.swift
@@ -0,0 +1,237 @@
+// Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#if !COMPILING_TENSORFLOW_MODULE
+import TensorFlow
+#endif
+
+infix operator .< : ComparisonPrecedence
+infix operator .<= : ComparisonPrecedence
+infix operator .>= : ComparisonPrecedence
+infix operator .> : ComparisonPrecedence
+infix operator .== : ComparisonPrecedence
+infix operator .!= : ComparisonPrecedence
+
+public extension Tensor where Scalar : Numeric & Comparable {
+    /// Computes `lhs < rhs` element-wise and returns a `Tensor` of Boolean /// scalars.
+    @inlinable
+    static func .< (lhs: Tensor, rhs: Tensor) -> Tensor<Bool> {
+        return Raw.less(lhs, rhs)
+    }
+
+    /// Computes `lhs <= rhs` element-wise and returns a `Tensor` of Boolean scalars.
+    @inlinable
+    static func .<= (lhs: Tensor, rhs: Tensor) -> Tensor<Bool> {
+        return Raw.lessEqual(lhs, rhs)
+    }
+
+    /// Computes `lhs > rhs` element-wise and returns a `Tensor` of Boolean scalars.
+    @inlinable
+    static func .> (lhs: Tensor, rhs: Tensor) -> Tensor<Bool> {
+        return Raw.greater(lhs, rhs)
+    }
+
+    /// Computes `lhs >= rhs` element-wise and returns a `Tensor` of Boolean scalars.
+    @inlinable
+    static func .>= (lhs: Tensor, rhs: Tensor) -> Tensor<Bool> {
+        return Raw.greaterEqual(lhs, rhs)
+    }
+
+    /// Computes `lhs < rhs` element-wise and returns a `Tensor` of Boolean scalars.
+    /// - Note: `.<` supports broadcasting.
+    @inlinable
+    static func .< (lhs: Scalar, rhs: Tensor) -> Tensor<Bool> {
+        return Raw.less(Tensor(lhs), rhs)
+    }
+
+    /// Computes `lhs <= rhs` element-wise and returns a `Tensor` of Boolean scalars.
+    /// - Note: `.<=` supports broadcasting.
+    @inlinable
+    static func .<= (lhs: Scalar, rhs: Tensor) -> Tensor<Bool> {
+        return Raw.lessEqual(Tensor(lhs), rhs)
+    }
+
+    /// Computes `lhs > rhs` element-wise and returns a `Tensor` of Boolean scalars.
+    /// - Note: `.>` supports broadcasting.
+    @inlinable
+    static func .> (lhs: Scalar, rhs: Tensor) -> Tensor<Bool> {
+        return Raw.greater(Tensor(lhs), rhs)
+    }
+
+    /// Computes `lhs >= rhs` element-wise and returns a `Tensor` of Boolean scalars.
+    /// - Note: `.>=` supports broadcasting.
+    @inlinable
+    static func .>= (lhs: Scalar, rhs: Tensor) -> Tensor<Bool> {
+        return Raw.greaterEqual(Tensor(lhs), rhs)
+    }
+
+    /// Computes `lhs < rhs` element-wise and returns a `Tensor` of Boolean scalars.
+    /// - Note: `.<` supports broadcasting.
+    @inlinable
+    static func .< (lhs: Tensor, rhs: Scalar) -> Tensor<Bool> {
+        return Raw.less(lhs, Tensor(rhs))
+    }
+
+    /// Computes `lhs <= rhs` element-wise and returns a `Tensor` of Boolean scalars.
+    /// - Note: `.<=` supports broadcasting.
+    @inlinable
+    static func .<= (lhs: Tensor, rhs: Scalar) -> Tensor<Bool> {
+        return Raw.lessEqual(lhs, Tensor(rhs))
+    }
+
+    /// Computes `lhs > rhs` element-wise and returns a `Tensor` of Boolean scalars.
+    /// - Note: `.>` supports broadcasting.
+    @inlinable
+    static func .> (lhs: Tensor, rhs: Scalar) -> Tensor<Bool> {
+        return Raw.greater(lhs, Tensor(rhs))
+    }
+
+    /// Computes `lhs >= rhs` element-wise and returns a `Tensor` of Boolean scalars.
+    /// - Note: `.>=` supports broadcasting.
+    @inlinable
+    static func .>= (lhs: Tensor, rhs: Scalar) -> Tensor<Bool> {
+        return Raw.greaterEqual(lhs, Tensor(rhs))
+    }
+}
+
+extension Tensor : Equatable where Scalar : Equatable {
+    @inlinable
+    public static func == (lhs: Tensor, rhs: Tensor) -> Bool {
+        return (lhs .== rhs).all()
+    }
+
+    @inlinable
+    public static func != (lhs: Tensor, rhs: Tensor) -> Bool {
+        return (lhs .== rhs).any()
+    }
+}
+
+extension Tensor : Comparable where Scalar : Numeric & Comparable {
+    /// Returns a Boolean value indicating whether the value of the first argument is
+    /// lexicographically less than that of the second argument.
+    @inlinable
+    public static func < (lhs: Tensor, rhs: Tensor) -> Bool {
+        return (lhs .< rhs).all()
+    }
+
+    /// Returns a Boolean value indicating whether the value of the first argument is
+    /// lexicographically less than or equal to that of the second argument.
+    @inlinable
+    public static func <= (lhs: Tensor, rhs: Tensor) -> Bool {
+        return (lhs .<= rhs).all()
+    }
+
+    /// Returns a Boolean value indicating whether the value of the first argument is
+    /// lexicographically greater than that of the second argument.
+    @inlinable
+    public static func > (lhs: Tensor, rhs: Tensor) -> Bool {
+        return (lhs .> rhs).all()
+    }
+
+    /// Returns a Boolean value indicating whether the value of the first argument is
+    /// lexicographically greater than or equal to that of the second argument.
+    @inlinable
+    public static func >= (lhs: Tensor, rhs: Tensor) -> Bool {
+        return (lhs .>= rhs).all()
+    }
+}
+
+public extension Tensor where Scalar : Numeric & Comparable {
+    /// Returns a Boolean value indicating whether the value of the first argument is
+    /// lexicographically less than that of the second argument.
+    @inlinable
+    static func < (lhs: Tensor, rhs: Scalar) -> Bool {
+        return (lhs .< rhs).all()
+    }
+
+    /// Returns a Boolean value indicating whether the value of the first argument is
+    /// lexicographically less than or equal to that of the second argument.
+    @inlinable
+    static func <= (lhs: Tensor, rhs: Scalar) -> Bool {
+        return (lhs .<= rhs).all()
+    }
+
+    /// Returns a Boolean value indicating whether the value of the first argument is
+    /// lexicographically greater than that of the second argument.
+    @inlinable
+    static func > (lhs: Tensor, rhs: Scalar) -> Bool {
+        return (lhs .> rhs).all()
+    }
+
+    /// Returns a Boolean value indicating whether the value of the first argument is
+    /// lexicographically greater than or equal to that of the second argument.
+    @inlinable
+    static func >= (lhs: Tensor, rhs: Scalar) -> Bool {
+        return (lhs .>= rhs).all()
+    }
+}
+
+public extension Tensor where Scalar : Equatable {
+    /// Computes `lhs != rhs` element-wise and returns a `Tensor` of Boolean scalars.
+    /// - Note: `.==` supports broadcasting.
+    @inlinable
+    static func .==(lhs: Tensor, rhs: Tensor) -> Tensor<Bool> {
+        return Raw.equal(lhs, rhs)
+    }
+
+    /// Computes `lhs != rhs` element-wise and returns a `Tensor` of Boolean scalars.
+    /// - Note: `.!=` supports broadcasting.
+    @inlinable
+    static func .!=(lhs: Tensor, rhs: Tensor) -> Tensor<Bool> {
+        return Raw.notEqual(lhs, rhs)
+    }
+
+    /// Computes `lhs == rhs` element-wise and returns a `Tensor` of Boolean scalars.
+    /// - Note: `.==` supports broadcasting.
+    @inlinable
+    static func .==(lhs: Scalar, rhs: Tensor) -> Tensor<Bool> {
+        return Tensor(lhs) .== rhs
+    }
+
+    /// Computes `lhs != rhs` element-wise and returns a `Tensor` of Boolean scalars.
+    /// - Note: `.!=` supports broadcasting.
+    @inlinable
+    static func .!=(lhs: Scalar, rhs: Tensor) -> Tensor<Bool> {
+        return Tensor(lhs) .!= rhs
+    }
+
+    /// Computes `lhs == rhs` element-wise and returns a `Tensor` of Boolean
+    /// scalars.
+    /// - Note: `.==` supports broadcasting.
+    @inlinable
+    static func .==(lhs: Tensor, rhs: Scalar) -> Tensor<Bool> {
+        return lhs .== Tensor(rhs)
+    }
+
+    /// Computes `lhs != rhs` element-wise and returns a `Tensor` of Boolean scalars.
+    /// - Note: `.!=` supports broadcasting.
+    @inlinable
+    static func .!=(lhs: Tensor, rhs: Scalar) -> Tensor<Bool> {
+        return lhs .!= Tensor(rhs)
+    }
+}
+
+// TODO: infix operator ≈ : ComparisonPrecedence
+
+public extension Tensor where Scalar : FloatingPoint & Equatable {
+    /// Returns a `Tensor` of Boolean values indicating whether the elements of `self` are
+    /// approximately equal to those of `other`.
+    @inlinable
+    func elementsApproximatelyEqual(
+        _ other: Tensor,
+        tolerance: Double = 0.00001
+    ) -> Tensor<Bool> {
+        return Raw.approximateEqual(self, other, tolerance: tolerance)
+    }
+}
diff --git a/Sources/DeepLearning/Operators/Math.swift b/Sources/DeepLearning/Operators/Math.swift
index be927ef61..56225be4a 100644
--- a/Sources/DeepLearning/Operators/Math.swift
+++ b/Sources/DeepLearning/Operators/Math.swift
@@ -20,6 +20,338 @@ import TensorFlow
 infix operator .> : ComparisonPrecedence
 #endif
 
+// TODO:
+// - Consider explicit broadcasting for elementwise binary ops when
+//   scalarization and rank getter are implemented.
+
+//===------------------------------------------------------------------------------------------===//
+// Additive Group
+//===------------------------------------------------------------------------------------------===//
+
+extension Tensor : AdditiveArithmetic where Scalar : Numeric {
+    /// A scalar zero tensor.
+    @inlinable
+    public static var zero: Tensor {
+        get {
+        return Tensor(zeros: [])
+        }
+    }
+
+    /// Adds two tensors and produces their sum.
+    /// - Note: `+` supports broadcasting.
+    @inlinable
+    @differentiable(vjp: _vjpAdd(lhs:rhs:) where Scalar : TensorFlowFloatingPoint)
+    public static func + (lhs: Tensor, rhs: Tensor) -> Tensor {
+        return Raw.add(lhs, rhs)
+    }
+
+    /// Subtracts one tensor from another and produces their difference.
+    /// - Note: `-` supports broadcasting.
+    @inlinable
+    @differentiable(vjp: _vjpSubtract(lhs:rhs:) where Scalar : TensorFlowFloatingPoint)
+    public static func - (lhs: Tensor, rhs: Tensor) -> Tensor {
+        return Raw.sub(lhs, rhs)
+    }
+}
+
+internal extension Tensor where Scalar : TensorFlowFloatingPoint {
+    @inlinable
+    static func _vjpAdd(lhs: Tensor, rhs: Tensor) -> (Tensor, (Tensor) -> (Tensor, Tensor)) {
+        return (lhs + rhs, { [lhsShape = lhs.shapeTensor, rhsShape = rhs.shapeTensor] v in
+            (v.unbroadcast(toShape: lhsShape), v.unbroadcast(toShape: rhsShape))
+        })
+    }
+
+    @inlinable
+    static func _vjpSubtract(lhs: Tensor, rhs: Tensor) -> (Tensor, (Tensor) -> (Tensor, Tensor)) {
+        return (lhs - rhs, { [lhsShape = lhs.shapeTensor, rhsShape = rhs.shapeTensor] v in
+            (v.unbroadcast(toShape: lhsShape), -v.unbroadcast(toShape: rhsShape))
+        })
+    }
+}
+
+//===------------------------------------------------------------------------------------------===//
+// Vector Space
+//===------------------------------------------------------------------------------------------===//
+
+extension Tensor : VectorNumeric where Scalar : Numeric {
+    /// Multiplies the scalar with every scalar of the tensor and produces the product.
+    @inlinable
+    @differentiable(vjp: _vjpMultiply(lhs:rhs:) where Scalar : TensorFlowFloatingPoint)
+    public static func * (lhs: Scalar, rhs: Tensor) -> Tensor {
+        return Tensor(lhs) * rhs
+    }
+}
+
+internal extension Tensor where Scalar : TensorFlowFloatingPoint {
+    @inlinable
+    static func _vjpMultiply(lhs: Tensor, rhs: Tensor) -> (Tensor, (Tensor) -> (Tensor, Tensor)) {
+        return (lhs * rhs, { [lhsShape = lhs.shapeTensor, rhsShape = rhs.shapeTensor] v in
+            ((rhs * v).unbroadcast(toShape: lhsShape), (lhs * v).unbroadcast(toShape: rhsShape))
+        })
+    }
+}
+
+extension Tensor : ShapedVectorNumeric where Scalar : Numeric {}
+
+extension Tensor : Differentiable where Scalar : TensorFlowFloatingPoint {
+    public typealias TangentVector = Tensor
+    public typealias CotangentVector = Tensor
+    public typealias AllDifferentiableVariables = Tensor
+
+    @inlinable
+    public func tangentVector(from cotangent: CotangentVector) -> TangentVector {
+        return cotangent
+    }
+}
+
+//===------------------------------------------------------------------------------------------===//
+// Additional Element-wise Operators
+//===------------------------------------------------------------------------------------------===//
+
+public extension Tensor where Scalar : Numeric {
+    /// Adds the scalar to every scalar of the tensor and produces the sum.
+    @inlinable
+    @differentiable(vjp: _vjpAdd(lhs:rhs:) where Scalar : TensorFlowFloatingPoint)
+    static func + (lhs: Scalar, rhs: Tensor) -> Tensor {
+        return Tensor(lhs) + rhs
+    }
+
+    /// Adds the scalar to every scalar of the tensor and produces the sum.
+    @inlinable
+    @differentiable(vjp: _vjpAdd(lhs:rhs:) where Scalar : TensorFlowFloatingPoint)
+    static func + (lhs: Tensor, rhs: Scalar) -> Tensor {
+        return lhs + Tensor(rhs)
+    }
+
+    /// Subtracts the scalar from every scalar of the tensor and produces the difference.
+    @inlinable
+    @differentiable(vjp: _vjpSubtract(lhs:rhs:) where Scalar : TensorFlowFloatingPoint)
+    static func - (lhs: Scalar, rhs: Tensor) -> Tensor {
+        return Tensor(lhs) - rhs
+    }
+
+    /// Subtracts the scalar from every scalar of the tensor and produces the difference
+    @inlinable
+    @differentiable(vjp: _vjpSubtract(lhs:rhs:) where Scalar : TensorFlowFloatingPoint)
+    static func - (lhs: Tensor, rhs: Scalar) -> Tensor {
+        return lhs - Tensor(rhs)
+    }
+
+    /// Adds two tensors and stores the result in the left-hand-side variable.
+    /// - Note: `+=` supports broadcasting.
+    @inlinable
+    static func += (lhs: inout Tensor, rhs: Tensor) {
+        lhs = lhs + rhs
+    }
+
+    /// Adds the scalar to every scalar of the tensor and stores the result in the left-hand-side
+    /// variable.
+    @inlinable
+    static func += (lhs: inout Tensor, rhs: Scalar) {
+        lhs = lhs + rhs
+    }
+
+    /// Subtracts the second tensor from the first and stores the result in the left-hand-side
+    /// variable.
+    /// - Note: `-=` supports broadcasting.
+    @inlinable
+    static func -= (lhs: inout Tensor, rhs: Tensor) {
+        lhs = lhs - rhs
+    }
+
+    /// Subtracts the scalar from every scalar of the tensor and stores the result in the
+    /// left-hand-side variable.
+    @inlinable
+    static func -= (lhs: inout Tensor, rhs: Scalar) {
+        lhs = lhs - rhs
+    }
+
+    /// Multiplies two tensors and produces their product.
+    /// - Note: `*` supports broadcasting.
+    @inlinable
+    @differentiable(vjp: _vjpMultiply(lhs:rhs:) where Scalar : TensorFlowFloatingPoint)
+    static func * (lhs: Tensor, rhs: Tensor) -> Tensor {
+        return Raw.mul(lhs, rhs)
+    }
+
+    /// Multiplies the scalar with every scalar of the tensor and produces the product.
+    @inlinable
+    @differentiable(vjp: _vjpMultiply(lhs:rhs:) where Scalar : TensorFlowFloatingPoint)
+    static func * (lhs: Tensor, rhs: Scalar) -> Tensor {
+        return lhs * Tensor(rhs)
+    }
+
+    /// Multiplies two tensors and stores the result in the left-hand-side variable.
+    /// - Note: `*=` supports broadcasting.
+    @inlinable
+    static func *= (lhs: inout Tensor, rhs: Tensor) {
+        lhs = lhs * rhs
+    }
+
+    @inlinable
+    static func *= (lhs: inout Tensor, rhs: Scalar) {
+        lhs = lhs * rhs
+    }
+
+    /// Returns the quotient of dividing the first tensor by the second.
+    /// - Note: `/` supports broadcasting.
+    @inlinable
+    @differentiable(vjp: _vjpDivide(lhs:rhs:) where Scalar : TensorFlowFloatingPoint)
+    static func / (lhs: Tensor, rhs: Tensor) -> Tensor {
+        return Raw.div(lhs, rhs)
+    }
+
+    /// Returns the quotient of dividing the scalar by the tensor, broadcasting the scalar.
+    @inlinable
+    @differentiable(vjp: _vjpDivide(lhs:rhs:) where Scalar : TensorFlowFloatingPoint)
+    static func / (lhs: Scalar, rhs: Tensor) -> Tensor {
+        return Tensor(lhs) / rhs
+    }
+
+    /// Returns the quotient of dividing the tensor by the scalar, broadcasting the scalar.
+    @inlinable
+    @differentiable(vjp: _vjpDivide(lhs:rhs:) where Scalar : TensorFlowFloatingPoint)
+    static func / (lhs: Tensor, rhs: Scalar) -> Tensor {
+        return lhs / Tensor(rhs)
+    }
+
+    /// Divides the first tensor by the second and stores the quotient in the left-hand-side
+    /// variable.
+    @inlinable
+    static func /= (lhs: inout Tensor, rhs: Tensor) {
+        lhs = lhs / rhs
+    }
+
+    /// Divides the tensor by the scalar, broadcasting the scalar, and stores the quotient in the
+    /// left-hand-side variable.
+    @inlinable
+    static func /= (lhs: inout Tensor, rhs: Scalar) {
+        lhs = lhs / rhs
+    }
+
+    /// Returns the remainder of dividing the first tensor by the second.
+    /// - Note: `%` supports broadcasting.
+    @inlinable
+    static func % (lhs: Tensor, rhs: Tensor) -> Tensor {
+        return Raw.mod(lhs, rhs)
+    }
+
+    /// Returns the remainder of dividing the tensor by the scalar, broadcasting the scalar.
+    @inlinable
+    static func % (lhs: Tensor, rhs: Scalar) -> Tensor {
+        return lhs % Tensor(rhs)
+    }
+
+    /// Returns the remainder of dividing the scalar by the tensor, broadcasting the scalar.
+    @inlinable
+    static func % (lhs: Scalar, rhs: Tensor) -> Tensor {
+        return Tensor(lhs) % rhs
+    }
+
+    /// Divides the first tensor by the second and stores the remainder in the left-hand-side
+    /// variable.
+    @inlinable
+    static func %= (lhs: inout Tensor, rhs: Tensor) {
+        lhs = lhs % rhs
+    }
+
+    /// Divides the tensor by the scalar and stores the remainder in the left-hand-side variable.
+    @inlinable
+    static func %= (lhs: inout Tensor, rhs: Scalar) {
+        lhs = lhs % rhs
+    }
+}
+
+internal extension Tensor where Scalar : TensorFlowFloatingPoint {
+    @inlinable
+    static func _vjpAdd(lhs: Tensor, rhs: Scalar) -> (Tensor, (Tensor) -> (Tensor, Scalar)) {
+        return (lhs + rhs, { v in (v, v.sum().scalarized()) })
+    }
+
+    @inlinable
+    static func _vjpAdd(lhs: Scalar, rhs: Tensor) -> (Tensor, (Tensor) -> (Scalar, Tensor)) {
+        return (lhs + rhs, { v in (v.sum().scalarized(), v) })
+    }
+
+    @inlinable
+    static func _vjpSubtract(lhs: Tensor, rhs: Scalar) -> (Tensor, (Tensor) -> (Tensor, Scalar)) {
+        return (lhs - rhs, { v in (v, 0 - v.sum().scalarized()) })
+    }
+
+    @inlinable
+    static func _vjpSubtract(lhs: Scalar, rhs: Tensor) -> (Tensor, (Tensor) -> (Scalar, Tensor)) {
+        return (lhs - rhs, { v in (v.sum().scalarized(), 0 - v) })
+    }
+
+    @inlinable
+    static func _vjpMultiply(lhs: Tensor, rhs: Scalar) -> (Tensor, (Tensor) -> (Tensor, Scalar)) {
+        return (lhs * rhs, { v in (v * rhs, (v * lhs).sum().scalarized()) })
+    }
+
+    @inlinable
+    static func _vjpMultiply(lhs: Scalar, rhs: Tensor) -> (Tensor, (Tensor) -> (Scalar, Tensor)) {
+        return (lhs * rhs, { v in ((v * rhs).sum().scalarized(), v * lhs) })
+    }
+
+    @inlinable
+    static func _vjpDivide(lhs: Tensor, rhs: Tensor) -> (Tensor, (Tensor) -> (Tensor, Tensor)) {
+        return (lhs / rhs, { [lhsShape = lhs.shapeTensor, rhsShape = rhs.shapeTensor] v in
+            ((v / rhs).unbroadcast(toShape: lhsShape),
+             ((-lhs) / rhs.squared() * v).unbroadcast(toShape: rhsShape))
+        })
+    }
+
+    @inlinable
+    static func _vjpDivide(lhs: Tensor, rhs: Scalar) -> (Tensor, (Tensor) -> (Tensor, Scalar)) {
+        return (lhs / rhs, { v in 
+            (v / rhs, (v * (0 - lhs) / Tensor(rhs).squared()).sum().scalarized())
+        })
+    }
+
+    @inlinable
+    static func _vjpDivide(lhs: Scalar, rhs: Tensor) -> (Tensor, (Tensor) -> (Scalar, Tensor)) {
+        return (lhs / rhs, { v in ((v / rhs).sum().scalarized(), v * -lhs / rhs.squared()) })
+    }
+}
+
+public extension Tensor where Scalar == Bool {
+    /// Computes `!self` element-wise.
+    @inlinable
+    func elementsLogicalNot() -> Tensor {
+        return Raw.logicalNot(self)
+    }
+
+    /// Computes `self && other` element-wise.
+    /// - Note: `&&` supports broadcasting.
+    @inlinable
+    func elementsLogicalAnd(_ other: Tensor) -> Tensor {
+        return Raw.logicalAnd(self, other)
+    }
+
+    /// Computes `self && other` element-wise, broadcasting `other`.
+    @inlinable
+    func elementsLogicalAnd(_ other: Scalar) -> Tensor {
+        return elementsLogicalAnd(Tensor(other))
+    }
+
+    /// Computes `self || other` element-wise.
+    @inlinable
+    func elementsLogicalOr(_ other: Tensor) -> Tensor {
+        return Raw.logicalOr(self, other)
+    }
+
+    /// Computes `self || other` element-wise, broadcasting `other`.
+    @inlinable
+    func elementsLogicalOr(_ other: Scalar) -> Tensor {
+        return elementsLogicalOr(Tensor(other))
+    }
+}
+
+//===------------------------------------------------------------------------------------------===//
+// Universal Functions
+//===------------------------------------------------------------------------------------------===//
+
 /// Returns the values of the specified tensor rounded to the nearest integer, element-wise.
 @inlinable
 @differentiable(vjp: _vjpRound)
diff --git a/Sources/DeepLearning/Tensors.swift b/Sources/DeepLearning/Tensors.swift
index daee815d8..e11e1f5d7 100644
--- a/Sources/DeepLearning/Tensors.swift
+++ b/Sources/DeepLearning/Tensors.swift
@@ -95,19 +95,3 @@ extension Tensor : Codable where Scalar : Codable {
         self.init(array)
     }
 }
-
-//===------------------------------------------------------------------------------------------===//
-// Equality
-//===------------------------------------------------------------------------------------------===//
-
-extension Tensor : Equatable where Scalar : Equatable {
-    @inlinable
-    public static func == (lhs: Tensor, rhs: Tensor) -> Bool {
-        return (lhs .== rhs).all()
-    }
-
-    @inlinable
-    public static func != (lhs: Tensor, rhs: Tensor) -> Bool {
-        return (lhs .== rhs).any()
-    }
-}

From bc0a581166f6179d9a270403460190e46791e5e6 Mon Sep 17 00:00:00 2001
From: Anthony Platanios <e.a.platanios@gmail.com>
Date: Sat, 20 Apr 2019 13:03:10 -0400
Subject: [PATCH 38/55] Removed all the newly-added ops.

---
 Sources/DeepLearning/Initializers.swift    | 183 +++++--------
 Sources/DeepLearning/Operators/Basic.swift | 304 ---------------------
 Sources/DeepLearning/Operators/Math.swift  |  24 --
 3 files changed, 61 insertions(+), 450 deletions(-)

diff --git a/Sources/DeepLearning/Initializers.swift b/Sources/DeepLearning/Initializers.swift
index 7eb77250b..e644fd78a 100644
--- a/Sources/DeepLearning/Initializers.swift
+++ b/Sources/DeepLearning/Initializers.swift
@@ -109,44 +109,44 @@ internal extension Tensor where Scalar : TensorFlowFloatingPoint {
 public extension Tensor {
     /// Creates a tensor from an array of tensors (which may themselves be scalars).
     @inlinable
-    // @differentiable(where Scalar : TensorFlowFloatingPoint)
+    @differentiable(where Scalar : TensorFlowFloatingPoint)
     init(_ elements: [Tensor]) {
         self = Tensor(stacking: elements)
     }
 
-    // /// Stacks `tensors`, along the `axis` dimension, into a new tensor with rank one higher than
-    // /// the current tensor and each tensor in `tensors`.
-    // /// 
-    // /// Given that `tensors` all have shape `[A, B, C]`, and `tensors.count = N`, then:
-    // /// - if `axis == 0` then the resulting tensor will have the shape `[N, A, B, C]`.
-    // /// - if `axis == 1` then the resulting tensor will have the shape `[A, N, B, C]`.
-    // /// - etc.
-    // ///
-    // /// For example:
-    // /// ```
-    // /// // 'x' is [1, 4]
-    // /// // 'y' is [2, 5]
-    // /// // 'z' is [3, 6]
-    // /// Tensor(stacking: [x, y, z]) // is [[1, 4], [2, 5], [3, 6]]
-    // /// Tensor(stacking: [x, y, z], alongAxis: 1) // is [[1, 2, 3], [4, 5, 6]]
-    // /// ```
-    // ///
-    // /// This is the opposite of `Tensor.unstacked`.
-    // ///
-    // /// - Parameters:
-    // ///   - tensors: Tensors to stack.
-    // ///   - axis: Dimension along which to stack. Negative values wrap around.
-    // /// 
-    // /// - Precondition: All tensors must have the same shape.
-    // /// - Precondition: `axis` must be in the range `[-rank, rank)`, where `rank` is the rank of the
-    // ///   provided tensors.
-    // /// 
-    // /// - Returns: The stacked tensor.
-    // @inlinable
-    // @differentiable(vjp: _vjpStacking where Scalar : TensorFlowFloatingPoint)
-    // init(stacking tensors: [Tensor], alongAxis axis: Int = 0) {
-    //     self = Raw.pack(tensors, axis: Int64(axis))
-    // }
+    /// Stacks `tensors`, along the `axis` dimension, into a new tensor with rank one higher than
+    /// the current tensor and each tensor in `tensors`.
+    /// 
+    /// Given that `tensors` all have shape `[A, B, C]`, and `tensors.count = N`, then:
+    /// - if `axis == 0` then the resulting tensor will have the shape `[N, A, B, C]`.
+    /// - if `axis == 1` then the resulting tensor will have the shape `[A, N, B, C]`.
+    /// - etc.
+    ///
+    /// For example:
+    /// ```
+    /// // 'x' is [1, 4]
+    /// // 'y' is [2, 5]
+    /// // 'z' is [3, 6]
+    /// Tensor(stacking: [x, y, z]) // is [[1, 4], [2, 5], [3, 6]]
+    /// Tensor(stacking: [x, y, z], alongAxis: 1) // is [[1, 2, 3], [4, 5, 6]]
+    /// ```
+    ///
+    /// This is the opposite of `Tensor.unstacked`.
+    ///
+    /// - Parameters:
+    ///   - tensors: Tensors to stack.
+    ///   - axis: Dimension along which to stack. Negative values wrap around.
+    /// 
+    /// - Precondition: All tensors must have the same shape.
+    /// - Precondition: `axis` must be in the range `[-rank, rank)`, where `rank` is the rank of the
+    ///   provided tensors.
+    /// 
+    /// - Returns: The stacked tensor.
+    @inlinable
+    @differentiable(vjp: _vjpStacking where Scalar : TensorFlowFloatingPoint)
+    init(stacking tensors: [Tensor], alongAxis axis: Int = 0) {
+        self = Raw.pack(tensors, axis: Int64(axis))
+    }
 
     /// Concatenates `tensors` along the `axis` dimension.
     ///
@@ -181,68 +181,40 @@ public extension Tensor {
     /// 
     /// - Returns: The concatenated tensor.
     @inlinable
-    // @differentiable(wrt: tensors, vjp: _vjpConcatenating where Scalar : TensorFlowFloatingPoint)
+    @differentiable(wrt: tensors, vjp: _vjpConcatenating where Scalar : TensorFlowFloatingPoint)
     init(concatenating tensors: [Tensor], alongAxis axis: Int = 0) {
         precondition(tensors.count > 0)
         self = Raw.concatV2(tensors, axis: Tensor<Int32>(Int32(axis)))
     }
-
-    // /// Returns a tiled tensor, constructed by tiling the provided tensor.
-    // ///
-    // /// This constructor creates a new tensor by replicating `tensor` `multiples` times. The
-    // /// constructed tensor's `i`'th dimension has `tensor.shape[i] * multiples[i]` elements, and the
-    // /// values of `tensor` are replicated `multiples[i]` times along the `i`'th dimension. For 
-    // /// example, tiling `[a b c d]` by `[2]` produces `[a b c d a b c d]`.
-    // /// 
-    // /// - Precondition: The shape of `multiples` must be `[tensor.rank]`.
-    // @inlinable
-    // @differentiable(wrt: tensor, vjp: _vjpTiling where Scalar : TensorFlowFloatingPoint)
-    // init(tiling tensor: Tensor, multiples: Tensor<Int32>) {
-    //     self = Raw.tile(tensor, multiples: multiples)
-    // }
 }
 
-// internal extension Tensor where Scalar : TensorFlowFloatingPoint {
-//     @inlinable
-//     static func _vjpStacking(
-//         stacking tensors: [Tensor],
-//         alongAxis axis: Int = 0
-//     ) -> (Tensor, (Tensor) -> Array<Tensor>.DifferentiableView) {
-//         let result = Tensor(stacking: tensors, alongAxis: axis)
-//         return (result, { v in
-//             return Array<Tensor>.DifferentiableView(v.unstack(alongAxis: axis))
-//         })
-//     }
-
-//     @inlinable
-//     static func _vjpConcatenating(
-//         concatenating tensors: [Tensor],
-//         alongAxis axis: Int = 0
-//     ) -> (Tensor, (Tensor) -> Array<Tensor>.DifferentiableView) {
-//         let result = Tensor<Scalar>(concatenating: tensors, alongAxis: axis)
-//         let posAxis = axis < 0 ? axis + tensors[0].rank : axis
-//         let sizes = Tensor<Int32>(stacking: tensors.map { $0.shapeTensor[posAxis] })
-//         return (result, { [count = tensors.count] v in
-//             if count == 1 { return Array<Tensor>.DifferentiableView([v]) }
-//             let splits = v.split(sizes: sizes, alongAxis: posAxis)
-//             return Array<Tensor>.DifferentiableView(splits)
-//         })
-//     }
+internal extension Tensor where Scalar : TensorFlowFloatingPoint {
+    @inlinable
+    static func _vjpStacking(
+        stacking tensors: [Tensor],
+        alongAxis axis: Int = 0
+    ) -> (Tensor, (Tensor) -> Array<Tensor>.DifferentiableView) {
+        let result = Tensor(stacking: tensors, alongAxis: axis)
+        return (result, { v in
+            return Array<Tensor>.DifferentiableView(v.unstack(alongAxis: axis))
+        })
+    }
 
-//     @inlinable
-//     static func _vjpTiling(
-//         tiling tensor: Tensor<Scalar>,
-//         multiples: Tensor<Int32>
-//     ) -> (Tensor, (Tensor) -> Tensor) {
-//         let result = Tensor(tiling: tensor, multiples: multiples)
-//         return (result, { [shape = tensor.shapeTensor] v in
-//             let splitShape = Tensor<Int32>(stacking: [multiples, shape]).transposed().flattened()
-//             let axes = Tensor<Int32>(
-//                 rangeFrom: 0, to: Int32(splitShape.scalarCount), stride: 2)
-//             return v.reshaped(toShape: splitShape).sum(squeezingAxes: axes)
-//         })
-//     }
-// }
+    @inlinable
+    static func _vjpConcatenating(
+        concatenating tensors: [Tensor],
+        alongAxis axis: Int = 0
+    ) -> (Tensor, (Tensor) -> Array<Tensor>.DifferentiableView) {
+        let result = Tensor<Scalar>(concatenating: tensors, alongAxis: axis)
+        let posAxis = axis < 0 ? axis + tensors[0].rank : axis
+        let sizes = Tensor<Int32>(stacking: tensors.map { $0.shapeTensor[posAxis] })
+        return (result, { [count = tensors.count] v in
+            if count == 1 { return Array<Tensor>.DifferentiableView([v]) }
+            let splits = v.split(sizes: sizes, alongAxis: posAxis)
+            return Array<Tensor>.DifferentiableView(splits)
+        })
+    }
+}
 
 //===------------------------------------------------------------------------------------------===//
 // Numeric
@@ -265,25 +237,6 @@ public extension Tensor where Scalar : Numeric {
         self.init(repeating: 1, shape: shape)
     }
 
-    // /// Creates a tensor with all scalars set to zero that has the same shape and type as the provided 
-    // /// tensor.
-    // ///
-    // /// - Parameter other: Tensor whose shape and data type to use.
-    // @inlinable
-    // init(zerosLike other: Tensor) {
-    //     self = Raw.zerosLike(other)
-    // }
-
-    // /// Creates a tensor with all scalars set to one that has the same shape and type as the provided 
-    // /// tensor.
-    // ///
-    // /// - Parameter other: Tensor whose shape and data type to use.
-    // @inlinable
-    // init(onesLike other: Tensor) {
-    //     self = Raw.onesLike(other)
-    // }
-
-
     /// Creates a 1-D tensor representing a sequence from a starting value to, but not including, 
     /// an end value, stepping by the specified amount.
     ///
@@ -300,20 +253,6 @@ public extension Tensor where Scalar : Numeric {
         self = Raw.range(start: Tensor(start), limit: Tensor(end), delta: Tensor(stride))
     }
 
-    // /// Creates a 1-D tensor representing a sequence from a starting value to, but not including, an 
-    // /// end value, stepping by the specified amount.
-    // ///
-    // /// - Parameters:
-    // ///   - start: The starting value to use for the sequence. If the sequence contains any values, 
-    // ///     the first one is `start`.
-    // ///   - end: An end value to limit the sequence. `end` is never an element of the resulting 
-    // ///     sequence.
-    // ///   - stride: The amount to step by with each iteration. `stride` must be positive.
-    // @inlinable
-    // init(rangeFrom start: Tensor<Scalar>, to end: Tensor<Scalar>, stride: Tensor<Scalar>) {
-    //     self = Raw.range(start: start, limit: end, delta: stride)
-    // }
-
     /// Creates a one-hot tensor at given indices. The locations represented by
     /// `indices` take value `onValue` (`1` by default), while all other locations
     /// take value `offValue` (`0` by default). If the input `indices` is rank
diff --git a/Sources/DeepLearning/Operators/Basic.swift b/Sources/DeepLearning/Operators/Basic.swift
index 7df841cf1..ae0fe360a 100644
--- a/Sources/DeepLearning/Operators/Basic.swift
+++ b/Sources/DeepLearning/Operators/Basic.swift
@@ -91,214 +91,6 @@ public extension Tensor {
     func squeezingShape(at axes: [Int]) -> Tensor {
         return Raw.squeeze(self, squeezeDims: axes.map(Int32.init))
     }
-
-    // @inlinable
-    // @differentiable(wrt: self where Scalar : TensorFlowFloatingPoint)
-    // func unstack(alongAxis axis: Int = 0) -> [Tensor] {
-    //     return split(numSplits: shape[axis], alongAxis: axis)
-    // }
-
-    // @inlinable
-    // @differentiable(vjp: _vjpSplit(numSplits:alongAxis:) where Scalar : TensorFlowFloatingPoint)
-    // func split(numSplits: Int, alongAxis axis: Int = 0) -> [Tensor] {
-    //     return Raw.split(
-    //         splitDim: Tensor<Int32>(Int32(axis)), value: self, numSplit: Int64(numSplits))
-    // }
-
-    // @inlinable
-    // @differentiable(
-    //     wrt: self,
-    //     vjp: _vjpSplit(sizes:alongAxis:) where Scalar : TensorFlowFloatingPoint)
-    // func split(sizes: Tensor<Int32>, alongAxis axis: Int = 0) -> [Tensor] {
-    //     return Raw.splitV(
-    //         value: self,
-    //         sizeSplits: sizes,
-    //         splitDim: Tensor<Int32>(Int32(axis)),
-    //         numSplit: Int64(sizes.shape[0]))
-    // }
-
-    // /// Gathers slices of this tensor at `indices` along the `axis` dimension.
-    // ///
-    // /// For 0-D (scalar) `indices`:
-    // /// ```
-    // /// result[p_0,          ..., p_{axis-1},
-    // ///        p_{axis + 1}, ..., p_{N-1}] = 
-    // /// self[p_0,          ..., p_{axis-1},
-    // ///      indices,
-    // ///      p_{axis + 1}, ..., p_{N-1}]
-    // /// ```
-    // /// 
-    // /// For 1-D (vector) `indices`:
-    // /// ```
-    // /// result[p_0,          ..., p_{axis-1},
-    // ///        i,
-    // ///        p_{axis + 1}, ..., p_{N-1}] = 
-    // /// self[p_0,          ..., p_{axis-1},
-    // ///      indices[i],
-    // ///      p_{axis + 1}, ..., p_{N-1}]
-    // /// ```
-    // /// 
-    // /// In the general case, produces a resulting tensor where:
-    // /// ```
-    // /// result[p_0,             ..., p_{axis-1},
-    // ///        i_{batch\_dims}, ..., i_{M-1},
-    // ///        p_{axis + 1},    ..., p_{N-1}] = 
-    // /// self[p_0,             ..., p_{axis-1},
-    // ///      indices[i_0,     ..., i_{M-1}],
-    // ///      p_{axis + 1},    ..., p_{N-1}]
-    // /// ```
-    // /// where `N = self.rank` and `M = indices.rank`.
-    // ///
-    // /// The shape of the resulting tensor is:
-    // /// `self.shape[..<axis] + indices.shape + self.shape[(axis + 1)...]`.
-    // /// 
-    // /// - Note: On CPU, if an out-of-range index is found, an error is thrown. On GPU, if an 
-    // /// out-of-range index is found, a 0 is stored in the corresponding output values.
-    // ///
-    // /// - Parameters:
-    // ///   - indices: Contains the indices to gather.
-    // ///   - axis: Dimension along which to gather. Negative values wrap around.
-    // /// 
-    // /// - Precondition: `axis` must be in the range `[-rank, rank)`.
-    // /// 
-    // /// - Returns: The gathered tensor.
-    // @inlinable
-    // @differentiable(wrt: self, vjp: _vjpGathering where Scalar : TensorFlowFloatingPoint)
-    // func gathering(
-    //     atIndices indices: Tensor<Int32>, 
-    //     alongAxis axis: Int = 0
-    // ) -> Tensor {
-    //     return Raw.gatherV2(params: self, indices: indices, axis: Tensor<Int32>(Int32(axis)))
-    // }
-
-    // /// Gathers slices of this tensor at `indices` along the `axis` dimension, while ignoring the 
-    // /// first `batchDims` dimensions that correspond to batch dimensions.
-    // /// 
-    // /// Performs similar functionality to `gathering`, except that the resulting tensor shape is now:
-    // /// `self.shape[..<axis] + indices.shape[batchDims...] + self.shape[(axis + 1)...]`.
-    // ///
-    // /// - Parameters:
-    // ///   - indices: Contains the indices to gather.
-    // ///   - axis: Dimension along which to gather. Negative values wrap around.
-    // ///   - batchDims: Number of leading batch dimensions to ignore.
-    // /// 
-    // /// - Precondition: `axis` must be in the range `[-rank, rank)`, while also being greater than 
-    // ///     or equal to `batchDims`.
-    // /// - Precondition: `batchDims` must be less than `indices.rank`.
-    // /// 
-    // /// - Returns: The gathered tensor.
-    // @inlinable
-    // // TODO: @differentiable(wrt: self where Scalar : TensorFlowFloatingPoint)
-    // func batchGathering(
-    //     atIndices indices: Tensor<Int32>,
-    //     alongAxis axis: Int,
-    //     numBatchDims batchDims: Int
-    // ) -> Tensor {
-    //     precondition(batchDims >= 0 && batchDims < indices.rank,
-    //                  "'numBatchDims' must be non-negative and less than 'indices.rank'.")
-    //     precondition(batchDims < rank, "'numBatchDims' must be less than the tensor's rank.")
-
-    //     // Handle the axis argument by transposing the axis dimension so that it is the first
-    //     // non-batch dimension, recursively calling `batchGathering` with `axis = 0`, and then
-    //     // transposing the result to put the pre-axis dimensions before the indices dimensions.
-    //     if axis != batchDims {
-    //         // Adjust axis to be positive.
-    //         let posAxis = axis < 0 ? axis + rank : axis
-
-    //         precondition(posAxis >= 0 && posAxis < rank, "'axis' is out of range.")
-    //         precondition(batchDims <= posAxis, "'batchDims' must be less than or equal to 'axis'.")
-
-    //         // Move self[axis] up to self[batchDims].
-    //         let permutation = Tensor<Int32>(concatenating: [
-    //             Tensor<Int32>(rangeFrom: 0, to: Int32(batchDims), stride: 1),
-    //             Tensor<Int32>(Int32(axis)).rankLifted(),
-    //             Tensor<Int32>(rangeFrom: Int32(batchDims), to: Int32(posAxis), stride: 1),
-    //             Tensor<Int32>(rangeFrom: Int32(axis) + 1, to: Int32(rank), stride: 1)])
-    //         let tensor = transposed(withPermutations: permutation)
-    //         let result = tensor.batchGathering(
-    //             atIndices: indices, alongAxis: batchDims, numBatchDims: batchDims)
-
-    //         // Move the result dimensions corresponding to self[batchDims ..< axis] to just before
-    //         // the dimensions corresponding to indices[batchDims ...].
-    //         let start = indices.rank + posAxis - batchDims
-    //         let resultPermutation = Tensor<Int32>(concatenating: [
-    //             Tensor<Int32>(rangeFrom: 0, to: Int32(batchDims), stride: 1),
-    //             Tensor<Int32>(rangeFrom: Int32(indices.rank), to: Int32(start), stride: 1),
-    //             Tensor<Int32>(rangeFrom: Int32(batchDims), to: Int32(indices.rank), stride: 1),
-    //             Tensor<Int32>(rangeFrom: Int32(start), to: Int32(result.rank), stride: 1)])
-    //         return result.transposed(withPermutations: resultPermutation)
-    //     }
-
-    //     var batchIndices = indices
-    //     var accumulated = Tensor<Int32>(ones: [])
-    //     for d in (1...batchDims).reversed() {
-    //         accumulated *= shapeTensor[d]
-    //         let dValue = shapeTensor[d - 1]
-    //         let dIndices = Tensor<Int32>(
-    //             rangeFrom: Tensor<Int32>(zeros: []),
-    //             to: dValue,
-    //             stride: Tensor<Int32>(ones: [])
-    //         ) * accumulated
-    //         let dShape = Tensor<Int32>(concatenating: [
-    //             Tensor<Int32>([Int32](repeating: 1, count: d - 1)),
-    //             dValue.rankLifted(),
-    //             Tensor<Int32>([Int32](repeating: 1, count: indices.rank - 1))])
-    //         batchIndices += dIndices.reshaped(toShape: dShape)
-    //     }
-
-    //     let flatIndices = batchIndices.flattened()
-    //     let outerShape = shapeTensor[Int(batchDims + 1)...]
-    //     let innerShape = shapeTensor[..<Int(batchDims + 1)].product(squeezingAxes: [0])
-    //     let flatTensor = reshaped(toShape: innerShape.rankLifted().concatenated(with: outerShape))
-    //     let flatResult = flatTensor.gathering(atIndices: flatIndices)
-    //     return flatResult.reshaped(toShape: indices.shapeTensor.concatenated(with: outerShape))
-    // }
-
-    // /// Gathers values from this tensor according to the provided boolean mask.
-    // ///
-    // /// For example:
-    // /// ```
-    // /// // 1-D example
-    // /// // tensor is [0, 1, 2, 3]
-    // /// // mask is [true, false, true, false]
-    // /// tensor.gathering(where: mask) // is [0, 2]
-    // /// 
-    // /// // 2-D example
-    // /// // tensor is [[1, 2], [3, 4], [5, 6]]
-    // /// // mask is [true, false, true]
-    // /// tensor.gathering(where: mask) // is [[1, 2], [5, 6]]
-    // /// ```
-    // ///
-    // /// In general, `0 < mask.rank = K <= tensor.rank`, and the `mask`'s shape must match the first 
-    // /// K dimensions of the `tensor`'s shape. We then have:
-    // /// `tensor.gathering(where: mask)[i, j1, ..., jd] = tensor[i1, ..., iK, j1, ..., jd]`, where 
-    // /// `[i1, ..., iK]` is the `i`th `true` entry of `mask` (row-major order).
-    // /// 
-    // /// The `axis` could be used with `mask` to indicate the axis to mask from. In that case, 
-    // /// `axis + mask.rank <= tensor.rank` and the `mask``'s shape must match the first 
-    // /// `axis + mask.rank` dimensions of the `tensor`'s shape.
-    // /// 
-    // /// - Parameters:
-    // ///   - mask: K-D boolean tensor, where `K <= self.rank`.
-    // ///   - axis: 0-D integer tensor representing the axis in `self` to mask from, where 
-    // ///     `K + axis <= self.rank`.
-    // /// 
-    // /// - Precondition: The `mask` cannot be a scalar: `mask.rank != 0`.
-    // /// 
-    // /// - Returns: `(self.rank - K + 1)`-dimensional tensor populated by entries in this tensor 
-    // ///   corresponding to `true` values in `mask`.
-    // @inlinable
-    // // TODO: @differentiable(wrt: self where Scalar : TensorFlowFloatingPoint)
-    // func gathering(where mask: Tensor<Bool>, alongAxis axis: Int = 0) -> Tensor {
-    //     precondition(mask.rank != 0, "The boolean mask cannot be a scalar.")
-    //     let posAxis = axis < 0 ? axis + rank : axis
-    //     let leadingSize = shapeTensor[posAxis ..< posAxis + mask.rank].product().rankLifted()
-    //     let reshapedTensor = reshaped(
-    //         toShape: Tensor<Int32>(concatenating: [
-    //             shapeTensor[..<posAxis], leadingSize, shapeTensor[(posAxis + mask.rank)...]]))
-    //     let indices = Tensor<Int32>(mask.flattened().nonZeroIndices().squeezingShape(at: 1))
-    //     return reshapedTensor.gathering(atIndices: indices, alongAxis: posAxis)
-    // }
 }
 
 internal extension Tensor where Scalar : TensorFlowFloatingPoint {
@@ -319,100 +111,4 @@ internal extension Tensor where Scalar : TensorFlowFloatingPoint {
         let value = squeezingShape(at: axes)
         return (value, { [shape = shapeTensor] v in v.reshaped(toShape: shape) })
     }
-
-    // @inlinable
-    // func _vjpSplit(
-    //     numSplits: Int,
-    //     alongAxis axis: Int = 0
-    // ) -> ([Tensor], (Array<Tensor>.CotangentVector) -> Tensor) {
-    //     let result = split(numSplits: numSplits, alongAxis: axis)
-    //     return (result, { v in Tensor(concatenating: v.base, alongAxis: axis) })
-    // }
-
-    // @inlinable
-    // func _vjpSplit(
-    //     sizes: Tensor<Int32>,
-    //     alongAxis axis: Int = 0
-    // ) -> ([Tensor], (Array<Tensor>.CotangentVector) -> Tensor) {
-    //     let result = split(sizes: sizes, alongAxis: axis)
-    //     return (result, { v in Tensor(concatenating: v.base, alongAxis: axis) })
-    // }
-
-    // @inlinable
-    // func _vjpGathering(
-    //     atIndices indices: Tensor<Int32>, 
-    //     alongAxis axis: Int = 0
-    // ) -> (Tensor, (Tensor) -> Tensor) {
-    //     let result = gathering(atIndices: indices, alongAxis: axis)
-    //     let posAxis = axis < 0 ? axis + rank : axis
-    //     return (result, { [shape = shapeTensor] v in
-    //         let indicesSize = Tensor<Int32>(Int32(indices.scalarCount))
-    //         let outerShape = shape[..<posAxis]
-    //         let outerSize = outerShape.scalarCount
-    //         let innerShape = shape[(posAxis + 1)...]
-    //         let innerSize = innerShape.scalarCount
-    //         let outerIndices = Tensor<Int32>(rangeFrom: 0, to: Int32(outerSize), stride: 1)
-    //         let innerIndices = Tensor<Int32>(
-    //             rangeFrom: Int32(outerSize) + 1,
-    //             to: Int32(outerSize) + 1 + Int32(innerSize),
-    //             stride: 1)
-    //         let valuesShape = Tensor<Int32>(concatenating: [outerShape, indicesSize, innerShape])
-    //         let values = v.reshaped(toShape: valuesShape)
-    //         let valueIndices = indices.reshaped(toShape: indicesSize)
-
-    //         // We need to sum up every slice `values[..., i, ....]` corresponding to
-    //         // `tensor[..., indices[i], ...]`. Since `unsortedSegmentSum` does not support an axis 
-    //         // parameter, we transpose the gather dimension to the front, then use 
-    //         // `unsortedSegmentSum` to build a `[gatherAxis, outerAxes, innerAxes]` tensor with all
-    //         // the gradients affecting each index in `gatherAxis` summed up.
-    //         let permutations = Tensor<Int32>(concatenating: [
-    //             Tensor<Int32>([Int32(outerSize)]), outerIndices, innerIndices])
-    //         let transposedValues = values.transposed(withPermutations: permutations)
-    //         let gradient = Raw.unsortedSegmentSum(
-    //             data: transposedValues,
-    //             segmentIds: valueIndices,
-    //             numSegments: shape[posAxis])
-    
-    //         // Finally, we invert the above transpose operation by moving dimension 0 back to its
-    //         // original position.
-    //         let inversePermutations = Tensor<Int32>(concatenating: [
-    //             outerIndices + 1, Tensor<Int32>([0]), innerIndices])
-    //         return gradient.transposed(withPermutations: inversePermutations)
-    //     })
-    // }
 }
-
-// public extension Tensor {
-//     /// Returns the locations of non-zero / true values in this tensor.
-//     ///
-//     /// The coordinates are returned in a 2-D tensor where the first dimension (rows) represents the 
-//     /// number of non-zero elements, and the second dimension (columns) represents the coordinates 
-//     /// of the non-zero elements. Keep in mind that the shape of the output tensor can vary 
-//     /// depending on how many true values there are in this tensor. Indices are output in row-major 
-//     /// order.
-//     ///
-//     /// For example:
-//     /// ```
-//     /// // 'input' is [[true, false], [true, false]]
-//     /// // 'input' has 2 true values and so the output has 2 rows.
-//     /// // 'input' has rank of 2, and so the second dimension of the output has size 2.
-//     /// input.nonZeroIndices() // is [[0, 0], [1, 0]]
-//     ///
-//     /// // 'input' is [[[ true, false], [ true, false]],
-//     /// //             [[false,  true], [false,  true]],
-//     /// //             [[false, false], [false,  true]]]
-//     /// // 'input' has 5 true values and so the output has 5 rows.
-//     /// // 'input' has rank 3, and so the second dimension of the output has size 3.
-//     /// input.nonZeroIndices() // is [[0, 0, 0],
-//     ///                        //     [0, 1, 0],
-//     ///                        //     [1, 0, 1],
-//     ///                        //     [1, 1, 1],
-//     ///                        //     [2, 1, 1]]
-//     /// ```
-//     ///
-//     /// - Returns: A tensor with shape `(num_true, rank(condition))`.
-//     @inlinable
-//     func nonZeroIndices() -> Tensor<Int64> {
-//         return Raw.where_(self)
-//     }
-// }
diff --git a/Sources/DeepLearning/Operators/Math.swift b/Sources/DeepLearning/Operators/Math.swift
index 56225be4a..2799004cd 100644
--- a/Sources/DeepLearning/Operators/Math.swift
+++ b/Sources/DeepLearning/Operators/Math.swift
@@ -381,30 +381,6 @@ internal func _vjpSigmoid<T : TensorFlowFloatingPoint>(
     return (sigmoid(x), { v in Raw.sigmoidGrad(x, dy: v) })
 }
 
-// /// Computes the log-sigmoid of the specified tensor element-wise. Specifically, 
-// /// `y = log(1 / (1 + exp(-x)))`. For numerical stability, we use `y = -softplus(-x)`.
-// @inlinable
-// @differentiable
-// public func logSigmoid<T : TensorFlowFloatingPoint>(_ x: Tensor<T>) -> Tensor<T> {
-//     return -softplus(-x)
-// }
-
-// /// Computes the softplus function for the specified tensor element-wise. The softplus function is 
-// /// defined as `log(exp(x) + 1)`.
-// @inlinable
-// @differentiable(vjp: _vjpSoftplus)
-// public func softplus<T : TensorFlowFloatingPoint>(_ x: Tensor<T>) -> Tensor<T> {
-//     return Raw.softplus(features: x)
-// }
-
-// @inlinable
-// internal func _vjpSoftplus<T : TensorFlowFloatingPoint>(
-//     _ x: Tensor<T>
-// ) -> (Tensor<T>, (Tensor<T>) -> Tensor<T>) {
-//     return (softplus(x), { v in v * sigmoid(x) })
-// }
-
-
 /// Computes the softmax of the specified tensor along the last axis.
 /// Specifically, computes `exp(x) / exp(x).sum(alongAxes: -1)`.
 @inlinable

From a91c00a26d65c86952c3f3e2a4895eb4bcc01a5b Mon Sep 17 00:00:00 2001
From: Anthony Platanios <e.a.platanios@gmail.com>
Date: Sat, 20 Apr 2019 13:31:49 -0400
Subject: [PATCH 39/55] Moved some more stuff to swift-apis.

---
 Sources/DeepLearning/Operators/Basic.swift |  90 +++++
 Sources/DeepLearning/Operators/Math.swift  | 438 ++++++++++++++++++++-
 2 files changed, 521 insertions(+), 7 deletions(-)

diff --git a/Sources/DeepLearning/Operators/Basic.swift b/Sources/DeepLearning/Operators/Basic.swift
index ae0fe360a..6327d1390 100644
--- a/Sources/DeepLearning/Operators/Basic.swift
+++ b/Sources/DeepLearning/Operators/Basic.swift
@@ -112,3 +112,93 @@ internal extension Tensor where Scalar : TensorFlowFloatingPoint {
         return (value, { [shape = shapeTensor] v in v.reshaped(toShape: shape) })
     }
 }
+
+//===------------------------------------------------------------------------------------------===//
+// Other Tensor Transformations
+//===------------------------------------------------------------------------------------------===//
+
+public extension Tensor {
+    /// Returns a transposed tensor, with dimensions permuted in the specified order.
+    @inlinable
+    @differentiable(
+        wrt: self,
+        vjp: _vjpTransposed(withPermutations:) where Scalar : TensorFlowFloatingPoint)
+    func transposed(withPermutations permutations: Tensor<Int32>) -> Tensor {
+        return Raw.transpose(self, perm: permutations)
+    }
+
+    /// Returns a transposed tensor, with dimensions permuted in the specified order.
+    @inlinable
+    @differentiable(
+        wrt: self,
+        vjp: _vjpTransposed(withPermutations:) where Scalar : TensorFlowFloatingPoint)
+    func transposed(withPermutations permutations: [Int]) -> Tensor {
+        let permutations = permutations.map(Int32.init)
+        return transposed(withPermutations: Tensor<Int32>(permutations))
+    }
+
+    /// Returns a transposed tensor, with dimensions permuted in the specified order.
+    @inlinable
+    @differentiable(
+        wrt: self, vjp: _vjpTransposed(withPermutations:) where Scalar : TensorFlowFloatingPoint)
+    func transposed(withPermutations permutations: Int...) -> Tensor {
+        return transposed(withPermutations: permutations)
+    }
+
+    /// Returns a transposed tensor, with dimensions permuted in reverse order.
+    @inlinable
+    @differentiable(wrt: self, vjp: _vjpTransposed() where Scalar : TensorFlowFloatingPoint)
+    func transposed() -> Tensor {
+        let defaultPermutations = rankTensor - 1 - Tensor<Int32>(
+            rangeFrom: 0, to: Int32(rank), stride: 1)
+        return transposed(withPermutations: Tensor<Int32>(defaultPermutations))
+    }
+
+    /// Concatenates tensors along the specified axis.
+    /// - Precondition: The tensors must have the same dimensions, except for the
+    ///   specified axis.
+    /// - Precondition: The axis must be in the range `-rank..<rank`.
+    @inlinable
+    @differentiable(where Scalar : TensorFlowFloatingPoint)
+    func concatenated(with other: Tensor, alongAxis axis: Int = 0) -> Tensor {
+        return Tensor(concatenating: [self, other], alongAxis: axis)
+    }
+
+    /// Concatenation operator.
+    /// - Note: `++` is a custom operator that does not exist in Swift, but does
+    ///   in Haskell/Scala. Its addition is not an insignificant language change
+    ///   and may be controversial. The existence/naming of `++` will be discussed
+    ///   during a later API design phase.
+    @inlinable
+    @differentiable(where Scalar : TensorFlowFloatingPoint)
+    static func ++ (lhs: Tensor, rhs: Tensor) -> Tensor {
+        return lhs.concatenated(with: rhs)
+    }
+}
+
+internal extension Tensor where Scalar : TensorFlowFloatingPoint {
+    @inlinable
+    func _vjpTransposed(
+        withPermutations permutations: Tensor<Int32>
+    ) -> (Tensor, (Tensor) -> Tensor) {
+        let value = transposed(withPermutations: permutations)
+        return (value, { $0.transposed(withPermutations: permutations) })
+    }
+
+    @inlinable
+    func _vjpTransposed(withPermutations permutations: [Int]) -> (Tensor, (Tensor) -> Tensor) {
+        let value = transposed(withPermutations: permutations)
+        return (value, { $0.transposed(withPermutations: permutations) })
+    }
+
+    @inlinable
+    func _vjpTransposed(withPermutations permutations: Int...) -> (Tensor, (Tensor) -> Tensor) {
+        let value = transposed(withPermutations: permutations)
+        return (value, { $0.transposed(withPermutations: permutations) })
+    }
+
+    @inlinable
+    func _vjpTransposed() -> (Tensor, (Tensor) -> Tensor) {
+        return (transposed(), { $0.transposed() })
+    }
+}
diff --git a/Sources/DeepLearning/Operators/Math.swift b/Sources/DeepLearning/Operators/Math.swift
index 2799004cd..7b295912d 100644
--- a/Sources/DeepLearning/Operators/Math.swift
+++ b/Sources/DeepLearning/Operators/Math.swift
@@ -349,21 +349,445 @@ public extension Tensor where Scalar == Bool {
 }
 
 //===------------------------------------------------------------------------------------------===//
-// Universal Functions
+// Element-wise Unary Math Functions
 //===------------------------------------------------------------------------------------------===//
 
-/// Returns the values of the specified tensor rounded to the nearest integer, element-wise.
+// Export Glibc/Darwin math functions. We should not require users to import
+// Foundation/Darwin/Glibc in order to use scalar math functions.
+//
+#if os(macOS) || os(iOS) || os(watchOS) || os(tvOS)
+@_exported import Darwin.C
+#else
+@_exported import Glibc
+#endif
+//
+// FIXME(rxwei): Scoped imports are not yet supported in parseable module
+// interfaces, so `@_exported import` won't work. When that becomes supported,
+// switch to `@_exported import` by removing `import Darwin.C/Glibc` above and
+// uncommenting the following lines. In the meantime, consider using indirect
+// wrappers for each function so that random libc symbols won't be leaked to
+// users' code completion.
+//
+// #if os(macOS) || os(iOS) || os(watchOS) || os(tvOS)
+// @_exported import func Darwin.C.sin
+// @_exported import func Darwin.C.cos
+// @_exported import func Darwin.C.tan
+// @_exported import func Darwin.C.sinf
+// @_exported import func Darwin.C.cosf
+// @_exported import func Darwin.C.tanf
+// @_exported import func Darwin.C.sinh
+// @_exported import func Darwin.C.cosh
+// @_exported import func Darwin.C.tanh
+// @_exported import func Darwin.C.sinhf
+// @_exported import func Darwin.C.coshf
+// @_exported import func Darwin.C.tanhf
+// @_exported import func Darwin.C.log
+// @_exported import func Darwin.C.logf
+// @_exported import func Darwin.C.exp
+// @_exported import func Darwin.C.expf
+// @_exported import func Darwin.C.pow
+// @_exported import func Darwin.C.powf
+// #else
+// @_exported import func Glibc.sin
+// @_exported import func Glibc.cos
+// @_exported import func Glibc.tan
+// @_exported import func Glibc.sinf
+// @_exported import func Glibc.cosf
+// @_exported import func Glibc.tanf
+// @_exported import func Glibc.sinh
+// @_exported import func Glibc.cosh
+// @_exported import func Glibc.tanh
+// @_exported import func Glibc.sinhf
+// @_exported import func Glibc.coshf
+// @_exported import func Glibc.tanhf
+// @_exported import func Glibc.log
+// @_exported import func Glibc.logf
+// @_exported import func Glibc.exp
+// @_exported import func Glibc.expf
+// @_exported import func Glibc.pow
+// @_exported import func Glibc.powf
+// #endif
+
+public extension Tensor where Scalar : SignedNumeric {
+    /// Computes the negation of the specified tensor element-wise.
+    @inlinable
+    @differentiable(vjp: _vjpNegate(_:) where Scalar : TensorFlowFloatingPoint)
+    static prefix func - (rhs: Tensor) -> Tensor {
+        return Raw.neg(rhs)
+    }
+}
+
+internal extension Tensor where Scalar : TensorFlowFloatingPoint {
+    @inlinable
+    static func _vjpNegate(_ x: Tensor) -> (Tensor, (Tensor) -> Tensor) {
+        return (-x, { v in -v })
+    }
+}
+
+/// Computes the absolute value of the specified tensor element-wise.
 @inlinable
-@differentiable(vjp: _vjpRound)
-public func round<T : TensorFlowFloatingPoint>(_ x: Tensor<T>) -> Tensor<T> {
-    return Raw.round(x)
+@differentiable(vjp: _vjpAbs(_:) where T : TensorFlowFloatingPoint)
+public func abs<T : SignedNumeric>(_ x: Tensor<T>) -> Tensor<T> {
+    return Raw.abs(x)
 }
 
 @inlinable
-internal func _vjpRound<T : TensorFlowFloatingPoint>(
+internal func _vjpAbs<T : TensorFlowFloatingPoint>(
   _ x: Tensor<T>
 ) -> (Tensor<T>, (Tensor<T>) -> Tensor<T>) {
-    return (round(x), { v in Tensor<T>(zerosLike: v) })
+    let sign = Raw.sign(x)
+    return (abs(x), { v in v * sign })
+}
+
+/// Computes the natural logarithm of the specified tensor element-wise.
+@inlinable
+@differentiable(vjp: _vjpLog(_:) where T : TensorFlowFloatingPoint)
+public func log<T : FloatingPoint>(_ x: Tensor<T>) -> Tensor<T> {
+    return Raw.log(x)
+}
+
+@inlinable
+internal func _vjpLog<T : TensorFlowFloatingPoint>(
+    _ x: Tensor<T>
+) -> (Tensor<T>, (Tensor<T>) -> Tensor<T>) {
+    return (log(x), { v in v / x })
+}
+
+/// Computes `sin` of the specified tensor element-wise.
+@inlinable
+@differentiable(vjp: _vjpSin(_:) where T : TensorFlowFloatingPoint)
+public func sin<T : FloatingPoint>(_ x: Tensor<T>) -> Tensor<T> {
+    return Raw.sin(x)
+}
+
+@inlinable
+internal func _vjpSin<T : TensorFlowFloatingPoint>(
+    _ x: Tensor<T>
+) -> (Tensor<T>, (Tensor<T>) -> Tensor<T>) {
+    return (sin(x), { v in v * cos(x) })
+}
+
+/// Computes `cos` of the specified tensor element-wise.
+@inlinable
+@differentiable(vjp: _vjpCos(_:) where T : TensorFlowFloatingPoint)
+public func cos<T : FloatingPoint>(_ x: Tensor<T>) -> Tensor<T> {
+    return Raw.cos(x)
+}
+
+@inlinable
+internal func _vjpCos<T : TensorFlowFloatingPoint>(
+    _ x: Tensor<T>
+) -> (Tensor<T>, (Tensor<T>) -> Tensor<T>) {
+    return (cos(x), { v in -v * sin(x) })
+}
+
+/// Computes `tan` of the specified tensor element-wise.
+@inlinable
+@differentiable(vjp: _vjpTan(_:) where T : TensorFlowFloatingPoint)
+public func tan<T : FloatingPoint>(_ x: Tensor<T>) -> Tensor<T> {
+    return Raw.tan(x)
+}
+
+@inlinable
+internal func _vjpTan<T : TensorFlowFloatingPoint>(
+    _ x: Tensor<T>
+) -> (Tensor<T>, (Tensor<T>) -> Tensor<T>) {
+    let value = tan(x)
+    return (value, { v in v * (1 + value.squared()) })
+}
+
+/// Computes `sinh` of the specified tensor element-wise.
+@inlinable
+@differentiable(vjp: _vjpSinh(_:) where T : TensorFlowFloatingPoint)
+public func sinh<T : FloatingPoint>(_ x: Tensor<T>) -> Tensor<T> {
+    return Raw.sinh(x)
+}
+
+@inlinable
+internal func _vjpSinh<T : TensorFlowFloatingPoint>(
+    _ x: Tensor<T>
+) -> (Tensor<T>, (Tensor<T>) -> Tensor<T>) {
+    return (sinh(x), { v in v * cosh(x) })
+}
+
+/// Computes `cosh` of the specified tensor element-wise.
+@inlinable
+@differentiable(vjp: _vjpCosh(_:) where T : TensorFlowFloatingPoint)
+public func cosh<T : FloatingPoint>(_ x: Tensor<T>) -> Tensor<T> {
+    return Raw.cosh(x)
+}
+
+@inlinable
+internal func _vjpCosh<T : TensorFlowFloatingPoint>(
+    _ x: Tensor<T>
+) -> (Tensor<T>, (Tensor<T>) -> Tensor<T>) {
+    return (cosh(x), { v in v * sinh(x) })
+}
+
+/// Computes `tanh` of the specified tensor element-wise.
+@inlinable
+@differentiable(vjp: _vjpTanh(_:) where T : TensorFlowFloatingPoint)
+public func tanh<T : FloatingPoint>(_ x: Tensor<T>) -> Tensor<T> {
+    return Raw.tanh(x)
+}
+
+@inlinable
+internal func _vjpTanh<T : TensorFlowFloatingPoint>(
+    _ x: Tensor<T>
+) -> (Tensor<T>, (Tensor<T>) -> Tensor<T>) {
+    let value = tanh(x)
+    return (value, { v in v * (1 - value.squared()) })
+}
+
+/// Computes the square of the tensor.
+public extension Tensor where Scalar : Numeric {
+    @inlinable
+    @differentiable(wrt: self, vjp: _vjpSquared() where Scalar : TensorFlowFloatingPoint)
+    func squared() -> Tensor {
+        return Raw.square(self)
+    }
+}
+
+internal extension Tensor where Scalar : TensorFlowFloatingPoint {
+    @inlinable
+    func _vjpSquared() -> (Tensor, (Tensor) -> Tensor) {
+        return (squared(), { 2 * self * $0 })
+    }
+}
+
+/// Computes the square root of the specified tensor element-wise.
+@inlinable
+@differentiable(vjp: _vjpSqrt(_:) where T : TensorFlowFloatingPoint)
+public func sqrt<T : FloatingPoint>(_ x: Tensor<T>) -> Tensor<T> {
+    return Raw.sqrt(x)
+}
+
+@inlinable
+internal func _vjpSqrt<T : TensorFlowFloatingPoint>(
+    _ x: Tensor<T>
+) -> (Tensor<T>, (Tensor<T>) -> Tensor<T>) {
+    let value = sqrt(x)
+    return (value, { v in v / (2 * value) })
+}
+
+/// Computes the inverse square root of the specified tensor element-wise.
+@inlinable
+@differentiable(vjp: _vjpRsqrt(_:) where T : TensorFlowFloatingPoint)
+public func rsqrt<T : FloatingPoint>(_ x: Tensor<T>) -> Tensor<T> {
+    return Raw.rsqrt(x)
+}
+
+@inlinable
+internal func _vjpRsqrt<T : TensorFlowFloatingPoint>(
+    _ x: Tensor<T>
+) -> (Tensor<T>, (Tensor<T>) -> Tensor<T>) {
+    let value = rsqrt(x)
+    return (value, { v in -v / 2 * value })
+}
+
+/// Computes `exp` of the specified tensor element-wise.
+@inlinable
+@differentiable(vjp: _vjpExp(_:) where T : TensorFlowFloatingPoint)
+public func exp<T : FloatingPoint>(_ x: Tensor<T>) -> Tensor<T> {
+    return Raw.exp(x)
+}
+
+@inlinable
+internal func _vjpExp<T : TensorFlowFloatingPoint>(
+    _ x: Tensor<T>
+) -> (Tensor<T>, (Tensor<T>) -> Tensor<T>) {
+    let value = exp(x)
+    return (value, { v in value * v })
+}
+
+/// Computes the ceiling of the specified tensor element-wise.
+@inlinable
+@differentiable(vjp: _vjpCeil(_:) where T : TensorFlowFloatingPoint)
+public func ceil<T : FloatingPoint>(_ x: Tensor<T>) -> Tensor<T> {
+    return Raw.ceil(x)
+}
+
+@inlinable
+internal func _vjpCeil<T : TensorFlowFloatingPoint>(
+    _ x: Tensor<T>
+) -> (Tensor<T>, (Tensor<T>) -> Tensor<T>) {
+    return (ceil(x), { _ in Tensor(0).broadcast(like: x) })
+}
+
+/// Computes the floor of the specified tensor element-wise.
+@inlinable
+@differentiable(vjp: _vjpFloor(_:) where T : TensorFlowFloatingPoint)
+public func floor<T : FloatingPoint>(_ x: Tensor<T>) -> Tensor<T> {
+    return Raw.floor(x)
+}
+
+@inlinable
+internal func _vjpFloor<T : TensorFlowFloatingPoint>(
+    _ x: Tensor<T>
+) -> (Tensor<T>, (Tensor<T>) -> Tensor<T>) {
+    return (floor(x), { _ in Tensor(0).broadcast(like: x) })
+}
+
+//===------------------------------------------------------------------------------------------===//
+// Element-wise Binary Math Functions
+//===------------------------------------------------------------------------------------------===//
+
+/// Computes the power of the first tensor to the second tensor.
+@inlinable
+@differentiable(vjp: _vjpPow(_:_:) where T : TensorFlowFloatingPoint)
+public func pow<T>(_ lhs: Tensor<T>, _ rhs: Tensor<T>) -> Tensor<T> where T : FloatingPoint {
+    return Raw.pow(lhs, rhs)
+}
+
+@inlinable
+internal func _vjpPow<T : TensorFlowFloatingPoint>(
+    _ x: Tensor<T>, _ y: Tensor<T>
+) -> (Tensor<T>, (Tensor<T>) -> (Tensor<T>, Tensor<T>)) {
+    let value = pow(x, y)
+    return (value, { v in
+        ((v * y * pow(x, y-1)).unbroadcast(like: x),
+        (v * log(x) * value).unbroadcast(like: y))
+    })
+}
+
+/// Computes the power of the scalar to the tensor, broadcasting the scalar.
+@inlinable
+// @differentiable(where T : TensorFlowFloatingPoint)
+public func pow<T>(_ lhs: T, _ rhs: Tensor<T>) -> Tensor<T> where T : FloatingPoint {
+    return pow(Tensor(lhs), rhs)
+}
+
+/// Computes the power of the tensor to the scalar, broadcasting the scalar.
+@inlinable
+// @differentiable(where T : TensorFlowFloatingPoint)
+public func pow<T>(_ lhs: Tensor<T>, _ rhs: T) -> Tensor<T> where T : FloatingPoint {
+    return pow(lhs, Tensor(rhs))
+}
+
+/// Computes the element-wise maximum of two tensors.
+/// - Note: `max` supports broadcasting.
+@inlinable
+@differentiable(vjp: _vjpMax(_:_:) where T : TensorFlowFloatingPoint)
+public func max<T>(_ lhs: Tensor<T>, _ rhs: Tensor<T>) -> Tensor<T> where T : Numeric & Comparable {
+    return Raw.maximum(lhs, rhs)
+}
+
+@inlinable
+internal func _vjpMax<T : TensorFlowFloatingPoint>(
+    _ x: Tensor<T>, _ y: Tensor<T>
+) -> (Tensor<T>, (Tensor<T>) -> (Tensor<T>, Tensor<T>)) {
+    let value = max(x, y)
+    return (value, { v in _vjpMinMaxHelper(x, y, originalValue: value, vector: v) })
+}
+
+/// Computes the element-wise maximum of the scalar and the tensor, broadcasting the scalar.
+@inlinable
+// @differentiable(where T : TensorFlowFloatingPoint)
+public func max<T>(_ lhs: T, _ rhs: Tensor<T>) -> Tensor<T> where T : Numeric & Comparable {
+    return max(Tensor(lhs), rhs)
+}
+
+/// Computes the element-wise maximum of the scalar and the tensor, broadcasting the scalar.
+@inlinable
+// @differentiable(where T : TensorFlowFloatingPoint)
+public func max<T>(_ lhs: Tensor<T>, _ rhs: T) -> Tensor<T> where T : Numeric & Comparable {
+    return max(lhs, Tensor(rhs))
+}
+
+/// Computes the element-wise minimum of two tensors.
+/// - Note: `min` supports broadcasting.
+@inlinable
+@differentiable(vjp: _vjpMin(_:_:) where T : TensorFlowFloatingPoint)
+public func min<T>(_ lhs: Tensor<T>, _ rhs: Tensor<T>) -> Tensor<T> where T : Numeric & Comparable {
+    return Raw.minimum(lhs, rhs)
+}
+
+@inlinable
+internal func _vjpMin<T : TensorFlowFloatingPoint>(
+    _ x: Tensor<T>, _ y: Tensor<T>
+) -> (Tensor<T>, (Tensor<T>) -> (Tensor<T>, Tensor<T>)) {
+    let value = min(x, y)
+    return (value, { v in _vjpMinMaxHelper(x, y, originalValue: value, vector: v) })
+}
+
+/// Computes the element-wise minimum of the scalar and the tensor, broadcasting the scalar.
+@inlinable
+// @differentiable(where T : TensorFlowFloatingPoint)
+public func min<T>(_ lhs: T, _ rhs: Tensor<T>) -> Tensor<T> where T : Numeric & Comparable {
+    return min(Tensor(lhs), rhs)
+}
+
+/// Computes the element-wise minimum of the scalar and the tensor, broadcasting the scalar.
+@inlinable
+// @differentiable(where T : TensorFlowFloatingPoint)
+public func min<T>(_ lhs: Tensor<T>, _ rhs: T) -> Tensor<T> where T : Numeric & Comparable {
+    return min(lhs, Tensor(rhs))
+}
+
+@inlinable
+internal func _vjpMinMaxHelper<T : TensorFlowFloatingPoint>(
+    _ x: Tensor<T>,
+    _ y: Tensor<T>,
+    originalValue: Tensor<T>,
+    vector: Tensor<T>
+) -> (Tensor<T>, Tensor<T>) {
+    let denom = 1 + Tensor<T>(x .== y)
+    let dfdx = vector * Tensor<T>(x .== originalValue) / denom
+    let dfdy = vector * Tensor<T>(y .== originalValue) / denom
+    return (dfdx.unbroadcast(like: x), dfdy.unbroadcast(like: y))
+}
+
+//===------------------------------------------------------------------------------------------===//
+// Selection Functions
+//===------------------------------------------------------------------------------------------===//
+
+public extension Tensor where Scalar == Bool {
+    /// Returns a new tensor containing elements from either `left` or `right`,
+    /// depending on the elements of `self`.
+    ///
+    /// `self` acts as a mask that chooses, based on the value at each scalar,
+    ///  whether the corresponding scalar in the output should be taken from
+    /// `left` (if `true`) or `right` (if `false`).
+    ///
+    /// - Precondition: `left` and `right` must have the same shape. If
+    ///   `left` and `right` are scalar, then `self` must also be scalar. If
+    ///   `left` and `right` have rank greater than or equal to 1, then `self`
+    ///   must be either have the same shape as `left` or be a 1-D `Tensor` such
+    ///   that `self.scalarCount == left[0]`.
+    @available(*, deprecated, message: "Use '.replacing(with:mask:)' instead")
+    @inlinable
+    func selecting<T>(_ left: Tensor<T>, _ right: Tensor<T>) -> Tensor<T> {
+        return left.replacing(with: right, where: self)
+    }
+}
+
+public extension Tensor {
+    /// Replaces elements of this tensor with `other` in the lanes where `mask` is
+    /// `true`.
+    ///
+    /// - Precondition: `self` and `other` must have the same shape. If
+    ///   `self` and `other` are scalar, then `mask` must also be scalar. If
+    ///   `self` and `other` have rank greater than or equal to `1`, then `mask`
+    ///   must be either have the same shape as `self` or be a 1-D `Tensor` such
+    ///   that `mask.scalarCount == self.shape[0]`.
+    @inlinable
+    @differentiable(wrt: (self, other), vjp: _vjpReplacing where Scalar : TensorFlowFloatingPoint)
+    func replacing(with other: Tensor, where mask: Tensor<Bool>) -> Tensor {
+        return Raw.select(condition: mask, t: self, e: other)
+    }
+}
+
+internal extension Tensor where Scalar : TensorFlowFloatingPoint {
+    @inlinable
+    func _vjpReplacing(
+        with other: Tensor,
+        where mask: Tensor<Bool>
+    ) -> (Tensor, (Tensor) -> (Tensor, Tensor)) {
+        return (replacing(with: other, where: mask), { v in
+            let zeros = Tensor(zeros: v.shape)
+            return (v.replacing(with: zeros, where: mask), zeros.replacing(with: v, where: mask))
+        })
+    }
 }
 
 /// Computes the sigmoid of the specified tensor element-wise.

From 0ad98436d047da18ff3bcd5c6c92edcb60886ca4 Mon Sep 17 00:00:00 2001
From: Anthony Platanios <e.a.platanios@gmail.com>
Date: Sat, 20 Apr 2019 14:07:26 -0400
Subject: [PATCH 40/55] Moved some more stuff to swift-apis.

---
 Sources/DeepLearning/Operators/Basic.swift | 368 ++++++++++
 Sources/DeepLearning/Operators/Math.swift  | 755 +++++++++++++++++++--
 Sources/DeepLearning/Tensors.swift         |  30 +
 3 files changed, 1094 insertions(+), 59 deletions(-)

diff --git a/Sources/DeepLearning/Operators/Basic.swift b/Sources/DeepLearning/Operators/Basic.swift
index 6327d1390..42479481f 100644
--- a/Sources/DeepLearning/Operators/Basic.swift
+++ b/Sources/DeepLearning/Operators/Basic.swift
@@ -202,3 +202,371 @@ internal extension Tensor where Scalar : TensorFlowFloatingPoint {
         return (transposed(), { $0.transposed() })
     }
 }
+
+//===------------------------------------------------------------------------------------------===//
+// Broadcasting
+//===------------------------------------------------------------------------------------------===//
+
+// TODO: What about precedence? Also, why is this operator meaningful for broadcasting?
+infix operator .=
+
+public extension Tensor {
+    @inlinable
+    func broadcast(toShape shape: Tensor<Int32>) -> Tensor {
+        return Raw.broadcastTo(self, shape: shape)
+    }
+
+    @inlinable
+    func broadcast(to shape: TensorShape) -> Tensor {
+        return broadcast(toShape: Tensor<Int32>(shape.dimensions.map(Int32.init)))
+    }
+
+    /// Broadcast to the same shape as the specified `Tensor`.
+    /// - Precondition: The specified shape must be compatible for broadcasting.
+    @inlinable
+    func broadcast<OtherScalar>(like other: Tensor<OtherScalar>) -> Tensor {
+        return broadcast(toShape: other.shapeTensor)
+    }
+
+    @inlinable
+    static func .= (lhs: inout Tensor, rhs: Tensor) {
+        lhs = rhs.broadcast(like: lhs)
+    }
+}
+
+// TODO: Why is this limited only to numeric data types whereas `broadcast` is not?
+public extension Tensor where Scalar : Numeric {
+    @inlinable
+    func unbroadcast(toShape otherShape: Tensor<Int32>) -> Tensor {
+        let rankDiff = (rankTensor - otherShape.scalarCountTensor).rankLifted()
+        let ones: Tensor<Int32> = Raw.fill(dims: rankDiff, value: Tensor<Int32>(1))
+        let paddedShape = ones ++ otherShape
+        let nonEqualIndices = paddedShape .!= shapeTensor
+        let broadcastIndices = Raw.where_(nonEqualIndices).flattened()
+        let unbroadcasted: Tensor = Raw.sum(
+            self, reductionIndices: Tensor<Int32>(broadcastIndices), keepDims: false)
+        return Raw.reshape(unbroadcasted, shape: otherShape)
+    }
+
+    @inlinable
+    func unbroadcast<OtherScalar>(like other: Tensor<OtherScalar>) -> Tensor {
+        return unbroadcast(toShape: other.shapeTensor)
+    }
+
+    @inlinable
+    func unbroadcast(to shape: TensorShape) -> Tensor {
+        return unbroadcast(toShape: Tensor<Int32>(shape.dimensions.map(Int32.init)))
+    }
+}
+
+//===------------------------------------------------------------------------------------------===//
+// Padding
+//===------------------------------------------------------------------------------------------===//
+
+public extension Tensor where Scalar : Numeric {
+    /// Returns a padded tensor according to the specified padding sizes.
+    @inlinable
+    func padded(forSizes sizes: [(before: Int, after: Int)], with value: Scalar = 0) -> Tensor {
+        let paddings = Tensor<Int32>(
+            shape: [sizes.count, 2],
+            scalars: sizes.flatMap { [Int32($0.before), Int32($0.after)] })
+        return Raw.padV2(self, paddings: paddings, constantValues: Tensor(value))
+    }
+}
+
+//===------------------------------------------------------------------------------------------===//
+// Indexing and Slicing
+//===------------------------------------------------------------------------------------------===//
+
+// TODO: Negative indexing and strides syntax.
+
+public extension Tensor {
+    /// Extracts a slice from the tensor defined by lower and upper bounds for
+    /// each dimension.
+    ///
+    /// - Parameter lowerBounds: The lower bounds at each dimension.
+    /// - Parameter upperBounds: The upper bounds at each dimension.
+    @inlinable
+    @differentiable(wrt: self)
+    func slice(lowerBounds: [Int], upperBounds: [Int]) -> Tensor {
+        // TODO: Precondition `lowerBounds.count == upperBounds.count`,
+        // preferably in graph.
+        // TODO: Differentiating control flow is not supported yet, thus the thunks.
+        let lowerBoundsTensor = Tensor<Int32>({lowerBounds.map(Int32.init)}())
+        let upperBoundsTensor = Tensor<Int32>({upperBounds.map(Int32.init)}())
+        return slice(lowerBounds: lowerBoundsTensor, sizes: upperBoundsTensor - lowerBoundsTensor)
+    }
+
+    @inlinable
+    @differentiable(wrt: self, vjp: _vjpSlice)
+    func slice(lowerBounds: Tensor<Int32>, sizes: Tensor<Int32>) -> Tensor {
+        return Raw.slice(self, begin: lowerBounds, size: sizes)
+    }
+
+    @inlinable
+    internal func _vjpSlice(
+        lowerBounds: Tensor<Int32>,
+        sizes: Tensor<Int32>
+    ) -> (Tensor, (Tensor) -> Tensor) {
+        let value = slice(lowerBounds: lowerBounds, sizes: sizes)
+        let afterPaddings = shapeTensor - value.shapeTensor - lowerBounds
+        return (value, { [after = afterPaddings] v in
+            let beforePaddings = lowerBounds.expandingShape(at: 1)
+            let afterPaddings = after.expandingShape(at: 1)
+            let paddings = Tensor<Int32>(
+                concatenating: [beforePaddings, afterPaddings], alongAxis: 1)
+            return Raw.pad(v, paddings: paddings)
+        })
+    }
+}
+
+public enum TensorRange : TensorRangeExpression {
+    case ellipsis
+    case newAxis
+    case squeezeAxis
+    case index(Int)
+    case range(Range<Int>, stride: Int)
+    case closedRange(ClosedRange<Int>, stride: Int)
+    case partialRangeFrom(PartialRangeFrom<Int>, stride: Int)
+    case partialRangeUpTo(PartialRangeUpTo<Int>, stride: Int)
+    case partialRangeThrough(PartialRangeThrough<Int>, stride: Int)
+
+    public var tensorRange: TensorRange { return self }
+}
+
+extension TensorRange : Equatable {
+    public static func == (lhs: TensorRange, rhs: TensorRange) -> Bool {
+        switch (lhs, rhs) {
+        case (.ellipsis, .ellipsis),
+             (.newAxis, .newAxis),
+             (.squeezeAxis, .squeezeAxis):
+            return true
+        case (let .index(i1), let .index(i2)): return i1 == i2
+        case (let .range(r1, s1), let .range(r2, s2)): return r1 == r2 && s1 == s2
+        case (let .closedRange(r1, s1), let .closedRange(r2, s2)):
+            return r1 == r2 && s1 == s2
+        case (let .partialRangeFrom(r1, s1), let .partialRangeFrom(r2, s2)):
+            return r1.lowerBound == r2.lowerBound && s1 == s2
+        case (let .partialRangeUpTo(r1, s1), let .partialRangeUpTo(r2, s2)):
+            return r1.upperBound == r2.upperBound && s1 == s2
+        case (let .partialRangeThrough(r1, s1), let .partialRangeThrough(r2, s2)):
+            return r1.upperBound == r2.upperBound && s1 == s2
+        default: return false
+        }
+    }
+}
+
+public protocol TensorRangeExpression {
+    var tensorRange: TensorRange { get }
+}
+
+// TODO: Cannot extend non-nominal type 'UnboundedRange'.
+// extension UnboundedRange : TensorRangeExpression {
+//     public var tensorRange: TensorRange { return .ellipsis }
+// }
+
+extension Int : TensorRangeExpression {
+    public var tensorRange: TensorRange { return .index(self) }
+}
+
+extension Range : TensorRangeExpression where Bound == Int {
+    public var tensorRange: TensorRange {
+        return .range(self, stride: 1)
+    }
+}
+
+extension ClosedRange : TensorRangeExpression where Bound == Int {
+    public var tensorRange: TensorRange {
+        return .closedRange(self, stride: 1)
+    }
+}
+
+extension PartialRangeFrom : TensorRangeExpression where Bound == Int {
+    public var tensorRange: TensorRange {
+        return .partialRangeFrom(self, stride: 1)
+    }
+}
+
+extension PartialRangeUpTo : TensorRangeExpression where Bound == Int {
+    public var tensorRange: TensorRange {
+        return .partialRangeUpTo(self, stride: 1)
+    }
+}
+
+extension PartialRangeThrough : TensorRangeExpression where Bound == Int {
+    public var tensorRange: TensorRange {
+        return .partialRangeThrough(self, stride: 1)
+    }
+}
+
+infix operator .. : StridedRangeFormationPrecedence
+precedencegroup StridedRangeFormationPrecedence {
+    associativity: left
+    higherThan: CastingPrecedence
+    lowerThan: RangeFormationPrecedence
+}
+
+public extension Range where Bound == Int {
+    static func .. (range: Range, stride: Int) -> TensorRange {
+        return .range(range, stride: stride)
+    }
+}
+
+public extension ClosedRange where Bound == Int {
+    static func .. (range: ClosedRange, stride: Int) -> TensorRange {
+        return .closedRange(range, stride: stride)
+    }
+}
+
+public extension PartialRangeFrom where Bound == Int {
+    static func .. (range: PartialRangeFrom, stride: Int) -> TensorRange {
+        return .partialRangeFrom(range, stride: stride)
+    }
+}
+
+public extension PartialRangeUpTo where Bound == Int {
+    static func .. (range: PartialRangeUpTo, stride: Int) -> TensorRange {
+        return .partialRangeUpTo(range, stride: stride)
+    }
+}
+
+public extension PartialRangeThrough where Bound == Int {
+    static func .. (range: PartialRangeThrough, stride: Int) -> TensorRange {
+        return .partialRangeThrough(range, stride: stride)
+    }
+}
+
+public extension Tensor {
+    @_fixed_layout @usableFromInline
+    internal struct IndexPath {
+        @usableFromInline
+        let begin, end, strides: Tensor<Int32>
+
+        @usableFromInline
+        let beginMask, endMask, ellipsisMask, newAxisMask, squeezeAxisMask: Int64
+
+        @inlinable
+        public init(
+            begin: Tensor<Int32>, end: Tensor<Int32>, strides: Tensor<Int32>,
+            beginMask: Int64, endMask: Int64, ellipsisMask: Int64, newAxisMask: Int64,
+            squeezeAxisMask: Int64
+        ) {
+            self.begin = begin
+            self.end = end
+            self.strides = strides
+            self.beginMask = beginMask
+            self.endMask = endMask
+            self.ellipsisMask = ellipsisMask
+            self.newAxisMask = newAxisMask
+            self.squeezeAxisMask = squeezeAxisMask
+        }
+    }
+
+    @inlinable
+    @differentiable(wrt: self, vjp: _vjpSubscript)
+    internal subscript(_ indexPath: IndexPath) -> Tensor {
+        get {
+            return Raw.stridedSlice(
+                self, begin: indexPath.begin, end: indexPath.end,
+                strides: indexPath.strides, beginMask: indexPath.beginMask,
+                endMask: indexPath.endMask, ellipsisMask: indexPath.ellipsisMask, 
+                newAxisMask: indexPath.newAxisMask,
+                shrinkAxisMask: indexPath.squeezeAxisMask)
+        }
+        set {
+            self = Raw.tensorStridedSliceUpdate(
+                self, begin: indexPath.begin, end: indexPath.end,
+                strides: indexPath.strides, value: newValue,
+                beginMask: indexPath.beginMask, endMask: indexPath.endMask,
+                ellipsisMask: indexPath.ellipsisMask,
+                newAxisMask: indexPath.newAxisMask,
+                shrinkAxisMask: indexPath.squeezeAxisMask)
+        }
+    }
+
+    @inlinable
+    // TODO: @differentiable(wrt: self)
+    subscript(_ ranges: TensorRangeExpression...) -> Tensor {
+        get {
+            return self[IndexPath(ranges.map { $0.tensorRange })]
+        }
+        set {
+            self[IndexPath(ranges.map { $0.tensorRange })] = newValue
+        }
+    }
+
+    @usableFromInline
+    internal func _vjpSubscript(
+        _ indexPath: IndexPath
+    ) -> (Tensor, (Tensor) -> Tensor) {
+        return (self[indexPath], { [shape = shapeTensor] v in
+            Raw.stridedSliceGrad(
+                shape: shape, begin: indexPath.begin, end: indexPath.end,
+                strides: indexPath.strides, dy: v, beginMask: indexPath.beginMask,
+                endMask: indexPath.endMask, ellipsisMask: indexPath.ellipsisMask,
+                newAxisMask: indexPath.newAxisMask,
+                shrinkAxisMask: indexPath.squeezeAxisMask)
+        })
+    }
+}
+
+internal extension Tensor.IndexPath {
+    @inlinable
+    init(_ ranges: [TensorRange]) {
+        precondition(!ranges.isEmpty, "The tensor range collection cannot be empty.")
+        precondition(ranges.count { $0 == TensorRange.ellipsis } < 2,
+                     "Only one ellipsis is allowed per tensor range collection.")
+
+        var begin = [Int32](repeating: 0, count: ranges.count)
+        var end = [Int32](repeating: 0, count: ranges.count)
+        var strides = [Int32](repeating: 1, count: ranges.count)
+        var beginMask: Int64 = 0
+        var endMask: Int64 = 0
+        var ellipsisMask: Int64 = 0
+        var newAxisMask: Int64 = 0
+        var squeezeAxisMask: Int64 = 0
+        for (i, index) in ranges.enumerated() {
+            switch index {
+            case .ellipsis: ellipsisMask |= 1 << i
+            case .newAxis: newAxisMask |= 1 << i
+            case .squeezeAxis: squeezeAxisMask |= 1 << i
+            case .index(let index):
+                begin[i] = Int32(index)
+                end[i] = Int32(index) + 1
+                squeezeAxisMask |= 1 << i
+            case .range(let range, let stride):
+                begin[i] = Int32(range.lowerBound)
+                end[i] = Int32(range.upperBound)
+                strides[i] = Int32(stride)
+            case .closedRange(let range, let stride):
+                begin[i] = Int32(range.lowerBound)
+                switch Int32(range.upperBound) {
+                case -1: endMask |= 1 << i
+                case let u: end[i] = u + 1
+                }
+                strides[i] = Int32(stride)
+            case .partialRangeFrom(let range, let stride):
+                begin[i] = Int32(range.lowerBound)
+                strides[i] = Int32(stride)
+                endMask |= 1 << i
+            case .partialRangeUpTo(let range, let stride):
+                end[i] = Int32(range.upperBound)
+                strides[i] = Int32(stride)
+                beginMask |= 1 << i
+            case .partialRangeThrough(let range, let stride):
+                end[i] = Int32(range.upperBound) + 1
+                strides[i] = Int32(stride)
+                beginMask |= 1 << i
+            }
+        }
+
+        self.begin = Tensor<Int32>(begin)
+        self.end = Tensor<Int32>(end)
+        self.strides = Tensor<Int32>(strides)
+        self.beginMask = beginMask
+        self.endMask = endMask
+        self.ellipsisMask = ellipsisMask
+        self.newAxisMask = newAxisMask
+        self.squeezeAxisMask = squeezeAxisMask
+    }
+}
diff --git a/Sources/DeepLearning/Operators/Math.swift b/Sources/DeepLearning/Operators/Math.swift
index 7b295912d..9f858a98b 100644
--- a/Sources/DeepLearning/Operators/Math.swift
+++ b/Sources/DeepLearning/Operators/Math.swift
@@ -628,6 +628,80 @@ internal func _vjpFloor<T : TensorFlowFloatingPoint>(
     return (floor(x), { _ in Tensor(0).broadcast(like: x) })
 }
 
+/// Computes the sigmoid of the specified tensor element-wise.
+/// Specifically, computes `1 / (1 + exp(-x))`.
+@inlinable
+@differentiable(vjp: _vjpSigmoid)
+public func sigmoid<T : TensorFlowFloatingPoint>(_ x: Tensor<T>) -> Tensor<T> {
+    return Raw.sigmoid(x)
+}
+
+@inlinable
+internal func _vjpSigmoid<T : TensorFlowFloatingPoint>(
+    _ x: Tensor<T>
+) -> (Tensor<T>, (Tensor<T>) -> Tensor<T>) {
+    return (sigmoid(x), { v in Raw.sigmoidGrad(x, dy: v) })
+}
+
+/// Computes the softmax of the specified tensor along the last axis.
+/// Specifically, computes `exp(x) / exp(x).sum(alongAxes: -1)`.
+@inlinable
+@differentiable(vjp: _vjpSoftmax(_:) where T : TensorFlowFloatingPoint)
+public func softmax<T : FloatingPoint>(_ x: Tensor<T>) -> Tensor<T> {
+    return Raw.softmax(logits: x)
+}
+
+/// Computes the softmax of the specified tensor along the specified axis.
+/// Specifically, computes `exp(x) / exp(x).sum(alongAxes: axis)`.
+@inlinable
+// TODO: [AD].
+public func softmax<T : TensorFlowFloatingPoint>(_ x: Tensor<T>, alongAxis axis: Int) -> Tensor<T> {
+    let xExp = exp(x)
+    let xExpSum = Raw.sum(xExp, reductionIndices: Tensor<Int32>(axis), keepDims: true)
+    return xExp / xExpSum
+}
+
+@inlinable
+func _vjpSoftmax<T : TensorFlowFloatingPoint>(
+    _ x: Tensor<T>
+) -> (Tensor<T>, (Tensor<T>) -> Tensor<T>) {
+    let value = softmax(x)
+    return (value, { v in
+        let sumChannels = (v * value).sum(alongAxes: -1)
+        return (v - sumChannels) * value
+    })
+}
+
+/// Computes the log-softmax of the specified tensor element-wise.
+@inlinable
+@differentiable(vjp: _vjpLogSoftmax(_:) where T : TensorFlowFloatingPoint)
+public func logSoftmax<T : FloatingPoint>(_ x: Tensor<T>) -> Tensor<T> {
+    return Raw.logSoftmax(logits: x)
+}
+
+@inlinable
+func _vjpLogSoftmax<T : TensorFlowFloatingPoint>(
+    _ x: Tensor<T>
+) -> (Tensor<T>, (Tensor<T>) -> Tensor<T>) {
+    let value = logSoftmax(x)
+    return (value, { v in v - v.sum(alongAxes: -1) * exp(value) })
+}
+
+/// Computes `relu` of the specified tensor element-wise.
+/// Specifically, computes `max(0, x)`.
+@inlinable
+@differentiable(vjp: _vjpRelu(_:) where T : TensorFlowFloatingPoint)
+public func relu<T : FloatingPoint>(_ x: Tensor<T>) -> Tensor<T> {
+    return max(0, x)
+}
+
+@inlinable
+func _vjpRelu<T : TensorFlowFloatingPoint>(
+    _ x: Tensor<T>
+) -> (Tensor<T>, (Tensor<T>) -> Tensor<T>) {
+    return (relu(x), { v in Tensor(x .> 0) * v })
+}
+
 //===------------------------------------------------------------------------------------------===//
 // Element-wise Binary Math Functions
 //===------------------------------------------------------------------------------------------===//
@@ -790,81 +864,644 @@ internal extension Tensor where Scalar : TensorFlowFloatingPoint {
     }
 }
 
-/// Computes the sigmoid of the specified tensor element-wise.
-/// Specifically, computes `1 / (1 + exp(-x))`.
-@inlinable
-@differentiable(vjp: _vjpSigmoid)
-public func sigmoid<T : TensorFlowFloatingPoint>(_ x: Tensor<T>) -> Tensor<T> {
-    return Raw.sigmoid(x)
+//===------------------------------------------------------------------------------------------===//
+// Reduction Functions
+//===------------------------------------------------------------------------------------------===//
+
+public extension Tensor where Scalar == Bool {
+    /// Returns `true` if all scalars are equal to `true`. Otherwise, returns `false`.
+    // NOTE: This overload is necessary, otherwise `all()` would refer to the variadic method
+    // `all(squeezingAxes:)` with zero indices.
+    @inlinable
+    func all() -> Bool {
+        let axes = Tensor<Int32>(rangeFrom: 0, to: Int32(rank), stride: 1)
+        return _TFGetScalarOrDie(Raw.all(self, reductionIndices: axes).handle)
+    }
+
+    /// Returns `true` if any scalars are equal to `true`. Otherwise, returns `false`.
+    // NOTE: This overload is necessary, otherwise `any()` would refer to the variadic method
+    // `any(squeezingAxes:)` with zero indices.
+    @inlinable
+    func any() -> Bool {
+        let axes = Tensor<Int32>(rangeFrom: 0, to: Int32(rank), stride: 1)
+        return _TFGetScalarOrDie(Raw.any(self, reductionIndices: axes).handle)
+    }
+
+    /// Performs a logical AND operation along the specified axes. The reduced dimensions are
+    /// removed.
+    /// - Parameter axes: The dimensions to reduce.
+    /// - Precondition: Each value in `axes` must be in the range `-rank..<rank`.
+    @inlinable
+    func all(squeezingAxes axes: Int...) -> Tensor {
+        let axes = axes.map(Int32.init)
+        return Raw.all(self, reductionIndices: Tensor<Int32>(axes), keepDims: false)
+    }
+
+    /// Performs a logical AND operation along the specified axes. The reduced dimensions are
+    /// removed.
+    /// - Parameter axes: The dimensions to reduce.
+    /// - Precondition: Each value in `axes` must be in the range `-rank..<rank`.
+    @inlinable
+    func any(squeezingAxes axes: Int...) -> Tensor {
+        let axes = axes.map(Int32.init)
+        return Raw.any(self, reductionIndices: Tensor<Int32>(axes), keepDims: false)
+    }
+
+    /// Performs a logical AND operation along the specified axes. The reduced dimensions are
+    /// retained with value 1.
+    /// - Parameter axes: The dimensions to reduce.
+    /// - Precondition: Each value in `axes` must be in the range `-rank..<rank`.
+    @inlinable
+    func all(alongAxes axes: Int...) -> Tensor {
+        let axes = axes.map(Int32.init)
+        return Raw.all(self, reductionIndices: Tensor<Int32>(axes), keepDims: true)
+    }
+
+    /// Performs a logical OR operation along the specified axes. The reduced
+    /// dimensions are retained with value 1.
+    /// - Parameter axes: The dimensions to reduce.
+    /// - Precondition: Each value in `axes` must be in the range `-rank..<rank`.
+    @inlinable
+    func any(alongAxes axes: Int...) -> Tensor {
+        let axes = axes.map(Int32.init)
+        return Raw.any(self, reductionIndices: Tensor<Int32>(axes), keepDims: true)
+    }
 }
 
-@inlinable
-internal func _vjpSigmoid<T : TensorFlowFloatingPoint>(
-    _ x: Tensor<T>
-) -> (Tensor<T>, (Tensor<T>) -> Tensor<T>) {
-    return (sigmoid(x), { v in Raw.sigmoidGrad(x, dy: v) })
+public extension Tensor where Scalar : Numeric & Comparable {
+    // NOTE: This overload is necessary, otherwise `min()` would refer to the variadic method
+    // `min(squeezingAxes:)` with zero indices.
+    @inlinable
+    func min() -> Tensor {
+        let axes = Tensor<Int32>(rangeFrom: 0, to: Int32(rank), stride: 1)
+        return Raw.min(self, reductionIndices: axes)
+    }
+
+    // NOTE: This overload is necessary, otherwise `max()` would refer to the variadic method
+    // `max(squeezingAxes:)` with zero indices.
+    @inlinable
+    func max() -> Tensor {
+        let axes = Tensor<Int32>(rangeFrom: 0, to: Int32(rank), stride: 1)
+        return Raw.max(self, reductionIndices: axes)
+    }
+
+    /// Returns the maximum values along the specified axes. The reduced dimensions are removed.
+    /// - Parameter axes: The dimensions to reduce.
+    /// - Precondition: Each value in `axes` must be in the range `-rank..<rank`.
+    @inlinable
+    func max(squeezingAxes axes: [Int]) -> Tensor {
+        let axes = axes.map(Int32.init)
+        return Raw.max(self, reductionIndices: Tensor<Int32>(axes), keepDims: false)
+    }
+
+    /// Returns the maximum values along the specified axes. The reduced dimensions are removed.
+    /// - Parameter axes: The dimensions to reduce.
+    /// - Precondition: Each value in `axes` must be in the range `-rank..<rank`.
+    @inlinable
+    func max(squeezingAxes axes: Int...) -> Tensor {
+        return max(squeezingAxes: axes)
+    }
+
+    /// Returns the minimum values along the specified axes. The reduced dimensions are removed.
+    /// - Parameter axes: The dimensions to reduce.
+    /// - Precondition: Each value in `axes` must be in the range `-rank..<rank`.
+    @inlinable
+    func min(squeezingAxes axes: [Int]) -> Tensor {
+        let axes = axes.map(Int32.init)
+        return Raw.min(self, reductionIndices: Tensor<Int32>(axes), keepDims: false)
+    }
+
+    /// Returns the minimum values along the specified axes. The reduced dimensions are removed.
+    /// - Parameter axes: The dimensions to reduce.
+    /// - Precondition: Each value in `axes` must be in the range `-rank..<rank`.
+    @inlinable
+    func min(squeezingAxes axes: Int...) -> Tensor {
+        return min(squeezingAxes: axes)
+    }
+
+    /// Returns the indices of the maximum values along the specified axes. The reduced dimensions
+    /// are removed.
+    /// - Parameter axes: The dimensions to reduce.
+    /// - Precondition: Each value in `axes` must be in the range `-rank..<rank`.
+    @inlinable
+    func argmax(squeezingAxis axis: Int) -> Tensor<Int32> {
+        return Raw.argMax(self, dimension: Tensor<Int32>(Int32(axis)))
+    }
+
+    /// Returns the indices of the minimum values along the specified axes. The reduced dimensions
+    /// are removed.
+    /// - Parameter axes: The dimensions to reduce.
+    /// - Precondition: Each value in `axes` must be in the range `-rank..<rank`.
+    @inlinable
+    func argmin(squeezingAxis axis: Int) -> Tensor<Int32> {
+        return Raw.argMin(self, dimension: Tensor<Int32>(Int32(axis)))
+    }
+
+    /// Returns the minimum along the specified axes. The reduced dimensions are retained with
+    /// value 1.
+    /// - Parameter axes: The dimensions to reduce.
+    /// - Precondition: Each value in `axes` must be in the range `-rank..<rank`.
+    @inlinable
+    func min(alongAxes axes: [Int]) -> Tensor {
+        let axes = axes.map(Int32.init)
+        return Raw.min(self, reductionIndices: Tensor<Int32>(axes), keepDims: true)
+    }
+
+    /// Returns the minimum along the specified axes. The reduced dimensions are retained with
+    /// value 1.
+    /// - Parameter axes: The dimensions to reduce.
+    /// - Precondition: Each value in `axes` must be in the range `-rank..<rank`.
+    @inlinable
+    func min(alongAxes axes: Int...) -> Tensor {
+        return min(alongAxes: axes)
+    }
+
+    /// Returns the minimum along the specified axes. The reduced dimensions are retained with
+    /// value 1.
+    /// - Parameter axes: The dimensions to reduce.
+    /// - Precondition: Each value in `axes` must be in the range `-rank..<rank`.
+    @inlinable
+    func max(alongAxes axes: [Int]) -> Tensor {
+        let axes = axes.map(Int32.init)
+        return Raw.max(self, reductionIndices: Tensor<Int32>(axes), keepDims: true)
+    }
+
+    /// Returns the minimum along the specified axes. The reduced dimensions are retained with
+    /// value 1.
+    /// - Parameter axes: The dimensions to reduce.
+    /// - Precondition: Each value in `axes` must be in the range `-rank..<rank`.
+    @inlinable
+    func max(alongAxes axes: Int...) -> Tensor {
+        return max(alongAxes: axes)
+    }
+
+    /// Returns the index of the maximum value of the flattened scalars.
+    @inlinable
+    func argmax() -> Tensor<Int32> {
+        return flattened().argmax(squeezingAxis: 0)
+    }
+
+    /// Returns the index of the minimum value of the flattened scalars.
+    @inlinable
+    func argmin() -> Tensor<Int32> {
+        return flattened().argmin(squeezingAxis: 0)
+    }
 }
 
-/// Computes the softmax of the specified tensor along the last axis.
-/// Specifically, computes `exp(x) / exp(x).sum(alongAxes: -1)`.
-@inlinable
-@differentiable(vjp: _vjpSoftmax(_:) where T : TensorFlowFloatingPoint)
-public func softmax<T : FloatingPoint>(_ x: Tensor<T>) -> Tensor<T> {
-    return Raw.softmax(logits: x)
+// MARK: - Numeric Reductions
+
+public extension Tensor where Scalar : Numeric {
+    // MARK: - Sum
+
+    /// Returns the sum along the specified axes. The reduced dimensions are removed.
+    /// - Parameter axes: The dimensions to reduce.
+    /// - Precondition: Each value in `axes` must be in the range `-rank...rank`.
+    @inlinable
+    @differentiable(wrt: self, vjp: _vjpSum(squeezingAxes:) where Scalar : TensorFlowFloatingPoint)
+    func sum(squeezingAxes axes: Tensor<Int32>) -> Tensor {
+        return Raw.sum(self, reductionIndices: Tensor<Int32>(axes), keepDims: false)
+    }
+
+    /// Returns the sum along the specified axes. The reduced dimensions are removed.
+    /// - Parameter axes: The dimensions to reduce.
+    /// - Precondition: Each value in `axes` must be in the range `-rank...rank`.
+    @inlinable
+    @differentiable(wrt: self where Scalar : TensorFlowFloatingPoint)
+    func sum(squeezingAxes axes: [Int]) -> Tensor {
+        // TODO(TF-433): Remove workaround for differentiating `map`.
+        let axes = {axes.map(Int32.init)}()
+        return sum(squeezingAxes: Tensor<Int32>(axes))
+    }
+
+    /// Returns the sum along the specified axes. The reduced dimensions are removed.
+    /// - Parameter axes: The dimensions to reduce.
+    /// - Precondition: Each value in `axes` must be in the range `-rank...rank`.
+    @inlinable
+    @differentiable(wrt: self where Scalar : TensorFlowFloatingPoint)
+    func sum(squeezingAxes axes: Int...) -> Tensor {
+        return sum(squeezingAxes: axes)
+    }
+
+    @inlinable
+    @differentiable(wrt: self where Scalar : TensorFlowFloatingPoint)
+    func sum() -> Tensor {
+        return flattened().sum(squeezingAxes: 0)
+    }
+
+    /// Returns the sum along the specified axes. The reduced dimensions are retained with value 1.
+    /// - Parameter axes: The dimensions to reduce.
+    /// - Precondition: Each value in `axes` must be in the range `-rank..<rank`.
+    @inlinable
+    @differentiable(wrt: self, vjp: _vjpSum(squeezingAxes:) where Scalar : TensorFlowFloatingPoint)
+    func sum(alongAxes axes: Tensor<Int32>) -> Tensor {
+        return Raw.sum(self, reductionIndices: axes, keepDims: true)
+    }
+
+    /// Returns the sum along the specified axes. The reduced dimensions are retained with value 1.
+    /// - Parameter axes: The dimensions to reduce.
+    /// - Precondition: Each value in `axes` must be in the range `-rank..<rank`.
+    @inlinable
+    @differentiable(wrt: self where Scalar : TensorFlowFloatingPoint)
+    func sum(alongAxes axes: [Int]) -> Tensor {
+        // TODO(TF-433): Remove workaround for differentiating `map`.
+        let axes = {axes.map(Int32.init)}()
+        return sum(alongAxes: Tensor<Int32>(axes))
+    }
+
+    /// Returns the sum along the specified axes. The reduced dimensions are retained with value 1.
+    /// - Parameter axes: The dimensions to reduce.
+    /// - Precondition: Each value in `axes` must be in the range `-rank..<rank`.
+    @inlinable
+    @differentiable(wrt: self where Scalar : TensorFlowFloatingPoint)
+    func sum(alongAxes axes: Int...) -> Tensor {
+        return sum(alongAxes: axes)
+    }
+
+    // MARK: - Product
+
+    /// Returns the product along the specified axes. The reduced dimensions are removed.
+    ///
+    /// - Parameter axes: The dimensions to reduce.
+    /// - Precondition: Each value in `axes` must be in the range `-rank...rank`.
+    // TODO: Make this @differentiable.
+    @inlinable
+    func product(squeezingAxes axes: Tensor<Int32>) -> Tensor {
+        return Raw.prod(self, reductionIndices: axes, keepDims: false)
+    }
+
+    /// Returns the product along the specified axes. The reduced dimensions are removed.
+    ///
+    /// - Parameter axes: The dimensions to reduce.
+    /// - Precondition: Each value in `axes` must be in the range `-rank...rank`.
+    @inlinable
+    func product(squeezingAxes axes: [Int]) -> Tensor {
+        // TODO(TF-433): Remove workaround for differentiating `map`.
+        let axes = {axes.map(Int32.init)}()
+        return product(squeezingAxes: Tensor<Int32>(axes))
+    }
+
+    /// Returns the product along the specified axes. The reduced dimensions are removed.
+    ///
+    /// - Parameter axes: The dimensions to reduce.
+    /// - Precondition: Each value in `axes` must be in the range `-rank...rank`.
+    @inlinable
+    func product(squeezingAxes axes: Int...) -> Tensor {
+        return product(squeezingAxes: axes)
+    }
+
+    @inlinable
+    func product() -> Tensor {
+        return flattened().product(squeezingAxes: 0)
+    }
+
+    /// Returns the product along the specified axes. The reduced dimensions are retained with
+    /// value 1.
+    /// - Parameter axes: The dimensions to reduce.
+    /// - Precondition: Each value in `axes` must be in the range `-rank..<rank`.
+    @inlinable
+    func product(alongAxes axes: Tensor<Int32>) -> Tensor {
+        return Raw.prod(self, reductionIndices: axes, keepDims: true)
+    }
+
+    /// Returns the product along the specified axes. The reduced dimensions are retained with
+    /// value 1.
+    /// - Parameter axes: The dimensions to reduce.
+    /// - Precondition: Each value in `axes` must be in the range `-rank..<rank`.
+    @inlinable
+    func product(alongAxes axes: [Int]) -> Tensor {
+        // TODO(TF-433): Remove workaround for differentiating `map`.
+        let axes = {axes.map(Int32.init)}()
+        return product(alongAxes: Tensor<Int32>(axes))
+    }
+
+    /// Returns the product along the specified axes. The reduced dimensions are retained with
+    /// value 1.
+    /// - Parameter axes: The dimensions to reduce.
+    /// - Precondition: Each value in `axes` must be in the range `-rank..<rank`.
+    @inlinable
+    func product(alongAxes axes: Int...) -> Tensor {
+        return product(alongAxes: axes)
+    }
+
+    // MARK: - Mean
+
+    /// Returns the arithmetic mean along the specified axes. The reduced dimensions are removed.
+    /// - Parameter axes: The dimensions to reduce.
+    /// - Precondition: Each value in `axes` must be in the range `-rank...rank`.
+    @inlinable
+    @differentiable(wrt: self, vjp: _vjpMean(squeezingAxes:) where Scalar : TensorFlowFloatingPoint)
+    func mean(squeezingAxes axes: Tensor<Int32>) -> Tensor {
+        return Raw.mean(self, reductionIndices: axes, keepDims: false)
+    }
+
+    /// Returns the arithmetic mean along the specified axes. The reduced dimensions are removed.
+    /// - Parameter axes: The dimensions to reduce.
+    /// - Precondition: Each value in `axes` must be in the range `-rank...rank`.
+    @inlinable
+    @differentiable(wrt: self where Scalar : TensorFlowFloatingPoint)
+    func mean(squeezingAxes axes: [Int]) -> Tensor {
+        // TODO(TF-433): Remove workaround for differentiating `map`.
+        let axes = {axes.map(Int32.init)}()
+        return mean(squeezingAxes: Tensor<Int32>(axes))
+    }
+
+    /// Returns the arithmetic mean along the specified axes. The reduced dimensions are removed.
+    /// - Parameter axes: The dimensions to reduce.
+    /// - Precondition: Each value in `axes` must be in the range `-rank...rank`.
+    @inlinable
+    @differentiable(wrt: self where Scalar : TensorFlowFloatingPoint)
+    func mean(squeezingAxes axes: Int...) -> Tensor {
+        return mean(squeezingAxes: axes)
+    }
+
+    @inlinable
+    @differentiable(wrt: self where Scalar : TensorFlowFloatingPoint)
+    func mean() -> Tensor {
+        return flattened().mean(squeezingAxes: [0])
+    }
+
+    /// Returns the arithmetic mean along the specified axes. The reduced dimensions are retained
+    /// with value 1.
+    /// - Parameter axes: The dimensions to reduce.
+    /// - Precondition: Each value in `axes` must be in the range `-rank..<rank`.
+    @inlinable
+    @differentiable(wrt: self, vjp: _vjpMean(alongAxes:) where Scalar : TensorFlowFloatingPoint)
+    func mean(alongAxes axes: Tensor<Int32>) -> Tensor {
+        return Raw.mean(self, reductionIndices: axes, keepDims: true)
+    }
+
+    /// Returns the arithmetic mean along the specified axes. The reduced dimensions are retained
+    /// with value 1.
+    /// - Parameter axes: The dimensions to reduce.
+    /// - Precondition: Each value in `axes` must be in the range `-rank..<rank`.
+    @inlinable
+    @differentiable(wrt: self where Scalar : TensorFlowFloatingPoint)
+    func mean(alongAxes axes: [Int]) -> Tensor {
+        // TODO(TF-433): Remove workaround for differentiating `map`.
+        let axes = {axes.map(Int32.init)}()
+        return mean(alongAxes: Tensor<Int32>(axes))
+    }
+
+    /// Returns the arithmetic mean along the specified axes. The reduced dimensions are retained
+    /// with value 1.
+    /// - Parameter axes: The dimensions to reduce.
+    /// - Precondition: Each value in `axes` must be in the range `-rank..<rank`.
+    @inlinable
+    @differentiable(wrt: self where Scalar : TensorFlowFloatingPoint)
+    func mean(alongAxes axes: Int...) -> Tensor {
+        return mean(alongAxes: axes)
+    }
+
+    // MARK: - Variance
+
+    /// Returns the variance along the specified axes. The reduced dimensions are removed. Does not
+    /// apply Bessel's correction.
+    /// - Parameter axes: The dimensions to reduce.
+    /// - Precondition: Each value in `axes` must be in the range `-rank..<rank`.
+    @inlinable
+    @differentiable(wrt: self where Scalar : TensorFlowFloatingPoint)
+    func variance(squeezingAxes axes: Tensor<Int32>) -> Tensor {
+        let squaredDiff = (self - mean(alongAxes: axes)).squared()
+        return squaredDiff.mean(squeezingAxes: axes)
+    }
+
+    /// Returns the variance along the specified axes. The reduced dimensions are removed. Does not
+    /// apply Bessel's correction.
+    /// - Parameter axes: The dimensions to reduce.
+    /// - Precondition: Each value in `axes` must be in the range `-rank..<rank`.
+    @inlinable
+    @differentiable(wrt: self where Scalar : TensorFlowFloatingPoint)
+    func variance(squeezingAxes axes: [Int]) -> Tensor {
+        // TODO(TF-433): Remove workaround for differentiating `map`.
+        let axes = {axes.map(Int32.init)}()
+        return variance(squeezingAxes: Tensor<Int32>(axes))
+    }
+
+    /// Returns the variance along the specified axes. The reduced dimensions are retained with
+    /// value 1. Does not apply Bessel's correction.
+    /// - Parameter axes: The dimensions to reduce.
+    /// - Precondition: Each value in `axes` must be in the range `-rank..<rank`.
+    @inlinable
+    @differentiable(wrt: self where Scalar : TensorFlowFloatingPoint)
+    func variance(squeezingAxes axes: Int...) -> Tensor {
+        return variance(squeezingAxes: axes)
+    }
+
+    @differentiable(wrt: self where Scalar : TensorFlowFloatingPoint)
+    @inlinable
+    func variance() -> Tensor {
+        let mean = self.mean()
+        let squaredDiff = (self - mean).squared()
+        return squaredDiff.mean()
+    }
+
+    /// Returns the variance along the specified axes. The reduced dimensions are retained with
+    /// value 1. Does not apply Bessel's correction.
+    /// - Parameter axes: The dimensions to reduce.
+    /// - Precondition: Each value in `axes` must be in the range `-rank..<rank`.
+    @inlinable
+    @differentiable(wrt: self where Scalar : TensorFlowFloatingPoint)
+    func variance(alongAxes axes: Tensor<Int32>) -> Tensor {
+        let squaredDiff = (self - mean(alongAxes: axes)).squared()
+        return squaredDiff.mean(alongAxes: axes)
+    }
+
+    /// Returns the variance along the specified axes. The reduced dimensions are retained with
+    /// value 1. Does not apply Bessel's correction.
+    /// - Parameter axes: The dimensions to reduce.
+    /// - Precondition: Each value in `axes` must be in the range `-rank..<rank`.
+    @inlinable
+    @differentiable(wrt: self where Scalar : TensorFlowFloatingPoint)
+    func variance(alongAxes axes: [Int]) -> Tensor {
+        // TODO(TF-433): Remove workaround for differentiating `map`.
+        let axes = {axes.map(Int32.init)}()
+        return variance(alongAxes: Tensor<Int32>(axes))
+    }
+
+    /// Returns the variance along the specified axes. The reduced dimensions are retained with
+    /// value 1. Does not apply Bessel's correction.
+    /// - Parameter axes: The dimensions to reduce.
+    /// - Precondition: Each value in `axes` must be in the range `-rank..<rank`.
+    @inlinable
+    @differentiable(wrt: self where Scalar : TensorFlowFloatingPoint)
+    func variance(alongAxes axes: Int...) -> Tensor {
+        return variance(alongAxes: axes)
+    }
 }
 
-/// Computes the softmax of the specified tensor along the specified axis.
-/// Specifically, computes `exp(x) / exp(x).sum(alongAxes: axis)`.
-@inlinable
-// TODO: [AD].
-public func softmax<T : TensorFlowFloatingPoint>(
-    _ x: Tensor<T>,
-    alongAxis axis: Int
-) -> Tensor<T> {
-    let expx = exp(x)
-    // TODO: [BUG] keepDims = true for the sum.
-    return expx / expx.sum(alongAxes: axis)
+internal extension Tensor where Scalar : TensorFlowFloatingPoint {
+    @inlinable
+    func _vjpSum(alongAxes axes: Tensor<Int32>) -> (Tensor, (Tensor) -> Tensor) {
+        let value = sum(alongAxes: axes)
+        return (value, { [shape = shapeTensor] in $0.broadcast(toShape: shape) })
+    }
+
+    @inlinable
+    func _vjpSum(squeezingAxes axes: Tensor<Int32>) -> (Tensor, (Tensor) -> Tensor) {
+        let value = sum(squeezingAxes: axes)
+        return (value, { [shape = shapeTensor] in $0.broadcast(toShape: shape) })
+    }
+
+    @inlinable
+    func _vjpMean(alongAxes axes: Tensor<Int32>) -> (Tensor, (Tensor) -> Tensor) {
+        let value = mean(alongAxes: axes)
+        let count = Raw.gather(params: shapeTensor, indices: axes).product()
+        return (value, { [shape = shapeTensor] in $0.broadcast(toShape: shape) / Tensor(count) })
+    }
+
+    @inlinable
+    func _vjpMean(squeezingAxes axes: [Int]) -> (Tensor, (Tensor) -> Tensor) {
+        let value = mean(squeezingAxes: axes)
+        return (value, { [shape = shapeTensor, count = axes.map { shape[$0] }.reduce(1, *)] in
+            $0.broadcast(toShape: shape) / Tensor(Scalar(count))
+        })
+    }
+
+    @inlinable
+    func _vjpMean(
+        squeezingAxes axes: Tensor<Int32>
+    ) -> (Tensor, (Tensor) -> Tensor) {
+        let value = mean(squeezingAxes: axes)
+        let count = Raw.gather(params: shapeTensor, indices: axes).product()
+        return (value, { [shape = shapeTensor] in $0.broadcast(toShape: shape) / Tensor(count) })
+    }
 }
 
-@inlinable
-func _vjpSoftmax<T : TensorFlowFloatingPoint>(
-    _ x: Tensor<T>
-) -> (Tensor<T>, (Tensor<T>) -> Tensor<T>) {
-    let value = softmax(x)
-    return (value, { v in
-        let sumChannels = (v * value).sum(alongAxes: -1)
-        return (v - sumChannels) * value
-    })
+// TODO: Consider making the return type be generic over `FloatingPoint` types
+// so that `self`'s scalar type can be any `Numeric` type.
+public extension Tensor where Scalar : TensorFlowFloatingPoint {
+    /// Returns the standard deviation of the elements along the specified axes. The reduced
+    /// dimensions are retained with value `1`. Does not apply Bessel's correction.
+    ///
+    /// - Parameter axes: The dimensions to reduce.
+    /// - Precondition: Each value in `axes` must be in the range `-rank..<rank`.
+    @inlinable
+    @differentiable(wrt: self)
+    func standardDeviation(squeezingAxes axes: Tensor<Int32>) -> Tensor {
+        return sqrt(variance(squeezingAxes: axes))
+    }
+
+    /// Returns the standard deviation of the elements along the specified axes. The reduced
+    /// dimensions are retained with value `1`. Does not apply Bessel's correction.
+    ///
+    /// - Parameter axes: The dimensions to reduce.
+    /// - Precondition: Each value in `axes` must be in the range `-rank..<rank`.
+    @inlinable
+    @differentiable(wrt: self)
+    func standardDeviation(squeezingAxes axes: [Int]) -> Tensor {
+        return sqrt(variance(squeezingAxes: axes))
+    }
+
+    /// Returns the standard deviation of the elements along the specified axes. The reduced
+    /// dimensions are retained with value `1`. Does not apply Bessel's correction.
+    ///
+    /// - Parameter axes: The dimensions to reduce.
+    /// - Precondition: Each value in `axes` must be in the range `-rank..<rank`.
+    @inlinable
+    @differentiable(wrt: self)
+    func standardDeviation(squeezingAxes axes: Int...) -> Tensor {
+        return standardDeviation(squeezingAxes: axes)
+    }
+
+    /// Returns the standard deviation of the elements along the specified axes. The reduced
+    /// dimensions are retained with value `1`. Does not apply Bessel's correction.
+    ///
+    /// - Parameter axes: The dimensions to reduce.
+    /// - Precondition: Each value in `axes` must be in the range `-rank..<rank`.
+    @inlinable
+    @differentiable(wrt: self)
+    func standardDeviation() -> Tensor {
+        // Reduce along all dimensions.
+        return standardDeviation(squeezingAxes: Array(0..<shape.rank))
+    }
+
+    /// Returns the standard deviation of the elements along the specified axes. The reduced
+    /// dimensions are retained with value `1`. Does not apply Bessel's correction.
+    ///
+    /// - Parameter axes: The dimensions to reduce.
+    /// - Precondition: Each value in `axes` must be in the range `-rank..<rank`.
+    @inlinable
+    @differentiable(wrt: self)
+    func standardDeviation(alongAxes axes: Tensor<Int32>) -> Tensor {
+        return sqrt(variance(alongAxes: axes))
+    }
+
+    /// Returns the standard deviation of the elements along the specified axes. The reduced
+    /// dimensions are retained with value `1`. Does not apply Bessel's correction.
+    ///
+    /// - Parameter axes: The dimensions to reduce.
+    /// - Precondition: Each value in `axes` must be in the range `-rank..<rank`.
+    @inlinable
+    @differentiable(wrt: self)
+    func standardDeviation(alongAxes axes: [Int]) -> Tensor {
+        // TODO(TF-433): Remove workaround for differentiating `map`.
+        let axes = {axes.map(Int32.init)}()
+        return standardDeviation(alongAxes: Tensor<Int32>(axes))
+    }
+
+    /// Returns the standard deviation of the elements along the specified axes. The reduced
+    /// dimensions are retained with value `1`. Does not apply Bessel's correction.
+    ///
+    /// - Parameter axes: The dimensions to reduce.
+    /// - Precondition: Each value in `axes` must be in the range `-rank..<rank`.
+    @inlinable
+    @differentiable(wrt: self)
+    func standardDeviation(alongAxes axes: Int...) -> Tensor {
+        return sqrt(variance(alongAxes: axes))
+    }
 }
 
-/// Computes the log-softmax of the specified tensor element-wise.
+//===------------------------------------------------------------------------------------------===//
+// Linear Algebra
+//===------------------------------------------------------------------------------------------===//
+
+/// Performs matrix multiplication with another tensor and produces the result.
 @inlinable
-@differentiable(vjp: _vjpLogSoftmax(_:) where T : TensorFlowFloatingPoint)
-public func logSoftmax<T : FloatingPoint>(_ x: Tensor<T>) -> Tensor<T> {
-    return Raw.logSoftmax(logits: x)
+@differentiable(vjp: _vjpMatmul(_:_:) where Scalar : TensorFlowFloatingPoint)
+public func matmul<Scalar : Numeric>(
+    _ lhs: Tensor<Scalar>,
+    _ rhs: Tensor<Scalar>
+) -> Tensor<Scalar> {
+    // Default arguments specified explicitly to avoid "external declarations of SILFunctions with
+    // shared visibility is not allowed" SILVerifier error in
+    // "tests/AutoDiff/tensor_autodiff_runtime.swift".
+    return Raw.matMul(lhs, rhs, transposeA: false, transposeB: false)
 }
 
 @inlinable
-func _vjpLogSoftmax<T : TensorFlowFloatingPoint>(
-  _ x: Tensor<T>
-) -> (Tensor<T>, (Tensor<T>) -> Tensor<T>) {
-  let value = logSoftmax(x)
-  return (value, { v in
-    v - v.sum(alongAxes: -1) * exp(value)
-  })
+internal  func _vjpMatmul<Scalar : TensorFlowFloatingPoint>(
+    _ lhs: Tensor<Scalar>,
+    _ rhs: Tensor<Scalar>
+) -> (Tensor<Scalar>, (Tensor<Scalar>) -> (Tensor<Scalar>, Tensor<Scalar>)) {
+    let value = matmul(lhs, rhs)
+    return (value, { v in 
+        (matmul(v, rhs.transposed()), matmul(lhs.transposed(), v))
+    })
 }
 
-/// Computes `relu` of the specified tensor element-wise.
-/// Specifically, computes `max(0, x)`.
-@inlinable
-@differentiable(vjp: _vjpRelu(_:) where T : TensorFlowFloatingPoint)
-public func relu<T : FloatingPoint>(_ x: Tensor<T>) -> Tensor<T> {
-    return max(0, x)
+infix operator • : MultiplicationPrecedence
+
+public extension Tensor where Scalar : Numeric {
+    // TODO: We have to define a custom VJP on • because AD can't yet differentiate generic methods.
+    // After AD can differentiate generic methods, remove the custom VJP.
+
+    /// Performs matrix multiplication between two tensors and produces the result.
+    @inlinable
+    @differentiable(vjp: _vjpMatmulOperator(lhs:rhs:) where Scalar : TensorFlowFloatingPoint)
+    static func • (lhs: Tensor, rhs: Tensor) -> Tensor {
+        return matmul(lhs, rhs)
+    }
 }
 
-@inlinable
-func _vjpRelu<T : TensorFlowFloatingPoint>(
-    _ x: Tensor<T>
-) -> (Tensor<T>, (Tensor<T>) -> Tensor<T>) {
-    return (relu(x), { v in Tensor(x .> 0) * v })
+// TODO: We have to define a custom VJP on • because AD can't yet
+// differentiate generic methods. After AD can differentiate generic methods,
+// remove the custom VJP.
+internal extension Tensor where Scalar : TensorFlowFloatingPoint {
+    @inlinable
+    static func _vjpMatmulOperator(
+        lhs: Tensor,
+        rhs: Tensor
+    ) -> (Tensor, (Tensor) -> (Tensor, Tensor)) {
+        return _vjpMatmul(lhs, rhs)
+    }
 }
diff --git a/Sources/DeepLearning/Tensors.swift b/Sources/DeepLearning/Tensors.swift
index e11e1f5d7..2d4e3b32d 100644
--- a/Sources/DeepLearning/Tensors.swift
+++ b/Sources/DeepLearning/Tensors.swift
@@ -20,6 +20,36 @@ import TensorFlow
 infix operator .== : ComparisonPrecedence
 #endif
 
+//===------------------------------------------------------------------------------------------===//
+// Tensor Properties
+//===------------------------------------------------------------------------------------------===//
+
+public extension Tensor {
+  /// The rank of the tensor, represented as a `Tensor<Int32>`.
+  @inlinable
+  var rankTensor: Tensor<Int32> {
+    get {
+      return Raw.rank(self)
+    }
+  }
+
+  /// The dimensions of the tensor, represented as a `Tensor<Int32>`.
+  @inlinable
+  var shapeTensor: Tensor<Int32> {
+    get {
+      return Raw.shape(self)
+    }
+  }
+
+  /// The number of scalars in the tensor, represented as a `Tensor<Int32>`.
+  @inlinable
+  var scalarCountTensor: Tensor<Int32> {
+    get {
+      return Raw.size(self)
+    }
+  }
+}
+
 //===------------------------------------------------------------------------------------------===//
 // Description and Visualization
 //===------------------------------------------------------------------------------------------===//

From 1120692fcde6a35a669da18938de4b52f87e2f89 Mon Sep 17 00:00:00 2001
From: Anthony Platanios <e.a.platanios@gmail.com>
Date: Sat, 20 Apr 2019 14:12:44 -0400
Subject: [PATCH 41/55] Added a README file to the 'Operators' source
 directory.

---
 Sources/DeepLearning/Operators/Basic.swift |  2 ++
 Sources/DeepLearning/Operators/README.md   | 22 ++++++++++++++++++++++
 2 files changed, 24 insertions(+)
 create mode 100644 Sources/DeepLearning/Operators/README.md

diff --git a/Sources/DeepLearning/Operators/Basic.swift b/Sources/DeepLearning/Operators/Basic.swift
index 42479481f..5e5a6e594 100644
--- a/Sources/DeepLearning/Operators/Basic.swift
+++ b/Sources/DeepLearning/Operators/Basic.swift
@@ -117,6 +117,8 @@ internal extension Tensor where Scalar : TensorFlowFloatingPoint {
 // Other Tensor Transformations
 //===------------------------------------------------------------------------------------------===//
 
+infix operator ++ : AdditionPrecedence
+
 public extension Tensor {
     /// Returns a transposed tensor, with dimensions permuted in the specified order.
     @inlinable
diff --git a/Sources/DeepLearning/Operators/README.md b/Sources/DeepLearning/Operators/README.md
new file mode 100644
index 000000000..c2f0d5e22
--- /dev/null
+++ b/Sources/DeepLearning/Operators/README.md
@@ -0,0 +1,22 @@
+# Ops and Convenience Methods
+
+The majority of the Tensor API is implemented in terms of 'ops' that are
+partitioned out to the TensorFlow graph when the compiler runs. These
+ops are intentionally designed to reflect TensorFlow ops, but provide nicer
+Swift syntax for accessing them. In addition to the core ops themselves,
+we also define some helper function wrappers, e.g. to make things symmetric
+and generally feel nice to use.
+
+The ops themselves are defined by the primitive `#tfop(...)` syntax, here 
+are some examples:
+```
+result = #tfop("Add", lhs, rhs)
+result = #tfop("Const", dtype: Float.self, value$tensor: 4.0)
+```
+
+The first parameter to this syntax is the TensorFlow op name as a string.
+After that, the inputs are specified, and then attributes are specified
+with their name as the keyword argument.
+
+Inputs and outputs must be of TensorHandle, ResourceHandle, or VariantHandle
+type. These are magic types known to the compiler.

From e7a04d2ad44ae311c6c41b77a79f59de3529a630 Mon Sep 17 00:00:00 2001
From: Anthony Platanios <e.a.platanios@gmail.com>
Date: Sat, 20 Apr 2019 14:22:59 -0400
Subject: [PATCH 42/55] Brought the gradient helper functions from the stdlib.

---
 Sources/DeepLearning/Gradients.swift     | 178 +++++++++++++++++++++++
 Sources/DeepLearning/Operators/README.md |  19 +++
 2 files changed, 197 insertions(+)
 create mode 100644 Sources/DeepLearning/Gradients.swift

diff --git a/Sources/DeepLearning/Gradients.swift b/Sources/DeepLearning/Gradients.swift
new file mode 100644
index 000000000..04a37fe8b
--- /dev/null
+++ b/Sources/DeepLearning/Gradients.swift
@@ -0,0 +1,178 @@
+// Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#if !COMPILING_TENSORFLOW_MODULE
+import TensorFlow
+#endif
+
+//===------------------------------------------------------------------------------------------===//
+// Method-style Differential Operators
+//===------------------------------------------------------------------------------------------===//
+
+public extension Differentiable {
+    @inlinable
+    func gradient<R : TensorFlowFloatingPoint>(
+        in f: @differentiable (Self) -> Tensor<R>
+    ) -> CotangentVector {
+        return self.pullback(in: f)(Tensor<R>(1))
+    }
+
+    @inlinable
+    func valueWithGradient<R : TensorFlowFloatingPoint>(
+        in f: @differentiable (Self) -> Tensor<R>
+    ) -> (value: Tensor<R>, gradient: CotangentVector) {
+        let (y, pb) = self.valueWithPullback(in: f)
+        return (y, pb(Tensor<R>(1)))
+    }
+
+    @inlinable
+    func gradient<T : Differentiable, R : TensorFlowFloatingPoint>(
+        at x: T,
+        in f: @differentiable (Self, T) -> Tensor<R>
+    ) -> (CotangentVector, T.CotangentVector) {
+        return self.pullback(at: x, in: f)(Tensor<R>(1))
+    }
+
+    @inlinable
+    func valueWithGradient<T : Differentiable, R : TensorFlowFloatingPoint>(
+        at x: T,
+        in f: @differentiable (Self, T) -> Tensor<R>
+    ) -> (value: Tensor<R>, gradient: (CotangentVector, T.CotangentVector)) {
+        let (y, pb) = self.valueWithPullback(at: x, in: f)
+        return (y, pb(Tensor<R>(1)))
+    }
+}
+
+//===------------------------------------------------------------------------------------------===//
+// Free-Function-Style Differential Operators
+//===------------------------------------------------------------------------------------------===//
+
+// Value with gradient
+
+@inlinable
+public func valueWithGradient<T, R>(
+    at x: T,
+    in f: @differentiable (T) -> Tensor<R>
+) -> (value: Tensor<R>, gradient: T.CotangentVector)
+where T : Differentiable, R : TensorFlowFloatingPoint {
+    let (y, pullback) = valueWithPullback(at: x, in: f)
+    return (y, pullback(Tensor<R>(1)))
+}
+
+@inlinable
+public func valueWithGradient<T, U, R>(
+    at x: T,
+    _ y: U,
+    in f: @differentiable (T, U) -> Tensor<R>
+) -> (value: Tensor<R>, gradient: (T.CotangentVector, U.CotangentVector))
+    where T : Differentiable, U : Differentiable, R : TensorFlowFloatingPoint {
+    let (y, pullback) = valueWithPullback(at: x, y, in: f)
+    return (y, pullback(Tensor<R>(1)))
+}
+
+@inlinable
+public func valueWithGradient<T, U, V, R>(
+    at x: T,
+    _ y: U,
+    _ z: V,
+    in f: @differentiable (T, U, V) -> Tensor<R>
+) -> (value: Tensor<R>, gradient: (T.CotangentVector, U.CotangentVector, V.CotangentVector))
+  where T : Differentiable, U : Differentiable, V : Differentiable, R : TensorFlowFloatingPoint {
+  let (y, pullback) = valueWithPullback(at: x, y, z, in: f)
+  return (y, pullback(Tensor<R>(1)))
+}
+
+// Value with gradient (curried)
+
+@inlinable
+public func valueWithGradient<T, R>(
+    of f: @escaping @differentiable (T) -> Tensor<R>
+) -> (T) -> (value: Tensor<R>, gradient: T.CotangentVector)
+    where T : Differentiable, R : TensorFlowFloatingPoint {
+    return { x in valueWithGradient(at: x, in: f) }
+}
+
+@inlinable
+public func valueWithGradient<T, U, R>(
+    of f: @escaping @differentiable (T, U) -> Tensor<R>
+) -> (T, U) -> (value: Tensor<R>, gradient: (T.CotangentVector, U.CotangentVector))
+  where T : Differentiable, U : Differentiable, R : TensorFlowFloatingPoint {
+  return { x, y in valueWithGradient(at: x, y, in: f) }
+}
+
+@inlinable
+public func valueWithGradient<T, U, V, R>(
+    of f: @escaping @differentiable (T, U, V) -> Tensor<R>
+) -> (T, U, V) -> (
+    value: Tensor<R>,
+    gradient: (T.CotangentVector, U.CotangentVector, V.CotangentVector))
+    where T : Differentiable, U : Differentiable, V : Differentiable, R : TensorFlowFloatingPoint {
+  return { x, y, z in valueWithGradient(at: x, y, z, in: f) }
+}
+
+// Gradient
+
+@inlinable
+public func gradient<T, R>(
+    at x: T,
+    in f: @differentiable (T) -> Tensor<R>
+) -> T.CotangentVector where T : Differentiable, R : TensorFlowFloatingPoint {
+    return pullback(at: x, in: f)(Tensor<R>(1))
+}
+
+@inlinable
+public func gradient<T, U, R>(
+    at x: T,
+    _ y: U,
+    in f: @differentiable (T, U) -> Tensor<R>
+) -> (T.CotangentVector, U.CotangentVector)
+    where T : Differentiable, U : Differentiable, R : TensorFlowFloatingPoint {
+    return pullback(at: x, y, in: f)(Tensor<R>(1))
+}
+
+@inlinable
+public func gradient<T, U, V, R>(
+    at x: T,
+    _ y: U,
+    _ z: V,
+    in f: @differentiable (T, U, V) -> Tensor<R>
+) -> (T.CotangentVector, U.CotangentVector, V.CotangentVector)
+    where T : Differentiable, U : Differentiable, V : Differentiable, R : TensorFlowFloatingPoint {
+    return pullback(at: x, y, z, in: f)(Tensor<R>(1))
+}
+
+// Gradient (curried)
+
+@inlinable
+public func gradient<T, R>(
+    of f: @escaping @differentiable (T) -> Tensor<R>
+) -> (T) -> T.CotangentVector where T : Differentiable, R : TensorFlowFloatingPoint {
+    return { x in gradient(at: x, in: f) }
+}
+
+@inlinable
+public func gradient<T, U, R>(
+    of f: @escaping @differentiable (T, U) -> Tensor<R>
+) -> (T, U) -> (T.CotangentVector, U.CotangentVector)
+    where T : Differentiable, U : Differentiable, R : TensorFlowFloatingPoint {
+    return { x, y in gradient(at: x, y, in: f) }
+}
+
+@inlinable
+public func gradient<T, U, V, R>(
+    of f: @escaping @differentiable (T, U, V) -> Tensor<R>
+) -> (T, U, V) -> (T.CotangentVector, U.CotangentVector, V.CotangentVector)
+    where T : Differentiable, U : Differentiable, V : Differentiable, R : TensorFlowFloatingPoint {
+    return { x, y, z in gradient(at: x, y, z, in: f) }
+}
diff --git a/Sources/DeepLearning/Operators/README.md b/Sources/DeepLearning/Operators/README.md
index c2f0d5e22..76e7a7e69 100644
--- a/Sources/DeepLearning/Operators/README.md
+++ b/Sources/DeepLearning/Operators/README.md
@@ -20,3 +20,22 @@ with their name as the keyword argument.
 
 Inputs and outputs must be of TensorHandle, ResourceHandle, or VariantHandle
 type. These are magic types known to the compiler.
+
+## Auto-Differentiation Support
+
+We also provide vector-Jacobian product (VJP) definitions for some of the
+convenience methods.
+
+Terminology:
+- originalValue (f): The function being differentiated, or the result of that
+  function.
+- VJP (f'): The function as the result of differentiation, computing
+  the vector-Jacobian products with respect to all arguments, or the result
+  of that function.
+
+For more information, visit:
+https://en.wikipedia.org/wiki/Automatic_differentiation
+
+The attribute '@differentiable(vjp: ...)' is used to register a function's VJP.
+The automatic differentiation pass identifies these VJPs and chains them
+together to produce arbitrary differentiable programs.

From 3ee21ffaaf48dbee0237d796618fc736a4f7812f Mon Sep 17 00:00:00 2001
From: Anthony Platanios <e.a.platanios@gmail.com>
Date: Sat, 20 Apr 2019 15:26:57 -0400
Subject: [PATCH 43/55] Bug fixes.

---
 Sources/DeepLearning/Initializers.swift    | 56 +++++++++++-----------
 Sources/DeepLearning/Operators/Basic.swift |  4 +-
 Sources/DeepLearning/Operators/Math.swift  |  3 +-
 3 files changed, 33 insertions(+), 30 deletions(-)

diff --git a/Sources/DeepLearning/Initializers.swift b/Sources/DeepLearning/Initializers.swift
index e644fd78a..5e90be2b7 100644
--- a/Sources/DeepLearning/Initializers.swift
+++ b/Sources/DeepLearning/Initializers.swift
@@ -143,7 +143,7 @@ public extension Tensor {
     /// 
     /// - Returns: The stacked tensor.
     @inlinable
-    @differentiable(vjp: _vjpStacking where Scalar : TensorFlowFloatingPoint)
+    // @differentiable(vjp: _vjpStacking where Scalar : TensorFlowFloatingPoint)
     init(stacking tensors: [Tensor], alongAxis axis: Int = 0) {
         self = Raw.pack(tensors, axis: Int64(axis))
     }
@@ -181,40 +181,40 @@ public extension Tensor {
     /// 
     /// - Returns: The concatenated tensor.
     @inlinable
-    @differentiable(wrt: tensors, vjp: _vjpConcatenating where Scalar : TensorFlowFloatingPoint)
+    // @differentiable(wrt: tensors, vjp: _vjpConcatenating where Scalar : TensorFlowFloatingPoint)
     init(concatenating tensors: [Tensor], alongAxis axis: Int = 0) {
         precondition(tensors.count > 0)
         self = Raw.concatV2(tensors, axis: Tensor<Int32>(Int32(axis)))
     }
 }
 
-internal extension Tensor where Scalar : TensorFlowFloatingPoint {
-    @inlinable
-    static func _vjpStacking(
-        stacking tensors: [Tensor],
-        alongAxis axis: Int = 0
-    ) -> (Tensor, (Tensor) -> Array<Tensor>.DifferentiableView) {
-        let result = Tensor(stacking: tensors, alongAxis: axis)
-        return (result, { v in
-            return Array<Tensor>.DifferentiableView(v.unstack(alongAxis: axis))
-        })
-    }
+// internal extension Tensor where Scalar : TensorFlowFloatingPoint {
+//     @inlinable
+//     static func _vjpStacking(
+//         stacking tensors: [Tensor],
+//         alongAxis axis: Int = 0
+//     ) -> (Tensor, (Tensor) -> Array<Tensor>.DifferentiableView) {
+//         let result = Tensor(stacking: tensors, alongAxis: axis)
+//         return (result, { v in
+//             return Array<Tensor>.DifferentiableView(v.unstack(alongAxis: axis))
+//         })
+//     }
 
-    @inlinable
-    static func _vjpConcatenating(
-        concatenating tensors: [Tensor],
-        alongAxis axis: Int = 0
-    ) -> (Tensor, (Tensor) -> Array<Tensor>.DifferentiableView) {
-        let result = Tensor<Scalar>(concatenating: tensors, alongAxis: axis)
-        let posAxis = axis < 0 ? axis + tensors[0].rank : axis
-        let sizes = Tensor<Int32>(stacking: tensors.map { $0.shapeTensor[posAxis] })
-        return (result, { [count = tensors.count] v in
-            if count == 1 { return Array<Tensor>.DifferentiableView([v]) }
-            let splits = v.split(sizes: sizes, alongAxis: posAxis)
-            return Array<Tensor>.DifferentiableView(splits)
-        })
-    }
-}
+//     @inlinable
+//     static func _vjpConcatenating(
+//         concatenating tensors: [Tensor],
+//         alongAxis axis: Int = 0
+//     ) -> (Tensor, (Tensor) -> Array<Tensor>.DifferentiableView) {
+//         let result = Tensor<Scalar>(concatenating: tensors, alongAxis: axis)
+//         let posAxis = axis < 0 ? axis + tensors[0].rank : axis
+//         let sizes = Tensor<Int32>(stacking: tensors.map { $0.shapeTensor[posAxis] })
+//         return (result, { [count = tensors.count] v in
+//             if count == 1 { return Array<Tensor>.DifferentiableView([v]) }
+//             let splits = v.split(sizes: sizes, alongAxis: posAxis)
+//             return Array<Tensor>.DifferentiableView(splits)
+//         })
+//     }
+// }
 
 //===------------------------------------------------------------------------------------------===//
 // Numeric
diff --git a/Sources/DeepLearning/Operators/Basic.swift b/Sources/DeepLearning/Operators/Basic.swift
index 5e5a6e594..428a4ca78 100644
--- a/Sources/DeepLearning/Operators/Basic.swift
+++ b/Sources/DeepLearning/Operators/Basic.swift
@@ -20,13 +20,15 @@ import TensorFlow
 // Shape Transformations
 //===------------------------------------------------------------------------------------------===//
 
-public extension Tensor {
+public extension TensorFlowScalar {
     /// Convert to a tensor with the specified rank, with all dimensions equal to 1.
     @inlinable
     func makeTensor(rank: Int) -> Tensor<Self> {
         return Tensor(repeating: self, shape: TensorShape(rank))
     }
+}
 
+public extension Tensor {
     /// Reshape to the shape of the specified `Tensor`.
     /// - Precondition: The number of scalars matches the new shape.
     @inlinable
diff --git a/Sources/DeepLearning/Operators/Math.swift b/Sources/DeepLearning/Operators/Math.swift
index 9f858a98b..8851080ec 100644
--- a/Sources/DeepLearning/Operators/Math.swift
+++ b/Sources/DeepLearning/Operators/Math.swift
@@ -18,6 +18,7 @@ import TensorFlow
 
 #if COMPILING_TENSORFLOW_MODULE
 infix operator .> : ComparisonPrecedence
+infix operator .== : ComparisonPrecedence
 #endif
 
 // TODO:
@@ -657,7 +658,7 @@ public func softmax<T : FloatingPoint>(_ x: Tensor<T>) -> Tensor<T> {
 // TODO: [AD].
 public func softmax<T : TensorFlowFloatingPoint>(_ x: Tensor<T>, alongAxis axis: Int) -> Tensor<T> {
     let xExp = exp(x)
-    let xExpSum = Raw.sum(xExp, reductionIndices: Tensor<Int32>(axis), keepDims: true)
+    let xExpSum = Raw.sum(xExp, reductionIndices: Tensor<Int32>(Int32(axis)), keepDims: true)
     return xExp / xExpSum
 }
 

From ef1c73bc6744cbb7e375d5d9e99463d5b71b7a38 Mon Sep 17 00:00:00 2001
From: Anthony Platanios <e.a.platanios@gmail.com>
Date: Sat, 20 Apr 2019 16:38:00 -0400
Subject: [PATCH 44/55] Brought the tensor tests from the stdlib.

---
 Sources/DeepLearning/Helpers.swift            |   4 +
 Sources/DeepLearning/Initializers.swift       |   4 +-
 Sources/DeepLearning/Operators/Basic.swift    |  19 +-
 Sources/DeepLearning/Operators/NN.swift       |  95 ++--
 Sources/DeepLearning/PythonConversion.swift   | 174 +++++++
 .../DeepLearningTests/InitializerTests.swift  |  97 ++++
 .../OperatorTests/BasicTests.swift            | 452 ++++++++++++++++++
 .../OperatorTests/ComparisonTests.swift       |  30 ++
 .../OperatorTests/MathTests.swift             | 199 ++++++++
 Tests/DeepLearningTests/TensorTests.swift     |  81 ++++
 Tests/DeepLearningTests/XCTestManifests.swift |   4 +
 11 files changed, 1100 insertions(+), 59 deletions(-)
 create mode 100644 Sources/DeepLearning/PythonConversion.swift
 create mode 100644 Tests/DeepLearningTests/InitializerTests.swift
 create mode 100644 Tests/DeepLearningTests/OperatorTests/BasicTests.swift
 create mode 100644 Tests/DeepLearningTests/OperatorTests/ComparisonTests.swift
 create mode 100644 Tests/DeepLearningTests/OperatorTests/MathTests.swift
 create mode 100644 Tests/DeepLearningTests/TensorTests.swift

diff --git a/Sources/DeepLearning/Helpers.swift b/Sources/DeepLearning/Helpers.swift
index 4d9c0217b..39bcf1e1d 100644
--- a/Sources/DeepLearning/Helpers.swift
+++ b/Sources/DeepLearning/Helpers.swift
@@ -30,3 +30,7 @@ public func identity<Scalar>(_ x: Tensor<Scalar>) -> Tensor<Scalar> {
 func pow<T: BinaryFloatingPoint>(_ x: T, _ y: T) -> T {
     return T(pow(Double(x), Double(y)))
 }
+
+extension Array where Element : Differentiable {
+    
+}
diff --git a/Sources/DeepLearning/Initializers.swift b/Sources/DeepLearning/Initializers.swift
index 5e90be2b7..204ca8a26 100644
--- a/Sources/DeepLearning/Initializers.swift
+++ b/Sources/DeepLearning/Initializers.swift
@@ -109,7 +109,7 @@ internal extension Tensor where Scalar : TensorFlowFloatingPoint {
 public extension Tensor {
     /// Creates a tensor from an array of tensors (which may themselves be scalars).
     @inlinable
-    @differentiable(where Scalar : TensorFlowFloatingPoint)
+    // @differentiable(where Scalar : TensorFlowFloatingPoint)
     init(_ elements: [Tensor]) {
         self = Tensor(stacking: elements)
     }
@@ -196,7 +196,7 @@ public extension Tensor {
 //     ) -> (Tensor, (Tensor) -> Array<Tensor>.DifferentiableView) {
 //         let result = Tensor(stacking: tensors, alongAxis: axis)
 //         return (result, { v in
-//             return Array<Tensor>.DifferentiableView(v.unstack(alongAxis: axis))
+//             Array<Tensor>.DifferentiableView(v.unstack(alongAxis: axis))
 //         })
 //     }
 
diff --git a/Sources/DeepLearning/Operators/Basic.swift b/Sources/DeepLearning/Operators/Basic.swift
index 428a4ca78..71a8970a0 100644
--- a/Sources/DeepLearning/Operators/Basic.swift
+++ b/Sources/DeepLearning/Operators/Basic.swift
@@ -163,7 +163,7 @@ public extension Tensor {
     ///   specified axis.
     /// - Precondition: The axis must be in the range `-rank..<rank`.
     @inlinable
-    @differentiable(where Scalar : TensorFlowFloatingPoint)
+    @differentiable(vjp: _vjpConcatenated where Scalar : TensorFlowFloatingPoint)
     func concatenated(with other: Tensor, alongAxis axis: Int = 0) -> Tensor {
         return Tensor(concatenating: [self, other], alongAxis: axis)
     }
@@ -205,6 +205,23 @@ internal extension Tensor where Scalar : TensorFlowFloatingPoint {
     func _vjpTransposed() -> (Tensor, (Tensor) -> Tensor) {
         return (transposed(), { $0.transposed() })
     }
+
+    @inlinable
+    func _vjpConcatenated(
+        with other: Tensor,
+        alongAxis axis: Int
+    ) -> (Tensor, (Tensor) -> (Tensor, Tensor)) {
+        let idx = axis < 0 ? axis + rank : axis
+        let splits = Tensor<Int32>([shapeTensor[idx], other.shapeTensor[idx]])
+        return (concatenated(with: other, alongAxis: axis), { result in
+            let gradients = Raw.splitV(
+                value: result,
+                sizeSplits: splits,
+                splitDim: Tensor<Int32>(Int32(axis)),
+                numSplit: Int64(2))
+            return (gradients[0], gradients[1])
+        })
+    }
 }
 
 //===------------------------------------------------------------------------------------------===//
diff --git a/Sources/DeepLearning/Operators/NN.swift b/Sources/DeepLearning/Operators/NN.swift
index f1401af20..1664b1954 100644
--- a/Sources/DeepLearning/Operators/NN.swift
+++ b/Sources/DeepLearning/Operators/NN.swift
@@ -20,7 +20,7 @@ import TensorFlow
 // Normalization
 //===------------------------------------------------------------------------------------------===//
 
-public extension Tensor where Scalar: TensorFlowFloatingPoint {
+public extension Tensor where Scalar : TensorFlowFloatingPoint {
     /// Computes the batch normalized tensor along the specified axis.
     ///
     /// Specifically, returns `(self - mu) / (var + epsilon) * gamma + beta` where `mu` and `var` are 
@@ -32,9 +32,7 @@ public extension Tensor where Scalar: TensorFlowFloatingPoint {
     ///   - scale: The scale, also known as gamma.
     ///   - epsilon: A small value added to the denominator for numerical stability.
     @inlinable
-    @differentiable(
-        wrt: (self, offset, scale),
-        vjp: _vjpBatchNormalized)
+    @differentiable(wrt: (self, offset, scale), vjp: _vjpBatchNormalized)
     func batchNormalized(
         alongAxis axis: Int,
         offset: Tensor = Tensor(0),
@@ -56,8 +54,7 @@ public extension Tensor where Scalar: TensorFlowFloatingPoint {
         scale: Tensor,
         epsilon: Scalar
     ) -> (Tensor, (Tensor) -> (Tensor, Tensor, Tensor)) {
-        let value = batchNormalized(
-            alongAxis: axis, offset: offset, scale: scale, epsilon: epsilon)
+        let value = batchNormalized(alongAxis: axis, offset: offset, scale: scale, epsilon: epsilon)
         return (value, { v in
             let mean = self.mean(alongAxes: axis)
             let squaredDiff: Tensor = Raw.squaredDifference(self, mean)
@@ -79,7 +76,7 @@ public extension Tensor where Scalar: TensorFlowFloatingPoint {
     }
 }
 
-public extension Tensor where Scalar: BinaryFloatingPoint {
+public extension Tensor where Scalar : BinaryFloatingPoint {
     /// Computes the batch normalized tensor along the specified axis.
     ///
     /// Specifically, returns `(self - mu)/(var + epsilon) * gamma + beta` where
@@ -94,9 +91,8 @@ public extension Tensor where Scalar: BinaryFloatingPoint {
     ///         stability.
     @inlinable
     @differentiable(
-        wrt: (self, offset, scale), vjp: _vjpBatchNormalized
-        where Scalar : TensorFlowFloatingPoint
-    )
+        wrt: (self, offset, scale),
+        vjp: _vjpBatchNormalized where Scalar : TensorFlowFloatingPoint)
     func batchNormalized(
         alongAxis axis: Int,
         offset: Tensor = Tensor(0),
@@ -142,7 +138,7 @@ public extension Padding {
     }
 }
 
-public extension Tensor where Scalar: TensorFlowFloatingPoint {
+public extension Tensor where Scalar : TensorFlowFloatingPoint {
     /// TensorFlow builtin conv2d gradient helper for the input.
     @inlinable
     @differentiable(wrt: (self, filter), vjp: _vjpConv2DBackpropInput)
@@ -186,14 +182,13 @@ public extension Tensor where Scalar: TensorFlowFloatingPoint {
         _ strides: (Int, Int, Int, Int),
         _ padding: Padding
     ) -> (Tensor, (Tensor) -> (Tensor, Tensor)) {
-        let value = conv2DBackpropInput(shape: shape, filter: filter, strides: strides,
-                                        padding: padding)
+        let value = conv2DBackpropInput(
+            shape: shape, filter: filter, strides: strides, padding: padding)
         return (value, { v in
-            return (
-                self.conv2DBackpropFilter(input: v, filterSizes: shape, strides: strides,
-                                          padding: padding),
-                v.convolved2D(withFilter: filter, strides: strides, padding: padding)
-            )
+            (
+                self.conv2DBackpropFilter(
+                    input: v, filterSizes: shape, strides: strides, padding: padding),
+                v.convolved2D(withFilter: filter, strides: strides, padding: padding))
         })
     }
 
@@ -204,14 +199,13 @@ public extension Tensor where Scalar: TensorFlowFloatingPoint {
         _ strides: (Int, Int, Int, Int),
         _ padding: Padding
     ) -> (Tensor, (Tensor) -> (Tensor, Tensor)) {
-        let value = conv2DBackpropFilter(input: input, filterSizes: filterSizes,
-                                         strides: strides, padding: padding)
+        let value = conv2DBackpropFilter(
+            input: input, filterSizes: filterSizes, strides: strides, padding: padding)
         return (value, { v in
-            return (
-                self.conv2DBackpropInput(shape: filterSizes, filter: v, strides: strides,
-                                         padding: padding),
-                input.convolved2D(withFilter: v, strides: strides, padding: padding)
-            )
+            (
+                self.conv2DBackpropInput(
+                    shape: filterSizes, filter: v, strides: strides, padding: padding),
+                input.convolved2D(withFilter: v, strides: strides, padding: padding))
         })
     }
 
@@ -221,19 +215,15 @@ public extension Tensor where Scalar: TensorFlowFloatingPoint {
         strides: (Int, Int, Int, Int),
         padding: Padding
     ) -> (Tensor, (Tensor) -> (Tensor, Tensor)) {
-        let value = convolved2D(withFilter: filter, strides: strides,
-                                padding: padding)
+        let value = convolved2D(withFilter: filter, strides: strides, padding: padding)
         return (value, { v in
-            return (
+            (
                 v.conv2DBackpropInput(
                     shape: self.shapeTensor, filter: filter,
-                    strides: strides, padding: padding
-                ),
+                    strides: strides, padding: padding),
                 v.conv2DBackpropFilter(
                     input: self, filterSizes: filter.shapeTensor,
-                    strides: strides, padding: padding
-                )
-            )
+                    strides: strides, padding: padding))
         })
     }
 
@@ -245,10 +235,9 @@ public extension Tensor where Scalar: TensorFlowFloatingPoint {
     ) -> (Tensor, (Tensor) -> Tensor) {
         // TODO: Currently this is not higher order differentiable. Redefine in
         // closed form.
-        let value = maxPooled(kernelSize: kernelSize, strides: strides,
-                              padding: padding)
+        let value = maxPooled(kernelSize: kernelSize, strides: strides, padding: padding)
         return (value, { v in
-            return Raw.maxPoolGradV2(
+            Raw.maxPoolGradV2(
                 origInput: self,
                 origOutput: value,
                 grad: v,
@@ -256,8 +245,7 @@ public extension Tensor where Scalar: TensorFlowFloatingPoint {
                                       Int32(kernelSize.2), Int32(kernelSize.3)]),
                 strides: Tensor<Int32>([Int32(strides.0), Int32(strides.1),
                                         Int32(strides.2), Int32(strides.3)]),
-                padding: padding.raw
-            )
+                padding: padding.raw)
         })
     }
 
@@ -269,22 +257,20 @@ public extension Tensor where Scalar: TensorFlowFloatingPoint {
     ) -> (Tensor, (Tensor) -> Tensor) {
         // TODO: Currently this is not higher order differentiable. Redefine in
         // closed form.
-        let value = averagePooled(kernelSize: kernelSize, strides: strides,
-                                  padding: padding)
+        let value = averagePooled(kernelSize: kernelSize, strides: strides, padding: padding)
         return (value, { v in
-            return Raw.avgPoolGrad(
+            Raw.avgPoolGrad(
                 origInputShape: self.shapeTensor,
                 grad: v,
                 ksize: [Int32(kernelSize.0), Int32(kernelSize.1),
                         Int32(kernelSize.2), Int32(kernelSize.3)],
                 strides: [Int32(strides.0), Int32(strides.1), Int32(strides.2), Int32(strides.3)],
-                padding: padding.raw
-            )
+                padding: padding.raw)
         })
     }
 }
 
-public extension Tensor where Scalar: FloatingPoint {
+public extension Tensor where Scalar : FloatingPoint {
     /// Computes a 2-D convolution using `self` as input, with the specified
     /// filter, strides, and padding.
     ///
@@ -295,11 +281,10 @@ public extension Tensor where Scalar: FloatingPoint {
     ///     - padding: The padding for the operation.
     /// - Precondition: `self` must have rank 4.
     /// - Precondition: `filter` must have rank 4.
-    @inlinable @inline(__always)
+    @inlinable
     @differentiable(
-        wrt: (self, filter), vjp: _vjpConvolved2D
-        where Scalar: TensorFlowFloatingPoint
-    )
+        wrt: (self, filter),
+        vjp: _vjpConvolved2D where Scalar: TensorFlowFloatingPoint)
     func convolved2D(
         withFilter filter: Tensor,
         strides: (Int, Int, Int, Int),
@@ -321,11 +306,10 @@ public extension Tensor where Scalar: FloatingPoint {
     ///     - strides: The strides of the sliding filter for each dimension of the
     ///         input.
     ///     - padding: The padding for the operation.
-    @inlinable @inline(__always)
+    @inlinable
     @differentiable(
-        wrt: self, vjp: _vjpMaxPooled(kernelSize:strides:padding:)
-        where Scalar : TensorFlowFloatingPoint
-    )
+        wrt: self,
+        vjp: _vjpMaxPooled(kernelSize:strides:padding:) where Scalar : TensorFlowFloatingPoint)
     func maxPooled(
         kernelSize: (Int, Int, Int, Int),
         strides: (Int, Int, Int, Int),
@@ -348,11 +332,10 @@ public extension Tensor where Scalar: FloatingPoint {
     ///     - strides: The strides of the sliding filter for each dimension of the
     ///         input.
     ///     - padding: The padding for the operation.
-    @inlinable @inline(__always)
+    @inlinable
     @differentiable(
-        wrt: self, vjp: _vjpAveragePooled(kernelSize:strides:padding:)
-        where Scalar : TensorFlowFloatingPoint
-    )
+        wrt: self,
+        vjp: _vjpAveragePooled(kernelSize:strides:padding:) where Scalar : TensorFlowFloatingPoint)
     func averagePooled(
         kernelSize: (Int, Int, Int, Int),
         strides: (Int, Int, Int, Int),
diff --git a/Sources/DeepLearning/PythonConversion.swift b/Sources/DeepLearning/PythonConversion.swift
new file mode 100644
index 000000000..a1b10d30a
--- /dev/null
+++ b/Sources/DeepLearning/PythonConversion.swift
@@ -0,0 +1,174 @@
+// Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#if !COMPILING_TENSORFLOW_MODULE
+import TensorFlow
+#endif
+
+#if canImport(Python)
+import Python
+
+/// The `numpy` Python module.
+/// Note: Global variables are lazy, so the following declaration won't produce
+// a Python import error until it is first used.
+private let np = Python.import("numpy")
+
+private func debugLogNumpyError(_ message: String) {
+  debugLog("NumPy conversion error: " + message)
+}
+
+extension ShapedArray : ConvertibleFromNumpyArray
+  where Scalar : NumpyScalarCompatible {
+  /// Creates a `ShapedArray` with the same shape and scalars as the specified
+  /// `numpy.ndarray` instance.
+  ///
+  /// - Parameter numpyArray: The `numpy.ndarray` instance to convert.
+  /// - Precondition: The `numpy` Python package must be installed.
+  /// - Precondition: `numpyArray` must have a compatible scalar `dtype`.
+  public init?(numpy numpyArray: PythonObject) {
+    // Check if input is a `numpy.ndarray` instance.
+    guard Python.isinstance(numpyArray, np.ndarray) == true else {
+      debugLogNumpyError("""
+        PythonObject input has type '\(Python.type(numpyArray))' and is not \
+        an instance of 'numpy.ndarray'.
+        """)
+      return nil
+    }
+    // Check if the dtype of the `ndarray` is compatible with the `Scalar`
+    // type.
+    guard Scalar.numpyScalarTypes.contains(numpyArray.dtype) else {
+      debugLogNumpyError("""
+        'numpy.ndarray' dtype '\(numpyArray.dtype)' is incompatible with \
+        Swift type '\(Scalar.self)'.
+        """)
+      return nil
+    }
+
+    let pyShape = numpyArray.__array_interface__["shape"]
+    guard let shape = [Int](pyShape) else {
+      debugLogNumpyError("cannot access shape of 'numpy.ndarray' instance.")
+      return nil
+    }
+
+    // Make sure that the array is contiguous in memory. This does a copy if
+    // the array is not already contiguous in memory.
+    let contiguousNumpyArray = np.ascontiguousarray(numpyArray)
+
+    guard let ptrVal =
+      UInt(contiguousNumpyArray.__array_interface__["data"].tuple2.0) else {
+      debugLogNumpyError("cannot access data of 'numpy.ndarray' instance.")
+      return nil
+    }
+    // Note: `ptr` is not nil even if the `ndarray` is empty (i.e. has a shape
+    // of `(0,)`).
+    guard let ptr = UnsafePointer<Scalar>(bitPattern: ptrVal) else {
+      fatalError("'numpy.ndarray' data pointer was nil")
+    }
+    // This code avoids calling `init<S : Sequence>(shape: [Int], scalars: S)`,
+    // which inefficiently copies scalars one by one. Instead,
+    // `init(shape: [Int], scalars: [Scalar])` is called, which efficiently
+    // does a `memcpy` of the entire `scalars` array.
+    // Unecessary copying is minimized.
+    let dummyPointer = UnsafeMutablePointer<Scalar>.allocate(capacity: 1)
+    let scalarCount = shape.reduce(1, *)
+    var scalars: [Scalar] = Array(repeating: dummyPointer.move(),
+                                  count: scalarCount)
+    dummyPointer.deallocate()
+    scalars.withUnsafeMutableBufferPointer { buffPtr in
+      buffPtr.baseAddress!.assign(from: ptr, count: scalarCount)
+    }
+    self.init(shape: shape, scalars: scalars)
+  }
+}
+
+extension Tensor : ConvertibleFromNumpyArray
+  where Scalar : NumpyScalarCompatible {
+  /// Creates a tensor with the same shape and scalars as the specified
+  /// `numpy.ndarray` instance.
+  ///
+  /// - Parameter numpyArray: The `numpy.ndarray` instance to convert.
+  /// - Precondition: The `numpy` Python package must be installed.
+  /// - Returns: `numpyArray` converted to an `Array`. Returns `nil` if
+  ///   `numpyArray` does not have a compatible scalar `dtype`.
+  public init?(numpy numpyArray: PythonObject) {
+    // Check if input is a `numpy.ndarray` instance.
+    guard Python.isinstance(numpyArray, np.ndarray) == true else {
+      debugLogNumpyError("""
+        PythonObject input has type '\(Python.type(numpyArray))' and is not \
+        an instance of 'numpy.ndarray'.
+        """)
+      return nil
+    }
+    // Check if the dtype of the `ndarray` is compatible with the `Scalar`
+    // type.
+    guard Scalar.numpyScalarTypes.contains(numpyArray.dtype) else {
+      debugLogNumpyError("""
+        'numpy.ndarray' dtype '\(numpyArray.dtype)' is incompatible with \
+        Swift type '\(Scalar.self)'.
+        """)
+      return nil
+    }
+
+    let pyShape = numpyArray.__array_interface__["shape"]
+    guard let dimensions = [Int](pyShape) else {
+      debugLogNumpyError("cannot access shape of 'numpy.ndarray' instance.")
+      return nil
+    }
+    let shape = TensorShape(dimensions)
+
+    // Make sure that the array is contiguous in memory. This does a copy if
+    // the array is not already contiguous in memory.
+    let contiguousNumpyArray = np.ascontiguousarray(numpyArray)
+
+    guard let ptrVal =
+      UInt(contiguousNumpyArray.__array_interface__["data"].tuple2.0) else {
+      debugLogNumpyError("cannot access data of 'numpy.ndarray' instance.")
+      return nil
+    }
+    // Note: `ptr` is not nil even if the `ndarray` is empty (i.e. has a shape
+    // of `(0,)`).
+    guard let ptr = UnsafePointer<Scalar>(bitPattern: ptrVal) else {
+      fatalError("'numpy.ndarray' data pointer was nil")
+    }
+    let buffPtr = UnsafeBufferPointer(start: ptr,
+                                      count: Int(shape.contiguousSize))
+    self.init(shape: shape, scalars: buffPtr)
+  }
+}
+
+extension ShapedArray where Scalar : NumpyScalarCompatible {
+  /// Creates a `numpy.ndarray` instance with the same shape and scalars as
+  /// this `ShapedArray`.
+  ///
+  /// - Precondition: The `numpy` Python package must be installed.
+  public func makeNumpyArray() -> PythonObject {
+    return scalars.makeNumpyArray().reshape(shape)
+  }
+}
+
+extension Tensor where Scalar : NumpyScalarCompatible {
+  /// Creates a `numpy.ndarray` instance with the same shape and scalars as
+  /// this tensor.
+  ///
+  /// - Precondition: The `numpy` Python package must be installed.
+  public func makeNumpyArray() -> PythonObject { return array.makeNumpyArray() }
+}
+
+extension TensorShape : PythonConvertible {
+  public var pythonObject: PythonObject {
+    return dimensions.pythonObject
+  }
+}
+
+#endif // canImport(Python)
diff --git a/Tests/DeepLearningTests/InitializerTests.swift b/Tests/DeepLearningTests/InitializerTests.swift
new file mode 100644
index 000000000..f91109065
--- /dev/null
+++ b/Tests/DeepLearningTests/InitializerTests.swift
@@ -0,0 +1,97 @@
+// Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+import XCTest
+@testable import DeepLearning
+
+final class InitializerTests: XCTestCase {
+    func testInitializers() {
+        let scalar = Tensor<Float>(1)
+        let matrix: Tensor<Float> = [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]
+        let broadcastScalar = Tensor<Float>(broadcasting: 10, rank: 3)
+        let some4d = Tensor<Float>(
+            shape: [2, 1, 2, 1],
+            scalars: AnyRandomAccessCollection([2, 3, 4, 5]))
+        XCTAssertEqual(ShapedArray(shape: [2, 1, 2, 1], scalars: [2, 3, 4, 5]), some4d.array)
+        XCTAssertEqual(ShapedArray(shape: [], scalars: [1]), scalar.array)
+        XCTAssertEqual(ShapedArray(shape: [2, 3], scalars: [1, 2, 3, 4, 5, 6]), matrix.array)
+        XCTAssertEqual(ShapedArray(shape: [1, 1, 1], scalars: [10]), broadcastScalar.array)
+    }
+
+    func testFactoryInitializers() {
+        let x = Tensor<Float>(ones: [1, 10])
+        XCTAssertEqual(ShapedArray(repeating: 1, shape: [1, 10]), x.array)
+    }
+
+    func testNumericInitializers() {
+        let x = Tensor<Float>(oneHotAtIndices: [0, 2, -1, 1], depth: 3)
+        XCTAssertEqual(ShapedArray(
+            shape: [4, 3],
+            scalars: [1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0]), x.array)
+    }
+
+    func testScalarToTensorConversion() {
+        let tensor = Tensor<Float>(broadcasting: 42, rank: 4)
+        XCTAssertEqual([1, 1, 1, 1], tensor.shape)
+        XCTAssertEqual([42], tensor.scalars)
+    }
+
+    func testArrayConversion() {
+        let array3D = ShapedArray(repeating: 1.0, shape: [2, 3, 4])
+        let tensor3D = Tensor(array3D)
+        XCTAssertEqual(array3D, tensor3D.array)
+    }
+
+    func testNonTPUDataTypeCast() {
+        // TPU does not support Int8 or 16 casting.
+        guard !_RuntimeConfig.executionMode.isTPU else { return }
+
+        let x = Tensor<Int32>(ones: [5, 5])
+        let ints = Tensor<Int64>(x)
+        let floats = Tensor<Float>(x)
+        let i8s = Tensor<Int8>(floats)
+        XCTAssertEqual(ShapedArray(repeating: 1, shape: [5, 5]), ints.array)
+        XCTAssertEqual(ShapedArray(repeating: 1, shape: [5, 5]), floats.array)
+        XCTAssertEqual(ShapedArray(repeating: 1, shape: [5, 5]), i8s.array)
+    }
+
+    func testTPUDataTypeCast() {
+        // Non-TPU mode (e.g. eager) does not support Uint32 casting.
+        guard _RuntimeConfig.executionMode.isTPU else { return }
+
+        let x = Tensor<Int32>(ones: [5, 5])
+        let ints = Tensor<Int64>(x)
+        let floats = Tensor<Float>(x)
+        let u32s = Tensor<UInt32>(floats)
+        XCTAssertEqual(ShapedArray(repeating: 1, shape: [5, 5]), ints.array)
+        XCTAssertEqual(ShapedArray(repeating: 1, shape: [5, 5]), floats.array)
+        XCTAssertEqual(ShapedArray(repeating: 1, shape: [5, 5]), u32s.array)
+    }
+
+    func testNonTPUBoolToNumericCast() {
+        // TPU does not support Int8 or 16 casting.
+        //
+        // When changing to UInt32, got another TPU/XLA compilation error when
+        // converting from bools to Uint32 (different from missing kernel error).
+        if _RuntimeConfig.executionMode.isTPU { return }
+
+        let bools = Tensor<Bool>(shape: [2, 2], scalars: [true, false, true, false])
+        let ints = Tensor<Int64>(bools)
+        let floats = Tensor<Float>(bools)
+        let i8s = Tensor<Int8>(bools)
+        XCTAssertEqual(ShapedArray(shape: [2, 2], scalars: [1, 0, 1, 0]), ints.array)
+        XCTAssertEqual(ShapedArray(shape: [2, 2], scalars: [1, 0, 1, 0]), floats.array)
+        XCTAssertEqual(ShapedArray(shape: [2, 2], scalars: [1, 0, 1, 0]), i8s.array)
+    }
+}
diff --git a/Tests/DeepLearningTests/OperatorTests/BasicTests.swift b/Tests/DeepLearningTests/OperatorTests/BasicTests.swift
new file mode 100644
index 000000000..112430984
--- /dev/null
+++ b/Tests/DeepLearningTests/OperatorTests/BasicTests.swift
@@ -0,0 +1,452 @@
+// Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+import XCTest
+@testable import DeepLearning
+
+final class BasicOperatorTests: XCTestCase {
+    func testElementIndexing() {
+        // NOTE: cannot test multiple `Tensor.shape` or `Tensor.scalars` directly
+        // until send and receive are implemented (without writing a bunch of mini
+        // tests). Instead, `Tensor.array` is called to make a ShapedArray host copy
+        // and the ShapedArray is tested.
+        let tensor3D = Tensor<Float>(
+            shape: [3, 4, 5], scalars: Array(stride(from: 0.0, to: 60, by: 1)))
+        let element2D = tensor3D[2]
+        let element1D = tensor3D[1][3]
+        let element0D = tensor3D[2][0][3]
+
+        let array2D = element2D.array
+        let array1D = element1D.array
+        let array0D = element0D.array
+
+        /// Test shapes
+        XCTAssertEqual([4, 5], array2D.shape)
+        XCTAssertEqual([5], array1D.shape)
+        XCTAssertEqual([], array0D.shape)
+
+        /// Test scalars
+        XCTAssertEqual(Array(stride(from: 40.0, to: 60, by: 1)), array2D.scalars)
+        XCTAssertEqual(Array(stride(from: 35.0, to: 40, by: 1)), array1D.scalars)
+        XCTAssertEqual([43], array0D.scalars)
+    }
+
+    func testElementIndexingAssignment() {
+        // NOTE: cannot test multiple `Tensor.shape` or `Tensor.scalars` directly
+        // until send and receive are implemented (without writing a bunch of mini
+        // tests). Instead, `Tensor.array` is called to make a ShapedArray host copy
+        // and the ShapedArray is tested.
+        var tensor3D = Tensor<Float>(
+            shape: [3, 4, 5], scalars: Array(stride(from: 0.0, to: 60, by: 1)))
+        tensor3D[2] = Tensor<Float>(
+            shape: [4, 5], scalars: Array(stride(from: 20.0, to: 40, by: 1)))
+        let element2D = tensor3D[2]
+        let element1D = tensor3D[1][3]
+        let element0D = tensor3D[2][0][3]
+
+        let array2D = element2D.array
+        let array1D = element1D.array
+        let array0D = element0D.array
+
+        /// Test shapes
+        XCTAssertEqual([4, 5], array2D.shape)
+        XCTAssertEqual([5], array1D.shape)
+        XCTAssertEqual([], array0D.shape)
+
+        /// Test scalars
+        XCTAssertEqual(Array(stride(from: 20.0, to: 40, by: 1)), array2D.scalars)
+        XCTAssertEqual(Array(stride(from: 35.0, to: 40, by: 1)), array1D.scalars)
+        XCTAssertEqual([23], array0D.scalars)
+    }
+
+    func testNestedElementIndexing() {
+        // NOTE: This test could use a clearer name, along with other "indexing"
+        // tests. Note to update corresponding test names in other files
+        // (shaped_array.test) as well.
+        let tensor3D = Tensor<Float>(
+            shape: [3, 4, 5], scalars: Array(stride(from: 0.0, to: 60, by: 1)))
+        let element1D = tensor3D[1, 3]
+        let element0D = tensor3D[2, 0, 3]
+
+        let array1D = element1D.array
+        let array0D = element0D.array
+
+        /// Test shapes
+        XCTAssertEqual([5], array1D.shape)
+        XCTAssertEqual([], array0D.shape)
+
+        /// Test scalars
+        XCTAssertEqual(Array(stride(from: 35.0, to: 40, by: 1)), array1D.scalars)
+        XCTAssertEqual([43], array0D.scalars)
+    }
+
+    func testSliceIndexing() {
+        // NOTE: cannot test `Tensor.shape` or `Tensor.scalars` directly until send
+        // and receive are implemented (without writing a bunch of mini tests).
+        // Instead, `Tensor.array` is called to make a ShapedArray host copy and the
+        // ShapedArray is tested instead.
+        let tensor3D = Tensor<Float>(
+            shape: [3, 4, 5], scalars: Array(stride(from: 0.0, to: 60, by: 1)))
+        let slice3D = tensor3D[2...]
+        let slice2D = tensor3D[1][0..<2]
+        let slice1D = tensor3D[0][0][3..<5]
+
+        let array3D = slice3D.array
+        let array2D = slice2D.array
+        let array1D = slice1D.array
+
+        /// Test shapes
+        XCTAssertEqual([1, 4, 5], array3D.shape)
+        XCTAssertEqual([2, 5], array2D.shape)
+        XCTAssertEqual([2], array1D.shape)
+
+        /// Test scalars
+        XCTAssertEqual(Array(stride(from: 40.0, to: 60, by: 1)), array3D.scalars)
+        XCTAssertEqual(Array(stride(from: 20.0, to: 30, by: 1)), array2D.scalars)
+        XCTAssertEqual(Array(stride(from: 3.0, to: 5, by: 1)), array1D.scalars)
+    }
+
+    func testSliceIndexingAssignment() {
+        // NOTE: cannot test `Tensor.shape` or `Tensor.scalars` directly until send
+        // and receive are implemented (without writing a bunch of mini tests).
+        // Instead, `Tensor.array` is called to make a ShapedArray host copy and the
+        // ShapedArray is tested instead.
+        var tensor3D = Tensor<Float>(
+            shape: [3, 4, 5], scalars: Array(stride(from: 0.0, to: 60, by: 1)))
+        tensor3D[2, 0..<5, 0..<6] = Tensor<Float>(
+            shape: [4, 5], scalars: Array(stride(from: 20.0, to: 40, by: 1)))
+        let slice3D = tensor3D[2...]
+        let slice2D = tensor3D[1][0..<2]
+        let slice1D = tensor3D[0][0][3..<5]
+
+        let array3D = slice3D.array
+        let array2D = slice2D.array
+        let array1D = slice1D.array
+
+        /// Test shapes
+        XCTAssertEqual([1, 4, 5], array3D.shape)
+        XCTAssertEqual([2, 5], array2D.shape)
+        XCTAssertEqual([2], array1D.shape)
+
+        /// Test scalars
+        XCTAssertEqual(Array(stride(from: 20.0, to: 40, by: 1)), array3D.scalars)
+        XCTAssertEqual(Array(stride(from: 20.0, to: 30, by: 1)), array2D.scalars)
+        XCTAssertEqual(Array(stride(from: 3.0, to: 5, by: 1)), array1D.scalars)
+    }
+
+    func testEllipsisIndexing() {
+        // NOTE: cannot test `Tensor.shape` or `Tensor.scalars` directly until send
+        // and receive are implemented (without writing a bunch of mini tests).
+        // Instead, `Tensor.array` is called to make a ShapedArray host copy and the
+        // ShapedArray is tested instead.
+        var tensor3D = Tensor<Float>(
+            shape: [3, 4, 5], scalars: Array(stride(from: 0.0, to: 60, by: 1)))
+        tensor3D[2, TensorRange.ellipsis] = Tensor<Float>(
+            shape: [4, 5], scalars: Array(stride(from: 20.0, to: 40, by: 1)))
+        let slice3D = tensor3D[2..., TensorRange.ellipsis]
+        let slice2D = tensor3D[1][0..<2]
+        let slice1D = tensor3D[0][0][3..<5]
+
+        let array3D = slice3D.array
+        let array2D = slice2D.array
+        let array1D = slice1D.array
+
+        /// Test shapes
+        XCTAssertEqual([1, 4, 5], array3D.shape)
+        XCTAssertEqual([2, 5], array2D.shape)
+        XCTAssertEqual([2], array1D.shape)
+
+        /// Test scalars
+        XCTAssertEqual(Array(stride(from: 20.0, to: 40, by: 1)), array3D.scalars)
+        XCTAssertEqual(Array(stride(from: 20.0, to: 30, by: 1)), array2D.scalars)
+        XCTAssertEqual(Array(stride(from: 3.0, to: 5, by: 1)), array1D.scalars)
+    }
+
+    func testNewAxisIndexing() {
+        // NOTE: cannot test `Tensor.shape` or `Tensor.scalars` directly until send
+        // and receive are implemented (without writing a bunch of mini tests).
+        // Instead, `Tensor.array` is called to make a ShapedArray host copy and the
+        // ShapedArray is tested instead.
+        let tensor3D = Tensor<Float>(
+            shape: [3, 4, 5], scalars: Array(stride(from: 0.0, to: 60, by: 1)))
+        let newAxis = TensorRange.newAxis
+        let ellipsis = TensorRange.ellipsis
+        let slice3D = tensor3D[2..., newAxis, ellipsis]
+        let slice2D = tensor3D[1, newAxis][0..<1, 0..<2]
+        let slice1D = tensor3D[0][newAxis, 0][0..<1, 3..<5, newAxis]
+
+        let array3D = slice3D.array
+        let array2D = slice2D.array
+        let array1D = slice1D.array
+
+        /// Test shapes
+        XCTAssertEqual([1, 1, 4, 5], array3D.shape)
+        XCTAssertEqual([1, 2, 5], array2D.shape)
+        XCTAssertEqual([1, 2, 1], array1D.shape)
+
+        /// Test scalars
+        XCTAssertEqual(Array(stride(from: 40.0, to: 60, by: 1)), array3D.scalars)
+        XCTAssertEqual(Array(stride(from: 20.0, to: 30, by: 1)), array2D.scalars)
+        XCTAssertEqual(Array(stride(from: 3.0, to: 5, by: 1)), array1D.scalars)
+    }
+
+    func testSqueezeAxisIndexing() {
+        // NOTE: cannot test `Tensor.shape` or `Tensor.scalars` directly until send
+        // and receive are implemented (without writing a bunch of mini tests).
+        // Instead, `Tensor.array` is called to make a ShapedArray host copy and the
+        // ShapedArray is tested instead.
+        let tensor3D = Tensor<Float>(
+            shape: [3, 4, 5], scalars: Array(stride(from: 0.0, to: 60, by: 1)))
+        let newAxis = TensorRange.newAxis
+        let ellipsis = TensorRange.ellipsis
+        let squeezeAxis = TensorRange.squeezeAxis
+        let slice3D = tensor3D[2..., newAxis, ellipsis][squeezeAxis, squeezeAxis]
+        let slice2D = tensor3D[1, newAxis][squeezeAxis, 0..<2]
+        let slice1D = tensor3D[0..<1, 0, 3..<5, newAxis][
+            squeezeAxis, ellipsis, squeezeAxis]
+
+        let array3D = slice3D.array
+        let array2D = slice2D.array
+        let array1D = slice1D.array
+
+        /// Test shapes
+        XCTAssertEqual([4, 5], array3D.shape)
+        XCTAssertEqual([2, 5], array2D.shape)
+        XCTAssertEqual([2], array1D.shape)
+
+        /// Test scalars
+        XCTAssertEqual(Array(stride(from: 40.0, to: 60, by: 1)), array3D.scalars)
+        XCTAssertEqual(Array(stride(from: 20.0, to: 30, by: 1)), array2D.scalars)
+        XCTAssertEqual(Array(stride(from: 3.0, to: 5, by: 1)), array1D.scalars)
+    }
+
+    func testStridedSliceIndexing() {
+        // NOTE: cannot test `Tensor.shape` or `Tensor.scalars` directly until send
+        // and receive are implemented (without writing a bunch of mini tests).
+        // Instead, `Tensor.array` is called to make a ShapedArray host copy and the
+        // ShapedArray is tested instead.
+        let tensor3D = Tensor<Float>(
+            shape: [3, 4, 5], scalars: Array(stride(from: 0.0, to: 60, by: 1)))
+        let slice3D = tensor3D[2...]
+        let slice2D = tensor3D[1][0..<3..2]
+        let slice1D = tensor3D[0][0][1..<5..2]
+
+        let array3D = slice3D.array
+        let array2D = slice2D.array
+        let array1D = slice1D.array
+
+        /// Test shapes
+        XCTAssertEqual([1, 4, 5], array3D.shape)
+        XCTAssertEqual([2, 5], array2D.shape)
+        XCTAssertEqual([2], array1D.shape)
+
+        /// Test scalars
+        XCTAssertEqual(Array(stride(from: 40.0, to: 60, by: 1)), array3D.scalars)
+        XCTAssertEqual(
+            Array(stride(from: 20.0, to: 25, by: 1)) + 
+            Array(stride(from: 30.0, to: 35, by: 1)), array2D.scalars)
+        XCTAssertEqual(Array(stride(from: 1.0, to: 5, by: 2)), array1D.scalars)
+    }
+
+    func testStridedSliceIndexingAssignment() {
+        // NOTE: cannot test `Tensor.shape` or `Tensor.scalars` directly until send
+        // and receive are implemented (without writing a bunch of mini tests).
+        // Instead, `Tensor.array` is called to make a ShapedArray host copy and the
+        // ShapedArray is tested instead.
+        var tensor3D = Tensor<Float>(
+            shape: [3, 4, 5], scalars: Array(stride(from: 0.0, to: 60, by: 1)))
+        tensor3D[2, 0..<5..2, 0..<6] = Tensor<Float>(
+            shape: [2, 5], scalars: Array(stride(from: 20.0, to: 40, by: 2)))
+        let slice3D = tensor3D[2...]
+        let slice2D = tensor3D[1][0..<2]
+        let slice1D = tensor3D[0][0][3..<5]
+
+        let array3D = slice3D.array
+        let array2D = slice2D.array
+        let array1D = slice1D.array
+
+        /// Test shapes
+        XCTAssertEqual([1, 4, 5], array3D.shape)
+        XCTAssertEqual([2, 5], array2D.shape)
+        XCTAssertEqual([2], array1D.shape)
+
+        /// Test scalars
+        XCTAssertEqual(
+            Array(stride(from: 20.0, to: 30, by: 2)) + 
+            Array(stride(from: 45.0, to: 50, by: 1)) + 
+            Array(stride(from: 30.0, to: 40, by: 2)) + 
+            Array(stride(from: 55.0, to: 60, by: 1)), array3D.scalars)
+        XCTAssertEqual(Array(stride(from: 20.0, to: 30, by: 1)), array2D.scalars)
+        XCTAssertEqual(Array(stride(from: 3.0, to: 5, by: 1)), array1D.scalars)
+    }
+
+    func testWholeTensorSlicing() {
+        let t: Tensor<Int32> = [[[1, 1, 1], [2, 2, 2]],
+                                [[3, 3, 3], [4, 4, 4]],
+                                [[5, 5, 5], [6, 6, 6]]]
+        let slice2 = t.slice(lowerBounds: [1, 0, 0], upperBounds: [2, 1, 3])
+        XCTAssertEqual(ShapedArray(shape: [1, 1, 3], scalars: [3, 3, 3]), slice2.array)
+    }
+
+    func testAdvancedIndexing() {
+        // NOTE: cannot test multiple `Tensor.shape` or `Tensor.scalars` directly
+        // until send and receive are implemented (without writing a bunch of mini
+        // tests). Instead, `Tensor.array` is called to make a ShapedArray host copy
+        // and the ShapedArray is tested.
+        let tensor3D = Tensor<Float>(
+            shape: [3, 4, 5], scalars: Array(stride(from: 0.0, to: 60, by: 1)))
+        let element2D = tensor3D[1..<3, 0, 3...]
+        let array2D = element2D.array
+
+        // Test shape
+        XCTAssertEqual([2, 2], array2D.shape)
+
+        // Test scalars
+        XCTAssertEqual(Array([23.0, 24.0, 43.0, 44.0]), array2D.scalars)
+    }
+
+    func testConcatenation() {
+        // 2 x 3
+        let t1 = Tensor<Int32>([[0, 1, 2], [3, 4, 5]])
+        // 2 x 3
+        let t2 = Tensor<Int32>([[6, 7, 8], [9, 10, 11]])
+        let concatenated = t1 ++ t2
+        let concatenated0 = t1.concatenated(with: t2)
+        let concatenated1 = t1.concatenated(with: t2, alongAxis: 1)
+        XCTAssertEqual(ShapedArray(shape: [4, 3], scalars: Array(0..<12)), concatenated.array)
+        XCTAssertEqual(ShapedArray(shape: [4, 3], scalars: Array(0..<12)), concatenated0.array)
+        XCTAssertEqual(
+            ShapedArray(shape: [2, 6], scalars: [0, 1, 2, 6, 7, 8, 3, 4, 5, 9, 10, 11]),
+            concatenated1.array)
+    }
+
+    func testVJPConcatenation() {
+        let a1 = Tensor<Float>([1,2,3,4])
+        let b1 = Tensor<Float>([5,6,7,8,9,10])
+
+        let a2 = Tensor<Float>([1,1,1,1])
+        let b2 = Tensor<Float>([1,1,1,1,1,1])
+
+        let grads = gradient(at: a2, b2) { a, b in
+            return ((a1 * a) ++ (b1 * b)).sum()
+        }
+
+        XCTAssertEqual(a1, grads.0)
+        XCTAssertEqual(b1, grads.1)
+    }
+
+    func testVJPConcatenationNegativeAxis() {
+        let a1 = Tensor<Float>([1,2,3,4])
+        let b1 = Tensor<Float>([5,6,7,8,9,10])
+
+        let a2 = Tensor<Float>([1,1,1,1])
+        let b2 = Tensor<Float>([1,1,1,1,1,1])
+
+        let grads = gradient(at: a2, b2) { a, b in
+            return (a1 * a).concatenated(with: b1 * b, alongAxis: -1).sum()
+        }
+
+        XCTAssertEqual(a1, grads.0)
+        XCTAssertEqual(b1, grads.1)
+    }
+
+    func testTranspose() {
+        // 3 x 2 -> 2 x 3
+        let xT = Tensor<Float>([[1, 2], [3, 4], [5, 6]]).transposed()
+        let xTArray = xT.array
+        XCTAssertEqual(2, xTArray.rank)
+        XCTAssertEqual([2, 3], xTArray.shape)
+        XCTAssertEqual([1, 3, 5, 2, 4, 6], xTArray.scalars)
+    }
+
+    func testReshape() {
+        // 2 x 3 -> 1 x 3 x 1 x 2 x 1
+        let matrix = Tensor<Int32>([[0, 1, 2], [3, 4, 5]])
+        let reshaped = matrix.reshaped(to: [1, 3, 1, 2, 1])
+
+        XCTAssertEqual([1, 3, 1, 2, 1], reshaped.shape)
+        XCTAssertEqual(Array(0..<6), reshaped.scalars)
+    }
+
+    func testFlatten() {
+        // 2 x 3 -> 6
+        let matrix = Tensor<Int32>([[0, 1, 2], [3, 4, 5]])
+        let flattened = matrix.flattened()
+
+        XCTAssertEqual([6], flattened.shape)
+        XCTAssertEqual(Array(0..<6), flattened.scalars)
+    }
+
+    func testFlatten0D() {
+        let scalar = Tensor<Float>(5)
+        let flattened = scalar.flattened()
+        XCTAssertEqual([1], flattened.shape)
+        XCTAssertEqual([5], flattened.scalars)
+    }
+
+    func testReshapeToScalar() {
+        // 1 x 1 -> scalar
+        let z = Tensor<Float>([[10]]).reshaped(to: [])
+        XCTAssertEqual([], z.shape)
+    }
+
+    func testReshapeTensor() {
+        // 2 x 3 -> 1 x 3 x 1 x 2 x 1
+        let x = Tensor<Float>(repeating: 0.0, shape: [2, 3])
+        let y = Tensor<Float>(repeating: 0.0, shape: [1, 3, 1, 2, 1])
+        let result = x.reshaped(like: y)
+        XCTAssertEqual([1, 3, 1, 2, 1], result.shape)
+    }
+
+    func testUnbroadcast1() {
+        let x = Tensor<Float>(repeating: 1, shape: [2, 3, 4, 5])
+        let y = Tensor<Float>(repeating: 1, shape: [4, 5])
+        let z = x.unbroadcast(like: y)
+        XCTAssertEqual(ShapedArray<Float>(repeating: 6, shape: [4, 5]), z.array)
+    }
+
+    func testUnbroadcast2() {
+        let x = Tensor<Float>(repeating: 1, shape: [2, 3, 4, 5])
+        let y = Tensor<Float>(repeating: 1, shape: [3, 1, 5])
+        let z = x.unbroadcast(like: y)
+        XCTAssertEqual(ShapedArray<Float>(repeating: 8, shape: [3, 1, 5]), z.array)
+    }
+
+    func testSliceUpdate() {
+        guard !_RuntimeConfig.executionMode.isTPU else { return }
+        var t1 = Tensor<Float>([[1, 2, 3], [4, 5, 6]])
+        t1[0] = Tensor(zeros: [3])
+        XCTAssertEqual(ShapedArray(shape:[2, 3], scalars: [0, 0, 0, 4, 5, 6]), t1.array)
+        var t2 = t1
+        t2[0][2] = Tensor(3)
+        XCTAssertEqual(ShapedArray(shape:[2, 3], scalars: [0, 0, 3, 4, 5, 6]), t2.array)
+        var t3 = Tensor<Bool>([[true, true, true], [false, false, false]])
+        t3[0][1] = Tensor(false)
+        XCTAssertEqual(ShapedArray(
+            shape:[2, 3], scalars: [true, false, true, false, false, false]), t3.array)
+        var t4 = Tensor<Bool>([[true, true, true], [false, false, false]])
+        t4[0] = Tensor(repeating: false, shape: [3])
+        XCTAssertEqual(ShapedArray(repeating: false, shape: [2, 3]), t4.array)
+    }
+
+    func testBroadcastTensor() {
+        // 1 -> 2 x 3 x 4
+        let one = Tensor<Float>(1)
+        var target = Tensor<Float>(repeating: 0.0, shape: [2, 3, 4])
+        let broadcasted = one.broadcast(like: target)
+        XCTAssertEqual(Tensor(repeating: 1, shape: [2, 3, 4]), broadcasted)
+        target .= Tensor(repeating: 1, shape: [1, 3, 1])
+        XCTAssertEqual(Tensor(repeating: 1, shape: [2, 3, 4]), target)
+    }
+}
diff --git a/Tests/DeepLearningTests/OperatorTests/ComparisonTests.swift b/Tests/DeepLearningTests/OperatorTests/ComparisonTests.swift
new file mode 100644
index 000000000..f667dbbcc
--- /dev/null
+++ b/Tests/DeepLearningTests/OperatorTests/ComparisonTests.swift
@@ -0,0 +1,30 @@
+// Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+import XCTest
+@testable import DeepLearning
+
+final class ComparisonOperatorTests: XCTestCase {
+    func testElementwiseComparison() {
+        let x = Tensor<Float>([0, 1, 2])
+        let y = Tensor<Float>([2, 1, 3])
+        XCTAssertEqual((x .< y).scalars, [true, false, true])
+    }
+
+    func testLexicographicalComparison() {
+        let x = Tensor<Float>([0, 1, 2, 3, 4])
+        let y = Tensor<Float>([2, 3, 4, 5, 6])
+        XCTAssertTrue(x < y)
+    }
+}
diff --git a/Tests/DeepLearningTests/OperatorTests/MathTests.swift b/Tests/DeepLearningTests/OperatorTests/MathTests.swift
new file mode 100644
index 000000000..8c1898fbb
--- /dev/null
+++ b/Tests/DeepLearningTests/OperatorTests/MathTests.swift
@@ -0,0 +1,199 @@
+// Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+import XCTest
+@testable import DeepLearning
+
+final class MathOperatorTests: XCTestCase {
+    func testReduction() {
+        // 2 x 5
+        let x = Tensor<Float>([[1, 2, 3, 4, 5], [1, 2, 3, 4, 5]])
+        XCTAssertEqual(Tensor(30), x.sum().toHost(shape: []))
+        XCTAssertEqual(
+            Tensor(shape: [5], scalars: [2, 4, 6, 8, 10]),
+            x.sum(squeezingAxes: 0).toHost(shape: []))
+        XCTAssertEqual(
+            Tensor(shape: [1, 5], scalars: [2, 4, 6, 8, 10]),
+            x.sum(alongAxes: 0).toHost(shape: []))
+
+        XCTAssertEqual(Tensor(14400), x.product().toHost(shape: []))
+        XCTAssertEqual(
+            Tensor(shape: [5], scalars: [1, 4, 9, 16, 25]),
+            x.product(squeezingAxes: 0).toHost(shape: []))
+        XCTAssertEqual(
+            Tensor(shape: [1, 5], scalars: [1, 4, 9, 16, 25]),
+            x.product(alongAxes: 0).toHost(shape: []))
+
+        XCTAssertEqual(Tensor(3), x.mean().toHost(shape: []))
+        XCTAssertEqual(
+            Tensor(shape: [5], scalars: [1, 2, 3, 4, 5]),
+            x.mean(squeezingAxes: 0).toHost(shape: []))
+        XCTAssertEqual(
+            Tensor(shape: [5], scalars: [1, 2, 3, 4, 5]),
+            x.mean(alongAxes: 0).toHost(shape: []))
+        XCTAssertEqual(
+            Tensor(shape: [2], scalars: [3, 3]),
+            x.mean(squeezingAxes: 1).toHost(shape: []))
+        XCTAssertEqual(
+            Tensor(shape: [1, 2], scalars: [3, 3]),
+            x.mean(alongAxes: 1).toHost(shape: []))
+
+        XCTAssertEqual(Tensor(2), x.variance().toHost(shape: []))
+        XCTAssertEqual(
+            Tensor(shape: [5], scalars: [0, 0, 0, 0, 0]),
+            x.variance(squeezingAxes: 0).toHost(shape: []))
+        XCTAssertEqual(
+            Tensor(shape: [5], scalars: [0, 0, 0, 0, 0]),
+            x.variance(alongAxes: 0).toHost(shape: []))
+        XCTAssertEqual(
+            Tensor(shape: [2], scalars: [2, 2]),
+            x.variance(squeezingAxes: 1).toHost(shape: []))
+        XCTAssertEqual(
+            Tensor(shape: [1, 2], scalars: [2, 2]),
+            x.variance(alongAxes: 1).toHost(shape: []))
+    }
+
+    func testArgmax() {
+        // 2 x 3
+        let x = Tensor<Float>([[0, 1, 2], [3, 4, 5]])
+        let argmax0 = x.argmax(squeezingAxis: 0)
+        let argmax1 = x.argmax(squeezingAxis: 1)
+        let scalarsArgmax = x.argmax()
+        XCTAssertEqual(ShapedArray(shape: [3], scalars: [1, 1, 1]), argmax0.array)
+        XCTAssertEqual(ShapedArray(shape: [2], scalars: [2, 2]), argmax1.array)
+        XCTAssertEqual(ShapedArray(shape: [], scalars: [5]), scalarsArgmax.array)
+    }
+
+    func testCeilAndFloor() {
+        let x = Tensor<Float>([-1.3, -0.4, 0.5, 1.6])
+        let xFloor = floor(x)
+        let xCeil = ceil(x)
+        XCTAssertEqual(ShapedArray(shape: [4], scalars: [-2, -1, 0, 1]), xFloor.array)
+        XCTAssertEqual(ShapedArray(shape: [4], scalars: [-1, 0, 1, 2]), xCeil.array)
+    }
+
+    func testSimpleMath() {
+        let x = Tensor<Float>([1.2, 1.2])
+        let y = tanh(x)
+        let array = y.array
+        XCTAssertEqual([2], array.shape)
+        XCTAssertEqual([0.833655, 0.833655], array.scalars, accuracy: 0.0001)
+    }
+
+    func testStandardDeviation() {
+        XCTAssertEqual(Tensor(0), Tensor<Float>([1]).standardDeviation())
+        XCTAssertEqual(Tensor(0.5), Tensor<Float>([0, 1]).standardDeviation(alongAxes: 0))
+        XCTAssertEqual(Tensor(0.5), Tensor<Float>([0, 1]).standardDeviation())
+        XCTAssertEqual(
+            2.87228132,
+            Tensor<Float>(rangeFrom: 0, to: 10, stride: 1).standardDeviation().scalarized(),
+            accuracy: 0.001)
+        let matrix = Tensor<Float>(rangeFrom: 0, to: 10, stride: 1).reshaped(to: [2, 5])
+        XCTAssertEqual(2.87228132, matrix.standardDeviation().scalarized(), accuracy: 0.001)
+        XCTAssertEqual(
+            [1.4142, 1.4142],
+            matrix.standardDeviation(alongAxes: 1).array.scalars,
+            accuracy: 0.001)
+    }
+
+    func test3Adds() {
+        let a = Tensor<Float>([1])
+        let b = Tensor<Float>([2])
+        let c = Tensor<Float>([3])
+
+        let o = a + b + c
+        XCTAssertEqual([6], o.scalars)
+    }
+
+    func testMultiOpMath() {
+        let x = Tensor<Float>([1.2, 1.2])
+        let y = Tensor<Float>([2.4, 2.4])
+        let t1 = x + y
+        let t2 = t1 * t1
+        let t3 = sqrt(t2)
+
+        let array1 = t1.array
+        let array2 = t2.array
+        let array3 = t3.array
+        XCTAssertEqual([2], array1.shape)
+        XCTAssertEqual([2], array2.shape)
+        XCTAssertEqual([2], array3.shape)
+        XCTAssertEqual([3.6, 3.6], array1.scalars, accuracy: 0.001)
+        XCTAssertEqual([12.96, 12.96], array2.scalars, accuracy: 0.001)
+        XCTAssertEqual([3.6, 3.6], array3.scalars, accuracy: 0.001)
+    }
+
+    func testXWPlusB() {
+        // Shape: 1 x 4
+        let x = Tensor<Float>([[1.0, 2.0, 2.0, 1.0]])
+        // Shape: 4 x 2
+        let w = Tensor<Float>([[1.0, 0.0], [3.0, 0.0], [2.0, 3.0], [1.0, 0.0]])
+        // Shape: 2
+        let b = Tensor<Float>([0.5, 0.5])
+        // Shape: 1 x 2 (broadcasted)
+        let result = matmul(x, w) + b
+        XCTAssertEqual([1, 2], result.shape)
+        XCTAssertEqual([12.5, 6.5], result.scalars)
+    }
+
+    @inline(never)
+    func testXORInference() {
+        func xor(_ x: Float, _ y: Float) -> Float {
+            let x = Tensor<Float>([x, y]).reshaped(to: [1, 2])
+
+            // FIXME: If params are declared outside of `xor`, it would crash.
+            // 2 x 4
+            let w1 = Tensor<Float>(
+                [[-1.83586664, -0.20809225, 0.47667537, 1.90780607],
+                [-1.83523219, -0.51167348, 0.15490439, 1.91018065]])
+            // 1 x 4
+            let b1 = Tensor<Float>([[2.54353216, 0.25132703, -0.16503136, -0.85754058]])
+            // 4 x 1
+            let w2 = Tensor<Float>([[3.04350065], [0.35590511], [-0.3252157], [3.49349223]])
+            // 1 x 1
+            let b2 = Tensor<Float>([[-0.74635993]])
+
+            let o1 = tanh(matmul(x, w1) + b1)
+            let y = tanh(matmul(o1, w2) + b2)
+            return y.array.scalars[0] // TODO: use better scalar getter
+        }
+
+        XCTAssertEqual(0.0, xor(0.0, 0.0), accuracy: 0.1)
+        XCTAssertEqual(1.0, xor(0.0, 1.0), accuracy: 0.1)
+        XCTAssertEqual(1.0, xor(1.0, 0.0), accuracy: 0.1)
+        XCTAssertEqual(0.0, xor(1.0, 1.0), accuracy: 0.1)
+    }
+
+    func testMLPClassifierStruct() {
+        struct MLPClassifier {
+            // 2 x 4
+            var w1 = Tensor<Float>([[1.0, 0.8, 0.4, 0.4],
+                                    [0.4, 0.3, 0.2, 0.1]])
+            // 4 x 1
+            var w2 = Tensor<Float>([[0.4], [0.4], [0.3], [0.9]])
+            var b1 = Tensor<Float>(zeros: [1, 4])
+            var b2 = Tensor<Float>(zeros: [1, 1])
+
+            func prediction(for x: Tensor<Float>) -> Tensor<Float> {
+                let o1 = tanh(matmul(x, w1) + b1)
+                return tanh(matmul(o1, w2) + b2)
+            }
+        }
+
+        let input = Tensor<Float>([[1, 0.5]])
+        let classifier = MLPClassifier()
+        let prediction = classifier.prediction(for: input)
+        XCTAssertEqual([0.816997], prediction.scalars, accuracy: 0.001)
+    }
+}
diff --git a/Tests/DeepLearningTests/TensorTests.swift b/Tests/DeepLearningTests/TensorTests.swift
new file mode 100644
index 000000000..01e18fda8
--- /dev/null
+++ b/Tests/DeepLearningTests/TensorTests.swift
@@ -0,0 +1,81 @@
+// Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+import XCTest
+@testable import DeepLearning
+
+final class TensorTests: XCTestCase {
+    func testSimpleCond() {
+        func selectValue(_ pred: Bool) -> Tensor<Int32> {
+            let a = Tensor<Int32>(0)
+            let b = Tensor<Int32>(1)
+            if pred {
+                return a
+            }
+            return b
+        }
+
+        XCTAssertEqual(0, selectValue(true).scalar)
+    }
+
+    @inline(never)
+    func testRankGetter() {
+        let tensor = Tensor<Int32>(shape: [3, 4, 5], scalars: Array(0..<60))
+        XCTAssertEqual(3, tensor.rank)
+    }
+
+    // TODO: Merge all rank/shape getter tests into one when we support code motion to avoid sends.
+
+    @inline(never)
+    func testRankGetter2() {
+        let vector = Tensor<Int32>([1])
+        XCTAssertEqual(1, vector.rank)
+    }
+
+    @inline(never)
+    func testRankGetter3() {
+        let matrix = Tensor<Float>([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
+        XCTAssertEqual(2, matrix.rank)
+    }
+
+    @inline(never)
+    func testRankGetter4() {
+        let ones = Tensor<Int32>(ones: [1, 2, 2, 2, 2, 2, 1])
+        XCTAssertEqual(7, ones.rank)
+    }
+
+    @inline(never)
+    func testShapeGetter() {
+        let tensor = Tensor<Int32>(shape: [3, 4, 5], scalars: Array(0..<60))
+        XCTAssertEqual([3, 4, 5], tensor.shape)
+    }
+
+    @inline(never)
+    func testShapeGetter2() {
+        let vector = Tensor<Int32>([1])
+        XCTAssertEqual([1], vector.shape)
+    }
+
+    @inline(never)
+    func testShapeGetter3() {
+        let matrix = Tensor<Float>([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
+        XCTAssertEqual([2, 3], matrix.shape)
+    }
+
+    @inline(never)
+    func testShapeGetter4() {
+        let ones = Tensor<Int32>(ones: [1, 2, 2, 2, 2, 2, 1])
+        XCTAssertEqual([1, 2, 2, 2, 2, 2, 1], ones.shape)
+    }
+}
diff --git a/Tests/DeepLearningTests/XCTestManifests.swift b/Tests/DeepLearningTests/XCTestManifests.swift
index 96a9048a5..e75c25298 100644
--- a/Tests/DeepLearningTests/XCTestManifests.swift
+++ b/Tests/DeepLearningTests/XCTestManifests.swift
@@ -22,6 +22,10 @@ public func allTests() -> [XCTestCaseEntry] {
         testCase(TrivialModelTests.allTests),
         testCase(SequentialTests.allTests),
         testCase(LayerTests.allTests),
+        testCase(TensorTests.allTests),
+        testCase(BasicOperatorTests.allTests),
+        testCase(ComparisonOperatorTests.allTests),
+        testCase(MathOperatorTests.allTests),
     ]
 }
 #endif

From 0e06843857b629f9643febf818fdf39032b5daf5 Mon Sep 17 00:00:00 2001
From: Anthony Platanios <e.a.platanios@gmail.com>
Date: Sat, 20 Apr 2019 18:27:41 -0400
Subject: [PATCH 45/55] Minor bug fix.

---
 Sources/DeepLearning/Operators/Math.swift | 31 ++++++++++++-----------
 1 file changed, 16 insertions(+), 15 deletions(-)

diff --git a/Sources/DeepLearning/Operators/Math.swift b/Sources/DeepLearning/Operators/Math.swift
index 8851080ec..7ab4fb6ba 100644
--- a/Sources/DeepLearning/Operators/Math.swift
+++ b/Sources/DeepLearning/Operators/Math.swift
@@ -658,8 +658,7 @@ public func softmax<T : FloatingPoint>(_ x: Tensor<T>) -> Tensor<T> {
 // TODO: [AD].
 public func softmax<T : TensorFlowFloatingPoint>(_ x: Tensor<T>, alongAxis axis: Int) -> Tensor<T> {
     let xExp = exp(x)
-    let xExpSum = Raw.sum(xExp, reductionIndices: Tensor<Int32>(Int32(axis)), keepDims: true)
-    return xExp / xExpSum
+    return xExp / xExp.sum(alongAxes: Tensor<Int32>(Int32(axis)))
 }
 
 @inlinable
@@ -1340,7 +1339,13 @@ internal extension Tensor where Scalar : TensorFlowFloatingPoint {
     @inlinable
     func _vjpSum(squeezingAxes axes: Tensor<Int32>) -> (Tensor, (Tensor) -> Tensor) {
         let value = sum(squeezingAxes: axes)
-        return (value, { [shape = shapeTensor] in $0.broadcast(toShape: shape) })
+        return (value, { [shape = shapeTensor] in
+            var result = $0
+	        for i in axes.array.scalars {
+                result = result.expandingShape(at: Int(i))
+            }
+	        return result.broadcast(toShape: shape)
+        })
     }
 
     @inlinable
@@ -1351,20 +1356,16 @@ internal extension Tensor where Scalar : TensorFlowFloatingPoint {
     }
 
     @inlinable
-    func _vjpMean(squeezingAxes axes: [Int]) -> (Tensor, (Tensor) -> Tensor) {
-        let value = mean(squeezingAxes: axes)
-        return (value, { [shape = shapeTensor, count = axes.map { shape[$0] }.reduce(1, *)] in
-            $0.broadcast(toShape: shape) / Tensor(Scalar(count))
-        })
-    }
-
-    @inlinable
-    func _vjpMean(
-        squeezingAxes axes: Tensor<Int32>
-    ) -> (Tensor, (Tensor) -> Tensor) {
+    func _vjpMean(squeezingAxes axes: Tensor<Int32>) -> (Tensor, (Tensor) -> Tensor) {
         let value = mean(squeezingAxes: axes)
         let count = Raw.gather(params: shapeTensor, indices: axes).product()
-        return (value, { [shape = shapeTensor] in $0.broadcast(toShape: shape) / Tensor(count) })
+        return (value, { [shape = shapeTensor] in 
+            var result = $0
+	        for i in axes.array.scalars {
+                result = result.expandingShape(at: Int(i))
+            }
+	        return result.broadcast(toShape: shape) / Tensor(count)
+        })
     }
 }
 

From eb407cf8d13655f5d10a0eaadaf6340a687995dd Mon Sep 17 00:00:00 2001
From: Anthony Platanios <e.a.platanios@gmail.com>
Date: Sat, 20 Apr 2019 18:59:47 -0400
Subject: [PATCH 46/55] Addressed Richard's comments.

---
 ...ents.swift => DifferentialOperators.swift} |  32 +-
 Sources/DeepLearning/Helpers.swift            |   4 -
 Sources/DeepLearning/Initializers.swift       |  32 +-
 Sources/DeepLearning/Layer.swift              |   2 +-
 Sources/DeepLearning/Operators/Basic.swift    |  87 ++++--
 .../DeepLearning/Operators/Comparison.swift   |  28 +-
 Sources/DeepLearning/Operators/Math.swift     | 274 +++++++++---------
 Sources/DeepLearning/Operators/NN.swift       |  39 ++-
 Sources/DeepLearning/Operators/README.md      |  41 ---
 Sources/DeepLearning/Optimizer.swift          |  27 +-
 Sources/DeepLearning/PythonConversion.swift   |  16 +-
 Sources/DeepLearning/Random.swift             |  12 +-
 Sources/DeepLearning/Tensors.swift            |  22 +-
 13 files changed, 295 insertions(+), 321 deletions(-)
 rename Sources/DeepLearning/{Gradients.swift => DifferentialOperators.swift} (79%)
 delete mode 100644 Sources/DeepLearning/Operators/README.md

diff --git a/Sources/DeepLearning/Gradients.swift b/Sources/DeepLearning/DifferentialOperators.swift
similarity index 79%
rename from Sources/DeepLearning/Gradients.swift
rename to Sources/DeepLearning/DifferentialOperators.swift
index 04a37fe8b..bfb53db77 100644
--- a/Sources/DeepLearning/Gradients.swift
+++ b/Sources/DeepLearning/DifferentialOperators.swift
@@ -22,14 +22,14 @@ import TensorFlow
 
 public extension Differentiable {
     @inlinable
-    func gradient<R : TensorFlowFloatingPoint>(
+    func gradient<R: TensorFlowFloatingPoint>(
         in f: @differentiable (Self) -> Tensor<R>
     ) -> CotangentVector {
         return self.pullback(in: f)(Tensor<R>(1))
     }
 
     @inlinable
-    func valueWithGradient<R : TensorFlowFloatingPoint>(
+    func valueWithGradient<R: TensorFlowFloatingPoint>(
         in f: @differentiable (Self) -> Tensor<R>
     ) -> (value: Tensor<R>, gradient: CotangentVector) {
         let (y, pb) = self.valueWithPullback(in: f)
@@ -37,7 +37,7 @@ public extension Differentiable {
     }
 
     @inlinable
-    func gradient<T : Differentiable, R : TensorFlowFloatingPoint>(
+    func gradient<T: Differentiable, R: TensorFlowFloatingPoint>(
         at x: T,
         in f: @differentiable (Self, T) -> Tensor<R>
     ) -> (CotangentVector, T.CotangentVector) {
@@ -45,7 +45,7 @@ public extension Differentiable {
     }
 
     @inlinable
-    func valueWithGradient<T : Differentiable, R : TensorFlowFloatingPoint>(
+    func valueWithGradient<T: Differentiable, R: TensorFlowFloatingPoint>(
         at x: T,
         in f: @differentiable (Self, T) -> Tensor<R>
     ) -> (value: Tensor<R>, gradient: (CotangentVector, T.CotangentVector)) {
@@ -65,7 +65,7 @@ public func valueWithGradient<T, R>(
     at x: T,
     in f: @differentiable (T) -> Tensor<R>
 ) -> (value: Tensor<R>, gradient: T.CotangentVector)
-where T : Differentiable, R : TensorFlowFloatingPoint {
+where T: Differentiable, R: TensorFlowFloatingPoint {
     let (y, pullback) = valueWithPullback(at: x, in: f)
     return (y, pullback(Tensor<R>(1)))
 }
@@ -76,7 +76,7 @@ public func valueWithGradient<T, U, R>(
     _ y: U,
     in f: @differentiable (T, U) -> Tensor<R>
 ) -> (value: Tensor<R>, gradient: (T.CotangentVector, U.CotangentVector))
-    where T : Differentiable, U : Differentiable, R : TensorFlowFloatingPoint {
+    where T: Differentiable, U: Differentiable, R: TensorFlowFloatingPoint {
     let (y, pullback) = valueWithPullback(at: x, y, in: f)
     return (y, pullback(Tensor<R>(1)))
 }
@@ -88,7 +88,7 @@ public func valueWithGradient<T, U, V, R>(
     _ z: V,
     in f: @differentiable (T, U, V) -> Tensor<R>
 ) -> (value: Tensor<R>, gradient: (T.CotangentVector, U.CotangentVector, V.CotangentVector))
-  where T : Differentiable, U : Differentiable, V : Differentiable, R : TensorFlowFloatingPoint {
+  where T: Differentiable, U: Differentiable, V: Differentiable, R: TensorFlowFloatingPoint {
   let (y, pullback) = valueWithPullback(at: x, y, z, in: f)
   return (y, pullback(Tensor<R>(1)))
 }
@@ -99,7 +99,7 @@ public func valueWithGradient<T, U, V, R>(
 public func valueWithGradient<T, R>(
     of f: @escaping @differentiable (T) -> Tensor<R>
 ) -> (T) -> (value: Tensor<R>, gradient: T.CotangentVector)
-    where T : Differentiable, R : TensorFlowFloatingPoint {
+    where T: Differentiable, R: TensorFlowFloatingPoint {
     return { x in valueWithGradient(at: x, in: f) }
 }
 
@@ -107,7 +107,7 @@ public func valueWithGradient<T, R>(
 public func valueWithGradient<T, U, R>(
     of f: @escaping @differentiable (T, U) -> Tensor<R>
 ) -> (T, U) -> (value: Tensor<R>, gradient: (T.CotangentVector, U.CotangentVector))
-  where T : Differentiable, U : Differentiable, R : TensorFlowFloatingPoint {
+  where T: Differentiable, U: Differentiable, R: TensorFlowFloatingPoint {
   return { x, y in valueWithGradient(at: x, y, in: f) }
 }
 
@@ -117,7 +117,7 @@ public func valueWithGradient<T, U, V, R>(
 ) -> (T, U, V) -> (
     value: Tensor<R>,
     gradient: (T.CotangentVector, U.CotangentVector, V.CotangentVector))
-    where T : Differentiable, U : Differentiable, V : Differentiable, R : TensorFlowFloatingPoint {
+    where T: Differentiable, U: Differentiable, V: Differentiable, R: TensorFlowFloatingPoint {
   return { x, y, z in valueWithGradient(at: x, y, z, in: f) }
 }
 
@@ -127,7 +127,7 @@ public func valueWithGradient<T, U, V, R>(
 public func gradient<T, R>(
     at x: T,
     in f: @differentiable (T) -> Tensor<R>
-) -> T.CotangentVector where T : Differentiable, R : TensorFlowFloatingPoint {
+) -> T.CotangentVector where T: Differentiable, R: TensorFlowFloatingPoint {
     return pullback(at: x, in: f)(Tensor<R>(1))
 }
 
@@ -137,7 +137,7 @@ public func gradient<T, U, R>(
     _ y: U,
     in f: @differentiable (T, U) -> Tensor<R>
 ) -> (T.CotangentVector, U.CotangentVector)
-    where T : Differentiable, U : Differentiable, R : TensorFlowFloatingPoint {
+    where T: Differentiable, U: Differentiable, R: TensorFlowFloatingPoint {
     return pullback(at: x, y, in: f)(Tensor<R>(1))
 }
 
@@ -148,7 +148,7 @@ public func gradient<T, U, V, R>(
     _ z: V,
     in f: @differentiable (T, U, V) -> Tensor<R>
 ) -> (T.CotangentVector, U.CotangentVector, V.CotangentVector)
-    where T : Differentiable, U : Differentiable, V : Differentiable, R : TensorFlowFloatingPoint {
+    where T: Differentiable, U: Differentiable, V: Differentiable, R: TensorFlowFloatingPoint {
     return pullback(at: x, y, z, in: f)(Tensor<R>(1))
 }
 
@@ -157,7 +157,7 @@ public func gradient<T, U, V, R>(
 @inlinable
 public func gradient<T, R>(
     of f: @escaping @differentiable (T) -> Tensor<R>
-) -> (T) -> T.CotangentVector where T : Differentiable, R : TensorFlowFloatingPoint {
+) -> (T) -> T.CotangentVector where T: Differentiable, R: TensorFlowFloatingPoint {
     return { x in gradient(at: x, in: f) }
 }
 
@@ -165,7 +165,7 @@ public func gradient<T, R>(
 public func gradient<T, U, R>(
     of f: @escaping @differentiable (T, U) -> Tensor<R>
 ) -> (T, U) -> (T.CotangentVector, U.CotangentVector)
-    where T : Differentiable, U : Differentiable, R : TensorFlowFloatingPoint {
+    where T: Differentiable, U: Differentiable, R: TensorFlowFloatingPoint {
     return { x, y in gradient(at: x, y, in: f) }
 }
 
@@ -173,6 +173,6 @@ public func gradient<T, U, R>(
 public func gradient<T, U, V, R>(
     of f: @escaping @differentiable (T, U, V) -> Tensor<R>
 ) -> (T, U, V) -> (T.CotangentVector, U.CotangentVector, V.CotangentVector)
-    where T : Differentiable, U : Differentiable, V : Differentiable, R : TensorFlowFloatingPoint {
+    where T: Differentiable, U: Differentiable, V: Differentiable, R: TensorFlowFloatingPoint {
     return { x, y, z in gradient(at: x, y, z, in: f) }
 }
diff --git a/Sources/DeepLearning/Helpers.swift b/Sources/DeepLearning/Helpers.swift
index 39bcf1e1d..4d9c0217b 100644
--- a/Sources/DeepLearning/Helpers.swift
+++ b/Sources/DeepLearning/Helpers.swift
@@ -30,7 +30,3 @@ public func identity<Scalar>(_ x: Tensor<Scalar>) -> Tensor<Scalar> {
 func pow<T: BinaryFloatingPoint>(_ x: T, _ y: T) -> T {
     return T(pow(Double(x), Double(y)))
 }
-
-extension Array where Element : Differentiable {
-    
-}
diff --git a/Sources/DeepLearning/Initializers.swift b/Sources/DeepLearning/Initializers.swift
index 204ca8a26..17884c3ca 100644
--- a/Sources/DeepLearning/Initializers.swift
+++ b/Sources/DeepLearning/Initializers.swift
@@ -36,7 +36,7 @@ public extension Tensor {
     ///   - shape: The dimensions of the tensor.
     @inlinable
     @differentiable(
-        vjp: _vjpInit(repeating:shape:) where Scalar : TensorFlowFloatingPoint)
+        vjp: _vjpInit(repeating:shape:) where Scalar: TensorFlowFloatingPoint)
     init(repeating repeatedValue: Scalar, shape: TensorShape) {
         self = Raw.fill(
             dims: Tensor<Int32>(shape.dimensions.map(Int32.init)),
@@ -46,7 +46,7 @@ public extension Tensor {
     /// Creates a tensor by broadcasting the given scalar to a given rank with
     /// all dimensions being 1.
     @inlinable
-    // @differentiable(where Scalar : TensorFlowFloatingPoint)
+    // @differentiable(where Scalar: TensorFlowFloatingPoint)
     init(broadcasting scalar: Scalar, rank: Int) {
         self = Tensor(scalar).reshaped(to: TensorShape(repeating: 1, count: rank))
     }
@@ -60,7 +60,7 @@ public extension Tensor {
     }
 }
 
-internal extension Tensor where Scalar : TensorFlowFloatingPoint {
+internal extension Tensor where Scalar: TensorFlowFloatingPoint {
     @inlinable
     static func _vjpInit(
         repeating repeatedValue: Scalar,
@@ -76,7 +76,7 @@ internal extension Tensor where Scalar : TensorFlowFloatingPoint {
 // Casting
 //===------------------------------------------------------------------------------------------===//
 
-public extension Tensor where Scalar : Numeric {
+public extension Tensor where Scalar: Numeric {
     /// Perform an element-wise type conversion from a `Bool` tensor.
     @inlinable
     init(_ other: Tensor<Bool>) {
@@ -86,16 +86,16 @@ public extension Tensor where Scalar : Numeric {
     /// Perform an element-wise conversion from another `Tensor`.
     @inlinable
     @differentiable(
-        vjp: _vjpCast where Scalar : TensorFlowFloatingPoint,
+        vjp: _vjpCast where Scalar: TensorFlowFloatingPoint,
                             OtherScalar: TensorFlowFloatingPoint)
-    init<OtherScalar : Numeric>(_ other: Tensor<OtherScalar>) {
+    init<OtherScalar: Numeric>(_ other: Tensor<OtherScalar>) {
         self = Raw.cast(other)
     }
 }
 
-internal extension Tensor where Scalar : TensorFlowFloatingPoint {
+internal extension Tensor where Scalar: TensorFlowFloatingPoint {
     @inlinable
-    static func _vjpCast<OtherScalar : TensorFlowFloatingPoint>(
+    static func _vjpCast<OtherScalar: TensorFlowFloatingPoint>(
         _ other: Tensor<OtherScalar>
     ) -> (Tensor, (Tensor) -> Tensor<OtherScalar>) {
         return (Tensor(other), { v in Tensor<OtherScalar>(v) })
@@ -109,7 +109,7 @@ internal extension Tensor where Scalar : TensorFlowFloatingPoint {
 public extension Tensor {
     /// Creates a tensor from an array of tensors (which may themselves be scalars).
     @inlinable
-    // @differentiable(where Scalar : TensorFlowFloatingPoint)
+    // @differentiable(where Scalar: TensorFlowFloatingPoint)
     init(_ elements: [Tensor]) {
         self = Tensor(stacking: elements)
     }
@@ -131,7 +131,7 @@ public extension Tensor {
     /// Tensor(stacking: [x, y, z], alongAxis: 1) // is [[1, 2, 3], [4, 5, 6]]
     /// ```
     ///
-    /// This is the opposite of `Tensor.unstacked`.
+    /// This is the opposite of `Tensor.unstack(alongAxis:)`.
     ///
     /// - Parameters:
     ///   - tensors: Tensors to stack.
@@ -143,7 +143,7 @@ public extension Tensor {
     /// 
     /// - Returns: The stacked tensor.
     @inlinable
-    // @differentiable(vjp: _vjpStacking where Scalar : TensorFlowFloatingPoint)
+    // @differentiable(vjp: _vjpStacking where Scalar: TensorFlowFloatingPoint)
     init(stacking tensors: [Tensor], alongAxis axis: Int = 0) {
         self = Raw.pack(tensors, axis: Int64(axis))
     }
@@ -181,14 +181,14 @@ public extension Tensor {
     /// 
     /// - Returns: The concatenated tensor.
     @inlinable
-    // @differentiable(wrt: tensors, vjp: _vjpConcatenating where Scalar : TensorFlowFloatingPoint)
+    // @differentiable(wrt: tensors, vjp: _vjpConcatenating where Scalar: TensorFlowFloatingPoint)
     init(concatenating tensors: [Tensor], alongAxis axis: Int = 0) {
         precondition(tensors.count > 0)
         self = Raw.concatV2(tensors, axis: Tensor<Int32>(Int32(axis)))
     }
 }
 
-// internal extension Tensor where Scalar : TensorFlowFloatingPoint {
+// internal extension Tensor where Scalar: TensorFlowFloatingPoint {
 //     @inlinable
 //     static func _vjpStacking(
 //         stacking tensors: [Tensor],
@@ -206,7 +206,7 @@ public extension Tensor {
 //         alongAxis axis: Int = 0
 //     ) -> (Tensor, (Tensor) -> Array<Tensor>.DifferentiableView) {
 //         let result = Tensor<Scalar>(concatenating: tensors, alongAxis: axis)
-//         let posAxis = axis < 0 ? axis + tensors[0].rank : axis
+//         let posAxis = axis < 0 ? axis + tensors[0].rank: axis
 //         let sizes = Tensor<Int32>(stacking: tensors.map { $0.shapeTensor[posAxis] })
 //         return (result, { [count = tensors.count] v in
 //             if count == 1 { return Array<Tensor>.DifferentiableView([v]) }
@@ -220,7 +220,7 @@ public extension Tensor {
 // Numeric
 //===------------------------------------------------------------------------------------------===//
 
-public extension Tensor where Scalar : Numeric {
+public extension Tensor where Scalar: Numeric {
     /// Creates a tensor with all scalars set to zero.
     ///
     /// - Parameter shape: Shape of the tensor.
@@ -418,7 +418,7 @@ public extension Tensor where Scalar: BinaryFloatingPoint,
     }
 }
 
-fileprivate extension Tensor where Scalar : BinaryFloatingPoint {
+fileprivate extension Tensor where Scalar: BinaryFloatingPoint {
     private static func glorot(
         fromStandardUniform randomUniform: __shared Tensor<Scalar>,
         shape: __shared TensorShape
diff --git a/Sources/DeepLearning/Layer.swift b/Sources/DeepLearning/Layer.swift
index 1ab238fe2..29052a3ef 100644
--- a/Sources/DeepLearning/Layer.swift
+++ b/Sources/DeepLearning/Layer.swift
@@ -558,7 +558,7 @@ public struct TransposedConv2D: Layer {
         self.activation = activation
         self.strides = strides
         self.padding = padding
-        self.paddingIndex = padding == .same ? 0 : 1
+        self.paddingIndex = padding == .same ? 0: 1
     }
 
     /// Returns the output obtained from applying the layer to the given input.
diff --git a/Sources/DeepLearning/Operators/Basic.swift b/Sources/DeepLearning/Operators/Basic.swift
index 71a8970a0..56cdde2fc 100644
--- a/Sources/DeepLearning/Operators/Basic.swift
+++ b/Sources/DeepLearning/Operators/Basic.swift
@@ -29,10 +29,37 @@ public extension TensorFlowScalar {
 }
 
 public extension Tensor {
+    /// Unpacks the given dimension of a rank-`R` tensor into multiple rank-`(R-1)` tensors. Unpacks 
+    /// `N` tensors from this tensor by chipping it along the `axis` dimension, where `N` is
+    /// inferred from this tensor's shape. For example, given a tensor with shape `[A, B, C, D]`:
+    /// 
+    ///   - If `axis == 0` then the `i`th tensor in the returned array is the slice 
+    ///     `self[i, :, :, :]` and each tensor in that array will have shape `[B, C, D]`. 
+    ///     (Note that the dimension unpacked along is gone, unlike
+    ///     `Tensor.split(numSplits:alongAxis)`, or `Tensor.split(sizes:alongAxis)`).
+    ///   - If `axis == 1` then the `i`th tensor in the returned array is the slice 
+    ///     `value[:, i, :, :]` and each tensor in that array will have shape `[A, C, D]`.
+    ///   - Etc.
+    ///
+    /// This is the opposite of `Tensor.init(stacking:alongAxis:)`.
+    ///
+    /// - Parameters:
+    ///   - axis: Dimension along which to unstack. Negative values wrap around.
+    /// 
+    /// - Precondition: `axis` must be in the range `[-rank, rank)`, where `rank` is the rank of the
+    ///   provided tensors.
+    /// 
+    /// - Returns: Array containing the unstacked tensors.
+    @inlinable
+    // @differentiable(vjp: _vjpUnstack(alongAxis:) wrt: self where Scalar : TensorFlowFloatingPoint)
+    func unstack(alongAxis axis: Int = 0) -> [Tensor] {
+        return Raw.unpack(value: self, num: shape[axis], axis: Int64(axis))
+    }
+
     /// Reshape to the shape of the specified `Tensor`.
     /// - Precondition: The number of scalars matches the new shape.
     @inlinable
-    @differentiable(wrt: self where Scalar : TensorFlowFloatingPoint)
+    @differentiable(wrt: self where Scalar: TensorFlowFloatingPoint)
     func reshaped<T>(like other: Tensor<T>) -> Tensor {
         return reshaped(toShape: other.shapeTensor)
     }
@@ -40,7 +67,7 @@ public extension Tensor {
     /// Reshape to the specified shape.
     /// - Precondition: The number of scalars matches the new shape.
     @inlinable
-    @differentiable(wrt: self where Scalar : TensorFlowFloatingPoint)
+    @differentiable(wrt: self where Scalar: TensorFlowFloatingPoint)
     func reshaped(to newShape: TensorShape) -> Tensor {
         // TODO(TF-433): Remove workaround for differentiating `map`.
         return reshaped(toShape: Tensor<Int32>({newShape.dimensions.map(Int32.init)}()))
@@ -51,14 +78,14 @@ public extension Tensor {
     @inlinable
     @differentiable(
         wrt: self,
-        vjp: _vjpReshaped(toShape:) where Scalar : TensorFlowFloatingPoint)
+        vjp: _vjpReshaped(toShape:) where Scalar: TensorFlowFloatingPoint)
     func reshaped(toShape newShape: Tensor<Int32>) -> Tensor {
         return Raw.reshape(self, shape: newShape)
     }
 
     /// Return a copy of the tensor collapsed into a 1-D `Tensor`, in row-major order.
     @inlinable
-    @differentiable(wrt: self where Scalar : TensorFlowFloatingPoint)
+    @differentiable(wrt: self where Scalar: TensorFlowFloatingPoint)
     func flattened() -> Tensor {
         return reshaped(to: [-1])
     }
@@ -66,14 +93,14 @@ public extension Tensor {
     /// Returns a shape-expanded `Tensor`, with a dimension of 1 inserted at the
     /// specified shape index.
     @inlinable
-    @differentiable(wrt: self, vjp: _vjpExpandingShape(at:) where Scalar : TensorFlowFloatingPoint)
+    @differentiable(wrt: self, vjp: _vjpExpandingShape(at:) where Scalar: TensorFlowFloatingPoint)
     func expandingShape(at shapeIndex: Int) -> Tensor {
         return Raw.expandDims(self, dim: Tensor<Int32>(Int32(shapeIndex)))
     }
 
     /// Returns a rank-lifted `Tensor` with a leading dimension of 1.
     @inlinable
-    @differentiable(wrt: self where Scalar : TensorFlowFloatingPoint)
+    @differentiable(wrt: self where Scalar: TensorFlowFloatingPoint)
     func rankLifted() -> Tensor {
         return expandingShape(at: 0)
     }
@@ -81,7 +108,7 @@ public extension Tensor {
     /// Remove the specified dimensions of size 1 from the shape of a tensor. If no dimensions are
     /// specified, then all dimensions of size 1 will be removed.
     @inlinable
-    @differentiable(wrt: self where Scalar : TensorFlowFloatingPoint)
+    @differentiable(wrt: self where Scalar: TensorFlowFloatingPoint)
     func squeezingShape(at axes: Int...) -> Tensor {
         return squeezingShape(at: axes)
     }
@@ -89,13 +116,13 @@ public extension Tensor {
     /// Remove the specified dimensions of size 1 from the shape of a tensor. If no dimensions are
     /// specified, then all dimensions of size 1 will be removed.
     @inlinable
-    @differentiable(wrt: self, vjp: _vjpSqueezingShape(at:) where Scalar : TensorFlowFloatingPoint)
+    @differentiable(wrt: self, vjp: _vjpSqueezingShape(at:) where Scalar: TensorFlowFloatingPoint)
     func squeezingShape(at axes: [Int]) -> Tensor {
         return Raw.squeeze(self, squeezeDims: axes.map(Int32.init))
     }
 }
 
-internal extension Tensor where Scalar : TensorFlowFloatingPoint {
+internal extension Tensor where Scalar: TensorFlowFloatingPoint {
     @inlinable
     func _vjpReshaped(toShape newShape: Tensor<Int32>) -> (Tensor, (Tensor) -> Tensor) {
         let value = reshaped(toShape: newShape)
@@ -119,14 +146,14 @@ internal extension Tensor where Scalar : TensorFlowFloatingPoint {
 // Other Tensor Transformations
 //===------------------------------------------------------------------------------------------===//
 
-infix operator ++ : AdditionPrecedence
+infix operator ++: AdditionPrecedence
 
 public extension Tensor {
     /// Returns a transposed tensor, with dimensions permuted in the specified order.
     @inlinable
     @differentiable(
         wrt: self,
-        vjp: _vjpTransposed(withPermutations:) where Scalar : TensorFlowFloatingPoint)
+        vjp: _vjpTransposed(withPermutations:) where Scalar: TensorFlowFloatingPoint)
     func transposed(withPermutations permutations: Tensor<Int32>) -> Tensor {
         return Raw.transpose(self, perm: permutations)
     }
@@ -135,7 +162,7 @@ public extension Tensor {
     @inlinable
     @differentiable(
         wrt: self,
-        vjp: _vjpTransposed(withPermutations:) where Scalar : TensorFlowFloatingPoint)
+        vjp: _vjpTransposed(withPermutations:) where Scalar: TensorFlowFloatingPoint)
     func transposed(withPermutations permutations: [Int]) -> Tensor {
         let permutations = permutations.map(Int32.init)
         return transposed(withPermutations: Tensor<Int32>(permutations))
@@ -144,14 +171,14 @@ public extension Tensor {
     /// Returns a transposed tensor, with dimensions permuted in the specified order.
     @inlinable
     @differentiable(
-        wrt: self, vjp: _vjpTransposed(withPermutations:) where Scalar : TensorFlowFloatingPoint)
+        wrt: self, vjp: _vjpTransposed(withPermutations:) where Scalar: TensorFlowFloatingPoint)
     func transposed(withPermutations permutations: Int...) -> Tensor {
         return transposed(withPermutations: permutations)
     }
 
     /// Returns a transposed tensor, with dimensions permuted in reverse order.
     @inlinable
-    @differentiable(wrt: self, vjp: _vjpTransposed() where Scalar : TensorFlowFloatingPoint)
+    @differentiable(wrt: self, vjp: _vjpTransposed() where Scalar: TensorFlowFloatingPoint)
     func transposed() -> Tensor {
         let defaultPermutations = rankTensor - 1 - Tensor<Int32>(
             rangeFrom: 0, to: Int32(rank), stride: 1)
@@ -163,7 +190,7 @@ public extension Tensor {
     ///   specified axis.
     /// - Precondition: The axis must be in the range `-rank..<rank`.
     @inlinable
-    @differentiable(vjp: _vjpConcatenated where Scalar : TensorFlowFloatingPoint)
+    @differentiable(vjp: _vjpConcatenated where Scalar: TensorFlowFloatingPoint)
     func concatenated(with other: Tensor, alongAxis axis: Int = 0) -> Tensor {
         return Tensor(concatenating: [self, other], alongAxis: axis)
     }
@@ -174,13 +201,13 @@ public extension Tensor {
     ///   and may be controversial. The existence/naming of `++` will be discussed
     ///   during a later API design phase.
     @inlinable
-    @differentiable(where Scalar : TensorFlowFloatingPoint)
+    @differentiable(where Scalar: TensorFlowFloatingPoint)
     static func ++ (lhs: Tensor, rhs: Tensor) -> Tensor {
         return lhs.concatenated(with: rhs)
     }
 }
 
-internal extension Tensor where Scalar : TensorFlowFloatingPoint {
+internal extension Tensor where Scalar: TensorFlowFloatingPoint {
     @inlinable
     func _vjpTransposed(
         withPermutations permutations: Tensor<Int32>
@@ -211,7 +238,7 @@ internal extension Tensor where Scalar : TensorFlowFloatingPoint {
         with other: Tensor,
         alongAxis axis: Int
     ) -> (Tensor, (Tensor) -> (Tensor, Tensor)) {
-        let idx = axis < 0 ? axis + rank : axis
+        let idx = axis < 0 ? axis + rank: axis
         let splits = Tensor<Int32>([shapeTensor[idx], other.shapeTensor[idx]])
         return (concatenated(with: other, alongAxis: axis), { result in
             let gradients = Raw.splitV(
@@ -256,7 +283,7 @@ public extension Tensor {
 }
 
 // TODO: Why is this limited only to numeric data types whereas `broadcast` is not?
-public extension Tensor where Scalar : Numeric {
+public extension Tensor where Scalar: Numeric {
     @inlinable
     func unbroadcast(toShape otherShape: Tensor<Int32>) -> Tensor {
         let rankDiff = (rankTensor - otherShape.scalarCountTensor).rankLifted()
@@ -284,7 +311,7 @@ public extension Tensor where Scalar : Numeric {
 // Padding
 //===------------------------------------------------------------------------------------------===//
 
-public extension Tensor where Scalar : Numeric {
+public extension Tensor where Scalar: Numeric {
     /// Returns a padded tensor according to the specified padding sizes.
     @inlinable
     func padded(forSizes sizes: [(before: Int, after: Int)], with value: Scalar = 0) -> Tensor {
@@ -341,7 +368,7 @@ public extension Tensor {
     }
 }
 
-public enum TensorRange : TensorRangeExpression {
+public enum TensorRange: TensorRangeExpression {
     case ellipsis
     case newAxis
     case squeezeAxis
@@ -355,7 +382,7 @@ public enum TensorRange : TensorRangeExpression {
     public var tensorRange: TensorRange { return self }
 }
 
-extension TensorRange : Equatable {
+extension TensorRange: Equatable {
     public static func == (lhs: TensorRange, rhs: TensorRange) -> Bool {
         switch (lhs, rhs) {
         case (.ellipsis, .ellipsis),
@@ -382,45 +409,45 @@ public protocol TensorRangeExpression {
 }
 
 // TODO: Cannot extend non-nominal type 'UnboundedRange'.
-// extension UnboundedRange : TensorRangeExpression {
+// extension UnboundedRange: TensorRangeExpression {
 //     public var tensorRange: TensorRange { return .ellipsis }
 // }
 
-extension Int : TensorRangeExpression {
+extension Int: TensorRangeExpression {
     public var tensorRange: TensorRange { return .index(self) }
 }
 
-extension Range : TensorRangeExpression where Bound == Int {
+extension Range: TensorRangeExpression where Bound == Int {
     public var tensorRange: TensorRange {
         return .range(self, stride: 1)
     }
 }
 
-extension ClosedRange : TensorRangeExpression where Bound == Int {
+extension ClosedRange: TensorRangeExpression where Bound == Int {
     public var tensorRange: TensorRange {
         return .closedRange(self, stride: 1)
     }
 }
 
-extension PartialRangeFrom : TensorRangeExpression where Bound == Int {
+extension PartialRangeFrom: TensorRangeExpression where Bound == Int {
     public var tensorRange: TensorRange {
         return .partialRangeFrom(self, stride: 1)
     }
 }
 
-extension PartialRangeUpTo : TensorRangeExpression where Bound == Int {
+extension PartialRangeUpTo: TensorRangeExpression where Bound == Int {
     public var tensorRange: TensorRange {
         return .partialRangeUpTo(self, stride: 1)
     }
 }
 
-extension PartialRangeThrough : TensorRangeExpression where Bound == Int {
+extension PartialRangeThrough: TensorRangeExpression where Bound == Int {
     public var tensorRange: TensorRange {
         return .partialRangeThrough(self, stride: 1)
     }
 }
 
-infix operator .. : StridedRangeFormationPrecedence
+infix operator ..: StridedRangeFormationPrecedence
 precedencegroup StridedRangeFormationPrecedence {
     associativity: left
     higherThan: CastingPrecedence
diff --git a/Sources/DeepLearning/Operators/Comparison.swift b/Sources/DeepLearning/Operators/Comparison.swift
index 2bc7329be..02bf5fadf 100644
--- a/Sources/DeepLearning/Operators/Comparison.swift
+++ b/Sources/DeepLearning/Operators/Comparison.swift
@@ -16,14 +16,14 @@
 import TensorFlow
 #endif
 
-infix operator .< : ComparisonPrecedence
-infix operator .<= : ComparisonPrecedence
-infix operator .>= : ComparisonPrecedence
-infix operator .> : ComparisonPrecedence
-infix operator .== : ComparisonPrecedence
-infix operator .!= : ComparisonPrecedence
-
-public extension Tensor where Scalar : Numeric & Comparable {
+infix operator .<: ComparisonPrecedence
+infix operator .<=: ComparisonPrecedence
+infix operator .>=: ComparisonPrecedence
+infix operator .>: ComparisonPrecedence
+infix operator .==: ComparisonPrecedence
+infix operator .!=: ComparisonPrecedence
+
+public extension Tensor where Scalar: Numeric & Comparable {
     /// Computes `lhs < rhs` element-wise and returns a `Tensor` of Boolean /// scalars.
     @inlinable
     static func .< (lhs: Tensor, rhs: Tensor) -> Tensor<Bool> {
@@ -105,7 +105,7 @@ public extension Tensor where Scalar : Numeric & Comparable {
     }
 }
 
-extension Tensor : Equatable where Scalar : Equatable {
+extension Tensor: Equatable where Scalar: Equatable {
     @inlinable
     public static func == (lhs: Tensor, rhs: Tensor) -> Bool {
         return (lhs .== rhs).all()
@@ -117,7 +117,7 @@ extension Tensor : Equatable where Scalar : Equatable {
     }
 }
 
-extension Tensor : Comparable where Scalar : Numeric & Comparable {
+extension Tensor: Comparable where Scalar: Numeric & Comparable {
     /// Returns a Boolean value indicating whether the value of the first argument is
     /// lexicographically less than that of the second argument.
     @inlinable
@@ -147,7 +147,7 @@ extension Tensor : Comparable where Scalar : Numeric & Comparable {
     }
 }
 
-public extension Tensor where Scalar : Numeric & Comparable {
+public extension Tensor where Scalar: Numeric & Comparable {
     /// Returns a Boolean value indicating whether the value of the first argument is
     /// lexicographically less than that of the second argument.
     @inlinable
@@ -177,7 +177,7 @@ public extension Tensor where Scalar : Numeric & Comparable {
     }
 }
 
-public extension Tensor where Scalar : Equatable {
+public extension Tensor where Scalar: Equatable {
     /// Computes `lhs != rhs` element-wise and returns a `Tensor` of Boolean scalars.
     /// - Note: `.==` supports broadcasting.
     @inlinable
@@ -222,9 +222,9 @@ public extension Tensor where Scalar : Equatable {
     }
 }
 
-// TODO: infix operator ≈ : ComparisonPrecedence
+// TODO: infix operator ≈: ComparisonPrecedence
 
-public extension Tensor where Scalar : FloatingPoint & Equatable {
+public extension Tensor where Scalar: FloatingPoint & Equatable {
     /// Returns a `Tensor` of Boolean values indicating whether the elements of `self` are
     /// approximately equal to those of `other`.
     @inlinable
diff --git a/Sources/DeepLearning/Operators/Math.swift b/Sources/DeepLearning/Operators/Math.swift
index 7ab4fb6ba..66c25b50c 100644
--- a/Sources/DeepLearning/Operators/Math.swift
+++ b/Sources/DeepLearning/Operators/Math.swift
@@ -17,8 +17,8 @@ import TensorFlow
 #endif
 
 #if COMPILING_TENSORFLOW_MODULE
-infix operator .> : ComparisonPrecedence
-infix operator .== : ComparisonPrecedence
+infix operator .>: ComparisonPrecedence
+infix operator .==: ComparisonPrecedence
 #endif
 
 // TODO:
@@ -29,19 +29,17 @@ infix operator .== : ComparisonPrecedence
 // Additive Group
 //===------------------------------------------------------------------------------------------===//
 
-extension Tensor : AdditiveArithmetic where Scalar : Numeric {
+extension Tensor: AdditiveArithmetic where Scalar: Numeric {
     /// A scalar zero tensor.
     @inlinable
     public static var zero: Tensor {
-        get {
         return Tensor(zeros: [])
-        }
     }
 
     /// Adds two tensors and produces their sum.
     /// - Note: `+` supports broadcasting.
     @inlinable
-    @differentiable(vjp: _vjpAdd(lhs:rhs:) where Scalar : TensorFlowFloatingPoint)
+    @differentiable(vjp: _vjpAdd(lhs:rhs:) where Scalar: TensorFlowFloatingPoint)
     public static func + (lhs: Tensor, rhs: Tensor) -> Tensor {
         return Raw.add(lhs, rhs)
     }
@@ -49,13 +47,13 @@ extension Tensor : AdditiveArithmetic where Scalar : Numeric {
     /// Subtracts one tensor from another and produces their difference.
     /// - Note: `-` supports broadcasting.
     @inlinable
-    @differentiable(vjp: _vjpSubtract(lhs:rhs:) where Scalar : TensorFlowFloatingPoint)
+    @differentiable(vjp: _vjpSubtract(lhs:rhs:) where Scalar: TensorFlowFloatingPoint)
     public static func - (lhs: Tensor, rhs: Tensor) -> Tensor {
         return Raw.sub(lhs, rhs)
     }
 }
 
-internal extension Tensor where Scalar : TensorFlowFloatingPoint {
+internal extension Tensor where Scalar: TensorFlowFloatingPoint {
     @inlinable
     static func _vjpAdd(lhs: Tensor, rhs: Tensor) -> (Tensor, (Tensor) -> (Tensor, Tensor)) {
         return (lhs + rhs, { [lhsShape = lhs.shapeTensor, rhsShape = rhs.shapeTensor] v in
@@ -75,16 +73,16 @@ internal extension Tensor where Scalar : TensorFlowFloatingPoint {
 // Vector Space
 //===------------------------------------------------------------------------------------------===//
 
-extension Tensor : VectorNumeric where Scalar : Numeric {
+extension Tensor: VectorNumeric where Scalar: Numeric {
     /// Multiplies the scalar with every scalar of the tensor and produces the product.
     @inlinable
-    @differentiable(vjp: _vjpMultiply(lhs:rhs:) where Scalar : TensorFlowFloatingPoint)
+    @differentiable(vjp: _vjpMultiply(lhs:rhs:) where Scalar: TensorFlowFloatingPoint)
     public static func * (lhs: Scalar, rhs: Tensor) -> Tensor {
         return Tensor(lhs) * rhs
     }
 }
 
-internal extension Tensor where Scalar : TensorFlowFloatingPoint {
+internal extension Tensor where Scalar: TensorFlowFloatingPoint {
     @inlinable
     static func _vjpMultiply(lhs: Tensor, rhs: Tensor) -> (Tensor, (Tensor) -> (Tensor, Tensor)) {
         return (lhs * rhs, { [lhsShape = lhs.shapeTensor, rhsShape = rhs.shapeTensor] v in
@@ -93,9 +91,9 @@ internal extension Tensor where Scalar : TensorFlowFloatingPoint {
     }
 }
 
-extension Tensor : ShapedVectorNumeric where Scalar : Numeric {}
+extension Tensor: ShapedVectorNumeric where Scalar: Numeric {}
 
-extension Tensor : Differentiable where Scalar : TensorFlowFloatingPoint {
+extension Tensor: Differentiable where Scalar: TensorFlowFloatingPoint {
     public typealias TangentVector = Tensor
     public typealias CotangentVector = Tensor
     public typealias AllDifferentiableVariables = Tensor
@@ -110,31 +108,31 @@ extension Tensor : Differentiable where Scalar : TensorFlowFloatingPoint {
 // Additional Element-wise Operators
 //===------------------------------------------------------------------------------------------===//
 
-public extension Tensor where Scalar : Numeric {
+public extension Tensor where Scalar: Numeric {
     /// Adds the scalar to every scalar of the tensor and produces the sum.
     @inlinable
-    @differentiable(vjp: _vjpAdd(lhs:rhs:) where Scalar : TensorFlowFloatingPoint)
+    @differentiable(vjp: _vjpAdd(lhs:rhs:) where Scalar: TensorFlowFloatingPoint)
     static func + (lhs: Scalar, rhs: Tensor) -> Tensor {
         return Tensor(lhs) + rhs
     }
 
     /// Adds the scalar to every scalar of the tensor and produces the sum.
     @inlinable
-    @differentiable(vjp: _vjpAdd(lhs:rhs:) where Scalar : TensorFlowFloatingPoint)
+    @differentiable(vjp: _vjpAdd(lhs:rhs:) where Scalar: TensorFlowFloatingPoint)
     static func + (lhs: Tensor, rhs: Scalar) -> Tensor {
         return lhs + Tensor(rhs)
     }
 
     /// Subtracts the scalar from every scalar of the tensor and produces the difference.
     @inlinable
-    @differentiable(vjp: _vjpSubtract(lhs:rhs:) where Scalar : TensorFlowFloatingPoint)
+    @differentiable(vjp: _vjpSubtract(lhs:rhs:) where Scalar: TensorFlowFloatingPoint)
     static func - (lhs: Scalar, rhs: Tensor) -> Tensor {
         return Tensor(lhs) - rhs
     }
 
     /// Subtracts the scalar from every scalar of the tensor and produces the difference
     @inlinable
-    @differentiable(vjp: _vjpSubtract(lhs:rhs:) where Scalar : TensorFlowFloatingPoint)
+    @differentiable(vjp: _vjpSubtract(lhs:rhs:) where Scalar: TensorFlowFloatingPoint)
     static func - (lhs: Tensor, rhs: Scalar) -> Tensor {
         return lhs - Tensor(rhs)
     }
@@ -171,14 +169,14 @@ public extension Tensor where Scalar : Numeric {
     /// Multiplies two tensors and produces their product.
     /// - Note: `*` supports broadcasting.
     @inlinable
-    @differentiable(vjp: _vjpMultiply(lhs:rhs:) where Scalar : TensorFlowFloatingPoint)
+    @differentiable(vjp: _vjpMultiply(lhs:rhs:) where Scalar: TensorFlowFloatingPoint)
     static func * (lhs: Tensor, rhs: Tensor) -> Tensor {
         return Raw.mul(lhs, rhs)
     }
 
     /// Multiplies the scalar with every scalar of the tensor and produces the product.
     @inlinable
-    @differentiable(vjp: _vjpMultiply(lhs:rhs:) where Scalar : TensorFlowFloatingPoint)
+    @differentiable(vjp: _vjpMultiply(lhs:rhs:) where Scalar: TensorFlowFloatingPoint)
     static func * (lhs: Tensor, rhs: Scalar) -> Tensor {
         return lhs * Tensor(rhs)
     }
@@ -190,6 +188,8 @@ public extension Tensor where Scalar : Numeric {
         lhs = lhs * rhs
     }
 
+    /// Multiplies the tensor with the scalar, broadcasting the scalar, and stores the result in the
+    /// left-hand-side variable.
     @inlinable
     static func *= (lhs: inout Tensor, rhs: Scalar) {
         lhs = lhs * rhs
@@ -198,21 +198,21 @@ public extension Tensor where Scalar : Numeric {
     /// Returns the quotient of dividing the first tensor by the second.
     /// - Note: `/` supports broadcasting.
     @inlinable
-    @differentiable(vjp: _vjpDivide(lhs:rhs:) where Scalar : TensorFlowFloatingPoint)
+    @differentiable(vjp: _vjpDivide(lhs:rhs:) where Scalar: TensorFlowFloatingPoint)
     static func / (lhs: Tensor, rhs: Tensor) -> Tensor {
         return Raw.div(lhs, rhs)
     }
 
     /// Returns the quotient of dividing the scalar by the tensor, broadcasting the scalar.
     @inlinable
-    @differentiable(vjp: _vjpDivide(lhs:rhs:) where Scalar : TensorFlowFloatingPoint)
+    @differentiable(vjp: _vjpDivide(lhs:rhs:) where Scalar: TensorFlowFloatingPoint)
     static func / (lhs: Scalar, rhs: Tensor) -> Tensor {
         return Tensor(lhs) / rhs
     }
 
     /// Returns the quotient of dividing the tensor by the scalar, broadcasting the scalar.
     @inlinable
-    @differentiable(vjp: _vjpDivide(lhs:rhs:) where Scalar : TensorFlowFloatingPoint)
+    @differentiable(vjp: _vjpDivide(lhs:rhs:) where Scalar: TensorFlowFloatingPoint)
     static func / (lhs: Tensor, rhs: Scalar) -> Tensor {
         return lhs / Tensor(rhs)
     }
@@ -264,7 +264,7 @@ public extension Tensor where Scalar : Numeric {
     }
 }
 
-internal extension Tensor where Scalar : TensorFlowFloatingPoint {
+internal extension Tensor where Scalar: TensorFlowFloatingPoint {
     @inlinable
     static func _vjpAdd(lhs: Tensor, rhs: Scalar) -> (Tensor, (Tensor) -> (Tensor, Scalar)) {
         return (lhs + rhs, { v in (v, v.sum().scalarized()) })
@@ -409,16 +409,16 @@ public extension Tensor where Scalar == Bool {
 // @_exported import func Glibc.powf
 // #endif
 
-public extension Tensor where Scalar : SignedNumeric {
+public extension Tensor where Scalar: SignedNumeric {
     /// Computes the negation of the specified tensor element-wise.
     @inlinable
-    @differentiable(vjp: _vjpNegate(_:) where Scalar : TensorFlowFloatingPoint)
+    @differentiable(vjp: _vjpNegate(_:) where Scalar: TensorFlowFloatingPoint)
     static prefix func - (rhs: Tensor) -> Tensor {
         return Raw.neg(rhs)
     }
 }
 
-internal extension Tensor where Scalar : TensorFlowFloatingPoint {
+internal extension Tensor where Scalar: TensorFlowFloatingPoint {
     @inlinable
     static func _vjpNegate(_ x: Tensor) -> (Tensor, (Tensor) -> Tensor) {
         return (-x, { v in -v })
@@ -427,13 +427,13 @@ internal extension Tensor where Scalar : TensorFlowFloatingPoint {
 
 /// Computes the absolute value of the specified tensor element-wise.
 @inlinable
-@differentiable(vjp: _vjpAbs(_:) where T : TensorFlowFloatingPoint)
-public func abs<T : SignedNumeric>(_ x: Tensor<T>) -> Tensor<T> {
+@differentiable(vjp: _vjpAbs(_:) where T: TensorFlowFloatingPoint)
+public func abs<T: SignedNumeric>(_ x: Tensor<T>) -> Tensor<T> {
     return Raw.abs(x)
 }
 
 @inlinable
-internal func _vjpAbs<T : TensorFlowFloatingPoint>(
+internal func _vjpAbs<T: TensorFlowFloatingPoint>(
   _ x: Tensor<T>
 ) -> (Tensor<T>, (Tensor<T>) -> Tensor<T>) {
     let sign = Raw.sign(x)
@@ -442,13 +442,13 @@ internal func _vjpAbs<T : TensorFlowFloatingPoint>(
 
 /// Computes the natural logarithm of the specified tensor element-wise.
 @inlinable
-@differentiable(vjp: _vjpLog(_:) where T : TensorFlowFloatingPoint)
-public func log<T : FloatingPoint>(_ x: Tensor<T>) -> Tensor<T> {
+@differentiable(vjp: _vjpLog(_:) where T: TensorFlowFloatingPoint)
+public func log<T: FloatingPoint>(_ x: Tensor<T>) -> Tensor<T> {
     return Raw.log(x)
 }
 
 @inlinable
-internal func _vjpLog<T : TensorFlowFloatingPoint>(
+internal func _vjpLog<T: TensorFlowFloatingPoint>(
     _ x: Tensor<T>
 ) -> (Tensor<T>, (Tensor<T>) -> Tensor<T>) {
     return (log(x), { v in v / x })
@@ -456,13 +456,13 @@ internal func _vjpLog<T : TensorFlowFloatingPoint>(
 
 /// Computes `sin` of the specified tensor element-wise.
 @inlinable
-@differentiable(vjp: _vjpSin(_:) where T : TensorFlowFloatingPoint)
-public func sin<T : FloatingPoint>(_ x: Tensor<T>) -> Tensor<T> {
+@differentiable(vjp: _vjpSin(_:) where T: TensorFlowFloatingPoint)
+public func sin<T: FloatingPoint>(_ x: Tensor<T>) -> Tensor<T> {
     return Raw.sin(x)
 }
 
 @inlinable
-internal func _vjpSin<T : TensorFlowFloatingPoint>(
+internal func _vjpSin<T: TensorFlowFloatingPoint>(
     _ x: Tensor<T>
 ) -> (Tensor<T>, (Tensor<T>) -> Tensor<T>) {
     return (sin(x), { v in v * cos(x) })
@@ -470,13 +470,13 @@ internal func _vjpSin<T : TensorFlowFloatingPoint>(
 
 /// Computes `cos` of the specified tensor element-wise.
 @inlinable
-@differentiable(vjp: _vjpCos(_:) where T : TensorFlowFloatingPoint)
-public func cos<T : FloatingPoint>(_ x: Tensor<T>) -> Tensor<T> {
+@differentiable(vjp: _vjpCos(_:) where T: TensorFlowFloatingPoint)
+public func cos<T: FloatingPoint>(_ x: Tensor<T>) -> Tensor<T> {
     return Raw.cos(x)
 }
 
 @inlinable
-internal func _vjpCos<T : TensorFlowFloatingPoint>(
+internal func _vjpCos<T: TensorFlowFloatingPoint>(
     _ x: Tensor<T>
 ) -> (Tensor<T>, (Tensor<T>) -> Tensor<T>) {
     return (cos(x), { v in -v * sin(x) })
@@ -484,13 +484,13 @@ internal func _vjpCos<T : TensorFlowFloatingPoint>(
 
 /// Computes `tan` of the specified tensor element-wise.
 @inlinable
-@differentiable(vjp: _vjpTan(_:) where T : TensorFlowFloatingPoint)
-public func tan<T : FloatingPoint>(_ x: Tensor<T>) -> Tensor<T> {
+@differentiable(vjp: _vjpTan(_:) where T: TensorFlowFloatingPoint)
+public func tan<T: FloatingPoint>(_ x: Tensor<T>) -> Tensor<T> {
     return Raw.tan(x)
 }
 
 @inlinable
-internal func _vjpTan<T : TensorFlowFloatingPoint>(
+internal func _vjpTan<T: TensorFlowFloatingPoint>(
     _ x: Tensor<T>
 ) -> (Tensor<T>, (Tensor<T>) -> Tensor<T>) {
     let value = tan(x)
@@ -499,13 +499,13 @@ internal func _vjpTan<T : TensorFlowFloatingPoint>(
 
 /// Computes `sinh` of the specified tensor element-wise.
 @inlinable
-@differentiable(vjp: _vjpSinh(_:) where T : TensorFlowFloatingPoint)
-public func sinh<T : FloatingPoint>(_ x: Tensor<T>) -> Tensor<T> {
+@differentiable(vjp: _vjpSinh(_:) where T: TensorFlowFloatingPoint)
+public func sinh<T: FloatingPoint>(_ x: Tensor<T>) -> Tensor<T> {
     return Raw.sinh(x)
 }
 
 @inlinable
-internal func _vjpSinh<T : TensorFlowFloatingPoint>(
+internal func _vjpSinh<T: TensorFlowFloatingPoint>(
     _ x: Tensor<T>
 ) -> (Tensor<T>, (Tensor<T>) -> Tensor<T>) {
     return (sinh(x), { v in v * cosh(x) })
@@ -513,13 +513,13 @@ internal func _vjpSinh<T : TensorFlowFloatingPoint>(
 
 /// Computes `cosh` of the specified tensor element-wise.
 @inlinable
-@differentiable(vjp: _vjpCosh(_:) where T : TensorFlowFloatingPoint)
-public func cosh<T : FloatingPoint>(_ x: Tensor<T>) -> Tensor<T> {
+@differentiable(vjp: _vjpCosh(_:) where T: TensorFlowFloatingPoint)
+public func cosh<T: FloatingPoint>(_ x: Tensor<T>) -> Tensor<T> {
     return Raw.cosh(x)
 }
 
 @inlinable
-internal func _vjpCosh<T : TensorFlowFloatingPoint>(
+internal func _vjpCosh<T: TensorFlowFloatingPoint>(
     _ x: Tensor<T>
 ) -> (Tensor<T>, (Tensor<T>) -> Tensor<T>) {
     return (cosh(x), { v in v * sinh(x) })
@@ -527,13 +527,13 @@ internal func _vjpCosh<T : TensorFlowFloatingPoint>(
 
 /// Computes `tanh` of the specified tensor element-wise.
 @inlinable
-@differentiable(vjp: _vjpTanh(_:) where T : TensorFlowFloatingPoint)
-public func tanh<T : FloatingPoint>(_ x: Tensor<T>) -> Tensor<T> {
+@differentiable(vjp: _vjpTanh(_:) where T: TensorFlowFloatingPoint)
+public func tanh<T: FloatingPoint>(_ x: Tensor<T>) -> Tensor<T> {
     return Raw.tanh(x)
 }
 
 @inlinable
-internal func _vjpTanh<T : TensorFlowFloatingPoint>(
+internal func _vjpTanh<T: TensorFlowFloatingPoint>(
     _ x: Tensor<T>
 ) -> (Tensor<T>, (Tensor<T>) -> Tensor<T>) {
     let value = tanh(x)
@@ -541,15 +541,15 @@ internal func _vjpTanh<T : TensorFlowFloatingPoint>(
 }
 
 /// Computes the square of the tensor.
-public extension Tensor where Scalar : Numeric {
+public extension Tensor where Scalar: Numeric {
     @inlinable
-    @differentiable(wrt: self, vjp: _vjpSquared() where Scalar : TensorFlowFloatingPoint)
+    @differentiable(wrt: self, vjp: _vjpSquared() where Scalar: TensorFlowFloatingPoint)
     func squared() -> Tensor {
         return Raw.square(self)
     }
 }
 
-internal extension Tensor where Scalar : TensorFlowFloatingPoint {
+internal extension Tensor where Scalar: TensorFlowFloatingPoint {
     @inlinable
     func _vjpSquared() -> (Tensor, (Tensor) -> Tensor) {
         return (squared(), { 2 * self * $0 })
@@ -558,13 +558,13 @@ internal extension Tensor where Scalar : TensorFlowFloatingPoint {
 
 /// Computes the square root of the specified tensor element-wise.
 @inlinable
-@differentiable(vjp: _vjpSqrt(_:) where T : TensorFlowFloatingPoint)
-public func sqrt<T : FloatingPoint>(_ x: Tensor<T>) -> Tensor<T> {
+@differentiable(vjp: _vjpSqrt(_:) where T: TensorFlowFloatingPoint)
+public func sqrt<T: FloatingPoint>(_ x: Tensor<T>) -> Tensor<T> {
     return Raw.sqrt(x)
 }
 
 @inlinable
-internal func _vjpSqrt<T : TensorFlowFloatingPoint>(
+internal func _vjpSqrt<T: TensorFlowFloatingPoint>(
     _ x: Tensor<T>
 ) -> (Tensor<T>, (Tensor<T>) -> Tensor<T>) {
     let value = sqrt(x)
@@ -573,13 +573,13 @@ internal func _vjpSqrt<T : TensorFlowFloatingPoint>(
 
 /// Computes the inverse square root of the specified tensor element-wise.
 @inlinable
-@differentiable(vjp: _vjpRsqrt(_:) where T : TensorFlowFloatingPoint)
-public func rsqrt<T : FloatingPoint>(_ x: Tensor<T>) -> Tensor<T> {
+@differentiable(vjp: _vjpRsqrt(_:) where T: TensorFlowFloatingPoint)
+public func rsqrt<T: FloatingPoint>(_ x: Tensor<T>) -> Tensor<T> {
     return Raw.rsqrt(x)
 }
 
 @inlinable
-internal func _vjpRsqrt<T : TensorFlowFloatingPoint>(
+internal func _vjpRsqrt<T: TensorFlowFloatingPoint>(
     _ x: Tensor<T>
 ) -> (Tensor<T>, (Tensor<T>) -> Tensor<T>) {
     let value = rsqrt(x)
@@ -588,13 +588,13 @@ internal func _vjpRsqrt<T : TensorFlowFloatingPoint>(
 
 /// Computes `exp` of the specified tensor element-wise.
 @inlinable
-@differentiable(vjp: _vjpExp(_:) where T : TensorFlowFloatingPoint)
-public func exp<T : FloatingPoint>(_ x: Tensor<T>) -> Tensor<T> {
+@differentiable(vjp: _vjpExp(_:) where T: TensorFlowFloatingPoint)
+public func exp<T: FloatingPoint>(_ x: Tensor<T>) -> Tensor<T> {
     return Raw.exp(x)
 }
 
 @inlinable
-internal func _vjpExp<T : TensorFlowFloatingPoint>(
+internal func _vjpExp<T: TensorFlowFloatingPoint>(
     _ x: Tensor<T>
 ) -> (Tensor<T>, (Tensor<T>) -> Tensor<T>) {
     let value = exp(x)
@@ -603,13 +603,13 @@ internal func _vjpExp<T : TensorFlowFloatingPoint>(
 
 /// Computes the ceiling of the specified tensor element-wise.
 @inlinable
-@differentiable(vjp: _vjpCeil(_:) where T : TensorFlowFloatingPoint)
-public func ceil<T : FloatingPoint>(_ x: Tensor<T>) -> Tensor<T> {
+@differentiable(vjp: _vjpCeil(_:) where T: TensorFlowFloatingPoint)
+public func ceil<T: FloatingPoint>(_ x: Tensor<T>) -> Tensor<T> {
     return Raw.ceil(x)
 }
 
 @inlinable
-internal func _vjpCeil<T : TensorFlowFloatingPoint>(
+internal func _vjpCeil<T: TensorFlowFloatingPoint>(
     _ x: Tensor<T>
 ) -> (Tensor<T>, (Tensor<T>) -> Tensor<T>) {
     return (ceil(x), { _ in Tensor(0).broadcast(like: x) })
@@ -617,13 +617,13 @@ internal func _vjpCeil<T : TensorFlowFloatingPoint>(
 
 /// Computes the floor of the specified tensor element-wise.
 @inlinable
-@differentiable(vjp: _vjpFloor(_:) where T : TensorFlowFloatingPoint)
-public func floor<T : FloatingPoint>(_ x: Tensor<T>) -> Tensor<T> {
+@differentiable(vjp: _vjpFloor(_:) where T: TensorFlowFloatingPoint)
+public func floor<T: FloatingPoint>(_ x: Tensor<T>) -> Tensor<T> {
     return Raw.floor(x)
 }
 
 @inlinable
-internal func _vjpFloor<T : TensorFlowFloatingPoint>(
+internal func _vjpFloor<T: TensorFlowFloatingPoint>(
     _ x: Tensor<T>
 ) -> (Tensor<T>, (Tensor<T>) -> Tensor<T>) {
     return (floor(x), { _ in Tensor(0).broadcast(like: x) })
@@ -633,12 +633,12 @@ internal func _vjpFloor<T : TensorFlowFloatingPoint>(
 /// Specifically, computes `1 / (1 + exp(-x))`.
 @inlinable
 @differentiable(vjp: _vjpSigmoid)
-public func sigmoid<T : TensorFlowFloatingPoint>(_ x: Tensor<T>) -> Tensor<T> {
+public func sigmoid<T: TensorFlowFloatingPoint>(_ x: Tensor<T>) -> Tensor<T> {
     return Raw.sigmoid(x)
 }
 
 @inlinable
-internal func _vjpSigmoid<T : TensorFlowFloatingPoint>(
+internal func _vjpSigmoid<T: TensorFlowFloatingPoint>(
     _ x: Tensor<T>
 ) -> (Tensor<T>, (Tensor<T>) -> Tensor<T>) {
     return (sigmoid(x), { v in Raw.sigmoidGrad(x, dy: v) })
@@ -647,8 +647,8 @@ internal func _vjpSigmoid<T : TensorFlowFloatingPoint>(
 /// Computes the softmax of the specified tensor along the last axis.
 /// Specifically, computes `exp(x) / exp(x).sum(alongAxes: -1)`.
 @inlinable
-@differentiable(vjp: _vjpSoftmax(_:) where T : TensorFlowFloatingPoint)
-public func softmax<T : FloatingPoint>(_ x: Tensor<T>) -> Tensor<T> {
+@differentiable(vjp: _vjpSoftmax(_:) where T: TensorFlowFloatingPoint)
+public func softmax<T: FloatingPoint>(_ x: Tensor<T>) -> Tensor<T> {
     return Raw.softmax(logits: x)
 }
 
@@ -656,13 +656,13 @@ public func softmax<T : FloatingPoint>(_ x: Tensor<T>) -> Tensor<T> {
 /// Specifically, computes `exp(x) / exp(x).sum(alongAxes: axis)`.
 @inlinable
 // TODO: [AD].
-public func softmax<T : TensorFlowFloatingPoint>(_ x: Tensor<T>, alongAxis axis: Int) -> Tensor<T> {
+public func softmax<T: TensorFlowFloatingPoint>(_ x: Tensor<T>, alongAxis axis: Int) -> Tensor<T> {
     let xExp = exp(x)
     return xExp / xExp.sum(alongAxes: Tensor<Int32>(Int32(axis)))
 }
 
 @inlinable
-func _vjpSoftmax<T : TensorFlowFloatingPoint>(
+func _vjpSoftmax<T: TensorFlowFloatingPoint>(
     _ x: Tensor<T>
 ) -> (Tensor<T>, (Tensor<T>) -> Tensor<T>) {
     let value = softmax(x)
@@ -674,13 +674,13 @@ func _vjpSoftmax<T : TensorFlowFloatingPoint>(
 
 /// Computes the log-softmax of the specified tensor element-wise.
 @inlinable
-@differentiable(vjp: _vjpLogSoftmax(_:) where T : TensorFlowFloatingPoint)
-public func logSoftmax<T : FloatingPoint>(_ x: Tensor<T>) -> Tensor<T> {
+@differentiable(vjp: _vjpLogSoftmax(_:) where T: TensorFlowFloatingPoint)
+public func logSoftmax<T: FloatingPoint>(_ x: Tensor<T>) -> Tensor<T> {
     return Raw.logSoftmax(logits: x)
 }
 
 @inlinable
-func _vjpLogSoftmax<T : TensorFlowFloatingPoint>(
+func _vjpLogSoftmax<T: TensorFlowFloatingPoint>(
     _ x: Tensor<T>
 ) -> (Tensor<T>, (Tensor<T>) -> Tensor<T>) {
     let value = logSoftmax(x)
@@ -690,13 +690,13 @@ func _vjpLogSoftmax<T : TensorFlowFloatingPoint>(
 /// Computes `relu` of the specified tensor element-wise.
 /// Specifically, computes `max(0, x)`.
 @inlinable
-@differentiable(vjp: _vjpRelu(_:) where T : TensorFlowFloatingPoint)
-public func relu<T : FloatingPoint>(_ x: Tensor<T>) -> Tensor<T> {
+@differentiable(vjp: _vjpRelu(_:) where T: TensorFlowFloatingPoint)
+public func relu<T: FloatingPoint>(_ x: Tensor<T>) -> Tensor<T> {
     return max(0, x)
 }
 
 @inlinable
-func _vjpRelu<T : TensorFlowFloatingPoint>(
+func _vjpRelu<T: TensorFlowFloatingPoint>(
     _ x: Tensor<T>
 ) -> (Tensor<T>, (Tensor<T>) -> Tensor<T>) {
     return (relu(x), { v in Tensor(x .> 0) * v })
@@ -708,13 +708,13 @@ func _vjpRelu<T : TensorFlowFloatingPoint>(
 
 /// Computes the power of the first tensor to the second tensor.
 @inlinable
-@differentiable(vjp: _vjpPow(_:_:) where T : TensorFlowFloatingPoint)
-public func pow<T>(_ lhs: Tensor<T>, _ rhs: Tensor<T>) -> Tensor<T> where T : FloatingPoint {
+@differentiable(vjp: _vjpPow(_:_:) where T: TensorFlowFloatingPoint)
+public func pow<T>(_ lhs: Tensor<T>, _ rhs: Tensor<T>) -> Tensor<T> where T: FloatingPoint {
     return Raw.pow(lhs, rhs)
 }
 
 @inlinable
-internal func _vjpPow<T : TensorFlowFloatingPoint>(
+internal func _vjpPow<T: TensorFlowFloatingPoint>(
     _ x: Tensor<T>, _ y: Tensor<T>
 ) -> (Tensor<T>, (Tensor<T>) -> (Tensor<T>, Tensor<T>)) {
     let value = pow(x, y)
@@ -726,28 +726,28 @@ internal func _vjpPow<T : TensorFlowFloatingPoint>(
 
 /// Computes the power of the scalar to the tensor, broadcasting the scalar.
 @inlinable
-// @differentiable(where T : TensorFlowFloatingPoint)
-public func pow<T>(_ lhs: T, _ rhs: Tensor<T>) -> Tensor<T> where T : FloatingPoint {
+// @differentiable(where T: TensorFlowFloatingPoint)
+public func pow<T>(_ lhs: T, _ rhs: Tensor<T>) -> Tensor<T> where T: FloatingPoint {
     return pow(Tensor(lhs), rhs)
 }
 
 /// Computes the power of the tensor to the scalar, broadcasting the scalar.
 @inlinable
-// @differentiable(where T : TensorFlowFloatingPoint)
-public func pow<T>(_ lhs: Tensor<T>, _ rhs: T) -> Tensor<T> where T : FloatingPoint {
+// @differentiable(where T: TensorFlowFloatingPoint)
+public func pow<T>(_ lhs: Tensor<T>, _ rhs: T) -> Tensor<T> where T: FloatingPoint {
     return pow(lhs, Tensor(rhs))
 }
 
 /// Computes the element-wise maximum of two tensors.
 /// - Note: `max` supports broadcasting.
 @inlinable
-@differentiable(vjp: _vjpMax(_:_:) where T : TensorFlowFloatingPoint)
-public func max<T>(_ lhs: Tensor<T>, _ rhs: Tensor<T>) -> Tensor<T> where T : Numeric & Comparable {
+@differentiable(vjp: _vjpMax(_:_:) where T: TensorFlowFloatingPoint)
+public func max<T>(_ lhs: Tensor<T>, _ rhs: Tensor<T>) -> Tensor<T> where T: Numeric & Comparable {
     return Raw.maximum(lhs, rhs)
 }
 
 @inlinable
-internal func _vjpMax<T : TensorFlowFloatingPoint>(
+internal func _vjpMax<T: TensorFlowFloatingPoint>(
     _ x: Tensor<T>, _ y: Tensor<T>
 ) -> (Tensor<T>, (Tensor<T>) -> (Tensor<T>, Tensor<T>)) {
     let value = max(x, y)
@@ -756,28 +756,28 @@ internal func _vjpMax<T : TensorFlowFloatingPoint>(
 
 /// Computes the element-wise maximum of the scalar and the tensor, broadcasting the scalar.
 @inlinable
-// @differentiable(where T : TensorFlowFloatingPoint)
-public func max<T>(_ lhs: T, _ rhs: Tensor<T>) -> Tensor<T> where T : Numeric & Comparable {
+// @differentiable(where T: TensorFlowFloatingPoint)
+public func max<T>(_ lhs: T, _ rhs: Tensor<T>) -> Tensor<T> where T: Numeric & Comparable {
     return max(Tensor(lhs), rhs)
 }
 
 /// Computes the element-wise maximum of the scalar and the tensor, broadcasting the scalar.
 @inlinable
-// @differentiable(where T : TensorFlowFloatingPoint)
-public func max<T>(_ lhs: Tensor<T>, _ rhs: T) -> Tensor<T> where T : Numeric & Comparable {
+// @differentiable(where T: TensorFlowFloatingPoint)
+public func max<T>(_ lhs: Tensor<T>, _ rhs: T) -> Tensor<T> where T: Numeric & Comparable {
     return max(lhs, Tensor(rhs))
 }
 
 /// Computes the element-wise minimum of two tensors.
 /// - Note: `min` supports broadcasting.
 @inlinable
-@differentiable(vjp: _vjpMin(_:_:) where T : TensorFlowFloatingPoint)
-public func min<T>(_ lhs: Tensor<T>, _ rhs: Tensor<T>) -> Tensor<T> where T : Numeric & Comparable {
+@differentiable(vjp: _vjpMin(_:_:) where T: TensorFlowFloatingPoint)
+public func min<T>(_ lhs: Tensor<T>, _ rhs: Tensor<T>) -> Tensor<T> where T: Numeric & Comparable {
     return Raw.minimum(lhs, rhs)
 }
 
 @inlinable
-internal func _vjpMin<T : TensorFlowFloatingPoint>(
+internal func _vjpMin<T: TensorFlowFloatingPoint>(
     _ x: Tensor<T>, _ y: Tensor<T>
 ) -> (Tensor<T>, (Tensor<T>) -> (Tensor<T>, Tensor<T>)) {
     let value = min(x, y)
@@ -786,20 +786,20 @@ internal func _vjpMin<T : TensorFlowFloatingPoint>(
 
 /// Computes the element-wise minimum of the scalar and the tensor, broadcasting the scalar.
 @inlinable
-// @differentiable(where T : TensorFlowFloatingPoint)
-public func min<T>(_ lhs: T, _ rhs: Tensor<T>) -> Tensor<T> where T : Numeric & Comparable {
+// @differentiable(where T: TensorFlowFloatingPoint)
+public func min<T>(_ lhs: T, _ rhs: Tensor<T>) -> Tensor<T> where T: Numeric & Comparable {
     return min(Tensor(lhs), rhs)
 }
 
 /// Computes the element-wise minimum of the scalar and the tensor, broadcasting the scalar.
 @inlinable
-// @differentiable(where T : TensorFlowFloatingPoint)
-public func min<T>(_ lhs: Tensor<T>, _ rhs: T) -> Tensor<T> where T : Numeric & Comparable {
+// @differentiable(where T: TensorFlowFloatingPoint)
+public func min<T>(_ lhs: Tensor<T>, _ rhs: T) -> Tensor<T> where T: Numeric & Comparable {
     return min(lhs, Tensor(rhs))
 }
 
 @inlinable
-internal func _vjpMinMaxHelper<T : TensorFlowFloatingPoint>(
+internal func _vjpMinMaxHelper<T: TensorFlowFloatingPoint>(
     _ x: Tensor<T>,
     _ y: Tensor<T>,
     originalValue: Tensor<T>,
@@ -845,13 +845,13 @@ public extension Tensor {
     ///   must be either have the same shape as `self` or be a 1-D `Tensor` such
     ///   that `mask.scalarCount == self.shape[0]`.
     @inlinable
-    @differentiable(wrt: (self, other), vjp: _vjpReplacing where Scalar : TensorFlowFloatingPoint)
+    @differentiable(wrt: (self, other), vjp: _vjpReplacing where Scalar: TensorFlowFloatingPoint)
     func replacing(with other: Tensor, where mask: Tensor<Bool>) -> Tensor {
         return Raw.select(condition: mask, t: self, e: other)
     }
 }
 
-internal extension Tensor where Scalar : TensorFlowFloatingPoint {
+internal extension Tensor where Scalar: TensorFlowFloatingPoint {
     @inlinable
     func _vjpReplacing(
         with other: Tensor,
@@ -928,7 +928,7 @@ public extension Tensor where Scalar == Bool {
     }
 }
 
-public extension Tensor where Scalar : Numeric & Comparable {
+public extension Tensor where Scalar: Numeric & Comparable {
     // NOTE: This overload is necessary, otherwise `min()` would refer to the variadic method
     // `min(squeezingAxes:)` with zero indices.
     @inlinable
@@ -1050,14 +1050,14 @@ public extension Tensor where Scalar : Numeric & Comparable {
 
 // MARK: - Numeric Reductions
 
-public extension Tensor where Scalar : Numeric {
+public extension Tensor where Scalar: Numeric {
     // MARK: - Sum
 
     /// Returns the sum along the specified axes. The reduced dimensions are removed.
     /// - Parameter axes: The dimensions to reduce.
     /// - Precondition: Each value in `axes` must be in the range `-rank...rank`.
     @inlinable
-    @differentiable(wrt: self, vjp: _vjpSum(squeezingAxes:) where Scalar : TensorFlowFloatingPoint)
+    @differentiable(wrt: self, vjp: _vjpSum(squeezingAxes:) where Scalar: TensorFlowFloatingPoint)
     func sum(squeezingAxes axes: Tensor<Int32>) -> Tensor {
         return Raw.sum(self, reductionIndices: Tensor<Int32>(axes), keepDims: false)
     }
@@ -1066,7 +1066,7 @@ public extension Tensor where Scalar : Numeric {
     /// - Parameter axes: The dimensions to reduce.
     /// - Precondition: Each value in `axes` must be in the range `-rank...rank`.
     @inlinable
-    @differentiable(wrt: self where Scalar : TensorFlowFloatingPoint)
+    @differentiable(wrt: self where Scalar: TensorFlowFloatingPoint)
     func sum(squeezingAxes axes: [Int]) -> Tensor {
         // TODO(TF-433): Remove workaround for differentiating `map`.
         let axes = {axes.map(Int32.init)}()
@@ -1077,13 +1077,13 @@ public extension Tensor where Scalar : Numeric {
     /// - Parameter axes: The dimensions to reduce.
     /// - Precondition: Each value in `axes` must be in the range `-rank...rank`.
     @inlinable
-    @differentiable(wrt: self where Scalar : TensorFlowFloatingPoint)
+    @differentiable(wrt: self where Scalar: TensorFlowFloatingPoint)
     func sum(squeezingAxes axes: Int...) -> Tensor {
         return sum(squeezingAxes: axes)
     }
 
     @inlinable
-    @differentiable(wrt: self where Scalar : TensorFlowFloatingPoint)
+    @differentiable(wrt: self where Scalar: TensorFlowFloatingPoint)
     func sum() -> Tensor {
         return flattened().sum(squeezingAxes: 0)
     }
@@ -1092,7 +1092,7 @@ public extension Tensor where Scalar : Numeric {
     /// - Parameter axes: The dimensions to reduce.
     /// - Precondition: Each value in `axes` must be in the range `-rank..<rank`.
     @inlinable
-    @differentiable(wrt: self, vjp: _vjpSum(squeezingAxes:) where Scalar : TensorFlowFloatingPoint)
+    @differentiable(wrt: self, vjp: _vjpSum(squeezingAxes:) where Scalar: TensorFlowFloatingPoint)
     func sum(alongAxes axes: Tensor<Int32>) -> Tensor {
         return Raw.sum(self, reductionIndices: axes, keepDims: true)
     }
@@ -1101,7 +1101,7 @@ public extension Tensor where Scalar : Numeric {
     /// - Parameter axes: The dimensions to reduce.
     /// - Precondition: Each value in `axes` must be in the range `-rank..<rank`.
     @inlinable
-    @differentiable(wrt: self where Scalar : TensorFlowFloatingPoint)
+    @differentiable(wrt: self where Scalar: TensorFlowFloatingPoint)
     func sum(alongAxes axes: [Int]) -> Tensor {
         // TODO(TF-433): Remove workaround for differentiating `map`.
         let axes = {axes.map(Int32.init)}()
@@ -1112,7 +1112,7 @@ public extension Tensor where Scalar : Numeric {
     /// - Parameter axes: The dimensions to reduce.
     /// - Precondition: Each value in `axes` must be in the range `-rank..<rank`.
     @inlinable
-    @differentiable(wrt: self where Scalar : TensorFlowFloatingPoint)
+    @differentiable(wrt: self where Scalar: TensorFlowFloatingPoint)
     func sum(alongAxes axes: Int...) -> Tensor {
         return sum(alongAxes: axes)
     }
@@ -1189,7 +1189,7 @@ public extension Tensor where Scalar : Numeric {
     /// - Parameter axes: The dimensions to reduce.
     /// - Precondition: Each value in `axes` must be in the range `-rank...rank`.
     @inlinable
-    @differentiable(wrt: self, vjp: _vjpMean(squeezingAxes:) where Scalar : TensorFlowFloatingPoint)
+    @differentiable(wrt: self, vjp: _vjpMean(squeezingAxes:) where Scalar: TensorFlowFloatingPoint)
     func mean(squeezingAxes axes: Tensor<Int32>) -> Tensor {
         return Raw.mean(self, reductionIndices: axes, keepDims: false)
     }
@@ -1198,7 +1198,7 @@ public extension Tensor where Scalar : Numeric {
     /// - Parameter axes: The dimensions to reduce.
     /// - Precondition: Each value in `axes` must be in the range `-rank...rank`.
     @inlinable
-    @differentiable(wrt: self where Scalar : TensorFlowFloatingPoint)
+    @differentiable(wrt: self where Scalar: TensorFlowFloatingPoint)
     func mean(squeezingAxes axes: [Int]) -> Tensor {
         // TODO(TF-433): Remove workaround for differentiating `map`.
         let axes = {axes.map(Int32.init)}()
@@ -1209,13 +1209,13 @@ public extension Tensor where Scalar : Numeric {
     /// - Parameter axes: The dimensions to reduce.
     /// - Precondition: Each value in `axes` must be in the range `-rank...rank`.
     @inlinable
-    @differentiable(wrt: self where Scalar : TensorFlowFloatingPoint)
+    @differentiable(wrt: self where Scalar: TensorFlowFloatingPoint)
     func mean(squeezingAxes axes: Int...) -> Tensor {
         return mean(squeezingAxes: axes)
     }
 
     @inlinable
-    @differentiable(wrt: self where Scalar : TensorFlowFloatingPoint)
+    @differentiable(wrt: self where Scalar: TensorFlowFloatingPoint)
     func mean() -> Tensor {
         return flattened().mean(squeezingAxes: [0])
     }
@@ -1225,7 +1225,7 @@ public extension Tensor where Scalar : Numeric {
     /// - Parameter axes: The dimensions to reduce.
     /// - Precondition: Each value in `axes` must be in the range `-rank..<rank`.
     @inlinable
-    @differentiable(wrt: self, vjp: _vjpMean(alongAxes:) where Scalar : TensorFlowFloatingPoint)
+    @differentiable(wrt: self, vjp: _vjpMean(alongAxes:) where Scalar: TensorFlowFloatingPoint)
     func mean(alongAxes axes: Tensor<Int32>) -> Tensor {
         return Raw.mean(self, reductionIndices: axes, keepDims: true)
     }
@@ -1235,7 +1235,7 @@ public extension Tensor where Scalar : Numeric {
     /// - Parameter axes: The dimensions to reduce.
     /// - Precondition: Each value in `axes` must be in the range `-rank..<rank`.
     @inlinable
-    @differentiable(wrt: self where Scalar : TensorFlowFloatingPoint)
+    @differentiable(wrt: self where Scalar: TensorFlowFloatingPoint)
     func mean(alongAxes axes: [Int]) -> Tensor {
         // TODO(TF-433): Remove workaround for differentiating `map`.
         let axes = {axes.map(Int32.init)}()
@@ -1247,7 +1247,7 @@ public extension Tensor where Scalar : Numeric {
     /// - Parameter axes: The dimensions to reduce.
     /// - Precondition: Each value in `axes` must be in the range `-rank..<rank`.
     @inlinable
-    @differentiable(wrt: self where Scalar : TensorFlowFloatingPoint)
+    @differentiable(wrt: self where Scalar: TensorFlowFloatingPoint)
     func mean(alongAxes axes: Int...) -> Tensor {
         return mean(alongAxes: axes)
     }
@@ -1259,7 +1259,7 @@ public extension Tensor where Scalar : Numeric {
     /// - Parameter axes: The dimensions to reduce.
     /// - Precondition: Each value in `axes` must be in the range `-rank..<rank`.
     @inlinable
-    @differentiable(wrt: self where Scalar : TensorFlowFloatingPoint)
+    @differentiable(wrt: self where Scalar: TensorFlowFloatingPoint)
     func variance(squeezingAxes axes: Tensor<Int32>) -> Tensor {
         let squaredDiff = (self - mean(alongAxes: axes)).squared()
         return squaredDiff.mean(squeezingAxes: axes)
@@ -1270,7 +1270,7 @@ public extension Tensor where Scalar : Numeric {
     /// - Parameter axes: The dimensions to reduce.
     /// - Precondition: Each value in `axes` must be in the range `-rank..<rank`.
     @inlinable
-    @differentiable(wrt: self where Scalar : TensorFlowFloatingPoint)
+    @differentiable(wrt: self where Scalar: TensorFlowFloatingPoint)
     func variance(squeezingAxes axes: [Int]) -> Tensor {
         // TODO(TF-433): Remove workaround for differentiating `map`.
         let axes = {axes.map(Int32.init)}()
@@ -1282,12 +1282,12 @@ public extension Tensor where Scalar : Numeric {
     /// - Parameter axes: The dimensions to reduce.
     /// - Precondition: Each value in `axes` must be in the range `-rank..<rank`.
     @inlinable
-    @differentiable(wrt: self where Scalar : TensorFlowFloatingPoint)
+    @differentiable(wrt: self where Scalar: TensorFlowFloatingPoint)
     func variance(squeezingAxes axes: Int...) -> Tensor {
         return variance(squeezingAxes: axes)
     }
 
-    @differentiable(wrt: self where Scalar : TensorFlowFloatingPoint)
+    @differentiable(wrt: self where Scalar: TensorFlowFloatingPoint)
     @inlinable
     func variance() -> Tensor {
         let mean = self.mean()
@@ -1300,7 +1300,7 @@ public extension Tensor where Scalar : Numeric {
     /// - Parameter axes: The dimensions to reduce.
     /// - Precondition: Each value in `axes` must be in the range `-rank..<rank`.
     @inlinable
-    @differentiable(wrt: self where Scalar : TensorFlowFloatingPoint)
+    @differentiable(wrt: self where Scalar: TensorFlowFloatingPoint)
     func variance(alongAxes axes: Tensor<Int32>) -> Tensor {
         let squaredDiff = (self - mean(alongAxes: axes)).squared()
         return squaredDiff.mean(alongAxes: axes)
@@ -1311,7 +1311,7 @@ public extension Tensor where Scalar : Numeric {
     /// - Parameter axes: The dimensions to reduce.
     /// - Precondition: Each value in `axes` must be in the range `-rank..<rank`.
     @inlinable
-    @differentiable(wrt: self where Scalar : TensorFlowFloatingPoint)
+    @differentiable(wrt: self where Scalar: TensorFlowFloatingPoint)
     func variance(alongAxes axes: [Int]) -> Tensor {
         // TODO(TF-433): Remove workaround for differentiating `map`.
         let axes = {axes.map(Int32.init)}()
@@ -1323,13 +1323,13 @@ public extension Tensor where Scalar : Numeric {
     /// - Parameter axes: The dimensions to reduce.
     /// - Precondition: Each value in `axes` must be in the range `-rank..<rank`.
     @inlinable
-    @differentiable(wrt: self where Scalar : TensorFlowFloatingPoint)
+    @differentiable(wrt: self where Scalar: TensorFlowFloatingPoint)
     func variance(alongAxes axes: Int...) -> Tensor {
         return variance(alongAxes: axes)
     }
 }
 
-internal extension Tensor where Scalar : TensorFlowFloatingPoint {
+internal extension Tensor where Scalar: TensorFlowFloatingPoint {
     @inlinable
     func _vjpSum(alongAxes axes: Tensor<Int32>) -> (Tensor, (Tensor) -> Tensor) {
         let value = sum(alongAxes: axes)
@@ -1371,7 +1371,7 @@ internal extension Tensor where Scalar : TensorFlowFloatingPoint {
 
 // TODO: Consider making the return type be generic over `FloatingPoint` types
 // so that `self`'s scalar type can be any `Numeric` type.
-public extension Tensor where Scalar : TensorFlowFloatingPoint {
+public extension Tensor where Scalar: TensorFlowFloatingPoint {
     /// Returns the standard deviation of the elements along the specified axes. The reduced
     /// dimensions are retained with value `1`. Does not apply Bessel's correction.
     ///
@@ -1459,8 +1459,8 @@ public extension Tensor where Scalar : TensorFlowFloatingPoint {
 
 /// Performs matrix multiplication with another tensor and produces the result.
 @inlinable
-@differentiable(vjp: _vjpMatmul(_:_:) where Scalar : TensorFlowFloatingPoint)
-public func matmul<Scalar : Numeric>(
+@differentiable(vjp: _vjpMatmul(_:_:) where Scalar: TensorFlowFloatingPoint)
+public func matmul<Scalar: Numeric>(
     _ lhs: Tensor<Scalar>,
     _ rhs: Tensor<Scalar>
 ) -> Tensor<Scalar> {
@@ -1471,7 +1471,7 @@ public func matmul<Scalar : Numeric>(
 }
 
 @inlinable
-internal  func _vjpMatmul<Scalar : TensorFlowFloatingPoint>(
+internal  func _vjpMatmul<Scalar: TensorFlowFloatingPoint>(
     _ lhs: Tensor<Scalar>,
     _ rhs: Tensor<Scalar>
 ) -> (Tensor<Scalar>, (Tensor<Scalar>) -> (Tensor<Scalar>, Tensor<Scalar>)) {
@@ -1481,15 +1481,15 @@ internal  func _vjpMatmul<Scalar : TensorFlowFloatingPoint>(
     })
 }
 
-infix operator • : MultiplicationPrecedence
+infix operator •: MultiplicationPrecedence
 
-public extension Tensor where Scalar : Numeric {
+public extension Tensor where Scalar: Numeric {
     // TODO: We have to define a custom VJP on • because AD can't yet differentiate generic methods.
     // After AD can differentiate generic methods, remove the custom VJP.
 
     /// Performs matrix multiplication between two tensors and produces the result.
     @inlinable
-    @differentiable(vjp: _vjpMatmulOperator(lhs:rhs:) where Scalar : TensorFlowFloatingPoint)
+    @differentiable(vjp: _vjpMatmulOperator(lhs:rhs:) where Scalar: TensorFlowFloatingPoint)
     static func • (lhs: Tensor, rhs: Tensor) -> Tensor {
         return matmul(lhs, rhs)
     }
@@ -1498,7 +1498,7 @@ public extension Tensor where Scalar : Numeric {
 // TODO: We have to define a custom VJP on • because AD can't yet
 // differentiate generic methods. After AD can differentiate generic methods,
 // remove the custom VJP.
-internal extension Tensor where Scalar : TensorFlowFloatingPoint {
+internal extension Tensor where Scalar: TensorFlowFloatingPoint {
     @inlinable
     static func _vjpMatmulOperator(
         lhs: Tensor,
diff --git a/Sources/DeepLearning/Operators/NN.swift b/Sources/DeepLearning/Operators/NN.swift
index 1664b1954..bd160e111 100644
--- a/Sources/DeepLearning/Operators/NN.swift
+++ b/Sources/DeepLearning/Operators/NN.swift
@@ -20,7 +20,7 @@ import TensorFlow
 // Normalization
 //===------------------------------------------------------------------------------------------===//
 
-public extension Tensor where Scalar : TensorFlowFloatingPoint {
+public extension Tensor where Scalar: TensorFlowFloatingPoint {
     /// Computes the batch normalized tensor along the specified axis.
     ///
     /// Specifically, returns `(self - mu) / (var + epsilon) * gamma + beta` where `mu` and `var` are 
@@ -76,7 +76,7 @@ public extension Tensor where Scalar : TensorFlowFloatingPoint {
     }
 }
 
-public extension Tensor where Scalar : BinaryFloatingPoint {
+public extension Tensor where Scalar: BinaryFloatingPoint {
     /// Computes the batch normalized tensor along the specified axis.
     ///
     /// Specifically, returns `(self - mu)/(var + epsilon) * gamma + beta` where
@@ -92,7 +92,7 @@ public extension Tensor where Scalar : BinaryFloatingPoint {
     @inlinable
     @differentiable(
         wrt: (self, offset, scale),
-        vjp: _vjpBatchNormalized where Scalar : TensorFlowFloatingPoint)
+        vjp: _vjpBatchNormalized where Scalar: TensorFlowFloatingPoint)
     func batchNormalized(
         alongAxis axis: Int,
         offset: Tensor = Tensor(0),
@@ -138,7 +138,7 @@ public extension Padding {
     }
 }
 
-public extension Tensor where Scalar : TensorFlowFloatingPoint {
+public extension Tensor where Scalar: TensorFlowFloatingPoint {
     /// TensorFlow builtin conv2d gradient helper for the input.
     @inlinable
     @differentiable(wrt: (self, filter), vjp: _vjpConv2DBackpropInput)
@@ -185,10 +185,9 @@ public extension Tensor where Scalar : TensorFlowFloatingPoint {
         let value = conv2DBackpropInput(
             shape: shape, filter: filter, strides: strides, padding: padding)
         return (value, { v in
-            (
-                self.conv2DBackpropFilter(
-                    input: v, filterSizes: shape, strides: strides, padding: padding),
-                v.convolved2D(withFilter: filter, strides: strides, padding: padding))
+            (self.conv2DBackpropFilter(
+                input: v, filterSizes: shape, strides: strides, padding: padding),
+             v.convolved2D(withFilter: filter, strides: strides, padding: padding))
         })
     }
 
@@ -202,10 +201,9 @@ public extension Tensor where Scalar : TensorFlowFloatingPoint {
         let value = conv2DBackpropFilter(
             input: input, filterSizes: filterSizes, strides: strides, padding: padding)
         return (value, { v in
-            (
-                self.conv2DBackpropInput(
-                    shape: filterSizes, filter: v, strides: strides, padding: padding),
-                input.convolved2D(withFilter: v, strides: strides, padding: padding))
+            (self.conv2DBackpropInput(
+                shape: filterSizes, filter: v, strides: strides, padding: padding),
+             input.convolved2D(withFilter: v, strides: strides, padding: padding))
         })
     }
 
@@ -217,13 +215,10 @@ public extension Tensor where Scalar : TensorFlowFloatingPoint {
     ) -> (Tensor, (Tensor) -> (Tensor, Tensor)) {
         let value = convolved2D(withFilter: filter, strides: strides, padding: padding)
         return (value, { v in
-            (
-                v.conv2DBackpropInput(
-                    shape: self.shapeTensor, filter: filter,
-                    strides: strides, padding: padding),
-                v.conv2DBackpropFilter(
-                    input: self, filterSizes: filter.shapeTensor,
-                    strides: strides, padding: padding))
+            (v.conv2DBackpropInput(
+                shape: self.shapeTensor, filter: filter, strides: strides, padding: padding),
+             v.conv2DBackpropFilter(
+                input: self, filterSizes: filter.shapeTensor, strides: strides, padding: padding))
         })
     }
 
@@ -270,7 +265,7 @@ public extension Tensor where Scalar : TensorFlowFloatingPoint {
     }
 }
 
-public extension Tensor where Scalar : FloatingPoint {
+public extension Tensor where Scalar: FloatingPoint {
     /// Computes a 2-D convolution using `self` as input, with the specified
     /// filter, strides, and padding.
     ///
@@ -309,7 +304,7 @@ public extension Tensor where Scalar : FloatingPoint {
     @inlinable
     @differentiable(
         wrt: self,
-        vjp: _vjpMaxPooled(kernelSize:strides:padding:) where Scalar : TensorFlowFloatingPoint)
+        vjp: _vjpMaxPooled(kernelSize:strides:padding:) where Scalar: TensorFlowFloatingPoint)
     func maxPooled(
         kernelSize: (Int, Int, Int, Int),
         strides: (Int, Int, Int, Int),
@@ -335,7 +330,7 @@ public extension Tensor where Scalar : FloatingPoint {
     @inlinable
     @differentiable(
         wrt: self,
-        vjp: _vjpAveragePooled(kernelSize:strides:padding:) where Scalar : TensorFlowFloatingPoint)
+        vjp: _vjpAveragePooled(kernelSize:strides:padding:) where Scalar: TensorFlowFloatingPoint)
     func averagePooled(
         kernelSize: (Int, Int, Int, Int),
         strides: (Int, Int, Int, Int),
diff --git a/Sources/DeepLearning/Operators/README.md b/Sources/DeepLearning/Operators/README.md
deleted file mode 100644
index 76e7a7e69..000000000
--- a/Sources/DeepLearning/Operators/README.md
+++ /dev/null
@@ -1,41 +0,0 @@
-# Ops and Convenience Methods
-
-The majority of the Tensor API is implemented in terms of 'ops' that are
-partitioned out to the TensorFlow graph when the compiler runs. These
-ops are intentionally designed to reflect TensorFlow ops, but provide nicer
-Swift syntax for accessing them. In addition to the core ops themselves,
-we also define some helper function wrappers, e.g. to make things symmetric
-and generally feel nice to use.
-
-The ops themselves are defined by the primitive `#tfop(...)` syntax, here 
-are some examples:
-```
-result = #tfop("Add", lhs, rhs)
-result = #tfop("Const", dtype: Float.self, value$tensor: 4.0)
-```
-
-The first parameter to this syntax is the TensorFlow op name as a string.
-After that, the inputs are specified, and then attributes are specified
-with their name as the keyword argument.
-
-Inputs and outputs must be of TensorHandle, ResourceHandle, or VariantHandle
-type. These are magic types known to the compiler.
-
-## Auto-Differentiation Support
-
-We also provide vector-Jacobian product (VJP) definitions for some of the
-convenience methods.
-
-Terminology:
-- originalValue (f): The function being differentiated, or the result of that
-  function.
-- VJP (f'): The function as the result of differentiation, computing
-  the vector-Jacobian products with respect to all arguments, or the result
-  of that function.
-
-For more information, visit:
-https://en.wikipedia.org/wiki/Automatic_differentiation
-
-The attribute '@differentiable(vjp: ...)' is used to register a function's VJP.
-The automatic differentiation pass identifies these VJPs and chains them
-together to produce arbitrary differentiable programs.
diff --git a/Sources/DeepLearning/Optimizer.swift b/Sources/DeepLearning/Optimizer.swift
index 81e03bcbd..35a58ad56 100644
--- a/Sources/DeepLearning/Optimizer.swift
+++ b/Sources/DeepLearning/Optimizer.swift
@@ -16,16 +16,14 @@
 import TensorFlow
 #endif
 
-public protocol Optimizable: Differentiable & KeyPathIterable
-    where AllDifferentiableVariables: KeyPathIterable { }
-
 /// A machine learning optimizer.
 ///
 /// Optimizers apply an optimization algorithm to update the differentiable variables of a machine
 /// learning model.
 public protocol Optimizer {
     /// The type of the model whose parameters are optimized.
-    associatedtype Model: Optimizable
+    associatedtype Model: Differentiable & KeyPathIterable
+        where AllDifferentiableVariables: KeyPathIterable
     /// The scalar parameter type.
     associatedtype Scalar: FloatingPoint
     /// The learning rate.
@@ -48,8 +46,9 @@ fileprivate extension Tensor where Scalar: Numeric {
 ///
 /// Reference: ["Adam - A Method for Stochastic Optimization"](
 /// https://arxiv.org/abs/1412.6980v8)
-public class Adam<Model: Optimizable>: Optimizer
-    where Model.AllDifferentiableVariables == Model.CotangentVector {
+public class Adam<Model: Differentiable & KeyPathIterable>: Optimizer
+    where AllDifferentiableVariables: KeyPathIterable,
+          Model.AllDifferentiableVariables == Model.CotangentVector {
     /// The learning rate.
     public var learningRate: Float
     /// A coefficient used to calculate the first and second moments of
@@ -142,8 +141,9 @@ public class Adam<Model: Optimizable>: Optimizer
 ///
 /// Reference: ["rmsprop: Divide the gradient by a running average of its recent magnitude"](
 /// http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf)
-public class RMSProp<Model: Optimizable>: Optimizer
-    where Model.AllDifferentiableVariables == Model.CotangentVector {
+public class RMSProp<Model: Differentiable & KeyPathIterable>: Optimizer
+    where AllDifferentiableVariables: KeyPathIterable,
+          Model.AllDifferentiableVariables == Model.CotangentVector {
     /// The learning rate.
     public var learningRate: Float
     // TODO: Document `rho`. Keras doesn't document `rho`.
@@ -206,8 +206,9 @@ public class RMSProp<Model: Optimizable>: Optimizer
 ///
 /// An optimizer that implements stochastic gradient descent, with support for momentum, learning
 /// rate decay, and Nesterov momentum.
-public class SGD<Model: Optimizable>: Optimizer
-    where Model.AllDifferentiableVariables == Model.CotangentVector {
+public class SGD<Model: Differentiable & KeyPathIterable>: Optimizer
+    where AllDifferentiableVariables: KeyPathIterable,
+          Model.AllDifferentiableVariables == Model.CotangentVector {
     /// The learning rate.
     public var learningRate: Float
     /// The momentum factor. It accelerates stochastic gradient descent in the relevant direction
@@ -278,8 +279,10 @@ public class SGD<Model: Optimizable>: Optimizer
 // MARK: - Manifold optimizers
 
 /// A Riemann manifold stochastic gradient descent (SGD) optimizer.
-public class RiemannSGD<Model: Optimizable, Scalar: FloatingPoint>: Optimizer
-    where Model.TangentVector: VectorNumeric, Model.TangentVector.Scalar == Scalar {
+public class RiemannSGD<Model: Differentiable & KeyPathIterable, Scalar: FloatingPoint>: Optimizer
+    where AllDifferentiableVariables: KeyPathIterable,
+          Model.TangentVector: VectorNumeric,
+          Model.TangentVector.Scalar == Scalar {
     /// The learning rate.
     public var learningRate: Scalar
 
diff --git a/Sources/DeepLearning/PythonConversion.swift b/Sources/DeepLearning/PythonConversion.swift
index a1b10d30a..4ff73cbaa 100644
--- a/Sources/DeepLearning/PythonConversion.swift
+++ b/Sources/DeepLearning/PythonConversion.swift
@@ -28,8 +28,8 @@ private func debugLogNumpyError(_ message: String) {
   debugLog("NumPy conversion error: " + message)
 }
 
-extension ShapedArray : ConvertibleFromNumpyArray
-  where Scalar : NumpyScalarCompatible {
+extension ShapedArray: ConvertibleFromNumpyArray
+  where Scalar: NumpyScalarCompatible {
   /// Creates a `ShapedArray` with the same shape and scalars as the specified
   /// `numpy.ndarray` instance.
   ///
@@ -75,7 +75,7 @@ extension ShapedArray : ConvertibleFromNumpyArray
     guard let ptr = UnsafePointer<Scalar>(bitPattern: ptrVal) else {
       fatalError("'numpy.ndarray' data pointer was nil")
     }
-    // This code avoids calling `init<S : Sequence>(shape: [Int], scalars: S)`,
+    // This code avoids calling `init<S: Sequence>(shape: [Int], scalars: S)`,
     // which inefficiently copies scalars one by one. Instead,
     // `init(shape: [Int], scalars: [Scalar])` is called, which efficiently
     // does a `memcpy` of the entire `scalars` array.
@@ -92,8 +92,8 @@ extension ShapedArray : ConvertibleFromNumpyArray
   }
 }
 
-extension Tensor : ConvertibleFromNumpyArray
-  where Scalar : NumpyScalarCompatible {
+extension Tensor: ConvertibleFromNumpyArray
+  where Scalar: NumpyScalarCompatible {
   /// Creates a tensor with the same shape and scalars as the specified
   /// `numpy.ndarray` instance.
   ///
@@ -147,7 +147,7 @@ extension Tensor : ConvertibleFromNumpyArray
   }
 }
 
-extension ShapedArray where Scalar : NumpyScalarCompatible {
+extension ShapedArray where Scalar: NumpyScalarCompatible {
   /// Creates a `numpy.ndarray` instance with the same shape and scalars as
   /// this `ShapedArray`.
   ///
@@ -157,7 +157,7 @@ extension ShapedArray where Scalar : NumpyScalarCompatible {
   }
 }
 
-extension Tensor where Scalar : NumpyScalarCompatible {
+extension Tensor where Scalar: NumpyScalarCompatible {
   /// Creates a `numpy.ndarray` instance with the same shape and scalars as
   /// this tensor.
   ///
@@ -165,7 +165,7 @@ extension Tensor where Scalar : NumpyScalarCompatible {
   public func makeNumpyArray() -> PythonObject { return array.makeNumpyArray() }
 }
 
-extension TensorShape : PythonConvertible {
+extension TensorShape: PythonConvertible {
   public var pythonObject: PythonObject {
     return dimensions.pythonObject
   }
diff --git a/Sources/DeepLearning/Random.swift b/Sources/DeepLearning/Random.swift
index 7a6752193..8c90ccdf4 100644
--- a/Sources/DeepLearning/Random.swift
+++ b/Sources/DeepLearning/Random.swift
@@ -429,8 +429,8 @@ public struct UniformIntegerDistribution<T: FixedWidthInteger>: RandomDistributi
 }
 
 @_fixed_layout
-public struct UniformFloatingPointDistribution<T : BinaryFloatingPoint>: RandomDistribution
-  where T.RawSignificand : FixedWidthInteger {
+public struct UniformFloatingPointDistribution<T: BinaryFloatingPoint>: RandomDistribution
+  where T.RawSignificand: FixedWidthInteger {
     public let lowerBound: T
     public let upperBound: T
 
@@ -445,8 +445,8 @@ public struct UniformFloatingPointDistribution<T : BinaryFloatingPoint>: RandomD
 }
 
 @_fixed_layout
-public struct NormalDistribution<T : BinaryFloatingPoint>: RandomDistribution
-  where T.RawSignificand : FixedWidthInteger {
+public struct NormalDistribution<T: BinaryFloatingPoint>: RandomDistribution
+  where T.RawSignificand: FixedWidthInteger {
     public let mean: T
     public let standardDeviation: T
     private let uniformDist = UniformFloatingPointDistribution<T>()
@@ -536,7 +536,7 @@ public struct BetaDistribution: RandomDistribution {
         } while r + alpha * (log(alpha) - log(b + w)) < t
 
         w = min(w, Float.greatestFiniteMagnitude)
-        return a == alpha0 ? w / (b + w) : b / (b + w)
+        return a == alpha0 ? w / (b + w): b / (b + w)
     }
 
     /// Returns one sample from a Beta(alpha, beta) distribution using Cheng's BC
@@ -592,6 +592,6 @@ public struct BetaDistribution: RandomDistribution {
         }
 
         w = min(w, Float.greatestFiniteMagnitude)
-        return a == alpha0 ? w / (b + w) : b / (b + w)
+        return a == alpha0 ? w / (b + w): b / (b + w)
     }
 }
diff --git a/Sources/DeepLearning/Tensors.swift b/Sources/DeepLearning/Tensors.swift
index 2d4e3b32d..1c1700649 100644
--- a/Sources/DeepLearning/Tensors.swift
+++ b/Sources/DeepLearning/Tensors.swift
@@ -17,7 +17,7 @@ import TensorFlow
 #endif
 
 #if COMPILING_TENSORFLOW_MODULE
-infix operator .== : ComparisonPrecedence
+infix operator .==: ComparisonPrecedence
 #endif
 
 //===------------------------------------------------------------------------------------------===//
@@ -28,25 +28,19 @@ public extension Tensor {
   /// The rank of the tensor, represented as a `Tensor<Int32>`.
   @inlinable
   var rankTensor: Tensor<Int32> {
-    get {
-      return Raw.rank(self)
-    }
+    return Raw.rank(self)
   }
 
   /// The dimensions of the tensor, represented as a `Tensor<Int32>`.
   @inlinable
   var shapeTensor: Tensor<Int32> {
-    get {
-      return Raw.shape(self)
-    }
+    return Raw.shape(self)
   }
 
   /// The number of scalars in the tensor, represented as a `Tensor<Int32>`.
   @inlinable
   var scalarCountTensor: Tensor<Int32> {
-    get {
-      return Raw.size(self)
-    }
+    return Raw.size(self)
   }
 }
 
@@ -55,7 +49,7 @@ public extension Tensor {
 //===------------------------------------------------------------------------------------------===//
 
 // String conversion.
-extension Tensor : CustomStringConvertible {
+extension Tensor: CustomStringConvertible {
     /// A textual representation of the tensor.
     ///
     /// - Note: use `fullDescription` for a non-pretty-printed description showing all scalars.
@@ -94,14 +88,14 @@ public extension Tensor {
 }
 
 // Xcode Playground display conversion.
-extension Tensor : CustomPlaygroundDisplayConvertible {
+extension Tensor: CustomPlaygroundDisplayConvertible {
     public var playgroundDescription: Any {
         return description
     }
 }
 
 // Mirror representation, used by debugger/REPL.
-extension Tensor : CustomReflectable {
+extension Tensor: CustomReflectable {
     public var customMirror: Mirror {
         return Mirror(self, children: [], displayStyle: .struct)
     }
@@ -111,7 +105,7 @@ extension Tensor : CustomReflectable {
 // Codable Conformance
 //===------------------------------------------------------------------------------------------===//
 
-extension Tensor : Codable where Scalar : Codable {
+extension Tensor: Codable where Scalar: Codable {
     @inlinable
     public func encode(to encoder: Encoder) throws {
         var container = encoder.singleValueContainer()

From f4b7e01715ed4ad678b0a1ce3db3b8a09814148c Mon Sep 17 00:00:00 2001
From: Anthony Platanios <e.a.platanios@gmail.com>
Date: Sat, 20 Apr 2019 19:05:02 -0400
Subject: [PATCH 47/55] Minor edit.

---
 Sources/DeepLearning/Optimizer.swift | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/Sources/DeepLearning/Optimizer.swift b/Sources/DeepLearning/Optimizer.swift
index 35a58ad56..ac4cf0b90 100644
--- a/Sources/DeepLearning/Optimizer.swift
+++ b/Sources/DeepLearning/Optimizer.swift
@@ -22,8 +22,7 @@ import TensorFlow
 /// learning model.
 public protocol Optimizer {
     /// The type of the model whose parameters are optimized.
-    associatedtype Model: Differentiable & KeyPathIterable
-        where AllDifferentiableVariables: KeyPathIterable
+    associatedtype Model: Differentiable
     /// The scalar parameter type.
     associatedtype Scalar: FloatingPoint
     /// The learning rate.
@@ -279,10 +278,8 @@ public class SGD<Model: Differentiable & KeyPathIterable>: Optimizer
 // MARK: - Manifold optimizers
 
 /// A Riemann manifold stochastic gradient descent (SGD) optimizer.
-public class RiemannSGD<Model: Differentiable & KeyPathIterable, Scalar: FloatingPoint>: Optimizer
-    where AllDifferentiableVariables: KeyPathIterable,
-          Model.TangentVector: VectorNumeric,
-          Model.TangentVector.Scalar == Scalar {
+public class RiemannSGD<Model: Differentiable, Scalar: FloatingPoint>: Optimizer
+    where Model.TangentVector: VectorNumeric, Model.TangentVector.Scalar == Scalar {
     /// The learning rate.
     public var learningRate: Scalar
 

From b207e42685b64d3901a66b6381b69b8afbb55209 Mon Sep 17 00:00:00 2001
From: Anthony Platanios <e.a.platanios@gmail.com>
Date: Sat, 20 Apr 2019 19:14:59 -0400
Subject: [PATCH 48/55] Reverted the change in the existing optimizer
 implementations.

---
 Sources/DeepLearning/Optimizer.swift | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

diff --git a/Sources/DeepLearning/Optimizer.swift b/Sources/DeepLearning/Optimizer.swift
index ac4cf0b90..440a691e0 100644
--- a/Sources/DeepLearning/Optimizer.swift
+++ b/Sources/DeepLearning/Optimizer.swift
@@ -45,9 +45,8 @@ fileprivate extension Tensor where Scalar: Numeric {
 ///
 /// Reference: ["Adam - A Method for Stochastic Optimization"](
 /// https://arxiv.org/abs/1412.6980v8)
-public class Adam<Model: Differentiable & KeyPathIterable>: Optimizer
-    where AllDifferentiableVariables: KeyPathIterable,
-          Model.AllDifferentiableVariables == Model.CotangentVector {
+public class Adam<Model: Layer>: Optimizer
+    where Model.AllDifferentiableVariables == Model.CotangentVector {
     /// The learning rate.
     public var learningRate: Float
     /// A coefficient used to calculate the first and second moments of
@@ -140,9 +139,8 @@ public class Adam<Model: Differentiable & KeyPathIterable>: Optimizer
 ///
 /// Reference: ["rmsprop: Divide the gradient by a running average of its recent magnitude"](
 /// http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf)
-public class RMSProp<Model: Differentiable & KeyPathIterable>: Optimizer
-    where AllDifferentiableVariables: KeyPathIterable,
-          Model.AllDifferentiableVariables == Model.CotangentVector {
+public class RMSProp<Model: Layer>: Optimizer
+    where Model.AllDifferentiableVariables == Model.CotangentVector {
     /// The learning rate.
     public var learningRate: Float
     // TODO: Document `rho`. Keras doesn't document `rho`.
@@ -205,9 +203,8 @@ public class RMSProp<Model: Differentiable & KeyPathIterable>: Optimizer
 ///
 /// An optimizer that implements stochastic gradient descent, with support for momentum, learning
 /// rate decay, and Nesterov momentum.
-public class SGD<Model: Differentiable & KeyPathIterable>: Optimizer
-    where AllDifferentiableVariables: KeyPathIterable,
-          Model.AllDifferentiableVariables == Model.CotangentVector {
+public class SGD<Model: Layer>: Optimizer
+    where Model.AllDifferentiableVariables == Model.CotangentVector {
     /// The learning rate.
     public var learningRate: Float
     /// The momentum factor. It accelerates stochastic gradient descent in the relevant direction
@@ -278,7 +275,7 @@ public class SGD<Model: Differentiable & KeyPathIterable>: Optimizer
 // MARK: - Manifold optimizers
 
 /// A Riemann manifold stochastic gradient descent (SGD) optimizer.
-public class RiemannSGD<Model: Differentiable, Scalar: FloatingPoint>: Optimizer
+public class RiemannSGD<Model: Layer, Scalar: FloatingPoint>: Optimizer
     where Model.TangentVector: VectorNumeric, Model.TangentVector.Scalar == Scalar {
     /// The learning rate.
     public var learningRate: Scalar

From 3dcd46d7dbf72a38053cae7bc1072ad09e2d667e Mon Sep 17 00:00:00 2001
From: Anthony Platanios <e.a.platanios@gmail.com>
Date: Sat, 20 Apr 2019 19:54:27 -0400
Subject: [PATCH 49/55] Added VJPs for some operations.

---
 Sources/DeepLearning/Initializers.swift    |  61 ++++++------
 Sources/DeepLearning/Layer.swift           |   3 +-
 Sources/DeepLearning/Operators/Basic.swift | 104 +++++++++++++++++++--
 3 files changed, 127 insertions(+), 41 deletions(-)

diff --git a/Sources/DeepLearning/Initializers.swift b/Sources/DeepLearning/Initializers.swift
index 17884c3ca..3ab3f5654 100644
--- a/Sources/DeepLearning/Initializers.swift
+++ b/Sources/DeepLearning/Initializers.swift
@@ -86,8 +86,7 @@ public extension Tensor where Scalar: Numeric {
     /// Perform an element-wise conversion from another `Tensor`.
     @inlinable
     @differentiable(
-        vjp: _vjpCast where Scalar: TensorFlowFloatingPoint,
-                            OtherScalar: TensorFlowFloatingPoint)
+        vjp: _vjpCast where Scalar: TensorFlowFloatingPoint, OtherScalar: TensorFlowFloatingPoint)
     init<OtherScalar: Numeric>(_ other: Tensor<OtherScalar>) {
         self = Raw.cast(other)
     }
@@ -109,7 +108,7 @@ internal extension Tensor where Scalar: TensorFlowFloatingPoint {
 public extension Tensor {
     /// Creates a tensor from an array of tensors (which may themselves be scalars).
     @inlinable
-    // @differentiable(where Scalar: TensorFlowFloatingPoint)
+    @differentiable(where Scalar: TensorFlowFloatingPoint)
     init(_ elements: [Tensor]) {
         self = Tensor(stacking: elements)
     }
@@ -143,7 +142,7 @@ public extension Tensor {
     /// 
     /// - Returns: The stacked tensor.
     @inlinable
-    // @differentiable(vjp: _vjpStacking where Scalar: TensorFlowFloatingPoint)
+    @differentiable(vjp: _vjpStacking where Scalar: TensorFlowFloatingPoint)
     init(stacking tensors: [Tensor], alongAxis axis: Int = 0) {
         self = Raw.pack(tensors, axis: Int64(axis))
     }
@@ -181,40 +180,40 @@ public extension Tensor {
     /// 
     /// - Returns: The concatenated tensor.
     @inlinable
-    // @differentiable(wrt: tensors, vjp: _vjpConcatenating where Scalar: TensorFlowFloatingPoint)
+    @differentiable(vjp: _vjpConcatenating where Scalar: TensorFlowFloatingPoint)
     init(concatenating tensors: [Tensor], alongAxis axis: Int = 0) {
         precondition(tensors.count > 0)
         self = Raw.concatV2(tensors, axis: Tensor<Int32>(Int32(axis)))
     }
 }
 
-// internal extension Tensor where Scalar: TensorFlowFloatingPoint {
-//     @inlinable
-//     static func _vjpStacking(
-//         stacking tensors: [Tensor],
-//         alongAxis axis: Int = 0
-//     ) -> (Tensor, (Tensor) -> Array<Tensor>.DifferentiableView) {
-//         let result = Tensor(stacking: tensors, alongAxis: axis)
-//         return (result, { v in
-//             Array<Tensor>.DifferentiableView(v.unstack(alongAxis: axis))
-//         })
-//     }
+internal extension Tensor where Scalar: TensorFlowFloatingPoint {
+    @inlinable
+    static func _vjpStacking(
+        stacking tensors: [Tensor],
+        alongAxis axis: Int = 0
+    ) -> (Tensor, (Tensor) -> Array<Tensor>.DifferentiableView) {
+        let result = Tensor(stacking: tensors, alongAxis: axis)
+        return (result, { v in
+            Array<Tensor>.DifferentiableView(v.unstack(alongAxis: axis))
+        })
+    }
 
-//     @inlinable
-//     static func _vjpConcatenating(
-//         concatenating tensors: [Tensor],
-//         alongAxis axis: Int = 0
-//     ) -> (Tensor, (Tensor) -> Array<Tensor>.DifferentiableView) {
-//         let result = Tensor<Scalar>(concatenating: tensors, alongAxis: axis)
-//         let posAxis = axis < 0 ? axis + tensors[0].rank: axis
-//         let sizes = Tensor<Int32>(stacking: tensors.map { $0.shapeTensor[posAxis] })
-//         return (result, { [count = tensors.count] v in
-//             if count == 1 { return Array<Tensor>.DifferentiableView([v]) }
-//             let splits = v.split(sizes: sizes, alongAxis: posAxis)
-//             return Array<Tensor>.DifferentiableView(splits)
-//         })
-//     }
-// }
+    @inlinable
+    static func _vjpConcatenating(
+        concatenating tensors: [Tensor],
+        alongAxis axis: Int = 0
+    ) -> (Tensor, (Tensor) -> Array<Tensor>.DifferentiableView) {
+        let result = Tensor<Scalar>(concatenating: tensors, alongAxis: axis)
+        let posAxis = axis < 0 ? axis + tensors[0].rank: axis
+        let sizes = Tensor<Int32>(stacking: tensors.map { $0.shapeTensor[posAxis] })
+        return (result, { [count = tensors.count] v in
+            if count == 1 { return Array<Tensor>.DifferentiableView([v]) }
+            let splits = v.split(sizes: sizes, alongAxis: posAxis)
+            return Array<Tensor>.DifferentiableView(splits)
+        })
+    }
+}
 
 //===------------------------------------------------------------------------------------------===//
 // Numeric
diff --git a/Sources/DeepLearning/Layer.swift b/Sources/DeepLearning/Layer.swift
index 29052a3ef..7f045d8d2 100644
--- a/Sources/DeepLearning/Layer.swift
+++ b/Sources/DeepLearning/Layer.swift
@@ -23,7 +23,8 @@ import TensorFlow
 ///
 /// `Layer` instances define a differentiable `applied(to:)` method for mapping inputs to
 /// outputs.
-public protocol Layer: Optimizable {
+public protocol Layer: Differentiable & KeyPathIterable 
+    where AllDifferentiableVariables: KeyPathIterable {
     /// The input type of the layer.
     associatedtype Input: Differentiable
     /// The output type of the layer.
diff --git a/Sources/DeepLearning/Operators/Basic.swift b/Sources/DeepLearning/Operators/Basic.swift
index 56cdde2fc..51b8390b7 100644
--- a/Sources/DeepLearning/Operators/Basic.swift
+++ b/Sources/DeepLearning/Operators/Basic.swift
@@ -33,11 +33,11 @@ public extension Tensor {
     /// `N` tensors from this tensor by chipping it along the `axis` dimension, where `N` is
     /// inferred from this tensor's shape. For example, given a tensor with shape `[A, B, C, D]`:
     /// 
-    ///   - If `axis == 0` then the `i`th tensor in the returned array is the slice 
+    ///   - If `axis == 0` then the `i`-th tensor in the returned array is the slice 
     ///     `self[i, :, :, :]` and each tensor in that array will have shape `[B, C, D]`. 
     ///     (Note that the dimension unpacked along is gone, unlike
     ///     `Tensor.split(numSplits:alongAxis)`, or `Tensor.split(sizes:alongAxis)`).
-    ///   - If `axis == 1` then the `i`th tensor in the returned array is the slice 
+    ///   - If `axis == 1` then the `i`-th tensor in the returned array is the slice 
     ///     `value[:, i, :, :]` and each tensor in that array will have shape `[A, C, D]`.
     ///   - Etc.
     ///
@@ -51,9 +51,73 @@ public extension Tensor {
     /// 
     /// - Returns: Array containing the unstacked tensors.
     @inlinable
-    // @differentiable(vjp: _vjpUnstack(alongAxis:) wrt: self where Scalar : TensorFlowFloatingPoint)
+    @differentiable(vjp: _vjpUnstack(alongAxis:) where Scalar: TensorFlowFloatingPoint)
     func unstack(alongAxis axis: Int = 0) -> [Tensor] {
-        return Raw.unpack(value: self, num: shape[axis], axis: Int64(axis))
+        return Raw.unpack(value: self, num: Int64(shape[axis]), axis: Int64(axis))
+    }
+
+    /// Splits a tensor into multiple tensors. The tensor is split along dimension `axis` into
+    /// `numSplits` smaller tensors. This requires that `numSplits` evenly divides `shape[axis]`.
+    ///
+    /// For example:
+    /// ```
+    /// // 'value' is a tensor with shape [5, 30]
+    /// // Split 'value' into 3 tensors along dimension 1:
+    /// let parts = value.split(numSplits: 3, alongAxis: 1)
+    /// parts[0] // has shape [5, 10]
+    /// parts[1] // has shape [5, 10]
+    /// parts[2] // has shape [5, 10]
+    /// ```
+    ///
+    /// - Parameters:
+    ///   - numSplits: Number of splits to create.
+    ///   - axis: Dimension along which to split this tensor. Negative values wrap around.
+    ///
+    /// - Precondition: `numSplits` must divide the size of dimension `axis` evenly.
+    /// - Precondition: `axis` must be in the range `[-rank, rank)`, where `rank` is the rank of the
+    ///   provided tensors.
+    ///
+    /// - Returns: Array containing the tensors parts.
+    @inlinable
+    @differentiable(vjp: _vjpSplit(numSplits:alongAxis:) where Scalar: TensorFlowFloatingPoint)
+    func split(numSplits: Int, alongAxis axis: Int = 0) -> [Tensor] {
+        return Raw.split(
+            splitDim: Tensor<Int32>(Int32(axis)), value: self, numSplit: Int64(numSplits))
+    }
+
+    /// Splits a tensor into multiple tensors. The tensor is split  into `sizes.shape[0]` pieces. 
+    /// The shape of the `i`-th piece has the same shape as this tensor except along dimension 
+    /// `axis` where the size is `sizes[i]`.
+    ///
+    /// For example:
+    /// ```
+    /// // 'value' is a tensor with shape [5, 30]
+    /// // Split 'value' into 3 tensors with sizes [4, 15, 11] along dimension 1:
+    /// let parts = value.split(sizes: Tensor<Int32>([4, 15, 11]), alongAxis: 1)
+    /// parts[0] // has shape [5, 4]
+    /// parts[1] // has shape [5, 15]
+    /// parts[2] // has shape [5, 11]
+    /// ```
+    ///
+    /// - Parameters:
+    ///   - sizes: 1-D tensor containing the size of each split.
+    ///   - axis: Dimension along which to split this tensor. Negative values wrap around.
+    ///
+    /// - Precondition: The values in `sizes` must add up to the size of dimension `axis`.
+    /// - Precondition: `axis` must be in the range `[-rank, rank)`, where `rank` is the rank of the
+    ///   provided tensors.
+    ///
+    /// - Returns: Array containing the tensors parts.
+    @inlinable
+    @differentiable(
+        wrt: self,
+        vjp: _vjpSplit(sizes:alongAxis:) where Scalar: TensorFlowFloatingPoint)
+    func split(sizes: Tensor<Int32>, alongAxis axis: Int = 0) -> [Tensor] {
+        return Raw.splitV(
+            value: self,
+            sizeSplits: sizes,
+            splitDim: Tensor<Int32>(Int32(axis)),
+            numSplit: Int64(sizes.shape[0]))
     }
 
     /// Reshape to the shape of the specified `Tensor`.
@@ -123,6 +187,32 @@ public extension Tensor {
 }
 
 internal extension Tensor where Scalar: TensorFlowFloatingPoint {
+    @inlinable
+    func _vjpUnstack(
+        alongAxis axis: Int = 0
+    ) -> ([Tensor], (Array<Tensor>.CotangentVector) -> Tensor) {
+        let result = unstack(alongAxis: axis)
+        return (result, { v in Tensor(stacking: v.base, alongAxis: axis) })
+    }
+
+    @inlinable
+    func _vjpSplit(
+        numSplits: Int,
+        alongAxis axis: Int = 0
+    ) -> ([Tensor], (Array<Tensor>.CotangentVector) -> Tensor) {
+        let result = split(numSplits: numSplits, alongAxis: axis)
+        return (result, { v in Tensor(concatenating: v.base, alongAxis: axis) })
+    }
+
+    @inlinable
+    func _vjpSplit(
+        sizes: Tensor<Int32>,
+        alongAxis axis: Int = 0
+    ) -> ([Tensor], (Array<Tensor>.CotangentVector) -> Tensor) {
+        let result = split(sizes: sizes, alongAxis: axis)
+        return (result, { v in Tensor(concatenating: v.base, alongAxis: axis) })
+    }
+
     @inlinable
     func _vjpReshaped(toShape newShape: Tensor<Int32>) -> (Tensor, (Tensor) -> Tensor) {
         let value = reshaped(toShape: newShape)
@@ -241,11 +331,7 @@ internal extension Tensor where Scalar: TensorFlowFloatingPoint {
         let idx = axis < 0 ? axis + rank: axis
         let splits = Tensor<Int32>([shapeTensor[idx], other.shapeTensor[idx]])
         return (concatenated(with: other, alongAxis: axis), { result in
-            let gradients = Raw.splitV(
-                value: result,
-                sizeSplits: splits,
-                splitDim: Tensor<Int32>(Int32(axis)),
-                numSplit: Int64(2))
+            let gradients = result.split(sizes: splits, alongAxis: axis)
             return (gradients[0], gradients[1])
         })
     }

From 5548c56b18e188c136b834b7314310a7c49722c8 Mon Sep 17 00:00:00 2001
From: Anthony Platanios <e.a.platanios@gmail.com>
Date: Sat, 20 Apr 2019 20:18:09 -0400
Subject: [PATCH 50/55] Incorporated fix from stdlib.

---
 Sources/DeepLearning/Operators/Basic.swift | 22 ++++++++++++++++------
 Sources/DeepLearning/Operators/Math.swift  |  8 ++------
 2 files changed, 18 insertions(+), 12 deletions(-)

diff --git a/Sources/DeepLearning/Operators/Basic.swift b/Sources/DeepLearning/Operators/Basic.swift
index 51b8390b7..55d2f8fd3 100644
--- a/Sources/DeepLearning/Operators/Basic.swift
+++ b/Sources/DeepLearning/Operators/Basic.swift
@@ -154,12 +154,22 @@ public extension Tensor {
         return reshaped(to: [-1])
     }
 
+    /// Returns a shape-expanded `Tensor`, with a dimension of 1 inserted at the specified shape
+    /// indices.
+    @inlinable
+    @differentiable(wrt: self where Scalar : TensorFlowFloatingPoint)
+    func expandingShape(at axes: Int...) -> Tensor {
+        return expandingShape(at: axes)
+    }
+
     /// Returns a shape-expanded `Tensor`, with a dimension of 1 inserted at the
-    /// specified shape index.
+    /// specified shape indices.
     @inlinable
     @differentiable(wrt: self, vjp: _vjpExpandingShape(at:) where Scalar: TensorFlowFloatingPoint)
-    func expandingShape(at shapeIndex: Int) -> Tensor {
-        return Raw.expandDims(self, dim: Tensor<Int32>(Int32(shapeIndex)))
+    func expandingShape(at axes: [Int]) -> Tensor {
+	    var result = self
+	    for i in axes { result = Raw.expandDims(result, dim: Tensor<Int32>(Int32(i))) }
+	    return result
     }
 
     /// Returns a rank-lifted `Tensor` with a leading dimension of 1.
@@ -220,9 +230,9 @@ internal extension Tensor where Scalar: TensorFlowFloatingPoint {
     }
 
     @inlinable
-    func _vjpExpandingShape(at shapeIndex: Int) -> (Tensor, (Tensor) -> Tensor) {
-        let value = expandingShape(at: shapeIndex)
-        return (value, { v in v.squeezingShape(at: shapeIndex) })
+    func _vjpExpandingShape(at axes: [Int]) -> (Tensor, (Tensor) -> Tensor) {
+	    let value = self.expandingShape(at: axes)
+        return (value, { v in v.squeezingShape(at: axes) })
     }
 
     @inlinable
diff --git a/Sources/DeepLearning/Operators/Math.swift b/Sources/DeepLearning/Operators/Math.swift
index 66c25b50c..90d3d7bcf 100644
--- a/Sources/DeepLearning/Operators/Math.swift
+++ b/Sources/DeepLearning/Operators/Math.swift
@@ -1341,9 +1341,7 @@ internal extension Tensor where Scalar: TensorFlowFloatingPoint {
         let value = sum(squeezingAxes: axes)
         return (value, { [shape = shapeTensor] in
             var result = $0
-	        for i in axes.array.scalars {
-                result = result.expandingShape(at: Int(i))
-            }
+	        for i in axes.array.scalars { result = result.expandingShape(at: Int(i)) }
 	        return result.broadcast(toShape: shape)
         })
     }
@@ -1361,9 +1359,7 @@ internal extension Tensor where Scalar: TensorFlowFloatingPoint {
         let count = Raw.gather(params: shapeTensor, indices: axes).product()
         return (value, { [shape = shapeTensor] in 
             var result = $0
-	        for i in axes.array.scalars {
-                result = result.expandingShape(at: Int(i))
-            }
+	        for i in axes.array.scalars { result = result.expandingShape(at: Int(i)) }
 	        return result.broadcast(toShape: shape) / Tensor(count)
         })
     }

From 89fb4e4d3f7f4049b1d30d026398b7ab6b86e18f Mon Sep 17 00:00:00 2001
From: Anthony Platanios <e.a.platanios@gmail.com>
Date: Sat, 20 Apr 2019 20:36:48 -0400
Subject: [PATCH 51/55] Addressed Richard's feedback.

---
 .../DeepLearningTests/InitializerTests.swift  | 11 ++++
 .../OperatorTests/BasicTests.swift            | 27 ++++++++++
 .../OperatorTests/ComparisonTests.swift       |  5 ++
 .../OperatorTests/MathTests.swift             | 14 +++++-
 Tests/DeepLearningTests/TensorTests.swift     | 50 ++++++-------------
 5 files changed, 70 insertions(+), 37 deletions(-)

diff --git a/Tests/DeepLearningTests/InitializerTests.swift b/Tests/DeepLearningTests/InitializerTests.swift
index f91109065..3407e5816 100644
--- a/Tests/DeepLearningTests/InitializerTests.swift
+++ b/Tests/DeepLearningTests/InitializerTests.swift
@@ -94,4 +94,15 @@ final class InitializerTests: XCTestCase {
         XCTAssertEqual(ShapedArray(shape: [2, 2], scalars: [1, 0, 1, 0]), floats.array)
         XCTAssertEqual(ShapedArray(shape: [2, 2], scalars: [1, 0, 1, 0]), i8s.array)
     }
+
+    static var allTests = [
+        ("testInitializers", testInitializers),
+        ("testFactoryInitializers", testFactoryInitializers),
+        ("testNumericInitializers", testNumericInitializers),
+        ("testScalarToTensorConversion", testScalarToTensorConversion),
+        ("testArrayConversion", testArrayConversion),
+        ("testNonTPUDataTypeCast", testNonTPUDataTypeCast),
+        ("testTPUDataTypeCast", testTPUDataTypeCast),
+        ("testNonTPUBoolToNumericCast", testNonTPUBoolToNumericCast)
+    ]
 }
diff --git a/Tests/DeepLearningTests/OperatorTests/BasicTests.swift b/Tests/DeepLearningTests/OperatorTests/BasicTests.swift
index 112430984..ae25efbc5 100644
--- a/Tests/DeepLearningTests/OperatorTests/BasicTests.swift
+++ b/Tests/DeepLearningTests/OperatorTests/BasicTests.swift
@@ -449,4 +449,31 @@ final class BasicOperatorTests: XCTestCase {
         target .= Tensor(repeating: 1, shape: [1, 3, 1])
         XCTAssertEqual(Tensor(repeating: 1, shape: [2, 3, 4]), target)
     }
+
+    static var allTests = [
+        ("testElementIndexing", testElementIndexing),
+        ("testElementIndexingAssignment", testElementIndexingAssignment),
+        ("testNestedElementIndexing", testNestedElementIndexing),
+        ("testSliceIndexing", testSliceIndexing),
+        ("testSliceIndexingAssignment", testSliceIndexingAssignment),
+        ("testEllipsisIndexing", testEllipsisIndexing),
+        ("testNewAxisIndexing", testNewAxisIndexing),
+        ("testSqueezeAxisIndexing", testSqueezeAxisIndexing),
+        ("testStridedSliceIndexing", testStridedSliceIndexing),
+        ("testStridedSliceIndexingAssignment", testStridedSliceIndexingAssignment),
+        ("testWholeTensorSlicing", testWholeTensorSlicing),
+        ("testAdvancedIndexing", testAdvancedIndexing),
+        ("testConcatenation", testConcatenation),
+        ("testVJPConcatenation", testVJPConcatenation),
+        ("testTranspose", testTranspose),
+        ("testReshape", testReshape),
+        ("testFlatten", testFlatten),
+        ("testFlatten0D", testFlatten0D),
+        ("testReshapeToScalar", testReshapeToScalar),
+        ("testReshapeTensor", testReshapeTensor),
+        ("testUnbroadcast1", testUnbroadcast1),
+        ("testUnbroadcast2", testUnbroadcast2),
+        ("testSliceUpdate", testSliceUpdate),
+        ("testBroadcastTensor", testBroadcastTensor)
+    ]
 }
diff --git a/Tests/DeepLearningTests/OperatorTests/ComparisonTests.swift b/Tests/DeepLearningTests/OperatorTests/ComparisonTests.swift
index f667dbbcc..e20a9cdc9 100644
--- a/Tests/DeepLearningTests/OperatorTests/ComparisonTests.swift
+++ b/Tests/DeepLearningTests/OperatorTests/ComparisonTests.swift
@@ -27,4 +27,9 @@ final class ComparisonOperatorTests: XCTestCase {
         let y = Tensor<Float>([2, 3, 4, 5, 6])
         XCTAssertTrue(x < y)
     }
+
+    static var allTests = [
+        ("testElementwiseComparison", testElementwiseComparison),
+        ("testLexicographicalComparison", testLexicographicalComparison)
+    ]
 }
diff --git a/Tests/DeepLearningTests/OperatorTests/MathTests.swift b/Tests/DeepLearningTests/OperatorTests/MathTests.swift
index 8c1898fbb..3f769be07 100644
--- a/Tests/DeepLearningTests/OperatorTests/MathTests.swift
+++ b/Tests/DeepLearningTests/OperatorTests/MathTests.swift
@@ -147,7 +147,6 @@ final class MathOperatorTests: XCTestCase {
         XCTAssertEqual([12.5, 6.5], result.scalars)
     }
 
-    @inline(never)
     func testXORInference() {
         func xor(_ x: Float, _ y: Float) -> Float {
             let x = Tensor<Float>([x, y]).reshaped(to: [1, 2])
@@ -196,4 +195,17 @@ final class MathOperatorTests: XCTestCase {
         let prediction = classifier.prediction(for: input)
         XCTAssertEqual([0.816997], prediction.scalars, accuracy: 0.001)
     }
+
+    static var allTests = [
+        ("testReduction", testReduction),
+        ("testArgmax", testArgmax),
+        ("testCeilAndFloor", testCeilAndFloor),
+        ("testSimpleMath", testSimpleMath),
+        ("testStandardDeviation", testStandardDeviation),
+        ("test3Adds", test3Adds),
+        ("testMultiOpMath", testMultiOpMath),
+        ("testXWPlusB", testXWPlusB),
+        ("testXORInference", testXORInference),
+        ("testMLPClassifierStruct", testMLPClassifierStruct)
+    ]
 }
diff --git a/Tests/DeepLearningTests/TensorTests.swift b/Tests/DeepLearningTests/TensorTests.swift
index 01e18fda8..ec7d1f6e3 100644
--- a/Tests/DeepLearningTests/TensorTests.swift
+++ b/Tests/DeepLearningTests/TensorTests.swift
@@ -29,53 +29,31 @@ final class TensorTests: XCTestCase {
         XCTAssertEqual(0, selectValue(true).scalar)
     }
 
-    @inline(never)
     func testRankGetter() {
-        let tensor = Tensor<Int32>(shape: [3, 4, 5], scalars: Array(0..<60))
-        XCTAssertEqual(3, tensor.rank)
-    }
-
-    // TODO: Merge all rank/shape getter tests into one when we support code motion to avoid sends.
-
-    @inline(never)
-    func testRankGetter2() {
         let vector = Tensor<Int32>([1])
-        XCTAssertEqual(1, vector.rank)
-    }
-
-    @inline(never)
-    func testRankGetter3() {
         let matrix = Tensor<Float>([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
-        XCTAssertEqual(2, matrix.rank)
-    }
-
-    @inline(never)
-    func testRankGetter4() {
         let ones = Tensor<Int32>(ones: [1, 2, 2, 2, 2, 2, 1])
+        let tensor = Tensor<Int32>(shape: [3, 4, 5], scalars: Array(0..<60))
+        XCTAssertEqual(1, vector.rank)
+        XCTAssertEqual(2, matrix.rank)
         XCTAssertEqual(7, ones.rank)
+        XCTAssertEqual(3, tensor.rank)
     }
 
-    @inline(never)
     func testShapeGetter() {
-        let tensor = Tensor<Int32>(shape: [3, 4, 5], scalars: Array(0..<60))
-        XCTAssertEqual([3, 4, 5], tensor.shape)
-    }
-
-    @inline(never)
-    func testShapeGetter2() {
         let vector = Tensor<Int32>([1])
-        XCTAssertEqual([1], vector.shape)
-    }
-
-    @inline(never)
-    func testShapeGetter3() {
         let matrix = Tensor<Float>([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
-        XCTAssertEqual([2, 3], matrix.shape)
-    }
-
-    @inline(never)
-    func testShapeGetter4() {
         let ones = Tensor<Int32>(ones: [1, 2, 2, 2, 2, 2, 1])
+        let tensor = Tensor<Int32>(shape: [3, 4, 5], scalars: Array(0..<60))
+        XCTAssertEqual([1], vector.shape)
+        XCTAssertEqual([2, 3], matrix.shape)
         XCTAssertEqual([1, 2, 2, 2, 2, 2, 1], ones.shape)
+        XCTAssertEqual([3, 4, 5], tensor.shape)
     }
+
+    static var allTests = [
+        ("testSimpleCond", testSimpleCond),
+        ("testRankGetter", testRankGetter),
+        ("testShapeGetter", testShapeGetter)
+    ]
 }

From 61eae263cc5d2499aa55eae8da9255479a236262 Mon Sep 17 00:00:00 2001
From: Anthony Platanios <e.a.platanios@gmail.com>
Date: Sat, 20 Apr 2019 20:40:07 -0400
Subject: [PATCH 52/55] Changed the indentation in the 'PythonConversion.swift'
 file.

---
 Sources/DeepLearning/PythonConversion.swift | 251 ++++++++++----------
 1 file changed, 124 insertions(+), 127 deletions(-)

diff --git a/Sources/DeepLearning/PythonConversion.swift b/Sources/DeepLearning/PythonConversion.swift
index 4ff73cbaa..5e52548c4 100644
--- a/Sources/DeepLearning/PythonConversion.swift
+++ b/Sources/DeepLearning/PythonConversion.swift
@@ -25,150 +25,147 @@ import Python
 private let np = Python.import("numpy")
 
 private func debugLogNumpyError(_ message: String) {
-  debugLog("NumPy conversion error: " + message)
+    debugLog("NumPy conversion error: " + message)
 }
 
 extension ShapedArray: ConvertibleFromNumpyArray
-  where Scalar: NumpyScalarCompatible {
-  /// Creates a `ShapedArray` with the same shape and scalars as the specified
-  /// `numpy.ndarray` instance.
-  ///
-  /// - Parameter numpyArray: The `numpy.ndarray` instance to convert.
-  /// - Precondition: The `numpy` Python package must be installed.
-  /// - Precondition: `numpyArray` must have a compatible scalar `dtype`.
-  public init?(numpy numpyArray: PythonObject) {
-    // Check if input is a `numpy.ndarray` instance.
-    guard Python.isinstance(numpyArray, np.ndarray) == true else {
-      debugLogNumpyError("""
-        PythonObject input has type '\(Python.type(numpyArray))' and is not \
-        an instance of 'numpy.ndarray'.
-        """)
-      return nil
+    where Scalar: NumpyScalarCompatible {
+    /// Creates a `ShapedArray` with the same shape and scalars as the specified
+    /// `numpy.ndarray` instance.
+    ///
+    /// - Parameter numpyArray: The `numpy.ndarray` instance to convert.
+    /// - Precondition: The `numpy` Python package must be installed.
+    /// - Precondition: `numpyArray` must have a compatible scalar `dtype`.
+    public init?(numpy numpyArray: PythonObject) {
+        // Check if input is a `numpy.ndarray` instance.
+        guard Python.isinstance(numpyArray, np.ndarray) == true else {
+            debugLogNumpyError("""
+                PythonObject input has type '\(Python.type(numpyArray))' and is not \
+                an instance of 'numpy.ndarray'.
+                """)
+            return nil
+        }
+        // Check if the dtype of the `ndarray` is compatible with the `Scalar`
+        // type.
+        guard Scalar.numpyScalarTypes.contains(numpyArray.dtype) else {
+            debugLogNumpyError("""
+                'numpy.ndarray' dtype '\(numpyArray.dtype)' is incompatible with \
+                Swift type '\(Scalar.self)'.
+                """)
+            return nil
+        }
+
+        let pyShape = numpyArray.__array_interface__["shape"]
+        guard let shape = [Int](pyShape) else {
+            debugLogNumpyError("cannot access shape of 'numpy.ndarray' instance.")
+            return nil
+        }
+
+        // Make sure that the array is contiguous in memory. This does a copy if
+        // the array is not already contiguous in memory.
+        let contiguousNumpyArray = np.ascontiguousarray(numpyArray)
+
+        guard let ptrVal =
+            UInt(contiguousNumpyArray.__array_interface__["data"].tuple2.0) else {
+            debugLogNumpyError("cannot access data of 'numpy.ndarray' instance.")
+            return nil
+        }
+        // Note: `ptr` is not nil even if the `ndarray` is empty (i.e. has a shape
+        // of `(0,)`).
+        guard let ptr = UnsafePointer<Scalar>(bitPattern: ptrVal) else {
+            fatalError("'numpy.ndarray' data pointer was nil")
+        }
+        // This code avoids calling `init<S: Sequence>(shape: [Int], scalars: S)`,
+        // which inefficiently copies scalars one by one. Instead,
+        // `init(shape: [Int], scalars: [Scalar])` is called, which efficiently
+        // does a `memcpy` of the entire `scalars` array.
+        // Unecessary copying is minimized.
+        let dummyPointer = UnsafeMutablePointer<Scalar>.allocate(capacity: 1)
+        let scalarCount = shape.reduce(1, *)
+        var scalars: [Scalar] = Array(repeating: dummyPointer.move(), count: scalarCount)
+        dummyPointer.deallocate()
+        scalars.withUnsafeMutableBufferPointer { buffPtr in
+            buffPtr.baseAddress!.assign(from: ptr, count: scalarCount)
+        }
+        self.init(shape: shape, scalars: scalars)
     }
-    // Check if the dtype of the `ndarray` is compatible with the `Scalar`
-    // type.
-    guard Scalar.numpyScalarTypes.contains(numpyArray.dtype) else {
-      debugLogNumpyError("""
-        'numpy.ndarray' dtype '\(numpyArray.dtype)' is incompatible with \
-        Swift type '\(Scalar.self)'.
-        """)
-      return nil
-    }
-
-    let pyShape = numpyArray.__array_interface__["shape"]
-    guard let shape = [Int](pyShape) else {
-      debugLogNumpyError("cannot access shape of 'numpy.ndarray' instance.")
-      return nil
-    }
-
-    // Make sure that the array is contiguous in memory. This does a copy if
-    // the array is not already contiguous in memory.
-    let contiguousNumpyArray = np.ascontiguousarray(numpyArray)
-
-    guard let ptrVal =
-      UInt(contiguousNumpyArray.__array_interface__["data"].tuple2.0) else {
-      debugLogNumpyError("cannot access data of 'numpy.ndarray' instance.")
-      return nil
-    }
-    // Note: `ptr` is not nil even if the `ndarray` is empty (i.e. has a shape
-    // of `(0,)`).
-    guard let ptr = UnsafePointer<Scalar>(bitPattern: ptrVal) else {
-      fatalError("'numpy.ndarray' data pointer was nil")
-    }
-    // This code avoids calling `init<S: Sequence>(shape: [Int], scalars: S)`,
-    // which inefficiently copies scalars one by one. Instead,
-    // `init(shape: [Int], scalars: [Scalar])` is called, which efficiently
-    // does a `memcpy` of the entire `scalars` array.
-    // Unecessary copying is minimized.
-    let dummyPointer = UnsafeMutablePointer<Scalar>.allocate(capacity: 1)
-    let scalarCount = shape.reduce(1, *)
-    var scalars: [Scalar] = Array(repeating: dummyPointer.move(),
-                                  count: scalarCount)
-    dummyPointer.deallocate()
-    scalars.withUnsafeMutableBufferPointer { buffPtr in
-      buffPtr.baseAddress!.assign(from: ptr, count: scalarCount)
-    }
-    self.init(shape: shape, scalars: scalars)
-  }
 }
 
 extension Tensor: ConvertibleFromNumpyArray
-  where Scalar: NumpyScalarCompatible {
-  /// Creates a tensor with the same shape and scalars as the specified
-  /// `numpy.ndarray` instance.
-  ///
-  /// - Parameter numpyArray: The `numpy.ndarray` instance to convert.
-  /// - Precondition: The `numpy` Python package must be installed.
-  /// - Returns: `numpyArray` converted to an `Array`. Returns `nil` if
-  ///   `numpyArray` does not have a compatible scalar `dtype`.
-  public init?(numpy numpyArray: PythonObject) {
-    // Check if input is a `numpy.ndarray` instance.
-    guard Python.isinstance(numpyArray, np.ndarray) == true else {
-      debugLogNumpyError("""
-        PythonObject input has type '\(Python.type(numpyArray))' and is not \
-        an instance of 'numpy.ndarray'.
-        """)
-      return nil
-    }
-    // Check if the dtype of the `ndarray` is compatible with the `Scalar`
-    // type.
-    guard Scalar.numpyScalarTypes.contains(numpyArray.dtype) else {
-      debugLogNumpyError("""
-        'numpy.ndarray' dtype '\(numpyArray.dtype)' is incompatible with \
-        Swift type '\(Scalar.self)'.
-        """)
-      return nil
+    where Scalar: NumpyScalarCompatible {
+    /// Creates a tensor with the same shape and scalars as the specified
+    /// `numpy.ndarray` instance.
+    ///
+    /// - Parameter numpyArray: The `numpy.ndarray` instance to convert.
+    /// - Precondition: The `numpy` Python package must be installed.
+    /// - Returns: `numpyArray` converted to an `Array`. Returns `nil` if
+    ///   `numpyArray` does not have a compatible scalar `dtype`.
+    public init?(numpy numpyArray: PythonObject) {
+        // Check if input is a `numpy.ndarray` instance.
+        guard Python.isinstance(numpyArray, np.ndarray) == true else {
+            debugLogNumpyError("""
+                PythonObject input has type '\(Python.type(numpyArray))' and is not \
+                an instance of 'numpy.ndarray'.
+                """)
+            return nil
+        }
+        // Check if the dtype of the `ndarray` is compatible with the `Scalar`
+        // type.
+        guard Scalar.numpyScalarTypes.contains(numpyArray.dtype) else {
+            debugLogNumpyError("""
+                'numpy.ndarray' dtype '\(numpyArray.dtype)' is incompatible with \
+                Swift type '\(Scalar.self)'.
+                """)
+            return nil
+        }
+
+        let pyShape = numpyArray.__array_interface__["shape"]
+        guard let dimensions = [Int](pyShape) else {
+            debugLogNumpyError("cannot access shape of 'numpy.ndarray' instance.")
+            return nil
+        }
+        let shape = TensorShape(dimensions)
+
+        // Make sure that the array is contiguous in memory. This does a copy if
+        // the array is not already contiguous in memory.
+        let contiguousNumpyArray = np.ascontiguousarray(numpyArray)
+
+        guard let ptrVal = UInt(contiguousNumpyArray.__array_interface__["data"].tuple2.0) else {
+            debugLogNumpyError("cannot access data of 'numpy.ndarray' instance.")
+            return nil
+        }
+        // Note: `ptr` is not nil even if the `ndarray` is empty (i.e. has a shape
+        // of `(0,)`).
+        guard let ptr = UnsafePointer<Scalar>(bitPattern: ptrVal) else {
+            fatalError("'numpy.ndarray' data pointer was nil")
+        }
+        let buffPtr = UnsafeBufferPointer(start: ptr, count: Int(shape.contiguousSize))
+        self.init(shape: shape, scalars: buffPtr)
     }
-
-    let pyShape = numpyArray.__array_interface__["shape"]
-    guard let dimensions = [Int](pyShape) else {
-      debugLogNumpyError("cannot access shape of 'numpy.ndarray' instance.")
-      return nil
-    }
-    let shape = TensorShape(dimensions)
-
-    // Make sure that the array is contiguous in memory. This does a copy if
-    // the array is not already contiguous in memory.
-    let contiguousNumpyArray = np.ascontiguousarray(numpyArray)
-
-    guard let ptrVal =
-      UInt(contiguousNumpyArray.__array_interface__["data"].tuple2.0) else {
-      debugLogNumpyError("cannot access data of 'numpy.ndarray' instance.")
-      return nil
-    }
-    // Note: `ptr` is not nil even if the `ndarray` is empty (i.e. has a shape
-    // of `(0,)`).
-    guard let ptr = UnsafePointer<Scalar>(bitPattern: ptrVal) else {
-      fatalError("'numpy.ndarray' data pointer was nil")
-    }
-    let buffPtr = UnsafeBufferPointer(start: ptr,
-                                      count: Int(shape.contiguousSize))
-    self.init(shape: shape, scalars: buffPtr)
-  }
 }
 
 extension ShapedArray where Scalar: NumpyScalarCompatible {
-  /// Creates a `numpy.ndarray` instance with the same shape and scalars as
-  /// this `ShapedArray`.
-  ///
-  /// - Precondition: The `numpy` Python package must be installed.
-  public func makeNumpyArray() -> PythonObject {
-    return scalars.makeNumpyArray().reshape(shape)
-  }
+    /// Creates a `numpy.ndarray` instance with the same shape and scalars as
+    /// this `ShapedArray`.
+    ///
+    /// - Precondition: The `numpy` Python package must be installed.
+    public func makeNumpyArray() -> PythonObject {
+        return scalars.makeNumpyArray().reshape(shape)
+    }
 }
 
 extension Tensor where Scalar: NumpyScalarCompatible {
-  /// Creates a `numpy.ndarray` instance with the same shape and scalars as
-  /// this tensor.
-  ///
-  /// - Precondition: The `numpy` Python package must be installed.
-  public func makeNumpyArray() -> PythonObject { return array.makeNumpyArray() }
+    /// Creates a `numpy.ndarray` instance with the same shape and scalars as
+    /// this tensor.
+    ///
+    /// - Precondition: The `numpy` Python package must be installed.
+    public func makeNumpyArray() -> PythonObject { return array.makeNumpyArray() }
 }
 
 extension TensorShape: PythonConvertible {
-  public var pythonObject: PythonObject {
-    return dimensions.pythonObject
-  }
+    public var pythonObject: PythonObject {
+        return dimensions.pythonObject
+    }
 }
 
 #endif // canImport(Python)

From 3cdd8083a87c59f393a1e58c08bf91d0eeffae31 Mon Sep 17 00:00:00 2001
From: Anthony Platanios <e.a.platanios@gmail.com>
Date: Sat, 20 Apr 2019 20:41:13 -0400
Subject: [PATCH 53/55] Changed the indentation in the 'Random.swift' file.

---
 Sources/DeepLearning/Random.swift | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/Sources/DeepLearning/Random.swift b/Sources/DeepLearning/Random.swift
index 8c90ccdf4..ade75a826 100644
--- a/Sources/DeepLearning/Random.swift
+++ b/Sources/DeepLearning/Random.swift
@@ -409,8 +409,8 @@ private func makeUInt64Pair(_ vector: UInt32x4) -> (UInt64, UInt64) {
 //===------------------------------------------------------------------------------------------===//
 
 public protocol RandomDistribution {
-  associatedtype Sample
-  func next<G: RandomNumberGenerator>(using generator: inout G) -> Sample
+    associatedtype Sample
+    func next<G: RandomNumberGenerator>(using generator: inout G) -> Sample
 }
 
 @_fixed_layout
@@ -446,7 +446,7 @@ public struct UniformFloatingPointDistribution<T: BinaryFloatingPoint>: RandomDi
 
 @_fixed_layout
 public struct NormalDistribution<T: BinaryFloatingPoint>: RandomDistribution
-  where T.RawSignificand: FixedWidthInteger {
+    where T.RawSignificand: FixedWidthInteger {
     public let mean: T
     public let standardDeviation: T
     private let uniformDist = UniformFloatingPointDistribution<T>()
@@ -503,10 +503,10 @@ public struct BetaDistribution: RandomDistribution {
     ///
     /// - Returns: Sample obtained using Cheng's BB algorithm.
     private static func chengsAlgorithmBB<G: RandomNumberGenerator>(
-      _ alpha0: Float,
-      _ a: Float,
-      _ b: Float,
-      using rng: inout G
+        _ alpha0: Float,
+        _ a: Float,
+        _ b: Float,
+        using rng: inout G
     ) -> Float {
         let alpha = a + b
         let beta  = sqrt((alpha - 2) / (2 * a * b - alpha))
@@ -550,10 +550,10 @@ public struct BetaDistribution: RandomDistribution {
     ///
     /// - Returns: Sample obtained using Cheng's BB algorithm.
     private static func chengsAlgorithmBC<G: RandomNumberGenerator>(
-      _ alpha0: Float,
-      _ a: Float,
-      _ b: Float,
-      using rng: inout G
+        _ alpha0: Float,
+        _ a: Float,
+        _ b: Float,
+        using rng: inout G
     ) -> Float {
         let alpha = a + b
         let beta  = 1 / b

From 4b87827efe6957319707816ba6b4f3666c65a197 Mon Sep 17 00:00:00 2001
From: Anthony Platanios <e.a.platanios@gmail.com>
Date: Sat, 20 Apr 2019 20:41:55 -0400
Subject: [PATCH 54/55] Minor edit.

---
 Sources/DeepLearning/Operators/NN.swift | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Sources/DeepLearning/Operators/NN.swift b/Sources/DeepLearning/Operators/NN.swift
index bd160e111..c461a910b 100644
--- a/Sources/DeepLearning/Operators/NN.swift
+++ b/Sources/DeepLearning/Operators/NN.swift
@@ -23,8 +23,8 @@ import TensorFlow
 public extension Tensor where Scalar: TensorFlowFloatingPoint {
     /// Computes the batch normalized tensor along the specified axis.
     ///
-    /// Specifically, returns `(self - mu) / (var + epsilon) * gamma + beta` where `mu` and `var` are 
-    /// respectively the mean and variance of `self` along `axis`.
+    /// Specifically, returns `(self - mu) / (var + epsilon) * gamma + beta` where `mu` and `var`
+    /// are respectively the mean and variance of `self` along `axis`.
     ///
     /// - Parameters:
     ///   - axis: The batch dimension.

From a5edd32155a86140f453088905b6a823e49d1748 Mon Sep 17 00:00:00 2001
From: Anthony Platanios <e.a.platanios@gmail.com>
Date: Sat, 20 Apr 2019 20:45:53 -0400
Subject: [PATCH 55/55] Tabs to spaces.

---
 Sources/DeepLearning/Operators/Basic.swift | 8 ++++----
 Sources/DeepLearning/Operators/Math.swift  | 8 ++++----
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/Sources/DeepLearning/Operators/Basic.swift b/Sources/DeepLearning/Operators/Basic.swift
index 55d2f8fd3..54f60ec3d 100644
--- a/Sources/DeepLearning/Operators/Basic.swift
+++ b/Sources/DeepLearning/Operators/Basic.swift
@@ -167,9 +167,9 @@ public extension Tensor {
     @inlinable
     @differentiable(wrt: self, vjp: _vjpExpandingShape(at:) where Scalar: TensorFlowFloatingPoint)
     func expandingShape(at axes: [Int]) -> Tensor {
-	    var result = self
-	    for i in axes { result = Raw.expandDims(result, dim: Tensor<Int32>(Int32(i))) }
-	    return result
+        var result = self
+        for i in axes { result = Raw.expandDims(result, dim: Tensor<Int32>(Int32(i))) }
+        return result
     }
 
     /// Returns a rank-lifted `Tensor` with a leading dimension of 1.
@@ -231,7 +231,7 @@ internal extension Tensor where Scalar: TensorFlowFloatingPoint {
 
     @inlinable
     func _vjpExpandingShape(at axes: [Int]) -> (Tensor, (Tensor) -> Tensor) {
-	    let value = self.expandingShape(at: axes)
+        let value = self.expandingShape(at: axes)
         return (value, { v in v.squeezingShape(at: axes) })
     }
 
diff --git a/Sources/DeepLearning/Operators/Math.swift b/Sources/DeepLearning/Operators/Math.swift
index 90d3d7bcf..3255aea10 100644
--- a/Sources/DeepLearning/Operators/Math.swift
+++ b/Sources/DeepLearning/Operators/Math.swift
@@ -1341,8 +1341,8 @@ internal extension Tensor where Scalar: TensorFlowFloatingPoint {
         let value = sum(squeezingAxes: axes)
         return (value, { [shape = shapeTensor] in
             var result = $0
-	        for i in axes.array.scalars { result = result.expandingShape(at: Int(i)) }
-	        return result.broadcast(toShape: shape)
+            for i in axes.array.scalars { result = result.expandingShape(at: Int(i)) }
+            return result.broadcast(toShape: shape)
         })
     }
 
@@ -1359,8 +1359,8 @@ internal extension Tensor where Scalar: TensorFlowFloatingPoint {
         let count = Raw.gather(params: shapeTensor, indices: axes).product()
         return (value, { [shape = shapeTensor] in 
             var result = $0
-	        for i in axes.array.scalars { result = result.expandingShape(at: Int(i)) }
-	        return result.broadcast(toShape: shape) / Tensor(count)
+            for i in axes.array.scalars { result = result.expandingShape(at: Int(i)) }
+            return result.broadcast(toShape: shape) / Tensor(count)
         })
     }
 }