From 9885f38a7f814982481639f6ecfdb47d808c91ac Mon Sep 17 00:00:00 2001 From: Anthony Platanios Date: Mon, 1 Apr 2019 09:58:29 -0400 Subject: [PATCH 01/55] Re-organized the operators source files. --- Sources/DeepLearning/Operators/Basic.swift | 23 +++ Sources/DeepLearning/Operators/Math.swift | 22 +++ .../{Operators.swift => Operators/NN.swift} | 154 ++++++++---------- 3 files changed, 114 insertions(+), 85 deletions(-) create mode 100644 Sources/DeepLearning/Operators/Basic.swift create mode 100644 Sources/DeepLearning/Operators/Math.swift rename Sources/DeepLearning/{Operators.swift => Operators/NN.swift} (74%) diff --git a/Sources/DeepLearning/Operators/Basic.swift b/Sources/DeepLearning/Operators/Basic.swift new file mode 100644 index 000000000..2e43792e7 --- /dev/null +++ b/Sources/DeepLearning/Operators/Basic.swift @@ -0,0 +1,23 @@ +// Copyright 2018 The TensorFlow Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#if !COMPILING_TENSORFLOW_MODULE +@_exported import TensorFlow +#endif + +/// Returns a tensor with the same shape and scalars as the specified tensor. +@differentiable +public func identity(_ x: Tensor) -> Tensor { + return x +} diff --git a/Sources/DeepLearning/Operators/Math.swift b/Sources/DeepLearning/Operators/Math.swift new file mode 100644 index 000000000..e838ff32d --- /dev/null +++ b/Sources/DeepLearning/Operators/Math.swift @@ -0,0 +1,22 @@ +// Copyright 2018 The TensorFlow Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#if !COMPILING_TENSORFLOW_MODULE +@_exported import TensorFlow +#endif + +/// Returns the values of the specified tensor rounded to the nearest integer, element-wise. +public func round(_ x: Tensor) -> Tensor { + return Raw.round(x) +} diff --git a/Sources/DeepLearning/Operators.swift b/Sources/DeepLearning/Operators/NN.swift similarity index 74% rename from Sources/DeepLearning/Operators.swift rename to Sources/DeepLearning/Operators/NN.swift index 51b2406a8..6ca32e22d 100644 --- a/Sources/DeepLearning/Operators.swift +++ b/Sources/DeepLearning/Operators/NN.swift @@ -13,111 +13,95 @@ // limitations under the License. #if !COMPILING_TENSORFLOW_MODULE -import TensorFlow +@_exported import TensorFlow #endif -/// Returns the values of the specified tensor rounded to the nearest integer, element-wise. -public func round(_ x: Tensor) -> Tensor { - return Raw.round(x) -} - -/// Returns a tensor with the same shape and scalars as the specified tensor. -@differentiable -public func identity(_ x: Tensor) -> Tensor { - return x -} - //===------------------------------------------------------------------------------------------===// // Normalization //===------------------------------------------------------------------------------------------===// -public extension Tensor where Scalar: TensorFlowFloatingPoint { - // TODO: Verify that these calculations are correct. - @inlinable - internal func _vjpBatchNormalized( - alongAxis axis: Int32, - offset: Tensor, - scale: Tensor, - epsilon: Scalar - ) -> (Tensor, (Tensor) -> (Tensor, Tensor, Tensor)) { - let value = batchNormalized(alongAxis: axis, offset: offset, scale: scale, - epsilon: epsilon) - return (value, { v in - let mean = self.mean(alongAxes: axis) - let squaredDiff: Tensor = Raw.squaredDifference(self, mean) - let variance = squaredDiff.mean(alongAxes: axis) - - let diff = self - mean - let inv = rsqrt(variance + epsilon) - let norm = diff * inv - - let dNorm = v * scale - let dVariance = -(dNorm * diff).sum(alongAxes: axis) / 2 * pow(inv, -3) - let dMean = (-dNorm * inv).sum(alongAxes: axis) + - dVariance * (-diff * 2).mean(alongAxes: axis) - let dOffset = v.sum(alongAxes: axis) - let dScale = (norm * v).sum(alongAxes: axis) - let dim = Tensor(Tensor(self.shapeTensor[axis])) - let tmp = (dNorm * inv) + (dVariance * 2 * dMean / dim) - let dSelf = tmp + (dMean / dim) - return (dSelf, dOffset, dScale) - }) - } +public extension Tensor where Scalar: BinaryFloatingPoint { + /// Computes the batch normalized tensor along the specified axis. + /// + /// Specifically, returns `(self - mu) / (var + epsilon) * gamma + beta` where `mu` and `var` are + /// respectively the mean and variance of `self` along `axis`. + /// + /// - Parameters: + /// - axis: The batch dimension. + /// - offset: The offset, also known as beta. + /// - scale: The scale, also known as gamma. + /// - epsilon: A small value added to the denominator for numerical stability. + @inlinable + @differentiable( + wrt: (self, offset, scale), + vjp: _vjpBatchNormalized where Scalar: TensorFlowFloatingPoint) + func batchNormalized( + alongAxis axis: Int32, + offset: Tensor = Tensor(0), + scale: Tensor = Tensor(1), + epsilon: Scalar = 0.001 + ) -> Tensor { + let mean = self.mean(alongAxes: axis) + let squaredDiff: Tensor = Raw.squaredDifference(self, mean) + let variance = squaredDiff.mean(alongAxes: axis) + let inv = rsqrt(variance + epsilon) * scale + return self * inv + offset - mean * inv + } } -public extension Tensor where Scalar: BinaryFloatingPoint { - /// Computes the batch normalized tensor along the specified axis. - /// - /// Specifically, returns `(self - mu)/(var + epsilon) * gamma + beta` where - /// `mu` and `var` are respectively the mean and variance of `self` along - /// `axis`. - /// - /// - Parameters: - /// - axis: The batch dimension. - /// - offset: The offset, also known as beta. - /// - scale: The scale, also known as gamma. - /// - epsilon: A small value added to the denominator for numerical - /// stability. - @inlinable - @differentiable( - wrt: (self, offset, scale), vjp: _vjpBatchNormalized - where Scalar : TensorFlowFloatingPoint - ) - func batchNormalized( - alongAxis axis: Int32, - offset: Tensor = Tensor(0), - scale: Tensor = Tensor(1), - epsilon: Scalar = 0.001 - ) -> Tensor { - let mean = self.mean(alongAxes: axis) - let squaredDiff: Tensor = Raw.squaredDifference(self, mean) - let variance = squaredDiff.mean(alongAxes: axis) - let inv = rsqrt(variance + epsilon) * scale - return self * inv + offset - mean * inv - } +internal extension Tensor where Scalar: TensorFlowFloatingPoint { + // TODO: Verify that these calculations are correct. + @inlinable + func _vjpBatchNormalized( + alongAxis axis: Int32, + offset: Tensor, + scale: Tensor, + epsilon: Scalar + ) -> (Tensor, (Tensor) -> (Tensor, Tensor, Tensor)) { + let value = batchNormalized( + alongAxis: axis, offset: offset, scale: scale, epsilon: epsilon) + return (value, { v in + let mean = self.mean(alongAxes: axis) + let squaredDiff: Tensor = Raw.squaredDifference(self, mean) + let variance = squaredDiff.mean(alongAxes: axis) + let diff = self - mean + let inv = rsqrt(variance + epsilon) + let norm = diff * inv + let dNorm = v * scale + let dVariance = -(dNorm * diff).sum(alongAxes: axis) / 2 * pow(inv, -3) + let dMean = (-dNorm * inv).sum(alongAxes: axis) + + dVariance * (-diff * 2).mean(alongAxes: axis) + let dOffset = v.sum(alongAxes: axis) + let dScale = (norm * v).sum(alongAxes: axis) + let dim = Tensor(Tensor(self.shapeTensor[axis])) + let tmp = (dNorm * inv) + (dVariance * 2 * dMean / dim) + let dSelf = tmp + (dMean / dim) + return (dSelf, dOffset, dScale) + }) + } } //===------------------------------------------------------------------------------------------===// -// Convolution and pooling +// Convolution and Pooling //===------------------------------------------------------------------------------------------===// /// A padding scheme. Used by padding, convolution, and pooling ops. // @_frozen // SR-9739 public enum Padding { - /// The "valid" padding scheme. - case valid - /// The "same" padding scheme. - case same + /// The "valid" padding scheme. + case valid + /// The "same" padding scheme. + case same } public extension Padding { - @inlinable - var raw: Raw.Padding { - switch self { - case .same: return .same - case .valid: return .valid - } + @inlinable + var raw: Raw.Padding { + switch self { + case .same: return .same + case .valid: return .valid } + } } public extension Tensor where Scalar: TensorFlowFloatingPoint { From 3ce90c09ba050cbe97cc19e5f4b9d40d2a2c2ee1 Mon Sep 17 00:00:00 2001 From: Anthony Platanios Date: Mon, 1 Apr 2019 13:30:25 -0400 Subject: [PATCH 02/55] Added support for 'stacked', 'concatenated', 'gathered', 'batchGathered', and 'masked'. --- Sources/DeepLearning/Operators/Basic.swift | 253 ++++++++++++++++++++- 1 file changed, 249 insertions(+), 4 deletions(-) diff --git a/Sources/DeepLearning/Operators/Basic.swift b/Sources/DeepLearning/Operators/Basic.swift index 2e43792e7..b491832ac 100644 --- a/Sources/DeepLearning/Operators/Basic.swift +++ b/Sources/DeepLearning/Operators/Basic.swift @@ -16,8 +16,253 @@ @_exported import TensorFlow #endif -/// Returns a tensor with the same shape and scalars as the specified tensor. -@differentiable -public func identity(_ x: Tensor) -> Tensor { - return x +public extension Tensor where Scalar: TensorFlowScalar { + /// Stacks the current tensor with `tensors`, along the `axis` dimension, into a tensor with + /// rank one higher than the current tensor and each tensor in `tensors`. + /// + /// Given `self` and `tensors` all have shape `[A, B, C]`, and `tensors.count = N-1`, then: + /// - if `axis == 0` then the resulting tensor will have the shape `[N, A, B, C]`. + /// - if `axis == 1` then the resulting tensor will have the shape `[A, N, B, C]`. + /// - etc. + /// + /// For example: + /// ``` + /// // 'x' is [1, 4] + /// // 'y' is [2, 5] + /// // 'z' is [3, 6] + /// x.packed(with: [y, z]) // is [[1, 4], [2, 5], [3, 6]] + /// x.packed(with: [y, z], alongAxis: 1) // is [[1, 2, 3], [4, 5, 6]] + /// ``` + /// + /// This is the opposite of `unstacked`. + /// + /// - Parameters: + /// - tensors: Tensors to stack with the current tensor. + /// - axis: Dimension along which to stack. Negative values wrap around. + /// + /// - Precondition: All tensors must have the same shape as the current tensor. + /// - Precondition: `axis` must be in the range `[-rank, rank)`. + /// + /// - Returns: The packed tensor. + @inlinable + // @differentiable(vjp: _vjpPacked where Scalar: TensorFlowFloatingPoint) + func stacked(with tensors: [Tensor], alongAxis axis: Int64 = 0) -> Tensor { + return Raw.pack([self] + tensors, axis: axis) + } + + /// Concatenates the current tensor with `tensors` along the `axis` dimension. + /// + /// Given `self` and `tensors` are all put in a single array, `values`, and + /// `values[i].shape = [D0, D1, ... Daxis(i), ...Dn]`, then the concatenated result has shape + /// `[D0, D1, ... Raxis, ...Dn]`, where `Raxis = sum(Daxis(i))`. That is, the data from the input + /// tensors is joined along the `axis` dimension. + /// + /// For example: + /// ``` + /// // t1 is [[1, 2, 3], [4, 5, 6]] + /// // t2 is [[7, 8, 9], [10, 11, 12]] + /// t1.concatenated(with: [t2]) // is [[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]] + /// t1.concatenated(with: [t2], alongAxis: 1) // is [[1, 2, 3, 7, 8, 9], [4, 5, 6, 10, 11, 12]] + /// + /// // t3 has shape [2, 3] + /// // t4 has shape [2, 3] + /// t3.concatenated(with: [t4]) // has shape [4, 3] + /// t3.concatenated(with: [t4], alongAxis: 1) // has shape [2, 6] + /// ``` + /// + /// - Note: If you are concatenating along a new axis consider using `stacked`. + /// + /// - Parameters: + /// - tensors: Tensors to concatenate with the current tensor. + /// - axis: Dimension along which to concatenate. Negative values wrap around. + /// + /// - Precondition: All tensors must have the same rank as the current tensor and all dimensions + /// except `axis` must be equal. + /// - Precondition: `axis` must be in the range `[-rank, rank)`. + /// + /// - Returns: The concatenated tensor. + @inlinable + // @differentiable(vjp: _vjpConcatenated where Scalar : TensorFlowFloatingPoint) + func concatenated(with tensors: [Tensor], alongAxis axis: Int32 = 0) -> Tensor { + return Raw.concatV2([self] + tensors, axis: Tensor(axis)) + } + + /// Gathers slices of this tensor at `indices` along the `axis` dimension. + /// + /// For 0-D (scalar) `indices`: + /// ``` + /// result[p_0, ..., p_{axis-1}, + /// p_{axis + 1}, ..., p_{N-1}] = + /// self[p_0, ..., p_{axis-1}, + /// indices, + /// p_{axis + 1}, ..., p_{N-1}] + /// ``` + /// + /// For 1-D (vector) `indices`: + /// ``` + /// result[p_0, ..., p_{axis-1}, + /// i, + /// p_{axis + 1}, ..., p_{N-1}] = + /// self[p_0, ..., p_{axis-1}, + /// indices[i], + /// p_{axis + 1}, ..., p_{N-1}] + /// ``` + /// + /// In the general case, produces a resulting tensor where: + /// ``` + /// result[p_0, ..., p_{axis-1}, + /// i_{batch\_dims}, ..., i_{M-1}, + /// p_{axis + 1}, ..., p_{N-1}] = + /// self[p_0, ..., p_{axis-1}, + /// indices[i_0, ..., i_{M-1}], + /// p_{axis + 1}, ..., p_{N-1}] + /// ``` + /// where `N = self.rank` and `M = indices.rank`. + /// + /// The shape of the resulting tensor is: + /// `self.shape[..( + atIndices indices: Tensor, + alongAxis axis: Int32 = 0 + ) -> Tensor { + return Raw.gatherV2(params: self, indices: indices, axis: Tensor(axis)) + } + + /// Gathers slices of this tensor at `indices` along the `axis` dimension, while ignoring the + /// first `batchDims` dimensions that correspond to batch dimensions. + /// + /// Performs similar functionality to `gathered`, except that the resulting tensor shape is now: + /// `self.shape[..( + atIndices indices: Tensor, + alongAxis axis: Int32, + numBatchDims batchDims: Int32 + ) -> Tensor { + precondition(batchDims >= 0 && batchDims < indices.rank, + "'numBatchDims' must be non-negative and less than 'indices.rank'.") + precondition(batchDims < rank, "'numBatchDims' must be less than the tensor's rank.") + + // Handle the axis argument by transposing the axis dimension so that it is the first non-batch + // dimension, recursively calling `batchGathering` with `axis = 0`, and then transposing the + // result to put the pre-axis dimensions before the indices dimensions. + if axis != batchDims { + // Adjust axis to be positive. + let posAxis = axis < 0 ? axis + rank : axis + + precondition(posAxis >= 0 && posAxis < rank, "'axis' is out of range.") + precondition(batchDims <= posAxis, "'batchDims' must be less than or equal to 'axis'.") + + // Move self[axis] up to self[batchDims]. + let permutation = Tensor(0 ..< batchDims).concatenated(with: [ + Tensor(axis).rankLifted(), + Tensor(rangeFrom: batchDims, to: posAxis, stride: 1), + Tensor(rangeFrom: axis + 1, to: rank, stride: 1)]) + let tensor = transposed(withPermutations: permutation) + let result = tensor.batchGathered( + atIndices: indices, alongAxis: batchDims, numBatchDims: batchDims) + + // Move the result dimensions corresponding to self[batchDims ..< axis] to just before the + // dimensions corresponding to indices[batchDims ...]. + let start = indices.rank + posAxis - batchDims + let resultPermutation = Tensor(0 ..< batchDims).concatenated(with: [ + Tensor(rangeFrom: indices.rank, to: start, stride: 1), + Tensor(batchDims ..< indices.rank), + Tensor(rangeFrom: start, to: result.rank, stride: 1)]) + return result.transposed(withPermutations: resultPermutation) + } + + let castedShape = Tensor(shapeTensor) + var batchIndices = indices + var accumulated = Tensor(ones: []) + for d in (1 ... batchDims).reversed() { + accumulated *= castedShape[d] + let dValue = castedShape[d - 1] + let dIndices = Tensor( + rangeFrom: Tensor(zeros: []), + to: dValue, + stride: Tensor(ones: []) + ) * accumulated + let dShape = Tensor(d - 1).packed(with: [ + Tensor(dValue), + Tensor(indices.rank - 1)]) + batchIndices += dIndices.reshaped(toShape: dShape) + } + + let flatIndices = batchIndices.flattened() + let outerShape = shapeTensor[Int(batchDims + 1)...] + let flatInnerShape = shapeTensor[.., alongAxis axis: Int32 = 0) -> Tensor { + precondition(mask.rank != 0, "The boolean mask cannot be a scalar.") + let posAxis = axis < 0 ? axis + rank : axis + let leadingSize = shapeTensor[posAxis ..< posAxis + mask.rank].product().rankLifted() + let reshapedTensor = reshaped( + toShape: shapeTensor[.. Date: Mon, 1 Apr 2019 15:08:37 -0400 Subject: [PATCH 03/55] Reverted back to 4-space tabs. --- Sources/DeepLearning/Operators/Basic.swift | 478 ++++++++++----------- Sources/DeepLearning/Operators/Math.swift | 2 +- Sources/DeepLearning/Operators/NN.swift | 130 +++--- 3 files changed, 305 insertions(+), 305 deletions(-) diff --git a/Sources/DeepLearning/Operators/Basic.swift b/Sources/DeepLearning/Operators/Basic.swift index b491832ac..9f89b56ee 100644 --- a/Sources/DeepLearning/Operators/Basic.swift +++ b/Sources/DeepLearning/Operators/Basic.swift @@ -17,252 +17,252 @@ #endif public extension Tensor where Scalar: TensorFlowScalar { - /// Stacks the current tensor with `tensors`, along the `axis` dimension, into a tensor with - /// rank one higher than the current tensor and each tensor in `tensors`. - /// - /// Given `self` and `tensors` all have shape `[A, B, C]`, and `tensors.count = N-1`, then: - /// - if `axis == 0` then the resulting tensor will have the shape `[N, A, B, C]`. - /// - if `axis == 1` then the resulting tensor will have the shape `[A, N, B, C]`. - /// - etc. - /// - /// For example: - /// ``` - /// // 'x' is [1, 4] - /// // 'y' is [2, 5] - /// // 'z' is [3, 6] - /// x.packed(with: [y, z]) // is [[1, 4], [2, 5], [3, 6]] - /// x.packed(with: [y, z], alongAxis: 1) // is [[1, 2, 3], [4, 5, 6]] - /// ``` - /// - /// This is the opposite of `unstacked`. - /// - /// - Parameters: - /// - tensors: Tensors to stack with the current tensor. - /// - axis: Dimension along which to stack. Negative values wrap around. - /// - /// - Precondition: All tensors must have the same shape as the current tensor. - /// - Precondition: `axis` must be in the range `[-rank, rank)`. - /// - /// - Returns: The packed tensor. - @inlinable - // @differentiable(vjp: _vjpPacked where Scalar: TensorFlowFloatingPoint) - func stacked(with tensors: [Tensor], alongAxis axis: Int64 = 0) -> Tensor { - return Raw.pack([self] + tensors, axis: axis) - } + /// Stacks the current tensor with `tensors`, along the `axis` dimension, into a tensor with + /// rank one higher than the current tensor and each tensor in `tensors`. + /// + /// Given `self` and `tensors` all have shape `[A, B, C]`, and `tensors.count = N-1`, then: + /// - if `axis == 0` then the resulting tensor will have the shape `[N, A, B, C]`. + /// - if `axis == 1` then the resulting tensor will have the shape `[A, N, B, C]`. + /// - etc. + /// + /// For example: + /// ``` + /// // 'x' is [1, 4] + /// // 'y' is [2, 5] + /// // 'z' is [3, 6] + /// x.packed(with: [y, z]) // is [[1, 4], [2, 5], [3, 6]] + /// x.packed(with: [y, z], alongAxis: 1) // is [[1, 2, 3], [4, 5, 6]] + /// ``` + /// + /// This is the opposite of `unstacked`. + /// + /// - Parameters: + /// - tensors: Tensors to stack with the current tensor. + /// - axis: Dimension along which to stack. Negative values wrap around. + /// + /// - Precondition: All tensors must have the same shape as the current tensor. + /// - Precondition: `axis` must be in the range `[-rank, rank)`. + /// + /// - Returns: The packed tensor. + @inlinable + // @differentiable(vjp: _vjpPacked where Scalar: TensorFlowFloatingPoint) + func stacked(with tensors: [Tensor], alongAxis axis: Int64 = 0) -> Tensor { + return Raw.pack([self] + tensors, axis: axis) + } - /// Concatenates the current tensor with `tensors` along the `axis` dimension. - /// - /// Given `self` and `tensors` are all put in a single array, `values`, and - /// `values[i].shape = [D0, D1, ... Daxis(i), ...Dn]`, then the concatenated result has shape - /// `[D0, D1, ... Raxis, ...Dn]`, where `Raxis = sum(Daxis(i))`. That is, the data from the input - /// tensors is joined along the `axis` dimension. - /// - /// For example: - /// ``` - /// // t1 is [[1, 2, 3], [4, 5, 6]] - /// // t2 is [[7, 8, 9], [10, 11, 12]] - /// t1.concatenated(with: [t2]) // is [[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]] - /// t1.concatenated(with: [t2], alongAxis: 1) // is [[1, 2, 3, 7, 8, 9], [4, 5, 6, 10, 11, 12]] - /// - /// // t3 has shape [2, 3] - /// // t4 has shape [2, 3] - /// t3.concatenated(with: [t4]) // has shape [4, 3] - /// t3.concatenated(with: [t4], alongAxis: 1) // has shape [2, 6] - /// ``` - /// - /// - Note: If you are concatenating along a new axis consider using `stacked`. - /// - /// - Parameters: - /// - tensors: Tensors to concatenate with the current tensor. - /// - axis: Dimension along which to concatenate. Negative values wrap around. - /// - /// - Precondition: All tensors must have the same rank as the current tensor and all dimensions - /// except `axis` must be equal. - /// - Precondition: `axis` must be in the range `[-rank, rank)`. - /// - /// - Returns: The concatenated tensor. - @inlinable - // @differentiable(vjp: _vjpConcatenated where Scalar : TensorFlowFloatingPoint) - func concatenated(with tensors: [Tensor], alongAxis axis: Int32 = 0) -> Tensor { - return Raw.concatV2([self] + tensors, axis: Tensor(axis)) - } + /// Concatenates the current tensor with `tensors` along the `axis` dimension. + /// + /// Given `self` and `tensors` are all put in a single array, `values`, and + /// `values[i].shape = [D0, D1, ... Daxis(i), ...Dn]`, then the concatenated result has shape + /// `[D0, D1, ... Raxis, ...Dn]`, where `Raxis = sum(Daxis(i))`. That is, the data from the + /// input tensors is joined along the `axis` dimension. + /// + /// For example: + /// ``` + /// // t1 is [[1, 2, 3], [4, 5, 6]] + /// // t2 is [[7, 8, 9], [10, 11, 12]] + /// t1.concatenated(with: [t2]) // is [[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]] + /// t1.concatenated(with: [t2], alongAxis: 1) // is [[1, 2, 3, 7, 8, 9], [4, 5, 6, 10, 11, 12]] + /// + /// // t3 has shape [2, 3] + /// // t4 has shape [2, 3] + /// t3.concatenated(with: [t4]) // has shape [4, 3] + /// t3.concatenated(with: [t4], alongAxis: 1) // has shape [2, 6] + /// ``` + /// + /// - Note: If you are concatenating along a new axis consider using `stacked`. + /// + /// - Parameters: + /// - tensors: Tensors to concatenate with the current tensor. + /// - axis: Dimension along which to concatenate. Negative values wrap around. + /// + /// - Precondition: All tensors must have the same rank as the current tensor and all dimensions + /// except `axis` must be equal. + /// - Precondition: `axis` must be in the range `[-rank, rank)`. + /// + /// - Returns: The concatenated tensor. + @inlinable + // @differentiable(vjp: _vjpConcatenated where Scalar : TensorFlowFloatingPoint) + func concatenated(with tensors: [Tensor], alongAxis axis: Int32 = 0) -> Tensor { + return Raw.concatV2([self] + tensors, axis: Tensor(axis)) + } - /// Gathers slices of this tensor at `indices` along the `axis` dimension. - /// - /// For 0-D (scalar) `indices`: - /// ``` - /// result[p_0, ..., p_{axis-1}, - /// p_{axis + 1}, ..., p_{N-1}] = - /// self[p_0, ..., p_{axis-1}, - /// indices, - /// p_{axis + 1}, ..., p_{N-1}] - /// ``` - /// - /// For 1-D (vector) `indices`: - /// ``` - /// result[p_0, ..., p_{axis-1}, - /// i, - /// p_{axis + 1}, ..., p_{N-1}] = - /// self[p_0, ..., p_{axis-1}, - /// indices[i], - /// p_{axis + 1}, ..., p_{N-1}] - /// ``` - /// - /// In the general case, produces a resulting tensor where: - /// ``` - /// result[p_0, ..., p_{axis-1}, - /// i_{batch\_dims}, ..., i_{M-1}, - /// p_{axis + 1}, ..., p_{N-1}] = - /// self[p_0, ..., p_{axis-1}, - /// indices[i_0, ..., i_{M-1}], - /// p_{axis + 1}, ..., p_{N-1}] - /// ``` - /// where `N = self.rank` and `M = indices.rank`. - /// - /// The shape of the resulting tensor is: - /// `self.shape[..( - atIndices indices: Tensor, - alongAxis axis: Int32 = 0 - ) -> Tensor { - return Raw.gatherV2(params: self, indices: indices, axis: Tensor(axis)) - } + /// Gathers slices of this tensor at `indices` along the `axis` dimension. + /// + /// For 0-D (scalar) `indices`: + /// ``` + /// result[p_0, ..., p_{axis-1}, + /// p_{axis + 1}, ..., p_{N-1}] = + /// self[p_0, ..., p_{axis-1}, + /// indices, + /// p_{axis + 1}, ..., p_{N-1}] + /// ``` + /// + /// For 1-D (vector) `indices`: + /// ``` + /// result[p_0, ..., p_{axis-1}, + /// i, + /// p_{axis + 1}, ..., p_{N-1}] = + /// self[p_0, ..., p_{axis-1}, + /// indices[i], + /// p_{axis + 1}, ..., p_{N-1}] + /// ``` + /// + /// In the general case, produces a resulting tensor where: + /// ``` + /// result[p_0, ..., p_{axis-1}, + /// i_{batch\_dims}, ..., i_{M-1}, + /// p_{axis + 1}, ..., p_{N-1}] = + /// self[p_0, ..., p_{axis-1}, + /// indices[i_0, ..., i_{M-1}], + /// p_{axis + 1}, ..., p_{N-1}] + /// ``` + /// where `N = self.rank` and `M = indices.rank`. + /// + /// The shape of the resulting tensor is: + /// `self.shape[..( + atIndices indices: Tensor, + alongAxis axis: Int32 = 0 + ) -> Tensor { + return Raw.gatherV2(params: self, indices: indices, axis: Tensor(axis)) + } - /// Gathers slices of this tensor at `indices` along the `axis` dimension, while ignoring the - /// first `batchDims` dimensions that correspond to batch dimensions. - /// - /// Performs similar functionality to `gathered`, except that the resulting tensor shape is now: - /// `self.shape[..( - atIndices indices: Tensor, - alongAxis axis: Int32, - numBatchDims batchDims: Int32 - ) -> Tensor { - precondition(batchDims >= 0 && batchDims < indices.rank, - "'numBatchDims' must be non-negative and less than 'indices.rank'.") - precondition(batchDims < rank, "'numBatchDims' must be less than the tensor's rank.") + /// Gathers slices of this tensor at `indices` along the `axis` dimension, while ignoring the + /// first `batchDims` dimensions that correspond to batch dimensions. + /// + /// Performs similar functionality to `gathered`, except that the resulting tensor shape is now: + /// `self.shape[..( + atIndices indices: Tensor, + alongAxis axis: Int32, + numBatchDims batchDims: Int32 + ) -> Tensor { + precondition(batchDims >= 0 && batchDims < indices.rank, + "'numBatchDims' must be non-negative and less than 'indices.rank'.") + precondition(batchDims < rank, "'numBatchDims' must be less than the tensor's rank.") - // Handle the axis argument by transposing the axis dimension so that it is the first non-batch - // dimension, recursively calling `batchGathering` with `axis = 0`, and then transposing the - // result to put the pre-axis dimensions before the indices dimensions. - if axis != batchDims { - // Adjust axis to be positive. - let posAxis = axis < 0 ? axis + rank : axis + // Handle the axis argument by transposing the axis dimension so that it is the first + // non-batch dimension, recursively calling `batchGathering` with `axis = 0`, and then + // transposing the result to put the pre-axis dimensions before the indices dimensions. + if axis != batchDims { + // Adjust axis to be positive. + let posAxis = axis < 0 ? axis + rank : axis - precondition(posAxis >= 0 && posAxis < rank, "'axis' is out of range.") - precondition(batchDims <= posAxis, "'batchDims' must be less than or equal to 'axis'.") + precondition(posAxis >= 0 && posAxis < rank, "'axis' is out of range.") + precondition(batchDims <= posAxis, "'batchDims' must be less than or equal to 'axis'.") - // Move self[axis] up to self[batchDims]. - let permutation = Tensor(0 ..< batchDims).concatenated(with: [ - Tensor(axis).rankLifted(), - Tensor(rangeFrom: batchDims, to: posAxis, stride: 1), - Tensor(rangeFrom: axis + 1, to: rank, stride: 1)]) - let tensor = transposed(withPermutations: permutation) - let result = tensor.batchGathered( - atIndices: indices, alongAxis: batchDims, numBatchDims: batchDims) - - // Move the result dimensions corresponding to self[batchDims ..< axis] to just before the - // dimensions corresponding to indices[batchDims ...]. - let start = indices.rank + posAxis - batchDims - let resultPermutation = Tensor(0 ..< batchDims).concatenated(with: [ - Tensor(rangeFrom: indices.rank, to: start, stride: 1), - Tensor(batchDims ..< indices.rank), - Tensor(rangeFrom: start, to: result.rank, stride: 1)]) - return result.transposed(withPermutations: resultPermutation) - } + // Move self[axis] up to self[batchDims]. + let permutation = Tensor(0 ..< batchDims).concatenated(with: [ + Tensor(axis).rankLifted(), + Tensor(rangeFrom: batchDims, to: posAxis, stride: 1), + Tensor(rangeFrom: axis + 1, to: rank, stride: 1)]) + let tensor = transposed(withPermutations: permutation) + let result = tensor.batchGathered( + atIndices: indices, alongAxis: batchDims, numBatchDims: batchDims) + + // Move the result dimensions corresponding to self[batchDims ..< axis] to just before + // the dimensions corresponding to indices[batchDims ...]. + let start = indices.rank + posAxis - batchDims + let resultPermutation = Tensor(0 ..< batchDims).concatenated(with: [ + Tensor(rangeFrom: indices.rank, to: start, stride: 1), + Tensor(batchDims ..< indices.rank), + Tensor(rangeFrom: start, to: result.rank, stride: 1)]) + return result.transposed(withPermutations: resultPermutation) + } - let castedShape = Tensor(shapeTensor) - var batchIndices = indices - var accumulated = Tensor(ones: []) - for d in (1 ... batchDims).reversed() { - accumulated *= castedShape[d] - let dValue = castedShape[d - 1] - let dIndices = Tensor( - rangeFrom: Tensor(zeros: []), - to: dValue, - stride: Tensor(ones: []) - ) * accumulated - let dShape = Tensor(d - 1).packed(with: [ - Tensor(dValue), - Tensor(indices.rank - 1)]) - batchIndices += dIndices.reshaped(toShape: dShape) - } + let castedShape = Tensor(shapeTensor) + var batchIndices = indices + var accumulated = Tensor(ones: []) + for d in (1 ... batchDims).reversed() { + accumulated *= castedShape[d] + let dValue = castedShape[d - 1] + let dIndices = Tensor( + rangeFrom: Tensor(zeros: []), + to: dValue, + stride: Tensor(ones: []) + ) * accumulated + let dShape = Tensor(d - 1).packed(with: [ + Tensor(dValue), + Tensor(indices.rank - 1)]) + batchIndices += dIndices.reshaped(toShape: dShape) + } - let flatIndices = batchIndices.flattened() - let outerShape = shapeTensor[Int(batchDims + 1)...] - let flatInnerShape = shapeTensor[.., alongAxis axis: Int32 = 0) -> Tensor { - precondition(mask.rank != 0, "The boolean mask cannot be a scalar.") - let posAxis = axis < 0 ? axis + rank : axis - let leadingSize = shapeTensor[posAxis ..< posAxis + mask.rank].product().rankLifted() - let reshapedTensor = reshaped( - toShape: shapeTensor[.., alongAxis axis: Int32 = 0) -> Tensor { + precondition(mask.rank != 0, "The boolean mask cannot be a scalar.") + let posAxis = axis < 0 ? axis + rank : axis + let leadingSize = shapeTensor[posAxis ..< posAxis + mask.rank].product().rankLifted() + let reshapedTensor = reshaped( + toShape: shapeTensor[..(_ x: Tensor) -> Tensor { - return Raw.round(x) + return Raw.round(x) } diff --git a/Sources/DeepLearning/Operators/NN.swift b/Sources/DeepLearning/Operators/NN.swift index 6ca32e22d..51117b833 100644 --- a/Sources/DeepLearning/Operators/NN.swift +++ b/Sources/DeepLearning/Operators/NN.swift @@ -21,64 +21,64 @@ //===------------------------------------------------------------------------------------------===// public extension Tensor where Scalar: BinaryFloatingPoint { - /// Computes the batch normalized tensor along the specified axis. - /// - /// Specifically, returns `(self - mu) / (var + epsilon) * gamma + beta` where `mu` and `var` are - /// respectively the mean and variance of `self` along `axis`. - /// - /// - Parameters: - /// - axis: The batch dimension. - /// - offset: The offset, also known as beta. - /// - scale: The scale, also known as gamma. - /// - epsilon: A small value added to the denominator for numerical stability. - @inlinable - @differentiable( - wrt: (self, offset, scale), - vjp: _vjpBatchNormalized where Scalar: TensorFlowFloatingPoint) - func batchNormalized( - alongAxis axis: Int32, - offset: Tensor = Tensor(0), - scale: Tensor = Tensor(1), - epsilon: Scalar = 0.001 - ) -> Tensor { - let mean = self.mean(alongAxes: axis) - let squaredDiff: Tensor = Raw.squaredDifference(self, mean) - let variance = squaredDiff.mean(alongAxes: axis) - let inv = rsqrt(variance + epsilon) * scale - return self * inv + offset - mean * inv - } + /// Computes the batch normalized tensor along the specified axis. + /// + /// Specifically, returns `(self - mu) / (var + epsilon) * gamma + beta` where `mu` and `var` are + /// respectively the mean and variance of `self` along `axis`. + /// + /// - Parameters: + /// - axis: The batch dimension. + /// - offset: The offset, also known as beta. + /// - scale: The scale, also known as gamma. + /// - epsilon: A small value added to the denominator for numerical stability. + @inlinable + @differentiable( + wrt: (self, offset, scale), + vjp: _vjpBatchNormalized where Scalar: TensorFlowFloatingPoint) + func batchNormalized( + alongAxis axis: Int32, + offset: Tensor = Tensor(0), + scale: Tensor = Tensor(1), + epsilon: Scalar = 0.001 + ) -> Tensor { + let mean = self.mean(alongAxes: axis) + let squaredDiff: Tensor = Raw.squaredDifference(self, mean) + let variance = squaredDiff.mean(alongAxes: axis) + let inv = rsqrt(variance + epsilon) * scale + return self * inv + offset - mean * inv + } } internal extension Tensor where Scalar: TensorFlowFloatingPoint { - // TODO: Verify that these calculations are correct. - @inlinable - func _vjpBatchNormalized( - alongAxis axis: Int32, - offset: Tensor, - scale: Tensor, - epsilon: Scalar - ) -> (Tensor, (Tensor) -> (Tensor, Tensor, Tensor)) { - let value = batchNormalized( - alongAxis: axis, offset: offset, scale: scale, epsilon: epsilon) - return (value, { v in - let mean = self.mean(alongAxes: axis) - let squaredDiff: Tensor = Raw.squaredDifference(self, mean) - let variance = squaredDiff.mean(alongAxes: axis) - let diff = self - mean - let inv = rsqrt(variance + epsilon) - let norm = diff * inv - let dNorm = v * scale - let dVariance = -(dNorm * diff).sum(alongAxes: axis) / 2 * pow(inv, -3) - let dMean = (-dNorm * inv).sum(alongAxes: axis) + - dVariance * (-diff * 2).mean(alongAxes: axis) - let dOffset = v.sum(alongAxes: axis) - let dScale = (norm * v).sum(alongAxes: axis) - let dim = Tensor(Tensor(self.shapeTensor[axis])) - let tmp = (dNorm * inv) + (dVariance * 2 * dMean / dim) - let dSelf = tmp + (dMean / dim) - return (dSelf, dOffset, dScale) - }) - } + // TODO: Verify that these calculations are correct. + @inlinable + func _vjpBatchNormalized( + alongAxis axis: Int32, + offset: Tensor, + scale: Tensor, + epsilon: Scalar + ) -> (Tensor, (Tensor) -> (Tensor, Tensor, Tensor)) { + let value = batchNormalized( + alongAxis: axis, offset: offset, scale: scale, epsilon: epsilon) + return (value, { v in + let mean = self.mean(alongAxes: axis) + let squaredDiff: Tensor = Raw.squaredDifference(self, mean) + let variance = squaredDiff.mean(alongAxes: axis) + let diff = self - mean + let inv = rsqrt(variance + epsilon) + let norm = diff * inv + let dNorm = v * scale + let dVariance = -(dNorm * diff).sum(alongAxes: axis) / 2 * pow(inv, -3) + let dMean = (-dNorm * inv).sum(alongAxes: axis) + + dVariance * (-diff * 2).mean(alongAxes: axis) + let dOffset = v.sum(alongAxes: axis) + let dScale = (norm * v).sum(alongAxes: axis) + let dim = Tensor(Tensor(self.shapeTensor[axis])) + let tmp = (dNorm * inv) + (dVariance * 2 * dMean / dim) + let dSelf = tmp + (dMean / dim) + return (dSelf, dOffset, dScale) + }) + } } //===------------------------------------------------------------------------------------------===// @@ -88,20 +88,20 @@ internal extension Tensor where Scalar: TensorFlowFloatingPoint { /// A padding scheme. Used by padding, convolution, and pooling ops. // @_frozen // SR-9739 public enum Padding { - /// The "valid" padding scheme. - case valid - /// The "same" padding scheme. - case same + /// The "valid" padding scheme. + case valid + /// The "same" padding scheme. + case same } public extension Padding { - @inlinable - var raw: Raw.Padding { - switch self { - case .same: return .same - case .valid: return .valid + @inlinable + var raw: Raw.Padding { + switch self { + case .same: return .same + case .valid: return .valid + } } - } } public extension Tensor where Scalar: TensorFlowFloatingPoint { From b3f6281de7afc4d8310182ab34ea9c3ef19fbe0b Mon Sep 17 00:00:00 2001 From: Anthony Platanios Date: Mon, 1 Apr 2019 15:10:25 -0400 Subject: [PATCH 04/55] Made some other minor changes. --- Sources/DeepLearning/Helpers.swift | 12 ++++++++++-- Sources/DeepLearning/Initializers.swift | 4 ++++ Sources/DeepLearning/Loss.swift | 2 +- Sources/DeepLearning/Optimizer.swift | 2 +- Sources/DeepLearning/Random.swift | 2 +- 5 files changed, 17 insertions(+), 5 deletions(-) diff --git a/Sources/DeepLearning/Helpers.swift b/Sources/DeepLearning/Helpers.swift index 86aec74bf..4d9c0217b 100644 --- a/Sources/DeepLearning/Helpers.swift +++ b/Sources/DeepLearning/Helpers.swift @@ -13,12 +13,20 @@ // limitations under the License. #if !COMPILING_TENSORFLOW_MODULE -import TensorFlow +@_exported import TensorFlow #endif +/// Returns a tensor with the same shape and scalars as the specified tensor. +@inlinable +@differentiable +public func identity(_ x: Tensor) -> Tensor { + return x +} + // `pow` is defined in Darwin/Glibc on `Float` and `Double`, but there doesn't exist a generic // version for `FloatingPoint`. // This is a manual definition. -func pow(_ x: T, _ y: T) -> T { +@inlinable +func pow(_ x: T, _ y: T) -> T { return T(pow(Double(x), Double(y))) } diff --git a/Sources/DeepLearning/Initializers.swift b/Sources/DeepLearning/Initializers.swift index ef4de6228..bddc8f3f0 100644 --- a/Sources/DeepLearning/Initializers.swift +++ b/Sources/DeepLearning/Initializers.swift @@ -16,6 +16,10 @@ @_exported import TensorFlow #endif +//===------------------------------------------------------------------------------------------===// +// Random +//===------------------------------------------------------------------------------------------===// + public extension Tensor where Scalar == Int32 { /// Creates a tensor with the specified shape, randomly sampling scalar values /// from a discrete uniform distribution. diff --git a/Sources/DeepLearning/Loss.swift b/Sources/DeepLearning/Loss.swift index fe9400302..45a5d15d6 100644 --- a/Sources/DeepLearning/Loss.swift +++ b/Sources/DeepLearning/Loss.swift @@ -13,7 +13,7 @@ // limitations under the License. #if !COMPILING_TENSORFLOW_MODULE -import TensorFlow +@_exported import TensorFlow #endif /// Computes the mean squared error between predictions and labels. diff --git a/Sources/DeepLearning/Optimizer.swift b/Sources/DeepLearning/Optimizer.swift index 479488e79..0c381833b 100644 --- a/Sources/DeepLearning/Optimizer.swift +++ b/Sources/DeepLearning/Optimizer.swift @@ -13,7 +13,7 @@ // limitations under the License. #if !COMPILING_TENSORFLOW_MODULE -import TensorFlow +@_exported import TensorFlow #endif /// A machine learning optimizer. diff --git a/Sources/DeepLearning/Random.swift b/Sources/DeepLearning/Random.swift index 44e55223c..7a6752193 100644 --- a/Sources/DeepLearning/Random.swift +++ b/Sources/DeepLearning/Random.swift @@ -19,7 +19,7 @@ import Glibc #endif //===------------------------------------------------------------------------------------------===// -// Random number generators +// Random Number Generators //===------------------------------------------------------------------------------------------===// /// A type that provides seedable deterministic pseudo-random data. From 111d96cf89a084d91ae18a296865748fe0a3f8ad Mon Sep 17 00:00:00 2001 From: Anthony Platanios Date: Mon, 1 Apr 2019 15:18:33 -0400 Subject: [PATCH 05/55] Added support or 'selecting'. --- Sources/DeepLearning/Operators/Basic.swift | 46 ++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/Sources/DeepLearning/Operators/Basic.swift b/Sources/DeepLearning/Operators/Basic.swift index 9f89b56ee..d520ece14 100644 --- a/Sources/DeepLearning/Operators/Basic.swift +++ b/Sources/DeepLearning/Operators/Basic.swift @@ -266,3 +266,49 @@ public extension Tensor where Scalar: TensorFlowScalar { return reshapedTensor.gathered(atIndices: indices, alongAxis: posAxis) } } + +public extension Tensor where Scalar == Bool { + /// Returns the elements of either `x` or `y`, depending on the values in stored in this tensor. + /// + /// `x` and `y` must be scalar if this tensor is scalar. Otherwise, either the first dimension + /// of `x` and `y` must match the shape of this tensor (i.e., this tensor must be a vector), or + /// the shapes of `x` and `y` must match the shape of this tensor. This tensor acts as a mask + /// that chooses, based on the value at each element, whether the corresponding element / row in + /// the output should be taken from `x` (if true) or `y` (if false). If this tensor is a vector + /// and `x` and `y` are higher rank matrices, then it chooses which row (outer dimension) to + /// copy from `x` and `y`. If it has the same shape as `x` and `y`, then it chooses which + /// element to copy from `x` and `y`. + /// + /// - Parameters: + /// - x: Contains the values to use when the condition is true. + /// - y: Contains the values to use when the condition is false. + /// + /// - Precondition: `x` and `y` must have the same shape. + /// + /// - Returns: A tensor with the same type and shape as `x` and `y`. + @differentiable( + wrt: (x, y), + vjp: _vjpSelecting(ifTrue:else:) where T: TensorFlowFloatingPoint) + func selecting( + ifTrue x: Tensor, + else y: Tensor + ) -> Tensor { + return Raw.select(condition: self, t: x, e: y) + } +} + +internal extension Tensor where Scalar == Bool { + @inlinable @inline(__always) + func _vjpSelecting( + ifTrue x: Tensor, + else y: Tensor + ) -> (Tensor, (Tensor) -> (Tensor, Tensor)) { + let value = selecting(ifTrue: x, else: y) + return (value, { v in + let zeros = Tensor(zeros: self.shape) + let gIfTrue = self.selecting(ifTrue: v, else: zeros) + let gElse = self.selecting(ifTrue: zeros, else: v) + return (gIfTrue, gElse) + }) + } +} From 371021b6d2c22943afd24b28c881cd250406e79b Mon Sep 17 00:00:00 2001 From: Anthony Platanios Date: Mon, 1 Apr 2019 15:28:02 -0400 Subject: [PATCH 06/55] Added support for 'nonZeroIndices'. --- Sources/DeepLearning/Operators/Basic.swift | 34 ++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/Sources/DeepLearning/Operators/Basic.swift b/Sources/DeepLearning/Operators/Basic.swift index d520ece14..4ce9ac5a8 100644 --- a/Sources/DeepLearning/Operators/Basic.swift +++ b/Sources/DeepLearning/Operators/Basic.swift @@ -267,6 +267,40 @@ public extension Tensor where Scalar: TensorFlowScalar { } } +public extension Tensor where Scalar: TensorFlowScalar { + /// Returns the locations of non-zero / true values in this tensor. + /// + /// The coordinates are returned in a 2-D tensor where the first dimension (rows) represents the + /// number of non-zero elements, and the second dimension (columns) represents the coordinates + /// of the non-zero elements. Keep in mind that the shape of the output tensor can vary + /// depending on how many true values there are in this tensor. Indices are output in row-major + /// order. + /// + /// For example: + /// ``` + /// // 'input' is [[true, false], [true, false]] + /// // 'input' has 2 true values and so the output has 2 rows. + /// // 'input' has rank of 2, and so the second dimension of the output has size 2. + /// input.nonZeroIndices() // is [[0, 0], [1, 0]] + /// + /// // 'input' is [[[ true, false], [ true, false]], + /// // [[false, true], [false, true]], + /// // [[false, false], [false, true]]] + /// // 'input' has 5 true values and so the output has 5 rows. + /// // 'input' has rank 3, and so the second dimension of the output has size 3. + /// input.nonZeroIndices() // is [[0, 0, 0], + /// // [0, 1, 0], + /// // [1, 0, 1], + /// // [1, 1, 1], + /// // [2, 1, 1]] + /// ``` + /// + /// - Returns: A tensor with shape `(num_true, rank(condition))`. + func nonZeroIndices() -> Tensor { + return Raw.where_(self) + } +} + public extension Tensor where Scalar == Bool { /// Returns the elements of either `x` or `y`, depending on the values in stored in this tensor. /// From 112707bdda7fa8f597d4d94bbcd7fd931843cc36 Mon Sep 17 00:00:00 2001 From: Anthony Platanios Date: Mon, 1 Apr 2019 15:29:30 -0400 Subject: [PATCH 07/55] Minor edits. --- Sources/DeepLearning/Operators/Basic.swift | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Sources/DeepLearning/Operators/Basic.swift b/Sources/DeepLearning/Operators/Basic.swift index 4ce9ac5a8..d55d517af 100644 --- a/Sources/DeepLearning/Operators/Basic.swift +++ b/Sources/DeepLearning/Operators/Basic.swift @@ -254,7 +254,7 @@ public extension Tensor where Scalar: TensorFlowScalar { /// /// - Returns: `(self.rank - K + 1)`-dimensional tensor populated by entries in this tensor /// corresponding to `true` values in `mask`. - @inlinable @inline(__always) + @inlinable func masked(with mask: Tensor, alongAxis axis: Int32 = 0) -> Tensor { precondition(mask.rank != 0, "The boolean mask cannot be a scalar.") let posAxis = axis < 0 ? axis + rank : axis @@ -296,6 +296,7 @@ public extension Tensor where Scalar: TensorFlowScalar { /// ``` /// /// - Returns: A tensor with shape `(num_true, rank(condition))`. + @inlinable func nonZeroIndices() -> Tensor { return Raw.where_(self) } @@ -320,6 +321,7 @@ public extension Tensor where Scalar == Bool { /// - Precondition: `x` and `y` must have the same shape. /// /// - Returns: A tensor with the same type and shape as `x` and `y`. + @inlinable @differentiable( wrt: (x, y), vjp: _vjpSelecting(ifTrue:else:) where T: TensorFlowFloatingPoint) @@ -332,7 +334,7 @@ public extension Tensor where Scalar == Bool { } internal extension Tensor where Scalar == Bool { - @inlinable @inline(__always) + @inlinable func _vjpSelecting( ifTrue x: Tensor, else y: Tensor From 3594e0e70a367c3c4169553b49b233129b6ef807 Mon Sep 17 00:00:00 2001 From: Anthony Platanios Date: Mon, 1 Apr 2019 16:16:45 -0400 Subject: [PATCH 08/55] Addressed Richard's feedback. --- Sources/DeepLearning/Operators/Basic.swift | 47 ---------------------- Sources/DeepLearning/Operators/NN.swift | 10 ++--- 2 files changed, 4 insertions(+), 53 deletions(-) diff --git a/Sources/DeepLearning/Operators/Basic.swift b/Sources/DeepLearning/Operators/Basic.swift index d55d517af..f36575189 100644 --- a/Sources/DeepLearning/Operators/Basic.swift +++ b/Sources/DeepLearning/Operators/Basic.swift @@ -301,50 +301,3 @@ public extension Tensor where Scalar: TensorFlowScalar { return Raw.where_(self) } } - -public extension Tensor where Scalar == Bool { - /// Returns the elements of either `x` or `y`, depending on the values in stored in this tensor. - /// - /// `x` and `y` must be scalar if this tensor is scalar. Otherwise, either the first dimension - /// of `x` and `y` must match the shape of this tensor (i.e., this tensor must be a vector), or - /// the shapes of `x` and `y` must match the shape of this tensor. This tensor acts as a mask - /// that chooses, based on the value at each element, whether the corresponding element / row in - /// the output should be taken from `x` (if true) or `y` (if false). If this tensor is a vector - /// and `x` and `y` are higher rank matrices, then it chooses which row (outer dimension) to - /// copy from `x` and `y`. If it has the same shape as `x` and `y`, then it chooses which - /// element to copy from `x` and `y`. - /// - /// - Parameters: - /// - x: Contains the values to use when the condition is true. - /// - y: Contains the values to use when the condition is false. - /// - /// - Precondition: `x` and `y` must have the same shape. - /// - /// - Returns: A tensor with the same type and shape as `x` and `y`. - @inlinable - @differentiable( - wrt: (x, y), - vjp: _vjpSelecting(ifTrue:else:) where T: TensorFlowFloatingPoint) - func selecting( - ifTrue x: Tensor, - else y: Tensor - ) -> Tensor { - return Raw.select(condition: self, t: x, e: y) - } -} - -internal extension Tensor where Scalar == Bool { - @inlinable - func _vjpSelecting( - ifTrue x: Tensor, - else y: Tensor - ) -> (Tensor, (Tensor) -> (Tensor, Tensor)) { - let value = selecting(ifTrue: x, else: y) - return (value, { v in - let zeros = Tensor(zeros: self.shape) - let gIfTrue = self.selecting(ifTrue: v, else: zeros) - let gElse = self.selecting(ifTrue: zeros, else: v) - return (gIfTrue, gElse) - }) - } -} diff --git a/Sources/DeepLearning/Operators/NN.swift b/Sources/DeepLearning/Operators/NN.swift index 51117b833..9142d432e 100644 --- a/Sources/DeepLearning/Operators/NN.swift +++ b/Sources/DeepLearning/Operators/NN.swift @@ -20,7 +20,7 @@ // Normalization //===------------------------------------------------------------------------------------------===// -public extension Tensor where Scalar: BinaryFloatingPoint { +public extension Tensor where Scalar: TensorFlowFloatingPoint { /// Computes the batch normalized tensor along the specified axis. /// /// Specifically, returns `(self - mu) / (var + epsilon) * gamma + beta` where `mu` and `var` are @@ -34,7 +34,7 @@ public extension Tensor where Scalar: BinaryFloatingPoint { @inlinable @differentiable( wrt: (self, offset, scale), - vjp: _vjpBatchNormalized where Scalar: TensorFlowFloatingPoint) + vjp: _vjpBatchNormalized) func batchNormalized( alongAxis axis: Int32, offset: Tensor = Tensor(0), @@ -47,12 +47,10 @@ public extension Tensor where Scalar: BinaryFloatingPoint { let inv = rsqrt(variance + epsilon) * scale return self * inv + offset - mean * inv } -} - -internal extension Tensor where Scalar: TensorFlowFloatingPoint { + // TODO: Verify that these calculations are correct. @inlinable - func _vjpBatchNormalized( + internal func _vjpBatchNormalized( alongAxis axis: Int32, offset: Tensor, scale: Tensor, From adf20ebc93d3b474411bae52fa52e1794d72d603 Mon Sep 17 00:00:00 2001 From: Anthony Platanios Date: Mon, 1 Apr 2019 19:44:53 -0400 Subject: [PATCH 09/55] Addressed Richard's comments. --- Sources/DeepLearning/Initializers.swift | 2 +- Sources/DeepLearning/Layer.swift | 2 +- Sources/DeepLearning/Loss.swift | 2 +- Sources/DeepLearning/Operators/Basic.swift | 54 +++++++++++----------- Sources/DeepLearning/Operators/Math.swift | 2 +- Sources/DeepLearning/Operators/NN.swift | 2 +- Sources/DeepLearning/Optimizer.swift | 2 +- 7 files changed, 33 insertions(+), 33 deletions(-) diff --git a/Sources/DeepLearning/Initializers.swift b/Sources/DeepLearning/Initializers.swift index bddc8f3f0..943ec8e24 100644 --- a/Sources/DeepLearning/Initializers.swift +++ b/Sources/DeepLearning/Initializers.swift @@ -13,7 +13,7 @@ // limitations under the License. #if !COMPILING_TENSORFLOW_MODULE -@_exported import TensorFlow +import TensorFlow #endif //===------------------------------------------------------------------------------------------===// diff --git a/Sources/DeepLearning/Layer.swift b/Sources/DeepLearning/Layer.swift index 73f1b263a..586d58abd 100644 --- a/Sources/DeepLearning/Layer.swift +++ b/Sources/DeepLearning/Layer.swift @@ -13,7 +13,7 @@ // limitations under the License. #if !COMPILING_TENSORFLOW_MODULE -@_exported import TensorFlow +import TensorFlow #endif /// A value that indicates either a training phase or an inference phase for a layer. diff --git a/Sources/DeepLearning/Loss.swift b/Sources/DeepLearning/Loss.swift index 45a5d15d6..fe9400302 100644 --- a/Sources/DeepLearning/Loss.swift +++ b/Sources/DeepLearning/Loss.swift @@ -13,7 +13,7 @@ // limitations under the License. #if !COMPILING_TENSORFLOW_MODULE -@_exported import TensorFlow +import TensorFlow #endif /// Computes the mean squared error between predictions and labels. diff --git a/Sources/DeepLearning/Operators/Basic.swift b/Sources/DeepLearning/Operators/Basic.swift index f36575189..8725c82b1 100644 --- a/Sources/DeepLearning/Operators/Basic.swift +++ b/Sources/DeepLearning/Operators/Basic.swift @@ -13,7 +13,7 @@ // limitations under the License. #if !COMPILING_TENSORFLOW_MODULE -@_exported import TensorFlow +import TensorFlow #endif public extension Tensor where Scalar: TensorFlowScalar { @@ -30,8 +30,8 @@ public extension Tensor where Scalar: TensorFlowScalar { /// // 'x' is [1, 4] /// // 'y' is [2, 5] /// // 'z' is [3, 6] - /// x.packed(with: [y, z]) // is [[1, 4], [2, 5], [3, 6]] - /// x.packed(with: [y, z], alongAxis: 1) // is [[1, 2, 3], [4, 5, 6]] + /// x.stacked(with: [y, z]) // is [[1, 4], [2, 5], [3, 6]] + /// x.stacked(with: [y, z], alongAxis: 1) // is [[1, 2, 3], [4, 5, 6]] /// ``` /// /// This is the opposite of `unstacked`. @@ -43,9 +43,9 @@ public extension Tensor where Scalar: TensorFlowScalar { /// - Precondition: All tensors must have the same shape as the current tensor. /// - Precondition: `axis` must be in the range `[-rank, rank)`. /// - /// - Returns: The packed tensor. + /// - Returns: The stacked tensor. @inlinable - // @differentiable(vjp: _vjpPacked where Scalar: TensorFlowFloatingPoint) + // @differentiable(vjp: _vjpStacked where Scalar: TensorFlowFloatingPoint) func stacked(with tensors: [Tensor], alongAxis axis: Int64 = 0) -> Tensor { return Raw.pack([self] + tensors, axis: axis) } @@ -133,8 +133,8 @@ public extension Tensor where Scalar: TensorFlowScalar { /// /// - Returns: The gathered tensor. @inlinable - // @differentiable(vjp: _vjpGathered where Scalar: TensorFlowFloatingPoint) - func gathered( + // @differentiable(vjp: _vjpGathering where Scalar: TensorFlowFloatingPoint) + func gathering( atIndices indices: Tensor, alongAxis axis: Int32 = 0 ) -> Tensor { @@ -144,7 +144,7 @@ public extension Tensor where Scalar: TensorFlowScalar { /// Gathers slices of this tensor at `indices` along the `axis` dimension, while ignoring the /// first `batchDims` dimensions that correspond to batch dimensions. /// - /// Performs similar functionality to `gathered`, except that the resulting tensor shape is now: + /// Performs similar functionality to `gathering`, except that the resulting tensor shape is now: /// `self.shape[..( + func batchGathering( atIndices indices: Tensor, alongAxis axis: Int32, numBatchDims batchDims: Int32 @@ -183,7 +183,7 @@ public extension Tensor where Scalar: TensorFlowScalar { Tensor(rangeFrom: batchDims, to: posAxis, stride: 1), Tensor(rangeFrom: axis + 1, to: rank, stride: 1)]) let tensor = transposed(withPermutations: permutation) - let result = tensor.batchGathered( + let result = tensor.batchGathering( atIndices: indices, alongAxis: batchDims, numBatchDims: batchDims) // Move the result dimensions corresponding to self[batchDims ..< axis] to just before @@ -199,25 +199,25 @@ public extension Tensor where Scalar: TensorFlowScalar { let castedShape = Tensor(shapeTensor) var batchIndices = indices var accumulated = Tensor(ones: []) - for d in (1 ... batchDims).reversed() { - accumulated *= castedShape[d] - let dValue = castedShape[d - 1] - let dIndices = Tensor( - rangeFrom: Tensor(zeros: []), - to: dValue, - stride: Tensor(ones: []) - ) * accumulated - let dShape = Tensor(d - 1).packed(with: [ - Tensor(dValue), - Tensor(indices.rank - 1)]) - batchIndices += dIndices.reshaped(toShape: dShape) + for d in (1...batchDims).reversed() { + accumulated *= castedShape[d] + let dValue = castedShape[d - 1] + let dIndices = Tensor( + rangeFrom: Tensor(zeros: []), + to: dValue, + stride: Tensor(ones: []) + ) * accumulated + let dShape = Tensor(d - 1).stacked(with: [ + Tensor(dValue), + Tensor(indices.rank - 1)]) + batchIndices += dIndices.reshaped(toShape: dShape) } let flatIndices = batchIndices.flattened() let outerShape = shapeTensor[Int(batchDims + 1)...] let innerShape = shapeTensor[.. Date: Mon, 1 Apr 2019 21:07:21 -0400 Subject: [PATCH 10/55] Addressed Richard's comments. --- Sources/DeepLearning/Operators/Basic.swift | 28 +++++++++++----------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/Sources/DeepLearning/Operators/Basic.swift b/Sources/DeepLearning/Operators/Basic.swift index 8725c82b1..fe75ac81c 100644 --- a/Sources/DeepLearning/Operators/Basic.swift +++ b/Sources/DeepLearning/Operators/Basic.swift @@ -134,8 +134,8 @@ public extension Tensor where Scalar: TensorFlowScalar { /// - Returns: The gathered tensor. @inlinable // @differentiable(vjp: _vjpGathering where Scalar: TensorFlowFloatingPoint) - func gathering( - atIndices indices: Tensor, + func gathering( + atIndices indices: Tensor, alongAxis axis: Int32 = 0 ) -> Tensor { return Raw.gatherV2(params: self, indices: indices, axis: Tensor(axis)) @@ -158,8 +158,8 @@ public extension Tensor where Scalar: TensorFlowScalar { /// /// - Returns: The gathered tensor. @inlinable - func batchGathering( - atIndices indices: Tensor, + func batchGathering( + atIndices indices: Tensor, alongAxis axis: Int32, numBatchDims batchDims: Int32 ) -> Tensor { @@ -196,16 +196,16 @@ public extension Tensor where Scalar: TensorFlowScalar { return result.transposed(withPermutations: resultPermutation) } - let castedShape = Tensor(shapeTensor) + let castedShape = Tensor(shapeTensor) var batchIndices = indices - var accumulated = Tensor(ones: []) + var accumulated = Tensor(ones: []) for d in (1...batchDims).reversed() { accumulated *= castedShape[d] let dValue = castedShape[d - 1] - let dIndices = Tensor( - rangeFrom: Tensor(zeros: []), + let dIndices = Tensor( + rangeFrom: Tensor(zeros: []), to: dValue, - stride: Tensor(ones: []) + stride: Tensor(ones: []) ) * accumulated let dShape = Tensor(d - 1).stacked(with: [ Tensor(dValue), @@ -221,24 +221,24 @@ public extension Tensor where Scalar: TensorFlowScalar { return flatResult.reshaped(toShape: indices.shapeTensor.concatenated(with: outerShape)) } - /// Applies the provided boolean mask to this tensor. + /// Gathers values from this tensor according to the provided boolean mask. /// /// For example: /// ``` /// // 1-D example /// // tensor is [0, 1, 2, 3] /// // mask is [true, false, true, false] - /// tensor.masked(with: mask) // is [0, 2] + /// tensor.gathering(where: mask) // is [0, 2] /// /// // 2-D example /// // tensor is [[1, 2], [3, 4], [5, 6]] /// // mask is [true, false, true] - /// tensor.masked(with: mask) // is [[1, 2], [5, 6]] + /// tensor.gathering(where: mask) // is [[1, 2], [5, 6]] /// ``` /// /// In general, `0 < mask.rank = K <= tensor.rank`, and the `mask`'s shape must match the first /// K dimensions of the `tensor`'s shape. We then have: - /// `tensor.masked(with: mask)[i, j1, ..., jd] = tensor[i1, ..., iK, j1, ..., jd]`, where + /// `tensor.gathering(where: mask)[i, j1, ..., jd] = tensor[i1, ..., iK, j1, ..., jd]`, where /// `[i1, ..., iK]` is the `i`th `true` entry of `mask` (row-major order). /// /// The `axis` could be used with `mask` to indicate the axis to mask from. In that case, @@ -255,7 +255,7 @@ public extension Tensor where Scalar: TensorFlowScalar { /// - Returns: `(self.rank - K + 1)`-dimensional tensor populated by entries in this tensor /// corresponding to `true` values in `mask`. @inlinable - func masked(with mask: Tensor, alongAxis axis: Int32 = 0) -> Tensor { + func gathering(where mask: Tensor, alongAxis axis: Int32 = 0) -> Tensor { precondition(mask.rank != 0, "The boolean mask cannot be a scalar.") let posAxis = axis < 0 ? axis + rank : axis let leadingSize = shapeTensor[posAxis ..< posAxis + mask.rank].product().rankLifted() From b0aba5de12cf2436bde2e3898f60ad1e57837aee Mon Sep 17 00:00:00 2001 From: Anthony Platanios Date: Mon, 1 Apr 2019 21:16:55 -0400 Subject: [PATCH 11/55] Updated the convolution ops to support explicit paddings. --- Sources/DeepLearning/Operators/NN.swift | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/Sources/DeepLearning/Operators/NN.swift b/Sources/DeepLearning/Operators/NN.swift index 6e8a40daf..70db254d5 100644 --- a/Sources/DeepLearning/Operators/NN.swift +++ b/Sources/DeepLearning/Operators/NN.swift @@ -86,6 +86,8 @@ public extension Tensor where Scalar: TensorFlowFloatingPoint { /// A padding scheme. Used by padding, convolution, and pooling ops. // @_frozen // SR-9739 public enum Padding { + /// The "explicit" padding scheme. + case explicit(paddings: [Int32]) /// The "valid" padding scheme. case valid /// The "same" padding scheme. @@ -94,12 +96,22 @@ public enum Padding { public extension Padding { @inlinable - var raw: Raw.Padding { + var raw: Raw.Padding2 { switch self { + case .explicit: return .explicit case .same: return .same case .valid: return .valid } } + + @inlinable + var explicitPaddings: [Int32] { + switch self { + case .explicit(let paddings): return paddings + case .same: return [] + case .valid: return [] + } + } } public extension Tensor where Scalar: TensorFlowFloatingPoint { @@ -117,7 +129,8 @@ public extension Tensor where Scalar: TensorFlowFloatingPoint { filter: filter, outBackprop: self, strides: [strides.0, strides.1, strides.2, strides.3], - padding: padding.raw) + padding: padding.raw, + explicitPaddings: padding.explicitPaddings) } /// TensorFlow builtin conv2d gradient helper for the filter. @@ -134,7 +147,8 @@ public extension Tensor where Scalar: TensorFlowFloatingPoint { filterSizes: filterSizes, outBackprop: self, strides: [strides.0, strides.1, strides.2, strides.3], - padding: padding.raw) + padding: padding.raw, + explicitPaddings: padding.explicitPaddings) } @inlinable @@ -264,7 +278,8 @@ public extension Tensor where Scalar: FloatingPoint { self, filter: filter, strides: [strides.0, strides.1, strides.2, strides.3], - padding: padding.raw) + padding: padding.raw, + explicitPaddings: padding.explicitPaddings) } /// Computes a 2-D max pooling, with the specified kernel sizes, strides, and From 05704d0b862d1d80df75c396a92484acf95683ee Mon Sep 17 00:00:00 2001 From: Anthony Platanios Date: Tue, 2 Apr 2019 08:15:46 -0400 Subject: [PATCH 12/55] Small edits. --- Sources/DeepLearning/Operators/NN.swift | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Sources/DeepLearning/Operators/NN.swift b/Sources/DeepLearning/Operators/NN.swift index 70db254d5..4b53ca514 100644 --- a/Sources/DeepLearning/Operators/NN.swift +++ b/Sources/DeepLearning/Operators/NN.swift @@ -87,7 +87,7 @@ public extension Tensor where Scalar: TensorFlowFloatingPoint { // @_frozen // SR-9739 public enum Padding { /// The "explicit" padding scheme. - case explicit(paddings: [Int32]) + case explicit(_ paddings: [Int32]) /// The "valid" padding scheme. case valid /// The "same" padding scheme. @@ -105,7 +105,7 @@ public extension Padding { } @inlinable - var explicitPaddings: [Int32] { + internal var explicitPaddings: [Int32] { switch self { case .explicit(let paddings): return paddings case .same: return [] From a686a76eccf5d44d21aec4af9544c22655aa73f4 Mon Sep 17 00:00:00 2001 From: Anthony Platanios Date: Tue, 2 Apr 2019 08:17:10 -0400 Subject: [PATCH 13/55] Updated the convolution ops to support explicit paddings. --- Sources/DeepLearning/Operators.swift | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/Sources/DeepLearning/Operators.swift b/Sources/DeepLearning/Operators.swift index 51b2406a8..3a660c07a 100644 --- a/Sources/DeepLearning/Operators.swift +++ b/Sources/DeepLearning/Operators.swift @@ -98,12 +98,14 @@ public extension Tensor where Scalar: BinaryFloatingPoint { } //===------------------------------------------------------------------------------------------===// -// Convolution and pooling +// Convolution and Pooling //===------------------------------------------------------------------------------------------===// /// A padding scheme. Used by padding, convolution, and pooling ops. // @_frozen // SR-9739 public enum Padding { + /// The "explicit" padding scheme. + case explicit(_ paddings: [Int32]) /// The "valid" padding scheme. case valid /// The "same" padding scheme. @@ -112,12 +114,22 @@ public enum Padding { public extension Padding { @inlinable - var raw: Raw.Padding { + var raw: Raw.Padding2 { switch self { + case .explicit: return .explicit case .same: return .same case .valid: return .valid } } + + @inlinable + internal var explicitPaddings: [Int32] { + switch self { + case .explicit(let paddings): return paddings + case .same: return [] + case .valid: return [] + } + } } public extension Tensor where Scalar: TensorFlowFloatingPoint { @@ -135,7 +147,8 @@ public extension Tensor where Scalar: TensorFlowFloatingPoint { filter: filter, outBackprop: self, strides: [strides.0, strides.1, strides.2, strides.3], - padding: padding.raw) + padding: padding.raw, + explicitPaddings: padding.explicitPaddings) } /// TensorFlow builtin conv2d gradient helper for the filter. @@ -152,7 +165,8 @@ public extension Tensor where Scalar: TensorFlowFloatingPoint { filterSizes: filterSizes, outBackprop: self, strides: [strides.0, strides.1, strides.2, strides.3], - padding: padding.raw) + padding: padding.raw, + explicitPaddings: padding.explicitPaddings) } @inlinable @@ -282,7 +296,8 @@ public extension Tensor where Scalar: FloatingPoint { self, filter: filter, strides: [strides.0, strides.1, strides.2, strides.3], - padding: padding.raw) + padding: padding.raw, + explicitPaddings: padding.explicitPaddings) } /// Computes a 2-D max pooling, with the specified kernel sizes, strides, and From cc4665849a25347c890133a1c496b10d6dc9479a Mon Sep 17 00:00:00 2001 From: Anthony Platanios Date: Tue, 2 Apr 2019 08:22:27 -0400 Subject: [PATCH 14/55] Small fix. --- Sources/DeepLearning/Operators/NN.swift | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Sources/DeepLearning/Operators/NN.swift b/Sources/DeepLearning/Operators/NN.swift index 4b53ca514..27a94dc1a 100644 --- a/Sources/DeepLearning/Operators/NN.swift +++ b/Sources/DeepLearning/Operators/NN.swift @@ -87,7 +87,7 @@ public extension Tensor where Scalar: TensorFlowFloatingPoint { // @_frozen // SR-9739 public enum Padding { /// The "explicit" padding scheme. - case explicit(_ paddings: [Int32]) + case explicit([Int32]) /// The "valid" padding scheme. case valid /// The "same" padding scheme. From 8494c04d0ef849119b73cd566bccb304bb11a999 Mon Sep 17 00:00:00 2001 From: Anthony Platanios Date: Tue, 2 Apr 2019 08:22:58 -0400 Subject: [PATCH 15/55] Small fix. --- Sources/DeepLearning/Operators.swift | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Sources/DeepLearning/Operators.swift b/Sources/DeepLearning/Operators.swift index 3a660c07a..501722a9e 100644 --- a/Sources/DeepLearning/Operators.swift +++ b/Sources/DeepLearning/Operators.swift @@ -105,7 +105,7 @@ public extension Tensor where Scalar: BinaryFloatingPoint { // @_frozen // SR-9739 public enum Padding { /// The "explicit" padding scheme. - case explicit(_ paddings: [Int32]) + case explicit([Int32]) /// The "valid" padding scheme. case valid /// The "same" padding scheme. From 976061f3b6fae3875f953f0c4938ea1e1104b2c4 Mon Sep 17 00:00:00 2001 From: Anthony Platanios Date: Tue, 2 Apr 2019 08:30:23 -0400 Subject: [PATCH 16/55] Added a new tensor initializer from ranges of tensors. --- Sources/DeepLearning/Initializers.swift | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/Sources/DeepLearning/Initializers.swift b/Sources/DeepLearning/Initializers.swift index 943ec8e24..1d2041836 100644 --- a/Sources/DeepLearning/Initializers.swift +++ b/Sources/DeepLearning/Initializers.swift @@ -16,6 +16,22 @@ import TensorFlow #endif +public extension Tensor where Scalar: TensorFlowScalar & Numeric { + /// Creates a 1-D tensor representing a sequence from a starting value to, but not including, an + /// end value, stepping by the specified amount. + /// + /// - Parameters: + /// - start: The starting value to use for the sequence. If the sequence contains any values, + /// the first one is `start`. + /// - end: An end value to limit the sequence. `end` is never an element of the resulting + /// sequence. + /// - stride: The amount to step by with each iteration. `stride` must be positive. + @inlinable @inline(__always) + init(rangeFrom start: Tensor, to end: Tensor, stride: Tensor) { + self = Raw.range(start: start, limit: end, delta: stride) + } +} + //===------------------------------------------------------------------------------------------===// // Random //===------------------------------------------------------------------------------------------===// From 5dfaaeecf5b48e4784ec2c3b9c0607f2bb7ceced Mon Sep 17 00:00:00 2001 From: Anthony Platanios Date: Tue, 2 Apr 2019 08:32:29 -0400 Subject: [PATCH 17/55] Added documentation string for the "explicit" padding scheme. --- Sources/DeepLearning/Operators.swift | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Sources/DeepLearning/Operators.swift b/Sources/DeepLearning/Operators.swift index 501722a9e..827890b3d 100644 --- a/Sources/DeepLearning/Operators.swift +++ b/Sources/DeepLearning/Operators.swift @@ -104,7 +104,8 @@ public extension Tensor where Scalar: BinaryFloatingPoint { /// A padding scheme. Used by padding, convolution, and pooling ops. // @_frozen // SR-9739 public enum Padding { - /// The "explicit" padding scheme. + /// The "explicit" padding scheme, which is defined by an array indicating the explicit padding + /// sizes at the start and end of each dimension. case explicit([Int32]) /// The "valid" padding scheme. case valid From eda9514edf0884d1da3545bf9417005a1a2fc1b7 Mon Sep 17 00:00:00 2001 From: Anthony Platanios Date: Tue, 2 Apr 2019 21:24:34 -0400 Subject: [PATCH 18/55] More fixes. --- Sources/DeepLearning/Layer.swift | 20 ++++++++++---------- Sources/DeepLearning/Operators.swift | 23 +++++++++++++++++++++-- 2 files changed, 31 insertions(+), 12 deletions(-) diff --git a/Sources/DeepLearning/Layer.swift b/Sources/DeepLearning/Layer.swift index 7c4cf1d23..ffa83d6b9 100644 --- a/Sources/DeepLearning/Layer.swift +++ b/Sources/DeepLearning/Layer.swift @@ -836,7 +836,7 @@ public struct MaxPool1D: Layer { /// The stride of the sliding window for temporal dimension. @noDerivative let stride: Int32 /// The padding algorithm for pooling. - @noDerivative let padding: Padding + @noDerivative let padding: PaddingV1 /// Creates a max pooling layer. /// @@ -847,7 +847,7 @@ public struct MaxPool1D: Layer { public init( poolSize: Int, stride: Int, - padding: Padding + padding: PaddingV1 ) { self.poolSize = Int32(poolSize) self.stride = Int32(stride) @@ -878,13 +878,13 @@ public struct MaxPool2D: Layer { /// Strides in non-spatial dimensions must be `1`. @noDerivative let strides: (Int32, Int32, Int32, Int32) /// The padding algorithm for pooling. - @noDerivative let padding: Padding + @noDerivative let padding: PaddingV1 /// Creates a max pooling layer. public init( poolSize: (Int, Int, Int, Int), strides: (Int, Int, Int, Int), - padding: Padding + padding: PaddingV1 ) { (self.poolSize.0, self.poolSize.1, self.poolSize.2, self.poolSize.3) = (Int32(poolSize.0), Int32(poolSize.1), Int32(poolSize.2), Int32(poolSize.3)) @@ -899,7 +899,7 @@ public struct MaxPool2D: Layer { /// - poolSize: Vertical and horizontal factors by which to downscale. /// - strides: The strides. /// - padding: The padding. - public init(poolSize: (Int, Int), strides: (Int, Int), padding: Padding = .valid) { + public init(poolSize: (Int, Int), strides: (Int, Int), padding: PaddingV1 = .valid) { self.poolSize = (1, Int32(poolSize.0), Int32(poolSize.1), 1) self.strides = (1, Int32(strides.0), Int32(strides.1), 1) self.padding = padding @@ -927,7 +927,7 @@ public struct AvgPool1D: Layer { /// The stride of the sliding window for temporal dimension. @noDerivative let stride: Int32 /// The padding algorithm for pooling. - @noDerivative let padding: Padding + @noDerivative let padding: PaddingV1 /// Creates an average pooling layer. /// @@ -938,7 +938,7 @@ public struct AvgPool1D: Layer { public init( poolSize: Int, stride: Int, - padding: Padding + padding: PaddingV1 ) { self.poolSize = Int32(poolSize) self.stride = Int32(stride) @@ -969,13 +969,13 @@ public struct AvgPool2D: Layer { /// Strides in non-spatial dimensions must be `1`. @noDerivative let strides: (Int32, Int32, Int32, Int32) /// The padding algorithm for pooling. - @noDerivative let padding: Padding + @noDerivative let padding: PaddingV1 /// Creates a average pooling layer. public init( poolSize: (Int, Int, Int, Int), strides: (Int, Int, Int, Int), - padding: Padding + padding: PaddingV1 ) { (self.poolSize.0, self.poolSize.1, self.poolSize.2, self.poolSize.3) = (Int32(poolSize.0), Int32(poolSize.1), Int32(poolSize.2), Int32(poolSize.3)) @@ -990,7 +990,7 @@ public struct AvgPool2D: Layer { /// - poolSize: Vertical and horizontal factors by which to downscale. /// - strides: The strides. /// - padding: The padding. - public init(poolSize: (Int, Int), strides: (Int, Int), padding: Padding = .valid) { + public init(poolSize: (Int, Int), strides: (Int, Int), padding: PaddingV1 = .valid) { self.poolSize = (1, Int32(poolSize.0), Int32(poolSize.1), 1) self.strides = (1, Int32(strides.0), Int32(strides.1), 1) self.padding = padding diff --git a/Sources/DeepLearning/Operators.swift b/Sources/DeepLearning/Operators.swift index 3a660c07a..1587facbd 100644 --- a/Sources/DeepLearning/Operators.swift +++ b/Sources/DeepLearning/Operators.swift @@ -132,6 +132,25 @@ public extension Padding { } } +/// An older padding scheme. Used by padding, convolution, and pooling ops. +// @_frozen // SR-9739 +public enum PaddingV1 { + /// The "valid" padding scheme. + case valid + /// The "same" padding scheme. + case same +} + +public extension PaddingV1 { + @inlinable + var raw: Raw.Padding { + switch self { + case .same: return .same + case .valid: return .valid + } + } +} + public extension Tensor where Scalar: TensorFlowFloatingPoint { /// TensorFlow builtin conv2d gradient helper for the input. @inlinable @@ -316,7 +335,7 @@ public extension Tensor where Scalar: FloatingPoint { func maxPooled( kernelSize: (Int32, Int32, Int32, Int32), strides: (Int32, Int32, Int32, Int32), - padding: Padding + padding: PaddingV1 ) -> Tensor { return Raw.maxPoolV2( self, @@ -343,7 +362,7 @@ public extension Tensor where Scalar: FloatingPoint { func averagePooled( kernelSize: (Int32, Int32, Int32, Int32), strides: (Int32, Int32, Int32, Int32), - padding: Padding + padding: PaddingV1 ) -> Tensor { return Raw.avgPool( value: self, From aed430a6155db601d6cba9357f252de08808481b Mon Sep 17 00:00:00 2001 From: Anthony Platanios Date: Fri, 12 Apr 2019 11:50:10 -0400 Subject: [PATCH 19/55] Added 'zerosLike' and 'onesLike' tensor initializers. --- Sources/DeepLearning/Initializers.swift | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/Sources/DeepLearning/Initializers.swift b/Sources/DeepLearning/Initializers.swift index 1d2041836..6b6d029b1 100644 --- a/Sources/DeepLearning/Initializers.swift +++ b/Sources/DeepLearning/Initializers.swift @@ -16,7 +16,25 @@ import TensorFlow #endif -public extension Tensor where Scalar: TensorFlowScalar & Numeric { +public extension Tensor where Scalar : Numeric { + /// Creates a tensor with all scalars set to zero that has the same shape and type as the provided + /// tensor. + /// + /// - Parameter other: Tensor whose shape and data type to use. + @inlinable @inline(__always) + init(zerosLike other: Tensor) { + self = Raw.zerosLike(other) + } + + /// Creates a tensor with all scalars set to one that has the same shape and type as the provided + /// tensor. + /// + /// - Parameter other: Tensor whose shape and data type to use. + @inlinable @inline(__always) + init(onesLike other: Tensor) { + self = Raw.onesLike(other) + } + /// Creates a 1-D tensor representing a sequence from a starting value to, but not including, an /// end value, stepping by the specified amount. /// @@ -32,10 +50,6 @@ public extension Tensor where Scalar: TensorFlowScalar & Numeric { } } -//===------------------------------------------------------------------------------------------===// -// Random -//===------------------------------------------------------------------------------------------===// - public extension Tensor where Scalar == Int32 { /// Creates a tensor with the specified shape, randomly sampling scalar values /// from a discrete uniform distribution. From 5a093a8f6305b599891bb75728106cf0aac67113 Mon Sep 17 00:00:00 2001 From: Anthony Platanios Date: Mon, 15 Apr 2019 10:00:36 -0400 Subject: [PATCH 20/55] Added a new 'stacking' tensor initializer and made some compatibility fixes. --- Sources/DeepLearning/Initializers.swift | 9 +++++ Sources/DeepLearning/Layer.swift | 20 +++++----- Sources/DeepLearning/Operators/Basic.swift | 16 ++++---- Sources/DeepLearning/Operators/NN.swift | 44 +++++----------------- 4 files changed, 37 insertions(+), 52 deletions(-) diff --git a/Sources/DeepLearning/Initializers.swift b/Sources/DeepLearning/Initializers.swift index 6b6d029b1..4f465f609 100644 --- a/Sources/DeepLearning/Initializers.swift +++ b/Sources/DeepLearning/Initializers.swift @@ -48,6 +48,15 @@ public extension Tensor where Scalar : Numeric { init(rangeFrom start: Tensor, to end: Tensor, stride: Tensor) { self = Raw.range(start: start, limit: end, delta: stride) } + + /// Returns a stacked tensor, constructed by stacking the provided tensors along + /// the specified axis. + /// - Precondition: The tensors must have the same dimensions,. + /// - Precondition: The axis must be in the range `-rank..], alongAxis axis: Int32 = 0) { + self = Raw.pack(tensors, axis: Int64(axis)) + } } public extension Tensor where Scalar == Int32 { diff --git a/Sources/DeepLearning/Layer.swift b/Sources/DeepLearning/Layer.swift index 6bca9eb67..801173654 100644 --- a/Sources/DeepLearning/Layer.swift +++ b/Sources/DeepLearning/Layer.swift @@ -836,7 +836,7 @@ public struct MaxPool1D: Layer { /// The stride of the sliding window for temporal dimension. @noDerivative let stride: Int32 /// The padding algorithm for pooling. - @noDerivative let padding: PaddingV1 + @noDerivative let padding: Padding /// Creates a max pooling layer. /// @@ -847,7 +847,7 @@ public struct MaxPool1D: Layer { public init( poolSize: Int, stride: Int, - padding: PaddingV1 + padding: Padding ) { self.poolSize = Int32(poolSize) self.stride = Int32(stride) @@ -878,13 +878,13 @@ public struct MaxPool2D: Layer { /// Strides in non-spatial dimensions must be `1`. @noDerivative let strides: (Int32, Int32, Int32, Int32) /// The padding algorithm for pooling. - @noDerivative let padding: PaddingV1 + @noDerivative let padding: Padding /// Creates a max pooling layer. public init( poolSize: (Int, Int, Int, Int), strides: (Int, Int, Int, Int), - padding: PaddingV1 + padding: Padding ) { (self.poolSize.0, self.poolSize.1, self.poolSize.2, self.poolSize.3) = (Int32(poolSize.0), Int32(poolSize.1), Int32(poolSize.2), Int32(poolSize.3)) @@ -899,7 +899,7 @@ public struct MaxPool2D: Layer { /// - poolSize: Vertical and horizontal factors by which to downscale. /// - strides: The strides. /// - padding: The padding. - public init(poolSize: (Int, Int), strides: (Int, Int), padding: PaddingV1 = .valid) { + public init(poolSize: (Int, Int), strides: (Int, Int), padding: Padding = .valid) { self.poolSize = (1, Int32(poolSize.0), Int32(poolSize.1), 1) self.strides = (1, Int32(strides.0), Int32(strides.1), 1) self.padding = padding @@ -927,7 +927,7 @@ public struct AvgPool1D: Layer { /// The stride of the sliding window for temporal dimension. @noDerivative let stride: Int32 /// The padding algorithm for pooling. - @noDerivative let padding: PaddingV1 + @noDerivative let padding: Padding /// Creates an average pooling layer. /// @@ -938,7 +938,7 @@ public struct AvgPool1D: Layer { public init( poolSize: Int, stride: Int, - padding: PaddingV1 + padding: Padding ) { self.poolSize = Int32(poolSize) self.stride = Int32(stride) @@ -969,13 +969,13 @@ public struct AvgPool2D: Layer { /// Strides in non-spatial dimensions must be `1`. @noDerivative let strides: (Int32, Int32, Int32, Int32) /// The padding algorithm for pooling. - @noDerivative let padding: PaddingV1 + @noDerivative let padding: Padding /// Creates a average pooling layer. public init( poolSize: (Int, Int, Int, Int), strides: (Int, Int, Int, Int), - padding: PaddingV1 + padding: Padding ) { (self.poolSize.0, self.poolSize.1, self.poolSize.2, self.poolSize.3) = (Int32(poolSize.0), Int32(poolSize.1), Int32(poolSize.2), Int32(poolSize.3)) @@ -990,7 +990,7 @@ public struct AvgPool2D: Layer { /// - poolSize: Vertical and horizontal factors by which to downscale. /// - strides: The strides. /// - padding: The padding. - public init(poolSize: (Int, Int), strides: (Int, Int), padding: PaddingV1 = .valid) { + public init(poolSize: (Int, Int), strides: (Int, Int), padding: Padding = .valid) { self.poolSize = (1, Int32(poolSize.0), Int32(poolSize.1), 1) self.strides = (1, Int32(strides.0), Int32(strides.1), 1) self.padding = padding diff --git a/Sources/DeepLearning/Operators/Basic.swift b/Sources/DeepLearning/Operators/Basic.swift index fe75ac81c..520e05ed1 100644 --- a/Sources/DeepLearning/Operators/Basic.swift +++ b/Sources/DeepLearning/Operators/Basic.swift @@ -46,8 +46,8 @@ public extension Tensor where Scalar: TensorFlowScalar { /// - Returns: The stacked tensor. @inlinable // @differentiable(vjp: _vjpStacked where Scalar: TensorFlowFloatingPoint) - func stacked(with tensors: [Tensor], alongAxis axis: Int64 = 0) -> Tensor { - return Raw.pack([self] + tensors, axis: axis) + func stacked(with tensors: [Tensor], alongAxis axis: Int32 = 0) -> Tensor { + return Raw.pack([self] + tensors, axis: Int64(axis)) } /// Concatenates the current tensor with `tensors` along the `axis` dimension. @@ -257,13 +257,13 @@ public extension Tensor where Scalar: TensorFlowScalar { @inlinable func gathering(where mask: Tensor, alongAxis axis: Int32 = 0) -> Tensor { precondition(mask.rank != 0, "The boolean mask cannot be a scalar.") - let posAxis = axis < 0 ? axis + rank : axis - let leadingSize = shapeTensor[posAxis ..< posAxis + mask.rank].product().rankLifted() + let posAxis = Int(axis < 0 ? axis + rank : axis) + let leadingSize = shapeTensor[posAxis ..< posAxis + Int(mask.rank)].product().rankLifted() let reshapedTensor = reshaped( - toShape: shapeTensor[..(mask.flattened().nonZeroIndices().squeezingShape(at: 1)) + return reshapedTensor.gathering(atIndices: indices, alongAxis: Int32(posAxis)) } } diff --git a/Sources/DeepLearning/Operators/NN.swift b/Sources/DeepLearning/Operators/NN.swift index 4b8afd7dc..ac3ac43d8 100644 --- a/Sources/DeepLearning/Operators/NN.swift +++ b/Sources/DeepLearning/Operators/NN.swift @@ -86,9 +86,6 @@ public extension Tensor where Scalar: TensorFlowFloatingPoint { /// A padding scheme. Used by padding, convolution, and pooling ops. // @_frozen // SR-9739 public enum Padding { - /// The "explicit" padding scheme, which is defined by an array indicating the explicit padding - /// sizes at the start and end of each dimension. - case explicit([Int32]) /// The "valid" padding scheme. case valid /// The "same" padding scheme. @@ -97,36 +94,15 @@ public enum Padding { public extension Padding { @inlinable - var raw: Raw.Padding2 { + internal var raw: Raw.Padding { switch self { - case .explicit: return .explicit case .same: return .same case .valid: return .valid } } @inlinable - internal var explicitPaddings: [Int32] { - switch self { - case .explicit(let paddings): return paddings - case .same: return [] - case .valid: return [] - } - } -} - -/// An older padding scheme. Used by padding, convolution, and pooling ops. -// @_frozen // SR-9739 -public enum PaddingV1 { - /// The "valid" padding scheme. - case valid - /// The "same" padding scheme. - case same -} - -public extension PaddingV1 { - @inlinable - var raw: Raw.Padding { + internal var raw2: Raw.Padding2 { switch self { case .same: return .same case .valid: return .valid @@ -149,8 +125,8 @@ public extension Tensor where Scalar: TensorFlowFloatingPoint { filter: filter, outBackprop: self, strides: [strides.0, strides.1, strides.2, strides.3], - padding: padding.raw, - explicitPaddings: padding.explicitPaddings) + padding: padding.raw2, + explicitPaddings: []) } /// TensorFlow builtin conv2d gradient helper for the filter. @@ -167,8 +143,8 @@ public extension Tensor where Scalar: TensorFlowFloatingPoint { filterSizes: filterSizes, outBackprop: self, strides: [strides.0, strides.1, strides.2, strides.3], - padding: padding.raw, - explicitPaddings: padding.explicitPaddings) + padding: padding.raw2, + explicitPaddings: []) } @inlinable @@ -298,8 +274,8 @@ public extension Tensor where Scalar: FloatingPoint { self, filter: filter, strides: [strides.0, strides.1, strides.2, strides.3], - padding: padding.raw, - explicitPaddings: padding.explicitPaddings) + padding: padding.raw2, + explicitPaddings: []) } /// Computes a 2-D max pooling, with the specified kernel sizes, strides, and @@ -318,7 +294,7 @@ public extension Tensor where Scalar: FloatingPoint { func maxPooled( kernelSize: (Int32, Int32, Int32, Int32), strides: (Int32, Int32, Int32, Int32), - padding: PaddingV1 + padding: Padding ) -> Tensor { return Raw.maxPoolV2( self, @@ -345,7 +321,7 @@ public extension Tensor where Scalar: FloatingPoint { func averagePooled( kernelSize: (Int32, Int32, Int32, Int32), strides: (Int32, Int32, Int32, Int32), - padding: PaddingV1 + padding: Padding ) -> Tensor { return Raw.avgPool( value: self, From 467a443d015bf97b3735c7f68ca29d349599715b Mon Sep 17 00:00:00 2001 From: Anthony Platanios Date: Mon, 15 Apr 2019 10:55:53 -0400 Subject: [PATCH 21/55] Added a new 'tiling' tensor initializer. --- Sources/DeepLearning/Initializers.swift | 87 ++++++++++++++----------- 1 file changed, 50 insertions(+), 37 deletions(-) diff --git a/Sources/DeepLearning/Initializers.swift b/Sources/DeepLearning/Initializers.swift index 4f465f609..c58539684 100644 --- a/Sources/DeepLearning/Initializers.swift +++ b/Sources/DeepLearning/Initializers.swift @@ -17,46 +17,59 @@ import TensorFlow #endif public extension Tensor where Scalar : Numeric { - /// Creates a tensor with all scalars set to zero that has the same shape and type as the provided - /// tensor. - /// - /// - Parameter other: Tensor whose shape and data type to use. - @inlinable @inline(__always) - init(zerosLike other: Tensor) { - self = Raw.zerosLike(other) - } + /// Creates a tensor with all scalars set to zero that has the same shape and type as the provided + /// tensor. + /// + /// - Parameter other: Tensor whose shape and data type to use. + @inlinable @inline(__always) + init(zerosLike other: Tensor) { + self = Raw.zerosLike(other) + } + + /// Creates a tensor with all scalars set to one that has the same shape and type as the provided + /// tensor. + /// + /// - Parameter other: Tensor whose shape and data type to use. + @inlinable @inline(__always) + init(onesLike other: Tensor) { + self = Raw.onesLike(other) + } - /// Creates a tensor with all scalars set to one that has the same shape and type as the provided - /// tensor. - /// - /// - Parameter other: Tensor whose shape and data type to use. - @inlinable @inline(__always) - init(onesLike other: Tensor) { - self = Raw.onesLike(other) - } + /// Creates a 1-D tensor representing a sequence from a starting value to, but not including, an + /// end value, stepping by the specified amount. + /// + /// - Parameters: + /// - start: The starting value to use for the sequence. If the sequence contains any values, + /// the first one is `start`. + /// - end: An end value to limit the sequence. `end` is never an element of the resulting + /// sequence. + /// - stride: The amount to step by with each iteration. `stride` must be positive. + @inlinable @inline(__always) + init(rangeFrom start: Tensor, to end: Tensor, stride: Tensor) { + self = Raw.range(start: start, limit: end, delta: stride) + } - /// Creates a 1-D tensor representing a sequence from a starting value to, but not including, an - /// end value, stepping by the specified amount. - /// - /// - Parameters: - /// - start: The starting value to use for the sequence. If the sequence contains any values, - /// the first one is `start`. - /// - end: An end value to limit the sequence. `end` is never an element of the resulting - /// sequence. - /// - stride: The amount to step by with each iteration. `stride` must be positive. - @inlinable @inline(__always) - init(rangeFrom start: Tensor, to end: Tensor, stride: Tensor) { - self = Raw.range(start: start, limit: end, delta: stride) - } + /// Returns a stacked tensor, constructed by stacking the provided tensors along + /// the specified axis. + /// - Precondition: The tensors must have the same dimensions,. + /// - Precondition: The axis must be in the range `-rank..], alongAxis axis: Int32 = 0) { + self = Raw.pack(tensors, axis: Int64(axis)) + } - /// Returns a stacked tensor, constructed by stacking the provided tensors along - /// the specified axis. - /// - Precondition: The tensors must have the same dimensions,. - /// - Precondition: The axis must be in the range `-rank..], alongAxis axis: Int32 = 0) { - self = Raw.pack(tensors, axis: Int64(axis)) - } + /// Returns a tiled tensor, constructed by tiling the provided tensor. + /// + /// This constructor creates a new tensor by replicating `tensor` `multiples` times. The + /// constructed tensor's `i`'th dimension has `tensor.shape[i] * multiples[i]` elements, and the + /// values of `tensor` are replicated `multiples[i]` times along the `i`'th dimension. For + /// example, tiling `[a b c d]` by `[2]` produces `[a b c d a b c d]`. + /// + /// - Precondition: The shape of `multiples` must be `[tensor.rank]`. + @inlinable @inline(__always) + init(tiling tensor: Tensor, multiples: Tensor) { + self = Raw.tile(tensor, multiples: multiples) + } } public extension Tensor where Scalar == Int32 { From 1faaef456c420a20d024f39e8584ebee2c8cd28d Mon Sep 17 00:00:00 2001 From: Anthony Platanios Date: Mon, 15 Apr 2019 10:57:32 -0400 Subject: [PATCH 22/55] Minor edit. --- Sources/DeepLearning/Initializers.swift | 46 +++++++++++++------------ 1 file changed, 24 insertions(+), 22 deletions(-) diff --git a/Sources/DeepLearning/Initializers.swift b/Sources/DeepLearning/Initializers.swift index c58539684..c42e2dc64 100644 --- a/Sources/DeepLearning/Initializers.swift +++ b/Sources/DeepLearning/Initializers.swift @@ -16,6 +16,30 @@ import TensorFlow #endif +public extension Tensor { + /// Returns a stacked tensor, constructed by stacking the provided tensors along + /// the specified axis. + /// - Precondition: The tensors must have the same dimensions,. + /// - Precondition: The axis must be in the range `-rank..], alongAxis axis: Int32 = 0) { + self = Raw.pack(tensors, axis: Int64(axis)) + } + + /// Returns a tiled tensor, constructed by tiling the provided tensor. + /// + /// This constructor creates a new tensor by replicating `tensor` `multiples` times. The + /// constructed tensor's `i`'th dimension has `tensor.shape[i] * multiples[i]` elements, and the + /// values of `tensor` are replicated `multiples[i]` times along the `i`'th dimension. For + /// example, tiling `[a b c d]` by `[2]` produces `[a b c d a b c d]`. + /// + /// - Precondition: The shape of `multiples` must be `[tensor.rank]`. + @inlinable @inline(__always) + init(tiling tensor: Tensor, multiples: Tensor) { + self = Raw.tile(tensor, multiples: multiples) + } +} + public extension Tensor where Scalar : Numeric { /// Creates a tensor with all scalars set to zero that has the same shape and type as the provided /// tensor. @@ -48,28 +72,6 @@ public extension Tensor where Scalar : Numeric { init(rangeFrom start: Tensor, to end: Tensor, stride: Tensor) { self = Raw.range(start: start, limit: end, delta: stride) } - - /// Returns a stacked tensor, constructed by stacking the provided tensors along - /// the specified axis. - /// - Precondition: The tensors must have the same dimensions,. - /// - Precondition: The axis must be in the range `-rank..], alongAxis axis: Int32 = 0) { - self = Raw.pack(tensors, axis: Int64(axis)) - } - - /// Returns a tiled tensor, constructed by tiling the provided tensor. - /// - /// This constructor creates a new tensor by replicating `tensor` `multiples` times. The - /// constructed tensor's `i`'th dimension has `tensor.shape[i] * multiples[i]` elements, and the - /// values of `tensor` are replicated `multiples[i]` times along the `i`'th dimension. For - /// example, tiling `[a b c d]` by `[2]` produces `[a b c d a b c d]`. - /// - /// - Precondition: The shape of `multiples` must be `[tensor.rank]`. - @inlinable @inline(__always) - init(tiling tensor: Tensor, multiples: Tensor) { - self = Raw.tile(tensor, multiples: multiples) - } } public extension Tensor where Scalar == Int32 { From 94cf85fa877faf4907a3bf89447e38d3a3c195e1 Mon Sep 17 00:00:00 2001 From: Anthony Platanios Date: Mon, 15 Apr 2019 11:18:20 -0400 Subject: [PATCH 23/55] Made some refactoring. --- Sources/DeepLearning/Initializers.swift | 81 ++++++++++++++++++--- Sources/DeepLearning/Operators/Basic.swift | 85 +++------------------- 2 files changed, 81 insertions(+), 85 deletions(-) diff --git a/Sources/DeepLearning/Initializers.swift b/Sources/DeepLearning/Initializers.swift index c42e2dc64..6e259f72b 100644 --- a/Sources/DeepLearning/Initializers.swift +++ b/Sources/DeepLearning/Initializers.swift @@ -17,15 +17,78 @@ import TensorFlow #endif public extension Tensor { - /// Returns a stacked tensor, constructed by stacking the provided tensors along - /// the specified axis. - /// - Precondition: The tensors must have the same dimensions,. - /// - Precondition: The axis must be in the range `-rank..], alongAxis axis: Int32 = 0) { self = Raw.pack(tensors, axis: Int64(axis)) } + /// Concatenates `tensors` along the `axis` dimension. + /// + /// Given that `tensors[i].shape = [D0, D1, ... Daxis(i), ...Dn]`, then the concatenated result + /// has shape `[D0, D1, ... Raxis, ...Dn]`, where `Raxis = sum(Daxis(i))`. That is, the data + /// from the input tensors is joined along the `axis` dimension. + /// + /// For example: + /// ``` + /// // t1 is [[1, 2, 3], [4, 5, 6]] + /// // t2 is [[7, 8, 9], [10, 11, 12]] + /// Tensor(concatenating: [t1, t2]) // is [[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]] + /// Tensor(concatenating: [t1, t2], alongAxis: 1) // is [[1, 2, 3, 7, 8, 9], [4, 5, 6, 10, 11, 12]] + /// + /// // t3 has shape [2, 3] + /// // t4 has shape [2, 3] + /// Tensor(concatenating: [t3, t4]) // has shape [4, 3] + /// Tensor(concatenating: [t3, t4], alongAxis: 1) // has shape [2, 6] + /// ``` + /// + /// - Note: If you are concatenating along a new axis consider using + /// `Tensor.init(stacking:alongAxis:)`. + /// + /// - Parameters: + /// - tensors: Tensors to concatenate. + /// - axis: Dimension along which to concatenate. Negative values wrap around. + /// + /// - Precondition: All tensors must have the same rank and all dimensions except `axis` + /// must be equal. + /// - Precondition: `axis` must be in the range `[-rank, rank)`, where `rank` is the rank of the + /// provided tensors. + /// + /// - Returns: The concatenated tensor. + @inlinable + // @differentiable(vjp: _vjpConcatenating where Scalar : TensorFlowFloatingPoint) + init(concatenating tensors: [Tensor], alongAxis axis: Int32 = 0) { + self = Raw.concatV2(tensors, axis: Tensor(axis)) + } + /// Returns a tiled tensor, constructed by tiling the provided tensor. /// /// This constructor creates a new tensor by replicating `tensor` `multiples` times. The @@ -34,7 +97,7 @@ public extension Tensor { /// example, tiling `[a b c d]` by `[2]` produces `[a b c d a b c d]`. /// /// - Precondition: The shape of `multiples` must be `[tensor.rank]`. - @inlinable @inline(__always) + @inlinable init(tiling tensor: Tensor, multiples: Tensor) { self = Raw.tile(tensor, multiples: multiples) } @@ -45,7 +108,7 @@ public extension Tensor where Scalar : Numeric { /// tensor. /// /// - Parameter other: Tensor whose shape and data type to use. - @inlinable @inline(__always) + @inlinable init(zerosLike other: Tensor) { self = Raw.zerosLike(other) } @@ -54,7 +117,7 @@ public extension Tensor where Scalar : Numeric { /// tensor. /// /// - Parameter other: Tensor whose shape and data type to use. - @inlinable @inline(__always) + @inlinable init(onesLike other: Tensor) { self = Raw.onesLike(other) } @@ -68,7 +131,7 @@ public extension Tensor where Scalar : Numeric { /// - end: An end value to limit the sequence. `end` is never an element of the resulting /// sequence. /// - stride: The amount to step by with each iteration. `stride` must be positive. - @inlinable @inline(__always) + @inlinable init(rangeFrom start: Tensor, to end: Tensor, stride: Tensor) { self = Raw.range(start: start, limit: end, delta: stride) } diff --git a/Sources/DeepLearning/Operators/Basic.swift b/Sources/DeepLearning/Operators/Basic.swift index 520e05ed1..740eb3826 100644 --- a/Sources/DeepLearning/Operators/Basic.swift +++ b/Sources/DeepLearning/Operators/Basic.swift @@ -16,77 +16,7 @@ import TensorFlow #endif -public extension Tensor where Scalar: TensorFlowScalar { - /// Stacks the current tensor with `tensors`, along the `axis` dimension, into a tensor with - /// rank one higher than the current tensor and each tensor in `tensors`. - /// - /// Given `self` and `tensors` all have shape `[A, B, C]`, and `tensors.count = N-1`, then: - /// - if `axis == 0` then the resulting tensor will have the shape `[N, A, B, C]`. - /// - if `axis == 1` then the resulting tensor will have the shape `[A, N, B, C]`. - /// - etc. - /// - /// For example: - /// ``` - /// // 'x' is [1, 4] - /// // 'y' is [2, 5] - /// // 'z' is [3, 6] - /// x.stacked(with: [y, z]) // is [[1, 4], [2, 5], [3, 6]] - /// x.stacked(with: [y, z], alongAxis: 1) // is [[1, 2, 3], [4, 5, 6]] - /// ``` - /// - /// This is the opposite of `unstacked`. - /// - /// - Parameters: - /// - tensors: Tensors to stack with the current tensor. - /// - axis: Dimension along which to stack. Negative values wrap around. - /// - /// - Precondition: All tensors must have the same shape as the current tensor. - /// - Precondition: `axis` must be in the range `[-rank, rank)`. - /// - /// - Returns: The stacked tensor. - @inlinable - // @differentiable(vjp: _vjpStacked where Scalar: TensorFlowFloatingPoint) - func stacked(with tensors: [Tensor], alongAxis axis: Int32 = 0) -> Tensor { - return Raw.pack([self] + tensors, axis: Int64(axis)) - } - - /// Concatenates the current tensor with `tensors` along the `axis` dimension. - /// - /// Given `self` and `tensors` are all put in a single array, `values`, and - /// `values[i].shape = [D0, D1, ... Daxis(i), ...Dn]`, then the concatenated result has shape - /// `[D0, D1, ... Raxis, ...Dn]`, where `Raxis = sum(Daxis(i))`. That is, the data from the - /// input tensors is joined along the `axis` dimension. - /// - /// For example: - /// ``` - /// // t1 is [[1, 2, 3], [4, 5, 6]] - /// // t2 is [[7, 8, 9], [10, 11, 12]] - /// t1.concatenated(with: [t2]) // is [[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]] - /// t1.concatenated(with: [t2], alongAxis: 1) // is [[1, 2, 3, 7, 8, 9], [4, 5, 6, 10, 11, 12]] - /// - /// // t3 has shape [2, 3] - /// // t4 has shape [2, 3] - /// t3.concatenated(with: [t4]) // has shape [4, 3] - /// t3.concatenated(with: [t4], alongAxis: 1) // has shape [2, 6] - /// ``` - /// - /// - Note: If you are concatenating along a new axis consider using `stacked`. - /// - /// - Parameters: - /// - tensors: Tensors to concatenate with the current tensor. - /// - axis: Dimension along which to concatenate. Negative values wrap around. - /// - /// - Precondition: All tensors must have the same rank as the current tensor and all dimensions - /// except `axis` must be equal. - /// - Precondition: `axis` must be in the range `[-rank, rank)`. - /// - /// - Returns: The concatenated tensor. - @inlinable - // @differentiable(vjp: _vjpConcatenated where Scalar : TensorFlowFloatingPoint) - func concatenated(with tensors: [Tensor], alongAxis axis: Int32 = 0) -> Tensor { - return Raw.concatV2([self] + tensors, axis: Tensor(axis)) - } - +public extension Tensor { /// Gathers slices of this tensor at `indices` along the `axis` dimension. /// /// For 0-D (scalar) `indices`: @@ -178,7 +108,8 @@ public extension Tensor where Scalar: TensorFlowScalar { precondition(batchDims <= posAxis, "'batchDims' must be less than or equal to 'axis'.") // Move self[axis] up to self[batchDims]. - let permutation = Tensor(0 ..< batchDims).concatenated(with: [ + let permutation = Tensor(concatenating: [ + Tensor(0 ..< batchDims), Tensor(axis).rankLifted(), Tensor(rangeFrom: batchDims, to: posAxis, stride: 1), Tensor(rangeFrom: axis + 1, to: rank, stride: 1)]) @@ -189,7 +120,8 @@ public extension Tensor where Scalar: TensorFlowScalar { // Move the result dimensions corresponding to self[batchDims ..< axis] to just before // the dimensions corresponding to indices[batchDims ...]. let start = indices.rank + posAxis - batchDims - let resultPermutation = Tensor(0 ..< batchDims).concatenated(with: [ + let resultPermutation = Tensor(concatenating: [ + Tensor(0 ..< batchDims), Tensor(rangeFrom: indices.rank, to: start, stride: 1), Tensor(batchDims ..< indices.rank), Tensor(rangeFrom: start, to: result.rank, stride: 1)]) @@ -207,7 +139,8 @@ public extension Tensor where Scalar: TensorFlowScalar { to: dValue, stride: Tensor(ones: []) ) * accumulated - let dShape = Tensor(d - 1).stacked(with: [ + let dShape = Tensor(stacking: [ + Tensor(d - 1), Tensor(dValue), Tensor(indices.rank - 1)]) batchIndices += dIndices.reshaped(toShape: dShape) @@ -260,8 +193,8 @@ public extension Tensor where Scalar: TensorFlowScalar { let posAxis = Int(axis < 0 ? axis + rank : axis) let leadingSize = shapeTensor[posAxis ..< posAxis + Int(mask.rank)].product().rankLifted() let reshapedTensor = reshaped( - toShape: shapeTensor[..(concatenating: [ + shapeTensor[..(mask.flattened().nonZeroIndices().squeezingShape(at: 1)) return reshapedTensor.gathering(atIndices: indices, alongAxis: Int32(posAxis)) } From e0bbfc049c9d1f7b9452bbffaecd4c9c894488d8 Mon Sep 17 00:00:00 2001 From: Anthony Platanios Date: Mon, 15 Apr 2019 15:29:31 -0400 Subject: [PATCH 24/55] Bug fix. --- Sources/DeepLearning/Operators/Basic.swift | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Sources/DeepLearning/Operators/Basic.swift b/Sources/DeepLearning/Operators/Basic.swift index 740eb3826..cb63eca1b 100644 --- a/Sources/DeepLearning/Operators/Basic.swift +++ b/Sources/DeepLearning/Operators/Basic.swift @@ -139,10 +139,10 @@ public extension Tensor { to: dValue, stride: Tensor(ones: []) ) * accumulated - let dShape = Tensor(stacking: [ - Tensor(d - 1), - Tensor(dValue), - Tensor(indices.rank - 1)]) + let dShape = Tensor(concatenating: [ + Tensor([Int32](repeating: 1, count: Int(d - 1))), + Tensor([dValue]), + Tensor([Int32](repeating: 1, count: Int(indices.rank - 1)))]) batchIndices += dIndices.reshaped(toShape: dShape) } From 6c0436878d41b5a1d6f1aa7b8c8b559b1250ff16 Mon Sep 17 00:00:00 2001 From: Anthony Platanios Date: Thu, 18 Apr 2019 22:14:11 -0400 Subject: [PATCH 25/55] Added support for the split op and its VJP. --- Sources/DeepLearning/Operators/Basic.swift | 92 ++++++++++++++++------ Sources/DeepLearning/Operators/Math.swift | 48 ++++++++++- Sources/DeepLearning/Operators/NN.swift | 2 +- 3 files changed, 117 insertions(+), 25 deletions(-) diff --git a/Sources/DeepLearning/Operators/Basic.swift b/Sources/DeepLearning/Operators/Basic.swift index cb63eca1b..82c96fd2e 100644 --- a/Sources/DeepLearning/Operators/Basic.swift +++ b/Sources/DeepLearning/Operators/Basic.swift @@ -17,6 +17,32 @@ import TensorFlow #endif public extension Tensor { + @inlinable + @differentiable(wrt: self where Scalar : TensorFlowFloatingPoint) + func unstack(alongAxis axis: Int = 0) -> [Tensor] { + return split(numSplits: shape[axis], alongAxis: axis) + } + + @inlinable + @differentiable( + vjp: _vjpSplit(numSplits:alongAxis:) where Scalar : TensorFlowFloatingPoint) + func split(numSplits: Int, alongAxis axis: Int = 0) -> [Tensor] { + return Raw.split( + splitDim: Tensor(Int32(axis)), value: self, numSplit: Int64(numSplits)) + } + + @inlinable + @differentiable( + wrt: self, + vjp: _vjpSplit(splitSizes:alongAxis:) where Scalar : TensorFlowFloatingPoint) + func split(splitSizes: Tensor, alongAxis axis: Int = 0) -> [Tensor] { + return Raw.splitV( + value: self, + sizeSplits: splitSizes, + splitDim: Tensor(Int32(axis)), + numSplit: Int64(splitSizes.shape[0])) + } + /// Gathers slices of this tensor at `indices` along the `axis` dimension. /// /// For 0-D (scalar) `indices`: @@ -66,9 +92,9 @@ public extension Tensor { // @differentiable(vjp: _vjpGathering where Scalar: TensorFlowFloatingPoint) func gathering( atIndices indices: Tensor, - alongAxis axis: Int32 = 0 + alongAxis axis: Int = 0 ) -> Tensor { - return Raw.gatherV2(params: self, indices: indices, axis: Tensor(axis)) + return Raw.gatherV2(params: self, indices: indices, axis: Tensor(Int32(axis))) } /// Gathers slices of this tensor at `indices` along the `axis` dimension, while ignoring the @@ -89,16 +115,16 @@ public extension Tensor { /// - Returns: The gathered tensor. @inlinable func batchGathering( - atIndices indices: Tensor, - alongAxis axis: Int32, - numBatchDims batchDims: Int32 + atIndices indices: Tensor, + alongAxis axis: Int, + numBatchDims batchDims: Int ) -> Tensor { - precondition(batchDims >= 0 && batchDims < indices.rank, + precondition(batchDims >= 0 && batchDims < indices.rank, "'numBatchDims' must be non-negative and less than 'indices.rank'.") precondition(batchDims < rank, "'numBatchDims' must be less than the tensor's rank.") - // Handle the axis argument by transposing the axis dimension so that it is the first - // non-batch dimension, recursively calling `batchGathering` with `axis = 0`, and then + // Handle the axis argument by transposing the axis dimension so that it is the first + // non-batch dimension, recursively calling `batchGathering` with `axis = 0`, and then // transposing the result to put the pre-axis dimensions before the indices dimensions. if axis != batchDims { // Adjust axis to be positive. @@ -109,22 +135,22 @@ public extension Tensor { // Move self[axis] up to self[batchDims]. let permutation = Tensor(concatenating: [ - Tensor(0 ..< batchDims), - Tensor(axis).rankLifted(), - Tensor(rangeFrom: batchDims, to: posAxis, stride: 1), - Tensor(rangeFrom: axis + 1, to: rank, stride: 1)]) + Tensor(0 ..< Int32(batchDims)), + Tensor(Int32(axis)).rankLifted(), + Tensor(rangeFrom: Int32(batchDims), to: Int32(posAxis), stride: 1), + Tensor(rangeFrom: Int32(axis) + 1, to: Int32(rank), stride: 1)]) let tensor = transposed(withPermutations: permutation) let result = tensor.batchGathering( atIndices: indices, alongAxis: batchDims, numBatchDims: batchDims) - - // Move the result dimensions corresponding to self[batchDims ..< axis] to just before + + // Move the result dimensions corresponding to self[batchDims ..< axis] to just before // the dimensions corresponding to indices[batchDims ...]. let start = indices.rank + posAxis - batchDims let resultPermutation = Tensor(concatenating: [ - Tensor(0 ..< batchDims), - Tensor(rangeFrom: indices.rank, to: start, stride: 1), - Tensor(batchDims ..< indices.rank), - Tensor(rangeFrom: start, to: result.rank, stride: 1)]) + Tensor(0 ..< Int32(batchDims)), + Tensor(rangeFrom: Int32(indices.rank), to: Int32(start), stride: 1), + Tensor(Int32(batchDims) ..< Int32(indices.rank)), + Tensor(rangeFrom: Int32(start), to: Int32(result.rank), stride: 1)]) return result.transposed(withPermutations: resultPermutation) } @@ -188,15 +214,15 @@ public extension Tensor { /// - Returns: `(self.rank - K + 1)`-dimensional tensor populated by entries in this tensor /// corresponding to `true` values in `mask`. @inlinable - func gathering(where mask: Tensor, alongAxis axis: Int32 = 0) -> Tensor { + func gathering(where mask: Tensor, alongAxis axis: Int = 0) -> Tensor { precondition(mask.rank != 0, "The boolean mask cannot be a scalar.") - let posAxis = Int(axis < 0 ? axis + rank : axis) - let leadingSize = shapeTensor[posAxis ..< posAxis + Int(mask.rank)].product().rankLifted() + let posAxis = axis < 0 ? axis + rank : axis + let leadingSize = shapeTensor[posAxis ..< posAxis + mask.rank].product().rankLifted() let reshapedTensor = reshaped( toShape: Tensor(concatenating: [ - shapeTensor[..(mask.flattened().nonZeroIndices().squeezingShape(at: 1)) - return reshapedTensor.gathering(atIndices: indices, alongAxis: Int32(posAxis)) + return reshapedTensor.gathering(atIndices: indices, alongAxis: posAxis) } } @@ -234,3 +260,23 @@ public extension Tensor { return Raw.where_(self) } } + +public extension Tensor where Scalar : TensorFlowFloatingPoint { + @inlinable + internal func _vjpSplit( + numSplits: Int, + alongAxis axis: Int = 0 + ) -> ([Tensor], (Array.DifferentiableView) -> Tensor) { + let result = split(numSplits: numSplits, alongAxis: axis) + return (result, { v in Tensor(concatenating: v.base, alongAxis: axis) }) + } + + @inlinable + internal func _vjpSplit( + splitSizes: Tensor, + alongAxis axis: Int = 0 + ) -> ([Tensor], (Array.DifferentiableView) -> Tensor) { + let result = split(splitSizes: splitSizes, alongAxis: axis) + return (result, { v in Tensor(concatenating: v.base, alongAxis: axis) }) + } +} diff --git a/Sources/DeepLearning/Operators/Math.swift b/Sources/DeepLearning/Operators/Math.swift index 249fc6015..883f0a2bb 100644 --- a/Sources/DeepLearning/Operators/Math.swift +++ b/Sources/DeepLearning/Operators/Math.swift @@ -17,6 +17,52 @@ import TensorFlow #endif /// Returns the values of the specified tensor rounded to the nearest integer, element-wise. -public func round(_ x: Tensor) -> Tensor { +@inlinable +@differentiable(vjp: _vjpRound) +public func round(_ x: Tensor) -> Tensor { return Raw.round(x) } + +@inlinable +internal func _vjpRound( + _ x: Tensor +) -> (Tensor, (Tensor) -> Tensor) { + return (round(x), { v in Tensor(zerosLike: v) }) +} + +/// Computes the sigmoid of the specified tensor element-wise. +@inlinable +@differentiable(vjp: _vjpSigmoid) +public func sigmoid(_ x: Tensor) -> Tensor { + return Raw.sigmoid(x) +} + +@inlinable +internal func _vjpSigmoid( + _ x: Tensor +) -> (Tensor, (Tensor) -> Tensor) { + return (sigmoid(x), { v in Raw.sigmoidGrad(x, dy: v) }) +} + +/// Computes the log-sigmoid of the specified tensor element-wise. Specifically, +/// `y = log(1 / (1 + exp(-x)))`. For numerical stability, we use `y = -softplus(-x)`. +@inlinable +@differentiable +public func logSigmoid(_ x: Tensor) -> Tensor { + return -softplus(-x) +} + +/// Computes the softplus function for the specified tensor element-wise. The softplus function is +/// defined as `log(exp(x) + 1)`. +@inlinable +@differentiable(vjp: _vjpSoftplus) +public func softplus(_ x: Tensor) -> Tensor { + return Raw.softplus(features: x) +} + +@inlinable +internal func _vjpSoftplus( + _ x: Tensor +) -> (Tensor, (Tensor) -> Tensor) { + return (softplus(x), { v in v * sigmoid(x) }) +} diff --git a/Sources/DeepLearning/Operators/NN.swift b/Sources/DeepLearning/Operators/NN.swift index d9b729722..f1401af20 100644 --- a/Sources/DeepLearning/Operators/NN.swift +++ b/Sources/DeepLearning/Operators/NN.swift @@ -36,7 +36,7 @@ public extension Tensor where Scalar: TensorFlowFloatingPoint { wrt: (self, offset, scale), vjp: _vjpBatchNormalized) func batchNormalized( - alongAxis axis: Int32, + alongAxis axis: Int, offset: Tensor = Tensor(0), scale: Tensor = Tensor(1), epsilon: Scalar = 0.001 From ca8ce02db1d00c9b8ddae11a971c4a2f9398b9c1 Mon Sep 17 00:00:00 2001 From: Anthony Platanios Date: Thu, 18 Apr 2019 22:49:03 -0400 Subject: [PATCH 26/55] Added VJPs for stacking and tiling. --- Sources/DeepLearning/Initializers.swift | 49 +++++++++++++++++++++---- 1 file changed, 42 insertions(+), 7 deletions(-) diff --git a/Sources/DeepLearning/Initializers.swift b/Sources/DeepLearning/Initializers.swift index 0d6953703..457462fcb 100644 --- a/Sources/DeepLearning/Initializers.swift +++ b/Sources/DeepLearning/Initializers.swift @@ -17,6 +17,13 @@ import TensorFlow #endif public extension Tensor { + /// Creates a tensor from an array of tensors (which may themselves be scalars). + @inlinable + @differentiable(where Scalar: TensorFlowFloatingPoint) + init(_ elements: [Tensor]) { + self = Tensor(stacking: elements) + } + /// Stacks `tensors`, along the `axis` dimension, into a new tensor with rank one higher than /// the current tensor and each tensor in `tensors`. /// @@ -46,8 +53,8 @@ public extension Tensor { /// /// - Returns: The stacked tensor. @inlinable - // @differentiable(vjp: _vjpStacking where Scalar: TensorFlowFloatingPoint) - init(stacking tensors: [Tensor], alongAxis axis: Int32 = 0) { + @differentiable(vjp: _vjpStacking where Scalar: TensorFlowFloatingPoint) + init(stacking tensors: [Tensor], alongAxis axis: Int = 0) { self = Raw.pack(tensors, axis: Int64(axis)) } @@ -83,11 +90,11 @@ public extension Tensor { /// provided tensors. /// /// - Returns: The concatenated tensor. - @inlinable - // @differentiable(vjp: _vjpConcatenating where Scalar : TensorFlowFloatingPoint) - init(concatenating tensors: [Tensor], alongAxis axis: Int32 = 0) { - self = Raw.concatV2(tensors, axis: Tensor(axis)) - } + // @inlinable + // // @differentiable(vjp: _vjpConcatenating where Scalar : TensorFlowFloatingPoint) + // init(concatenating tensors: [Tensor], alongAxis axis: Int = 0) { + // self = Raw.concatV2(tensors, axis: Tensor(Int32(axis))) + // } /// Returns a tiled tensor, constructed by tiling the provided tensor. /// @@ -98,11 +105,39 @@ public extension Tensor { /// /// - Precondition: The shape of `multiples` must be `[tensor.rank]`. @inlinable + @differentiable(wrt: tensor, vjp: _vjpTiling where Scalar: TensorFlowFloatingPoint) init(tiling tensor: Tensor, multiples: Tensor) { self = Raw.tile(tensor, multiples: multiples) } } +internal extension Tensor where Scalar : TensorFlowFloatingPoint { + @inlinable + static func _vjpStacking( + stacking tensors: [Tensor], + alongAxis axis: Int = 0 + ) -> (Tensor, (Tensor) -> Array.DifferentiableView) { + let result = Tensor(stacking: tensors, alongAxis: axis) + return (result, { v in + return Array.DifferentiableView(v.unstack(alongAxis: axis)) + }) + } + + @inlinable + static func _vjpTiling( + tiling tensor: Tensor, + multiples: Tensor + ) -> (Tensor, (Tensor) -> Tensor) { + let result = Tensor(tiling: tensor, multiples: multiples) + return (result, { [shape = tensor.shapeTensor] v in + let splitShape = Tensor(stacking: [multiples, shape]).transposed().flattened() + let axes = Tensor( + rangeFrom: 0, to: Int32(splitShape.shape.contiguousSize), stride: 2) + return v.reshaped(toShape: splitShape).sum(squeezingAxes: axes) + }) + } +} + public extension Tensor where Scalar : Numeric { /// Creates a tensor with all scalars set to zero that has the same shape and type as the provided /// tensor. From 26e91232fc6de0adbf57aa4ec50c7fd142b959b6 Mon Sep 17 00:00:00 2001 From: Anthony Platanios Date: Thu, 18 Apr 2019 23:05:30 -0400 Subject: [PATCH 27/55] Added VJP for concatenating. --- Sources/DeepLearning/Initializers.swift | 64 +++++++++++++++++-------- 1 file changed, 44 insertions(+), 20 deletions(-) diff --git a/Sources/DeepLearning/Initializers.swift b/Sources/DeepLearning/Initializers.swift index 457462fcb..bca769299 100644 --- a/Sources/DeepLearning/Initializers.swift +++ b/Sources/DeepLearning/Initializers.swift @@ -19,7 +19,7 @@ import TensorFlow public extension Tensor { /// Creates a tensor from an array of tensors (which may themselves be scalars). @inlinable - @differentiable(where Scalar: TensorFlowFloatingPoint) + @differentiable(where Scalar : TensorFlowFloatingPoint) init(_ elements: [Tensor]) { self = Tensor(stacking: elements) } @@ -53,7 +53,7 @@ public extension Tensor { /// /// - Returns: The stacked tensor. @inlinable - @differentiable(vjp: _vjpStacking where Scalar: TensorFlowFloatingPoint) + @differentiable(vjp: _vjpStacking where Scalar : TensorFlowFloatingPoint) init(stacking tensors: [Tensor], alongAxis axis: Int = 0) { self = Raw.pack(tensors, axis: Int64(axis)) } @@ -90,11 +90,12 @@ public extension Tensor { /// provided tensors. /// /// - Returns: The concatenated tensor. - // @inlinable - // // @differentiable(vjp: _vjpConcatenating where Scalar : TensorFlowFloatingPoint) - // init(concatenating tensors: [Tensor], alongAxis axis: Int = 0) { - // self = Raw.concatV2(tensors, axis: Tensor(Int32(axis))) - // } + @inlinable + @differentiable(wrt: tensors, vjp: _vjpConcatenating where Scalar : TensorFlowFloatingPoint) + init(concatenating tensors: [Tensor], alongAxis axis: Int = 0) { + precondition(tensors.count > 0) + self = Raw.concatV2(tensors, axis: Tensor(Int32(axis))) + } /// Returns a tiled tensor, constructed by tiling the provided tensor. /// @@ -105,8 +106,8 @@ public extension Tensor { /// /// - Precondition: The shape of `multiples` must be `[tensor.rank]`. @inlinable - @differentiable(wrt: tensor, vjp: _vjpTiling where Scalar: TensorFlowFloatingPoint) - init(tiling tensor: Tensor, multiples: Tensor) { + @differentiable(wrt: tensor, vjp: _vjpTiling where Scalar : TensorFlowFloatingPoint) + init(tiling tensor: Tensor, multiples: Tensor) { self = Raw.tile(tensor, multiples: multiples) } } @@ -123,6 +124,21 @@ internal extension Tensor where Scalar : TensorFlowFloatingPoint { }) } + @inlinable + static func _vjpConcatenating( + concatenating tensors: [Tensor], + alongAxis axis: Int = 0 + ) -> (Tensor, (Tensor) -> Array.DifferentiableView) { + let result = Tensor(concatenating: tensors, alongAxis: axis) + let posAxis = axis < 0 ? axis + tensors[0].rank : axis + let sizes = Tensor(stacking: tensors.map { $0.shapeTensor[posAxis] }) + return (result, { [count = tensors.count] v in + if count == 1 { return Array.DifferentiableView([v]) } + let splits = v.split(sizes: sizes, alongAxis: posAxis) + return Array.DifferentiableView(splits) + }) + } + @inlinable static func _vjpTiling( tiling tensor: Tensor, @@ -180,8 +196,10 @@ public extension Tensor where Scalar == Int32 { /// - shape: The dimensions of the tensor. /// - generator: Random number generator to use. /// - init(randomStandardUniform shape: TensorShape, - generator: inout G) { + init( + randomStandardUniform shape: TensorShape, + generator: inout G + ) { let dist = UniformIntegerDistribution() var scalars: [Scalar] = [] for _ in 0 ..< shape.contiguousSize { @@ -250,8 +268,10 @@ public extension Tensor where Scalar: BinaryFloatingPoint, /// - shape: The dimensions of the tensor. /// - generator: Random number generator to use. /// - init(randomUniform shape: TensorShape, - generator: inout G) { + init( + randomUniform shape: TensorShape, + generator: inout G + ) { let dist = UniformFloatingPointDistribution() var scalars: [Scalar] = [] for _ in 0 ..< shape.contiguousSize { @@ -269,10 +289,12 @@ public extension Tensor where Scalar: BinaryFloatingPoint, /// - stddev: The standard deviation of the distribution. /// - generator: Random number generator to use. /// - init(randomNormal shape: TensorShape, - mean: Scalar = 0, - stddev: Scalar = 1, - generator: inout G) { + init( + randomNormal shape: TensorShape, + mean: Scalar = 0, + stddev: Scalar = 1, + generator: inout G + ) { let dist = NormalDistribution(mean: mean, standardDeviation: stddev) var scalars: [Scalar] = [] for _ in 0 ..< shape.contiguousSize { @@ -306,9 +328,11 @@ public extension Tensor where Scalar: TensorFlowFloatingPoint { /// - Parameters: /// - shape: The dimensions of the tensor. /// - init(glorotUniform shape: TensorShape, - seed: (Int64, Int64) = (Int64.random(in: Int64.min.. Date: Thu, 18 Apr 2019 23:26:38 -0400 Subject: [PATCH 28/55] Added the gathering VJP. --- Sources/DeepLearning/Initializers.swift | 2 +- Sources/DeepLearning/Operators/Basic.swift | 65 ++++++++++++++++++---- 2 files changed, 56 insertions(+), 11 deletions(-) diff --git a/Sources/DeepLearning/Initializers.swift b/Sources/DeepLearning/Initializers.swift index bca769299..dd9d01923 100644 --- a/Sources/DeepLearning/Initializers.swift +++ b/Sources/DeepLearning/Initializers.swift @@ -148,7 +148,7 @@ internal extension Tensor where Scalar : TensorFlowFloatingPoint { return (result, { [shape = tensor.shapeTensor] v in let splitShape = Tensor(stacking: [multiples, shape]).transposed().flattened() let axes = Tensor( - rangeFrom: 0, to: Int32(splitShape.shape.contiguousSize), stride: 2) + rangeFrom: 0, to: Int32(splitShape.scalarCount), stride: 2) return v.reshaped(toShape: splitShape).sum(squeezingAxes: axes) }) } diff --git a/Sources/DeepLearning/Operators/Basic.swift b/Sources/DeepLearning/Operators/Basic.swift index 82c96fd2e..78acc9a2e 100644 --- a/Sources/DeepLearning/Operators/Basic.swift +++ b/Sources/DeepLearning/Operators/Basic.swift @@ -34,13 +34,13 @@ public extension Tensor { @inlinable @differentiable( wrt: self, - vjp: _vjpSplit(splitSizes:alongAxis:) where Scalar : TensorFlowFloatingPoint) - func split(splitSizes: Tensor, alongAxis axis: Int = 0) -> [Tensor] { + vjp: _vjpSplit(sizes:alongAxis:) where Scalar : TensorFlowFloatingPoint) + func split(sizes: Tensor, alongAxis axis: Int = 0) -> [Tensor] { return Raw.splitV( value: self, - sizeSplits: splitSizes, + sizeSplits: sizes, splitDim: Tensor(Int32(axis)), - numSplit: Int64(splitSizes.shape[0])) + numSplit: Int64(sizes.shape[0])) } /// Gathers slices of this tensor at `indices` along the `axis` dimension. @@ -89,7 +89,7 @@ public extension Tensor { /// /// - Returns: The gathered tensor. @inlinable - // @differentiable(vjp: _vjpGathering where Scalar: TensorFlowFloatingPoint) + @differentiable(wrt: self, vjp: _vjpGathering where Scalar : TensorFlowFloatingPoint) func gathering( atIndices indices: Tensor, alongAxis axis: Int = 0 @@ -114,6 +114,7 @@ public extension Tensor { /// /// - Returns: The gathered tensor. @inlinable + @differentiable(wrt: self where Scalar : TensorFlowFloatingPoint) func batchGathering( atIndices indices: Tensor, alongAxis axis: Int, @@ -214,6 +215,7 @@ public extension Tensor { /// - Returns: `(self.rank - K + 1)`-dimensional tensor populated by entries in this tensor /// corresponding to `true` values in `mask`. @inlinable + @differentiable(wrt: self where Scalar : TensorFlowFloatingPoint) func gathering(where mask: Tensor, alongAxis axis: Int = 0) -> Tensor { precondition(mask.rank != 0, "The boolean mask cannot be a scalar.") let posAxis = axis < 0 ? axis + rank : axis @@ -261,9 +263,9 @@ public extension Tensor { } } -public extension Tensor where Scalar : TensorFlowFloatingPoint { +internal extension Tensor where Scalar : TensorFlowFloatingPoint { @inlinable - internal func _vjpSplit( + func _vjpSplit( numSplits: Int, alongAxis axis: Int = 0 ) -> ([Tensor], (Array.DifferentiableView) -> Tensor) { @@ -272,11 +274,54 @@ public extension Tensor where Scalar : TensorFlowFloatingPoint { } @inlinable - internal func _vjpSplit( - splitSizes: Tensor, + func _vjpSplit( + sizes: Tensor, alongAxis axis: Int = 0 ) -> ([Tensor], (Array.DifferentiableView) -> Tensor) { - let result = split(splitSizes: splitSizes, alongAxis: axis) + let result = split(sizes: sizes, alongAxis: axis) return (result, { v in Tensor(concatenating: v.base, alongAxis: axis) }) } + + @inlinable + func _vjpGathering( + atIndices indices: Tensor, + alongAxis axis: Int = 0 + ) -> (Tensor, (Tensor) -> Tensor) { + let result = gathering(atIndices: indices, alongAxis: axis) + let posAxis = axis < 0 ? axis + rank : axis + return (result, { [shape = shapeTensor] v in + let indicesSize = Tensor(Int32(indices.scalarCount)) + let outerShape = shape[..(rangeFrom: 0, to: Int32(outerSize), stride: 1) + let innerIndices = Tensor( + rangeFrom: Int32(outerSize) + 1, + to: Int32(outerSize) + 1 + Int32(innerSize), + stride: 1) + let valuesShape = Tensor(concatenating: [outerShape, indicesSize, innerShape]) + let values = v.reshaped(toShape: valuesShape) + let valueIndices = indices.reshaped(toShape: indicesSize) + + // We need to sum up every slice `values[..., i, ....]` corresponding to + // `tensor[..., indices[i], ...]`. Since `unsortedSegmentSum` does not support an axis + // parameter, we transpose the gather dimension to the front, then use + // `unsortedSegmentSum` to build a `[gatherAxis, outerAxes, innerAxes]` tensor with all + // the gradients affecting each index in `gatherAxis` summed up. + let permutations = Tensor(concatenating: [ + Tensor([Int32(outerSize)]), outerIndices, innerIndices]) + let transposedValues = values.transposed(withPermutations: permutations) + let gradient = Raw.unsortedSegmentSum( + data: transposedValues, + segmentIds: valueIndices, + numSegments: shape[posAxis]) + + // Finally, we invert the above transpose operation by moving dimension 0 back to its + // original position. + let inversePermutations = Tensor(concatenating: [ + outerIndices + 1, Tensor([0]), innerIndices]) + return gradient.transposed(withPermutations: inversePermutations) + }) + } } From 49bfe8df71057167a41a3cd25fea9940a47bd028 Mon Sep 17 00:00:00 2001 From: Anthony Platanios Date: Fri, 19 Apr 2019 13:31:14 -0400 Subject: [PATCH 29/55] Bug fixes. --- Sources/DeepLearning/Initializers.swift | 8 ++--- Sources/DeepLearning/Operators/Basic.swift | 36 ++++++++++------------ 2 files changed, 21 insertions(+), 23 deletions(-) diff --git a/Sources/DeepLearning/Initializers.swift b/Sources/DeepLearning/Initializers.swift index dd9d01923..13384567b 100644 --- a/Sources/DeepLearning/Initializers.swift +++ b/Sources/DeepLearning/Initializers.swift @@ -113,18 +113,18 @@ public extension Tensor { } internal extension Tensor where Scalar : TensorFlowFloatingPoint { - @inlinable + @usableFromInline static func _vjpStacking( stacking tensors: [Tensor], alongAxis axis: Int = 0 ) -> (Tensor, (Tensor) -> Array.DifferentiableView) { - let result = Tensor(stacking: tensors, alongAxis: axis) + let result = Tensor(stacking: tensors, alongAxis: axis) return (result, { v in return Array.DifferentiableView(v.unstack(alongAxis: axis)) }) } - @inlinable + @usableFromInline static func _vjpConcatenating( concatenating tensors: [Tensor], alongAxis axis: Int = 0 @@ -139,7 +139,7 @@ internal extension Tensor where Scalar : TensorFlowFloatingPoint { }) } - @inlinable + @usableFromInline static func _vjpTiling( tiling tensor: Tensor, multiples: Tensor diff --git a/Sources/DeepLearning/Operators/Basic.swift b/Sources/DeepLearning/Operators/Basic.swift index 78acc9a2e..052a2d368 100644 --- a/Sources/DeepLearning/Operators/Basic.swift +++ b/Sources/DeepLearning/Operators/Basic.swift @@ -24,8 +24,7 @@ public extension Tensor { } @inlinable - @differentiable( - vjp: _vjpSplit(numSplits:alongAxis:) where Scalar : TensorFlowFloatingPoint) + @differentiable(vjp: _vjpSplit(numSplits:alongAxis:) where Scalar : TensorFlowFloatingPoint) func split(numSplits: Int, alongAxis axis: Int = 0) -> [Tensor] { return Raw.split( splitDim: Tensor(Int32(axis)), value: self, numSplit: Int64(numSplits)) @@ -114,7 +113,7 @@ public extension Tensor { /// /// - Returns: The gathered tensor. @inlinable - @differentiable(wrt: self where Scalar : TensorFlowFloatingPoint) + // TODO: @differentiable(wrt: self where Scalar : TensorFlowFloatingPoint) func batchGathering( atIndices indices: Tensor, alongAxis axis: Int, @@ -136,7 +135,7 @@ public extension Tensor { // Move self[axis] up to self[batchDims]. let permutation = Tensor(concatenating: [ - Tensor(0 ..< Int32(batchDims)), + Tensor(rangeFrom: 0, to: Int32(batchDims), stride: 1), Tensor(Int32(axis)).rankLifted(), Tensor(rangeFrom: Int32(batchDims), to: Int32(posAxis), stride: 1), Tensor(rangeFrom: Int32(axis) + 1, to: Int32(rank), stride: 1)]) @@ -148,28 +147,27 @@ public extension Tensor { // the dimensions corresponding to indices[batchDims ...]. let start = indices.rank + posAxis - batchDims let resultPermutation = Tensor(concatenating: [ - Tensor(0 ..< Int32(batchDims)), + Tensor(rangeFrom: 0, to: Int32(batchDims), stride: 1), Tensor(rangeFrom: Int32(indices.rank), to: Int32(start), stride: 1), - Tensor(Int32(batchDims) ..< Int32(indices.rank)), + Tensor(rangeFrom: Int32(batchDims), to: Int32(indices.rank), stride: 1), Tensor(rangeFrom: Int32(start), to: Int32(result.rank), stride: 1)]) return result.transposed(withPermutations: resultPermutation) } - let castedShape = Tensor(shapeTensor) var batchIndices = indices var accumulated = Tensor(ones: []) for d in (1...batchDims).reversed() { - accumulated *= castedShape[d] - let dValue = castedShape[d - 1] + accumulated *= shapeTensor[d] + let dValue = shapeTensor[d - 1] let dIndices = Tensor( rangeFrom: Tensor(zeros: []), to: dValue, stride: Tensor(ones: []) ) * accumulated let dShape = Tensor(concatenating: [ - Tensor([Int32](repeating: 1, count: Int(d - 1))), - Tensor([dValue]), - Tensor([Int32](repeating: 1, count: Int(indices.rank - 1)))]) + Tensor([Int32](repeating: 1, count: d - 1)), + dValue.rankLifted(), + Tensor([Int32](repeating: 1, count: indices.rank - 1))]) batchIndices += dIndices.reshaped(toShape: dShape) } @@ -215,7 +213,7 @@ public extension Tensor { /// - Returns: `(self.rank - K + 1)`-dimensional tensor populated by entries in this tensor /// corresponding to `true` values in `mask`. @inlinable - @differentiable(wrt: self where Scalar : TensorFlowFloatingPoint) + // TODO: @differentiable(wrt: self where Scalar : TensorFlowFloatingPoint) func gathering(where mask: Tensor, alongAxis axis: Int = 0) -> Tensor { precondition(mask.rank != 0, "The boolean mask cannot be a scalar.") let posAxis = axis < 0 ? axis + rank : axis @@ -264,25 +262,25 @@ public extension Tensor { } internal extension Tensor where Scalar : TensorFlowFloatingPoint { - @inlinable + @usableFromInline func _vjpSplit( numSplits: Int, alongAxis axis: Int = 0 - ) -> ([Tensor], (Array.DifferentiableView) -> Tensor) { + ) -> ([Tensor], (Array.CotangentVector) -> Tensor) { let result = split(numSplits: numSplits, alongAxis: axis) return (result, { v in Tensor(concatenating: v.base, alongAxis: axis) }) } - @inlinable + @usableFromInline func _vjpSplit( sizes: Tensor, alongAxis axis: Int = 0 - ) -> ([Tensor], (Array.DifferentiableView) -> Tensor) { + ) -> ([Tensor], (Array.CotangentVector) -> Tensor) { let result = split(sizes: sizes, alongAxis: axis) return (result, { v in Tensor(concatenating: v.base, alongAxis: axis) }) } - @inlinable + @usableFromInline func _vjpGathering( atIndices indices: Tensor, alongAxis axis: Int = 0 @@ -316,7 +314,7 @@ internal extension Tensor where Scalar : TensorFlowFloatingPoint { data: transposedValues, segmentIds: valueIndices, numSegments: shape[posAxis]) - + // Finally, we invert the above transpose operation by moving dimension 0 back to its // original position. let inversePermutations = Tensor(concatenating: [ From 10de441b10669039d08fa4a24679bf40a812f6e3 Mon Sep 17 00:00:00 2001 From: Anthony Platanios Date: Fri, 19 Apr 2019 13:49:26 -0400 Subject: [PATCH 30/55] Added an 'Optimizable' protocol. --- Sources/DeepLearning/Layer.swift | 3 +-- Sources/DeepLearning/Optimizer.swift | 13 ++++++++----- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/Sources/DeepLearning/Layer.swift b/Sources/DeepLearning/Layer.swift index db93adc19..870d55569 100644 --- a/Sources/DeepLearning/Layer.swift +++ b/Sources/DeepLearning/Layer.swift @@ -23,8 +23,7 @@ import TensorFlow /// /// `Layer` instances define a differentiable `applied(to:)` method for mapping inputs to /// outputs. -public protocol Layer: Differentiable & KeyPathIterable - where AllDifferentiableVariables: KeyPathIterable { +public protocol Layer: Optimizable { /// The input type of the layer. associatedtype Input: Differentiable /// The output type of the layer. diff --git a/Sources/DeepLearning/Optimizer.swift b/Sources/DeepLearning/Optimizer.swift index 440a691e0..81e03bcbd 100644 --- a/Sources/DeepLearning/Optimizer.swift +++ b/Sources/DeepLearning/Optimizer.swift @@ -16,13 +16,16 @@ import TensorFlow #endif +public protocol Optimizable: Differentiable & KeyPathIterable + where AllDifferentiableVariables: KeyPathIterable { } + /// A machine learning optimizer. /// /// Optimizers apply an optimization algorithm to update the differentiable variables of a machine /// learning model. public protocol Optimizer { /// The type of the model whose parameters are optimized. - associatedtype Model: Differentiable + associatedtype Model: Optimizable /// The scalar parameter type. associatedtype Scalar: FloatingPoint /// The learning rate. @@ -45,7 +48,7 @@ fileprivate extension Tensor where Scalar: Numeric { /// /// Reference: ["Adam - A Method for Stochastic Optimization"]( /// https://arxiv.org/abs/1412.6980v8) -public class Adam: Optimizer +public class Adam: Optimizer where Model.AllDifferentiableVariables == Model.CotangentVector { /// The learning rate. public var learningRate: Float @@ -139,7 +142,7 @@ public class Adam: Optimizer /// /// Reference: ["rmsprop: Divide the gradient by a running average of its recent magnitude"]( /// http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf) -public class RMSProp: Optimizer +public class RMSProp: Optimizer where Model.AllDifferentiableVariables == Model.CotangentVector { /// The learning rate. public var learningRate: Float @@ -203,7 +206,7 @@ public class RMSProp: Optimizer /// /// An optimizer that implements stochastic gradient descent, with support for momentum, learning /// rate decay, and Nesterov momentum. -public class SGD: Optimizer +public class SGD: Optimizer where Model.AllDifferentiableVariables == Model.CotangentVector { /// The learning rate. public var learningRate: Float @@ -275,7 +278,7 @@ public class SGD: Optimizer // MARK: - Manifold optimizers /// A Riemann manifold stochastic gradient descent (SGD) optimizer. -public class RiemannSGD: Optimizer +public class RiemannSGD: Optimizer where Model.TangentVector: VectorNumeric, Model.TangentVector.Scalar == Scalar { /// The learning rate. public var learningRate: Scalar From a6303962e0703ea73bd8451e5dd3685e3f40eed5 Mon Sep 17 00:00:00 2001 From: Anthony Platanios Date: Fri, 19 Apr 2019 17:23:34 -0400 Subject: [PATCH 31/55] Moved some more activation functions from the stdlib. --- Sources/DeepLearning/Operators/Math.swift | 84 +++++++++++++++++++---- 1 file changed, 69 insertions(+), 15 deletions(-) diff --git a/Sources/DeepLearning/Operators/Math.swift b/Sources/DeepLearning/Operators/Math.swift index 883f0a2bb..4ffa1e800 100644 --- a/Sources/DeepLearning/Operators/Math.swift +++ b/Sources/DeepLearning/Operators/Math.swift @@ -19,50 +19,104 @@ import TensorFlow /// Returns the values of the specified tensor rounded to the nearest integer, element-wise. @inlinable @differentiable(vjp: _vjpRound) -public func round(_ x: Tensor) -> Tensor { +public func round(_ x: Tensor) -> Tensor { return Raw.round(x) } @inlinable -internal func _vjpRound( +internal func _vjpRound( _ x: Tensor ) -> (Tensor, (Tensor) -> Tensor) { - return (round(x), { v in Tensor(zerosLike: v) }) + return (round(x), { v in Tensor(zerosLike: v) }) } /// Computes the sigmoid of the specified tensor element-wise. +/// Specifically, computes `1 / (1 + exp(-x))`. @inlinable @differentiable(vjp: _vjpSigmoid) -public func sigmoid(_ x: Tensor) -> Tensor { - return Raw.sigmoid(x) +public func sigmoid(_ x: Tensor) -> Tensor { + return Raw.sigmoid(x) } @inlinable -internal func _vjpSigmoid( - _ x: Tensor +internal func _vjpSigmoid( + _ x: Tensor ) -> (Tensor, (Tensor) -> Tensor) { - return (sigmoid(x), { v in Raw.sigmoidGrad(x, dy: v) }) + return (sigmoid(x), { v in Raw.sigmoidGrad(x, dy: v) }) } /// Computes the log-sigmoid of the specified tensor element-wise. Specifically, /// `y = log(1 / (1 + exp(-x)))`. For numerical stability, we use `y = -softplus(-x)`. @inlinable @differentiable -public func logSigmoid(_ x: Tensor) -> Tensor { - return -softplus(-x) +public func logSigmoid(_ x: Tensor) -> Tensor { + return -softplus(-x) } /// Computes the softplus function for the specified tensor element-wise. The softplus function is /// defined as `log(exp(x) + 1)`. @inlinable @differentiable(vjp: _vjpSoftplus) -public func softplus(_ x: Tensor) -> Tensor { - return Raw.softplus(features: x) +public func softplus(_ x: Tensor) -> Tensor { + return Raw.softplus(features: x) } @inlinable -internal func _vjpSoftplus( - _ x: Tensor +internal func _vjpSoftplus( + _ x: Tensor +) -> (Tensor, (Tensor) -> Tensor) { + return (softplus(x), { v in v * sigmoid(x) }) +} + + +/// Computes the softmax of the specified tensor along the last axis. +/// Specifically, computes `exp(x) / exp(x).sum(alongAxes: -1)`. +@inlinable +@differentiable(vjp: _vjpSoftmax(_:) where T : TensorFlowFloatingPoint) +public func softmax(_ x: Tensor) -> Tensor { + return Raw.softmax(logits: x) +} + +/// Computes the softmax of the specified tensor along the specified axis. +/// Specifically, computes `exp(x) / exp(x).sum(alongAxes: axis)`. +@inlinable +public func softmax( + _ x: Tensor, + alongAxis axis: Int +) -> Tensor { + let expx = exp(x) + return expx / expx.sum(alongAxes: axis) +} + +@inlinable +func _vjpSoftmax( + _ x: Tensor +) -> (Tensor, (Tensor) -> Tensor) { + let value = softmax(x) + return (value, { v in + let sumChannels = (v * value).sum(alongAxes: -1) + return (v - sumChannels) * value + }) +} + +/// Computes the log-softmax of the specified tensor element-wise. +@inlinable +@differentiable(vjp: _vjpLogSoftmax(_:) where T : TensorFlowFloatingPoint) +public func logSoftmax(_ x: Tensor) -> Tensor { + return Raw.logSoftmax(logits: x) +} + +/// Computes `relu` of the specified tensor element-wise. +/// Specifically, computes `max(0, x)`. +@inlinable +@differentiable(vjp: _vjpRelu(_:) where T : TensorFlowFloatingPoint) +public func relu(_ x: Tensor) -> Tensor { + return max(0, x) +} + +@inlinable +func _vjpRelu( + _ x: Tensor ) -> (Tensor, (Tensor) -> Tensor) { - return (softplus(x), { v in v * sigmoid(x) }) + return (relu(x), { v in Tensor(x .> 0) * v }) } From c3243f414a253694af8a1fb9a8eb3bf1af19d3b4 Mon Sep 17 00:00:00 2001 From: Anthony Platanios Date: Fri, 19 Apr 2019 17:30:34 -0400 Subject: [PATCH 32/55] Added log-softmax VJP. --- Sources/DeepLearning/Operators/Math.swift | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/Sources/DeepLearning/Operators/Math.swift b/Sources/DeepLearning/Operators/Math.swift index 4ffa1e800..1156f72b9 100644 --- a/Sources/DeepLearning/Operators/Math.swift +++ b/Sources/DeepLearning/Operators/Math.swift @@ -106,6 +106,16 @@ public func logSoftmax(_ x: Tensor) -> Tensor { return Raw.logSoftmax(logits: x) } +@inlinable +func _vjpLogSoftmax( + _ x: Tensor +) -> (Tensor, (Tensor) -> Tensor) { + let value = logSoftmax(x) + return (value, { v in + v - v.sum(alongAxes: -1) * exp(value) + }) +} + /// Computes `relu` of the specified tensor element-wise. /// Specifically, computes `max(0, x)`. @inlinable From 4547a6dc617a9cdb9544980d477b7760672aeefc Mon Sep 17 00:00:00 2001 From: Anthony Platanios Date: Fri, 19 Apr 2019 20:20:49 -0400 Subject: [PATCH 33/55] Minor bug fix. --- Sources/DeepLearning/Operators/Math.swift | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Sources/DeepLearning/Operators/Math.swift b/Sources/DeepLearning/Operators/Math.swift index 1156f72b9..1169f774d 100644 --- a/Sources/DeepLearning/Operators/Math.swift +++ b/Sources/DeepLearning/Operators/Math.swift @@ -16,6 +16,10 @@ import TensorFlow #endif +#if COMPILING_TENSORFLOW_MODULE +infix operator .> : ComparisonPrecedence +#endif + /// Returns the values of the specified tensor rounded to the nearest integer, element-wise. @inlinable @differentiable(vjp: _vjpRound) From 19cdbd9fd40dcb6ba24562608e345af67c999d53 Mon Sep 17 00:00:00 2001 From: Anthony Platanios Date: Sat, 20 Apr 2019 12:09:27 -0400 Subject: [PATCH 34/55] Brought some initializers from stdlib. --- Sources/DeepLearning/Initializers.swift | 378 ++++++++++++++++------ Sources/DeepLearning/Operators/Math.swift | 46 +-- 2 files changed, 297 insertions(+), 127 deletions(-) diff --git a/Sources/DeepLearning/Initializers.swift b/Sources/DeepLearning/Initializers.swift index 13384567b..2b61a5c5b 100644 --- a/Sources/DeepLearning/Initializers.swift +++ b/Sources/DeepLearning/Initializers.swift @@ -17,46 +17,132 @@ import TensorFlow #endif public extension Tensor { - /// Creates a tensor from an array of tensors (which may themselves be scalars). + /// Creates a tensor with the specified shape and a single, repeated scalar + /// value. + /// + /// - Parameters: + /// - shape: The dimensions of the tensor. + /// - repeatedValue: The scalar value to repeat. @inlinable - @differentiable(where Scalar : TensorFlowFloatingPoint) - init(_ elements: [Tensor]) { - self = Tensor(stacking: elements) + @available(*, deprecated, renamed: "init(repeating:shape:)") + init(shape: TensorShape, repeating repeatedValue: Scalar) { + self.init(repeating: repeatedValue, shape: shape) } - /// Stacks `tensors`, along the `axis` dimension, into a new tensor with rank one higher than - /// the current tensor and each tensor in `tensors`. - /// - /// Given that `tensors` all have shape `[A, B, C]`, and `tensors.count = N`, then: - /// - if `axis == 0` then the resulting tensor will have the shape `[N, A, B, C]`. - /// - if `axis == 1` then the resulting tensor will have the shape `[A, N, B, C]`. - /// - etc. - /// - /// For example: - /// ``` - /// // 'x' is [1, 4] - /// // 'y' is [2, 5] - /// // 'z' is [3, 6] - /// Tensor(stacking: [x, y, z]) // is [[1, 4], [2, 5], [3, 6]] - /// Tensor(stacking: [x, y, z], alongAxis: 1) // is [[1, 2, 3], [4, 5, 6]] - /// ``` - /// - /// This is the opposite of `Tensor.unstacked`. + /// Creates a tensor with the specified shape and a single, repeated scalar value. /// /// - Parameters: - /// - tensors: Tensors to stack. - /// - axis: Dimension along which to stack. Negative values wrap around. - /// - /// - Precondition: All tensors must have the same shape. - /// - Precondition: `axis` must be in the range `[-rank, rank)`, where `rank` is the rank of the - /// provided tensors. - /// - /// - Returns: The stacked tensor. + /// - repeatedValue: The scalar value to repeat. + /// - shape: The dimensions of the tensor. + @inlinable + @differentiable( + vjp: _vjpInit(repeating:shape:) where Scalar : TensorFlowFloatingPoint) + init(repeating repeatedValue: Scalar, shape: TensorShape) { + self = Raw.fill( + dims: Tensor(shape.dimensions.map(Int32.init)), + value: Tensor(repeatedValue)) + } + + /// Creates a tensor by broadcasting the given scalar to a given rank with + /// all dimensions being 1. + @inlinable + // @differentiable(where Scalar : TensorFlowFloatingPoint) + init(broadcasting scalar: Scalar, rank: Int) { + self = Tensor(scalar).reshaped(to: TensorShape(repeating: 1, count: rank)) + } + + /// Creates a tensor of shape `[4]` from a 4-tuple. + /// - Note: This is intended for internal use, for example, to initialize a + /// tensor attribute from `convolved2D`'s `strides` argument. + @inlinable + internal init(_ scalars: (Scalar, Scalar, Scalar, Scalar)) { + self.init([scalars.0, scalars.1, scalars.2, scalars.3]) + } +} + +internal extension Tensor where Scalar : TensorFlowFloatingPoint { + @inlinable + static func _vjpInit( + repeating repeatedValue: Scalar, + shape: TensorShape + ) -> (Tensor, (Tensor) -> Scalar) { + return (Tensor(repeating: repeatedValue, shape: shape), { + $0.sum().scalarized() + }) + } +} + +//===------------------------------------------------------------------------------------------===// +// Casting +//===------------------------------------------------------------------------------------------===// + +public extension Tensor where Scalar : Numeric { + /// Perform an element-wise type conversion from a `Bool` tensor. + @inlinable + init(_ other: Tensor) { + self = Raw.cast(other) + } + + /// Perform an element-wise conversion from another `Tensor`. + @inlinable + @differentiable( + vjp: _vjpCast where Scalar : TensorFlowFloatingPoint, + OtherScalar: TensorFlowFloatingPoint) + init(_ other: Tensor) { + self = Raw.cast(other) + } +} + +internal extension Tensor where Scalar : TensorFlowFloatingPoint { @inlinable - @differentiable(vjp: _vjpStacking where Scalar : TensorFlowFloatingPoint) - init(stacking tensors: [Tensor], alongAxis axis: Int = 0) { - self = Raw.pack(tensors, axis: Int64(axis)) + static func _vjpCast( + _ other: Tensor + ) -> (Tensor, (Tensor) -> Tensor) { + return (Tensor(other), { v in Tensor(v) }) } +} + +public extension Tensor { + /// Creates a tensor from an array of tensors (which may themselves be scalars). + @inlinable + // @differentiable(where Scalar : TensorFlowFloatingPoint) + init(_ elements: [Tensor]) { + self = Tensor(stacking: elements) + } + + // /// Stacks `tensors`, along the `axis` dimension, into a new tensor with rank one higher than + // /// the current tensor and each tensor in `tensors`. + // /// + // /// Given that `tensors` all have shape `[A, B, C]`, and `tensors.count = N`, then: + // /// - if `axis == 0` then the resulting tensor will have the shape `[N, A, B, C]`. + // /// - if `axis == 1` then the resulting tensor will have the shape `[A, N, B, C]`. + // /// - etc. + // /// + // /// For example: + // /// ``` + // /// // 'x' is [1, 4] + // /// // 'y' is [2, 5] + // /// // 'z' is [3, 6] + // /// Tensor(stacking: [x, y, z]) // is [[1, 4], [2, 5], [3, 6]] + // /// Tensor(stacking: [x, y, z], alongAxis: 1) // is [[1, 2, 3], [4, 5, 6]] + // /// ``` + // /// + // /// This is the opposite of `Tensor.unstacked`. + // /// + // /// - Parameters: + // /// - tensors: Tensors to stack. + // /// - axis: Dimension along which to stack. Negative values wrap around. + // /// + // /// - Precondition: All tensors must have the same shape. + // /// - Precondition: `axis` must be in the range `[-rank, rank)`, where `rank` is the rank of the + // /// provided tensors. + // /// + // /// - Returns: The stacked tensor. + // @inlinable + // @differentiable(vjp: _vjpStacking where Scalar : TensorFlowFloatingPoint) + // init(stacking tensors: [Tensor], alongAxis axis: Int = 0) { + // self = Raw.pack(tensors, axis: Int64(axis)) + // } /// Concatenates `tensors` along the `axis` dimension. /// @@ -91,100 +177,182 @@ public extension Tensor { /// /// - Returns: The concatenated tensor. @inlinable - @differentiable(wrt: tensors, vjp: _vjpConcatenating where Scalar : TensorFlowFloatingPoint) + // @differentiable(wrt: tensors, vjp: _vjpConcatenating where Scalar : TensorFlowFloatingPoint) init(concatenating tensors: [Tensor], alongAxis axis: Int = 0) { precondition(tensors.count > 0) self = Raw.concatV2(tensors, axis: Tensor(Int32(axis))) } - /// Returns a tiled tensor, constructed by tiling the provided tensor. - /// - /// This constructor creates a new tensor by replicating `tensor` `multiples` times. The - /// constructed tensor's `i`'th dimension has `tensor.shape[i] * multiples[i]` elements, and the - /// values of `tensor` are replicated `multiples[i]` times along the `i`'th dimension. For - /// example, tiling `[a b c d]` by `[2]` produces `[a b c d a b c d]`. - /// - /// - Precondition: The shape of `multiples` must be `[tensor.rank]`. - @inlinable - @differentiable(wrt: tensor, vjp: _vjpTiling where Scalar : TensorFlowFloatingPoint) - init(tiling tensor: Tensor, multiples: Tensor) { - self = Raw.tile(tensor, multiples: multiples) - } + // /// Returns a tiled tensor, constructed by tiling the provided tensor. + // /// + // /// This constructor creates a new tensor by replicating `tensor` `multiples` times. The + // /// constructed tensor's `i`'th dimension has `tensor.shape[i] * multiples[i]` elements, and the + // /// values of `tensor` are replicated `multiples[i]` times along the `i`'th dimension. For + // /// example, tiling `[a b c d]` by `[2]` produces `[a b c d a b c d]`. + // /// + // /// - Precondition: The shape of `multiples` must be `[tensor.rank]`. + // @inlinable + // @differentiable(wrt: tensor, vjp: _vjpTiling where Scalar : TensorFlowFloatingPoint) + // init(tiling tensor: Tensor, multiples: Tensor) { + // self = Raw.tile(tensor, multiples: multiples) + // } } -internal extension Tensor where Scalar : TensorFlowFloatingPoint { - @usableFromInline - static func _vjpStacking( - stacking tensors: [Tensor], - alongAxis axis: Int = 0 - ) -> (Tensor, (Tensor) -> Array.DifferentiableView) { - let result = Tensor(stacking: tensors, alongAxis: axis) - return (result, { v in - return Array.DifferentiableView(v.unstack(alongAxis: axis)) - }) - } +// internal extension Tensor where Scalar : TensorFlowFloatingPoint { +// @inlinable +// static func _vjpStacking( +// stacking tensors: [Tensor], +// alongAxis axis: Int = 0 +// ) -> (Tensor, (Tensor) -> Array.DifferentiableView) { +// let result = Tensor(stacking: tensors, alongAxis: axis) +// return (result, { v in +// return Array.DifferentiableView(v.unstack(alongAxis: axis)) +// }) +// } - @usableFromInline - static func _vjpConcatenating( - concatenating tensors: [Tensor], - alongAxis axis: Int = 0 - ) -> (Tensor, (Tensor) -> Array.DifferentiableView) { - let result = Tensor(concatenating: tensors, alongAxis: axis) - let posAxis = axis < 0 ? axis + tensors[0].rank : axis - let sizes = Tensor(stacking: tensors.map { $0.shapeTensor[posAxis] }) - return (result, { [count = tensors.count] v in - if count == 1 { return Array.DifferentiableView([v]) } - let splits = v.split(sizes: sizes, alongAxis: posAxis) - return Array.DifferentiableView(splits) - }) - } +// @inlinable +// static func _vjpConcatenating( +// concatenating tensors: [Tensor], +// alongAxis axis: Int = 0 +// ) -> (Tensor, (Tensor) -> Array.DifferentiableView) { +// let result = Tensor(concatenating: tensors, alongAxis: axis) +// let posAxis = axis < 0 ? axis + tensors[0].rank : axis +// let sizes = Tensor(stacking: tensors.map { $0.shapeTensor[posAxis] }) +// return (result, { [count = tensors.count] v in +// if count == 1 { return Array.DifferentiableView([v]) } +// let splits = v.split(sizes: sizes, alongAxis: posAxis) +// return Array.DifferentiableView(splits) +// }) +// } + +// @inlinable +// static func _vjpTiling( +// tiling tensor: Tensor, +// multiples: Tensor +// ) -> (Tensor, (Tensor) -> Tensor) { +// let result = Tensor(tiling: tensor, multiples: multiples) +// return (result, { [shape = tensor.shapeTensor] v in +// let splitShape = Tensor(stacking: [multiples, shape]).transposed().flattened() +// let axes = Tensor( +// rangeFrom: 0, to: Int32(splitShape.scalarCount), stride: 2) +// return v.reshaped(toShape: splitShape).sum(squeezingAxes: axes) +// }) +// } +// } - @usableFromInline - static func _vjpTiling( - tiling tensor: Tensor, - multiples: Tensor - ) -> (Tensor, (Tensor) -> Tensor) { - let result = Tensor(tiling: tensor, multiples: multiples) - return (result, { [shape = tensor.shapeTensor] v in - let splitShape = Tensor(stacking: [multiples, shape]).transposed().flattened() - let axes = Tensor( - rangeFrom: 0, to: Int32(splitShape.scalarCount), stride: 2) - return v.reshaped(toShape: splitShape).sum(squeezingAxes: axes) - }) - } -} + +//===------------------------------------------------------------------------------------------===// +// Numeric Initialization +//===------------------------------------------------------------------------------------------===// public extension Tensor where Scalar : Numeric { - /// Creates a tensor with all scalars set to zero that has the same shape and type as the provided - /// tensor. + /// Creates a tensor with all scalars set to zero. + /// + /// - Parameter shape: Shape of the tensor. + @inlinable + init(zeros shape: TensorShape) { + self.init(repeating: 0, shape: shape) + } + + /// Creates a tensor with all scalars set to one. /// - /// - Parameter other: Tensor whose shape and data type to use. + /// - Parameter shape: Shape of the tensor. @inlinable - init(zerosLike other: Tensor) { - self = Raw.zerosLike(other) + init(ones shape: TensorShape) { + self.init(repeating: 1, shape: shape) } - /// Creates a tensor with all scalars set to one that has the same shape and type as the provided - /// tensor. + // /// Creates a tensor with all scalars set to zero that has the same shape and type as the provided + // /// tensor. + // /// + // /// - Parameter other: Tensor whose shape and data type to use. + // @inlinable + // init(zerosLike other: Tensor) { + // self = Raw.zerosLike(other) + // } + + // /// Creates a tensor with all scalars set to one that has the same shape and type as the provided + // /// tensor. + // /// + // /// - Parameter other: Tensor whose shape and data type to use. + // @inlinable + // init(onesLike other: Tensor) { + // self = Raw.onesLike(other) + // } + + + /// Creates a 1-D tensor representing a sequence from a starting value to, but not including, + /// an end value, stepping by the specified amount. + /// + /// - Parameters: + /// - start: The starting value to use for the sequence. If the sequence + /// contains any values, the first one is `start`. + /// - end: An end value to limit the sequence. `end` is never an element of + /// the resulting sequence. + /// - stride: The amount to step by with each iteration. `stride` must be + /// positive. /// - /// - Parameter other: Tensor whose shape and data type to use. @inlinable - init(onesLike other: Tensor) { - self = Raw.onesLike(other) + init(rangeFrom start: Scalar, to end: Scalar, stride: Scalar) { + self = Raw.range(start: Tensor(start), limit: Tensor(end), delta: Tensor(stride)) } - /// Creates a 1-D tensor representing a sequence from a starting value to, but not including, an - /// end value, stepping by the specified amount. + // /// Creates a 1-D tensor representing a sequence from a starting value to, but not including, an + // /// end value, stepping by the specified amount. + // /// + // /// - Parameters: + // /// - start: The starting value to use for the sequence. If the sequence contains any values, + // /// the first one is `start`. + // /// - end: An end value to limit the sequence. `end` is never an element of the resulting + // /// sequence. + // /// - stride: The amount to step by with each iteration. `stride` must be positive. + // @inlinable + // init(rangeFrom start: Tensor, to end: Tensor, stride: Tensor) { + // self = Raw.range(start: start, limit: end, delta: stride) + // } + + /// Creates a one-hot tensor at given indices. The locations represented by + /// `indices` take value `onValue` (`1` by default), while all other locations + /// take value `offValue` (`0` by default). If the input `indices` is rank + /// `n`, the new tensor will have rank `n+1`. The new axis is created at + /// dimension `axis` (by default, the new axis is appended at the end). + /// + /// If `indices` is a scalar, the new tensor's shape will be a vector of + /// length `depth`. + /// + /// If `indices` is a vector of length `features`, the output shape will be: + /// features x depth, if axis == -1 + /// depth x features, if axis == 0 + /// + /// If `indices` is a matrix (batch) with shape `[batch, features]`, the + /// output shape will be: + /// batch x features x depth, if axis == -1 + /// batch x depth x features, if axis == 1 + /// depth x batch x features, if axis == 0 /// /// - Parameters: - /// - start: The starting value to use for the sequence. If the sequence contains any values, - /// the first one is `start`. - /// - end: An end value to limit the sequence. `end` is never an element of the resulting - /// sequence. - /// - stride: The amount to step by with each iteration. `stride` must be positive. + /// - indices: A `Tensor` of indices. + /// - depth: A scalar defining the depth of the one hot dimension. + /// - onValue: A scalar defining the value at the location referred to by + /// some index in `indices`. + /// - offValue: A scalar defining the value at a location that is not + /// referred to by any index in `indices`. + /// - axis: The axis to fill. The default is `-1`, a new inner-most axis. + /// @inlinable - init(rangeFrom start: Tensor, to end: Tensor, stride: Tensor) { - self = Raw.range(start: start, limit: end, delta: stride) + init( + oneHotAtIndices indices: Tensor, + depth: Int, + onValue: Scalar = 1, + offValue: Scalar = 0, + axis: Int = -1 + ) { + self = Raw.oneHot( + indices: indices, + depth: Tensor(Int32(depth)), + onValue: Tensor(onValue), + offValue: Tensor(offValue), + axis: Int64(axis)) } } diff --git a/Sources/DeepLearning/Operators/Math.swift b/Sources/DeepLearning/Operators/Math.swift index 1169f774d..be927ef61 100644 --- a/Sources/DeepLearning/Operators/Math.swift +++ b/Sources/DeepLearning/Operators/Math.swift @@ -49,28 +49,28 @@ internal func _vjpSigmoid( return (sigmoid(x), { v in Raw.sigmoidGrad(x, dy: v) }) } -/// Computes the log-sigmoid of the specified tensor element-wise. Specifically, -/// `y = log(1 / (1 + exp(-x)))`. For numerical stability, we use `y = -softplus(-x)`. -@inlinable -@differentiable -public func logSigmoid(_ x: Tensor) -> Tensor { - return -softplus(-x) -} - -/// Computes the softplus function for the specified tensor element-wise. The softplus function is -/// defined as `log(exp(x) + 1)`. -@inlinable -@differentiable(vjp: _vjpSoftplus) -public func softplus(_ x: Tensor) -> Tensor { - return Raw.softplus(features: x) -} - -@inlinable -internal func _vjpSoftplus( - _ x: Tensor -) -> (Tensor, (Tensor) -> Tensor) { - return (softplus(x), { v in v * sigmoid(x) }) -} +// /// Computes the log-sigmoid of the specified tensor element-wise. Specifically, +// /// `y = log(1 / (1 + exp(-x)))`. For numerical stability, we use `y = -softplus(-x)`. +// @inlinable +// @differentiable +// public func logSigmoid(_ x: Tensor) -> Tensor { +// return -softplus(-x) +// } + +// /// Computes the softplus function for the specified tensor element-wise. The softplus function is +// /// defined as `log(exp(x) + 1)`. +// @inlinable +// @differentiable(vjp: _vjpSoftplus) +// public func softplus(_ x: Tensor) -> Tensor { +// return Raw.softplus(features: x) +// } + +// @inlinable +// internal func _vjpSoftplus( +// _ x: Tensor +// ) -> (Tensor, (Tensor) -> Tensor) { +// return (softplus(x), { v in v * sigmoid(x) }) +// } /// Computes the softmax of the specified tensor along the last axis. @@ -84,11 +84,13 @@ public func softmax(_ x: Tensor) -> Tensor { /// Computes the softmax of the specified tensor along the specified axis. /// Specifically, computes `exp(x) / exp(x).sum(alongAxes: axis)`. @inlinable +// TODO: [AD]. public func softmax( _ x: Tensor, alongAxis axis: Int ) -> Tensor { let expx = exp(x) + // TODO: [BUG] keepDims = true for the sum. return expx / expx.sum(alongAxes: axis) } From a16d911230cb9a19c3f2e495b40b40f09f5233b4 Mon Sep 17 00:00:00 2001 From: Anthony Platanios Date: Sat, 20 Apr 2019 12:26:50 -0400 Subject: [PATCH 35/55] Brought some more stuff from the stdlib. --- Sources/DeepLearning/Initializers.swift | 11 +- Sources/DeepLearning/Operators/Basic.swift | 635 ++++++++++++--------- Sources/DeepLearning/Tensors.swift | 109 ++++ 3 files changed, 482 insertions(+), 273 deletions(-) create mode 100644 Sources/DeepLearning/Tensors.swift diff --git a/Sources/DeepLearning/Initializers.swift b/Sources/DeepLearning/Initializers.swift index 2b61a5c5b..7eb77250b 100644 --- a/Sources/DeepLearning/Initializers.swift +++ b/Sources/DeepLearning/Initializers.swift @@ -102,6 +102,10 @@ internal extension Tensor where Scalar : TensorFlowFloatingPoint { } } +//===------------------------------------------------------------------------------------------===// +// Stacking / Concatenating / Tiling +//===------------------------------------------------------------------------------------------===// + public extension Tensor { /// Creates a tensor from an array of tensors (which may themselves be scalars). @inlinable @@ -240,9 +244,8 @@ public extension Tensor { // } // } - //===------------------------------------------------------------------------------------------===// -// Numeric Initialization +// Numeric //===------------------------------------------------------------------------------------------===// public extension Tensor where Scalar : Numeric { @@ -356,6 +359,10 @@ public extension Tensor where Scalar : Numeric { } } +//===------------------------------------------------------------------------------------------===// +// Random +//===------------------------------------------------------------------------------------------===// + public extension Tensor where Scalar == Int32 { /// Creates a tensor with the specified shape, randomly sampling scalar values /// from a discrete uniform distribution. diff --git a/Sources/DeepLearning/Operators/Basic.swift b/Sources/DeepLearning/Operators/Basic.swift index 052a2d368..7df841cf1 100644 --- a/Sources/DeepLearning/Operators/Basic.swift +++ b/Sources/DeepLearning/Operators/Basic.swift @@ -16,310 +16,403 @@ import TensorFlow #endif +//===------------------------------------------------------------------------------------------===// +// Shape Transformations +//===------------------------------------------------------------------------------------------===// + public extension Tensor { + /// Convert to a tensor with the specified rank, with all dimensions equal to 1. + @inlinable + func makeTensor(rank: Int) -> Tensor { + return Tensor(repeating: self, shape: TensorShape(rank)) + } + + /// Reshape to the shape of the specified `Tensor`. + /// - Precondition: The number of scalars matches the new shape. @inlinable @differentiable(wrt: self where Scalar : TensorFlowFloatingPoint) - func unstack(alongAxis axis: Int = 0) -> [Tensor] { - return split(numSplits: shape[axis], alongAxis: axis) + func reshaped(like other: Tensor) -> Tensor { + return reshaped(toShape: other.shapeTensor) } + /// Reshape to the specified shape. + /// - Precondition: The number of scalars matches the new shape. @inlinable - @differentiable(vjp: _vjpSplit(numSplits:alongAxis:) where Scalar : TensorFlowFloatingPoint) - func split(numSplits: Int, alongAxis axis: Int = 0) -> [Tensor] { - return Raw.split( - splitDim: Tensor(Int32(axis)), value: self, numSplit: Int64(numSplits)) + @differentiable(wrt: self where Scalar : TensorFlowFloatingPoint) + func reshaped(to newShape: TensorShape) -> Tensor { + // TODO(TF-433): Remove workaround for differentiating `map`. + return reshaped(toShape: Tensor({newShape.dimensions.map(Int32.init)}())) } + /// Reshape to the specified `Tensor` representing a shape. + /// - Precondition: The number of scalars matches the new shape. @inlinable @differentiable( wrt: self, - vjp: _vjpSplit(sizes:alongAxis:) where Scalar : TensorFlowFloatingPoint) - func split(sizes: Tensor, alongAxis axis: Int = 0) -> [Tensor] { - return Raw.splitV( - value: self, - sizeSplits: sizes, - splitDim: Tensor(Int32(axis)), - numSplit: Int64(sizes.shape[0])) + vjp: _vjpReshaped(toShape:) where Scalar : TensorFlowFloatingPoint) + func reshaped(toShape newShape: Tensor) -> Tensor { + return Raw.reshape(self, shape: newShape) } - /// Gathers slices of this tensor at `indices` along the `axis` dimension. - /// - /// For 0-D (scalar) `indices`: - /// ``` - /// result[p_0, ..., p_{axis-1}, - /// p_{axis + 1}, ..., p_{N-1}] = - /// self[p_0, ..., p_{axis-1}, - /// indices, - /// p_{axis + 1}, ..., p_{N-1}] - /// ``` - /// - /// For 1-D (vector) `indices`: - /// ``` - /// result[p_0, ..., p_{axis-1}, - /// i, - /// p_{axis + 1}, ..., p_{N-1}] = - /// self[p_0, ..., p_{axis-1}, - /// indices[i], - /// p_{axis + 1}, ..., p_{N-1}] - /// ``` - /// - /// In the general case, produces a resulting tensor where: - /// ``` - /// result[p_0, ..., p_{axis-1}, - /// i_{batch\_dims}, ..., i_{M-1}, - /// p_{axis + 1}, ..., p_{N-1}] = - /// self[p_0, ..., p_{axis-1}, - /// indices[i_0, ..., i_{M-1}], - /// p_{axis + 1}, ..., p_{N-1}] - /// ``` - /// where `N = self.rank` and `M = indices.rank`. - /// - /// The shape of the resulting tensor is: - /// `self.shape[.., - alongAxis axis: Int = 0 - ) -> Tensor { - return Raw.gatherV2(params: self, indices: indices, axis: Tensor(Int32(axis))) + @differentiable(wrt: self where Scalar : TensorFlowFloatingPoint) + func flattened() -> Tensor { + return reshaped(to: [-1]) } - /// Gathers slices of this tensor at `indices` along the `axis` dimension, while ignoring the - /// first `batchDims` dimensions that correspond to batch dimensions. - /// - /// Performs similar functionality to `gathering`, except that the resulting tensor shape is now: - /// `self.shape[.., - alongAxis axis: Int, - numBatchDims batchDims: Int - ) -> Tensor { - precondition(batchDims >= 0 && batchDims < indices.rank, - "'numBatchDims' must be non-negative and less than 'indices.rank'.") - precondition(batchDims < rank, "'numBatchDims' must be less than the tensor's rank.") + @differentiable(wrt: self, vjp: _vjpExpandingShape(at:) where Scalar : TensorFlowFloatingPoint) + func expandingShape(at shapeIndex: Int) -> Tensor { + return Raw.expandDims(self, dim: Tensor(Int32(shapeIndex))) + } - // Handle the axis argument by transposing the axis dimension so that it is the first - // non-batch dimension, recursively calling `batchGathering` with `axis = 0`, and then - // transposing the result to put the pre-axis dimensions before the indices dimensions. - if axis != batchDims { - // Adjust axis to be positive. - let posAxis = axis < 0 ? axis + rank : axis + /// Returns a rank-lifted `Tensor` with a leading dimension of 1. + @inlinable + @differentiable(wrt: self where Scalar : TensorFlowFloatingPoint) + func rankLifted() -> Tensor { + return expandingShape(at: 0) + } - precondition(posAxis >= 0 && posAxis < rank, "'axis' is out of range.") - precondition(batchDims <= posAxis, "'batchDims' must be less than or equal to 'axis'.") + /// Remove the specified dimensions of size 1 from the shape of a tensor. If no dimensions are + /// specified, then all dimensions of size 1 will be removed. + @inlinable + @differentiable(wrt: self where Scalar : TensorFlowFloatingPoint) + func squeezingShape(at axes: Int...) -> Tensor { + return squeezingShape(at: axes) + } - // Move self[axis] up to self[batchDims]. - let permutation = Tensor(concatenating: [ - Tensor(rangeFrom: 0, to: Int32(batchDims), stride: 1), - Tensor(Int32(axis)).rankLifted(), - Tensor(rangeFrom: Int32(batchDims), to: Int32(posAxis), stride: 1), - Tensor(rangeFrom: Int32(axis) + 1, to: Int32(rank), stride: 1)]) - let tensor = transposed(withPermutations: permutation) - let result = tensor.batchGathering( - atIndices: indices, alongAxis: batchDims, numBatchDims: batchDims) + /// Remove the specified dimensions of size 1 from the shape of a tensor. If no dimensions are + /// specified, then all dimensions of size 1 will be removed. + @inlinable + @differentiable(wrt: self, vjp: _vjpSqueezingShape(at:) where Scalar : TensorFlowFloatingPoint) + func squeezingShape(at axes: [Int]) -> Tensor { + return Raw.squeeze(self, squeezeDims: axes.map(Int32.init)) + } - // Move the result dimensions corresponding to self[batchDims ..< axis] to just before - // the dimensions corresponding to indices[batchDims ...]. - let start = indices.rank + posAxis - batchDims - let resultPermutation = Tensor(concatenating: [ - Tensor(rangeFrom: 0, to: Int32(batchDims), stride: 1), - Tensor(rangeFrom: Int32(indices.rank), to: Int32(start), stride: 1), - Tensor(rangeFrom: Int32(batchDims), to: Int32(indices.rank), stride: 1), - Tensor(rangeFrom: Int32(start), to: Int32(result.rank), stride: 1)]) - return result.transposed(withPermutations: resultPermutation) - } + // @inlinable + // @differentiable(wrt: self where Scalar : TensorFlowFloatingPoint) + // func unstack(alongAxis axis: Int = 0) -> [Tensor] { + // return split(numSplits: shape[axis], alongAxis: axis) + // } - var batchIndices = indices - var accumulated = Tensor(ones: []) - for d in (1...batchDims).reversed() { - accumulated *= shapeTensor[d] - let dValue = shapeTensor[d - 1] - let dIndices = Tensor( - rangeFrom: Tensor(zeros: []), - to: dValue, - stride: Tensor(ones: []) - ) * accumulated - let dShape = Tensor(concatenating: [ - Tensor([Int32](repeating: 1, count: d - 1)), - dValue.rankLifted(), - Tensor([Int32](repeating: 1, count: indices.rank - 1))]) - batchIndices += dIndices.reshaped(toShape: dShape) - } + // @inlinable + // @differentiable(vjp: _vjpSplit(numSplits:alongAxis:) where Scalar : TensorFlowFloatingPoint) + // func split(numSplits: Int, alongAxis axis: Int = 0) -> [Tensor] { + // return Raw.split( + // splitDim: Tensor(Int32(axis)), value: self, numSplit: Int64(numSplits)) + // } - let flatIndices = batchIndices.flattened() - let outerShape = shapeTensor[Int(batchDims + 1)...] - let innerShape = shapeTensor[.., alongAxis axis: Int = 0) -> [Tensor] { + // return Raw.splitV( + // value: self, + // sizeSplits: sizes, + // splitDim: Tensor(Int32(axis)), + // numSplit: Int64(sizes.shape[0])) + // } - /// Gathers values from this tensor according to the provided boolean mask. - /// - /// For example: - /// ``` - /// // 1-D example - /// // tensor is [0, 1, 2, 3] - /// // mask is [true, false, true, false] - /// tensor.gathering(where: mask) // is [0, 2] - /// - /// // 2-D example - /// // tensor is [[1, 2], [3, 4], [5, 6]] - /// // mask is [true, false, true] - /// tensor.gathering(where: mask) // is [[1, 2], [5, 6]] - /// ``` - /// - /// In general, `0 < mask.rank = K <= tensor.rank`, and the `mask`'s shape must match the first - /// K dimensions of the `tensor`'s shape. We then have: - /// `tensor.gathering(where: mask)[i, j1, ..., jd] = tensor[i1, ..., iK, j1, ..., jd]`, where - /// `[i1, ..., iK]` is the `i`th `true` entry of `mask` (row-major order). - /// - /// The `axis` could be used with `mask` to indicate the axis to mask from. In that case, - /// `axis + mask.rank <= tensor.rank` and the `mask``'s shape must match the first - /// `axis + mask.rank` dimensions of the `tensor`'s shape. - /// - /// - Parameters: - /// - mask: K-D boolean tensor, where `K <= self.rank`. - /// - axis: 0-D integer tensor representing the axis in `self` to mask from, where - /// `K + axis <= self.rank`. - /// - /// - Precondition: The `mask` cannot be a scalar: `mask.rank != 0`. - /// - /// - Returns: `(self.rank - K + 1)`-dimensional tensor populated by entries in this tensor - /// corresponding to `true` values in `mask`. - @inlinable - // TODO: @differentiable(wrt: self where Scalar : TensorFlowFloatingPoint) - func gathering(where mask: Tensor, alongAxis axis: Int = 0) -> Tensor { - precondition(mask.rank != 0, "The boolean mask cannot be a scalar.") - let posAxis = axis < 0 ? axis + rank : axis - let leadingSize = shapeTensor[posAxis ..< posAxis + mask.rank].product().rankLifted() - let reshapedTensor = reshaped( - toShape: Tensor(concatenating: [ - shapeTensor[..(mask.flattened().nonZeroIndices().squeezingShape(at: 1)) - return reshapedTensor.gathering(atIndices: indices, alongAxis: posAxis) - } + // /// Gathers slices of this tensor at `indices` along the `axis` dimension. + // /// + // /// For 0-D (scalar) `indices`: + // /// ``` + // /// result[p_0, ..., p_{axis-1}, + // /// p_{axis + 1}, ..., p_{N-1}] = + // /// self[p_0, ..., p_{axis-1}, + // /// indices, + // /// p_{axis + 1}, ..., p_{N-1}] + // /// ``` + // /// + // /// For 1-D (vector) `indices`: + // /// ``` + // /// result[p_0, ..., p_{axis-1}, + // /// i, + // /// p_{axis + 1}, ..., p_{N-1}] = + // /// self[p_0, ..., p_{axis-1}, + // /// indices[i], + // /// p_{axis + 1}, ..., p_{N-1}] + // /// ``` + // /// + // /// In the general case, produces a resulting tensor where: + // /// ``` + // /// result[p_0, ..., p_{axis-1}, + // /// i_{batch\_dims}, ..., i_{M-1}, + // /// p_{axis + 1}, ..., p_{N-1}] = + // /// self[p_0, ..., p_{axis-1}, + // /// indices[i_0, ..., i_{M-1}], + // /// p_{axis + 1}, ..., p_{N-1}] + // /// ``` + // /// where `N = self.rank` and `M = indices.rank`. + // /// + // /// The shape of the resulting tensor is: + // /// `self.shape[.., + // alongAxis axis: Int = 0 + // ) -> Tensor { + // return Raw.gatherV2(params: self, indices: indices, axis: Tensor(Int32(axis))) + // } + + // /// Gathers slices of this tensor at `indices` along the `axis` dimension, while ignoring the + // /// first `batchDims` dimensions that correspond to batch dimensions. + // /// + // /// Performs similar functionality to `gathering`, except that the resulting tensor shape is now: + // /// `self.shape[.., + // alongAxis axis: Int, + // numBatchDims batchDims: Int + // ) -> Tensor { + // precondition(batchDims >= 0 && batchDims < indices.rank, + // "'numBatchDims' must be non-negative and less than 'indices.rank'.") + // precondition(batchDims < rank, "'numBatchDims' must be less than the tensor's rank.") + + // // Handle the axis argument by transposing the axis dimension so that it is the first + // // non-batch dimension, recursively calling `batchGathering` with `axis = 0`, and then + // // transposing the result to put the pre-axis dimensions before the indices dimensions. + // if axis != batchDims { + // // Adjust axis to be positive. + // let posAxis = axis < 0 ? axis + rank : axis + + // precondition(posAxis >= 0 && posAxis < rank, "'axis' is out of range.") + // precondition(batchDims <= posAxis, "'batchDims' must be less than or equal to 'axis'.") + + // // Move self[axis] up to self[batchDims]. + // let permutation = Tensor(concatenating: [ + // Tensor(rangeFrom: 0, to: Int32(batchDims), stride: 1), + // Tensor(Int32(axis)).rankLifted(), + // Tensor(rangeFrom: Int32(batchDims), to: Int32(posAxis), stride: 1), + // Tensor(rangeFrom: Int32(axis) + 1, to: Int32(rank), stride: 1)]) + // let tensor = transposed(withPermutations: permutation) + // let result = tensor.batchGathering( + // atIndices: indices, alongAxis: batchDims, numBatchDims: batchDims) + + // // Move the result dimensions corresponding to self[batchDims ..< axis] to just before + // // the dimensions corresponding to indices[batchDims ...]. + // let start = indices.rank + posAxis - batchDims + // let resultPermutation = Tensor(concatenating: [ + // Tensor(rangeFrom: 0, to: Int32(batchDims), stride: 1), + // Tensor(rangeFrom: Int32(indices.rank), to: Int32(start), stride: 1), + // Tensor(rangeFrom: Int32(batchDims), to: Int32(indices.rank), stride: 1), + // Tensor(rangeFrom: Int32(start), to: Int32(result.rank), stride: 1)]) + // return result.transposed(withPermutations: resultPermutation) + // } + + // var batchIndices = indices + // var accumulated = Tensor(ones: []) + // for d in (1...batchDims).reversed() { + // accumulated *= shapeTensor[d] + // let dValue = shapeTensor[d - 1] + // let dIndices = Tensor( + // rangeFrom: Tensor(zeros: []), + // to: dValue, + // stride: Tensor(ones: []) + // ) * accumulated + // let dShape = Tensor(concatenating: [ + // Tensor([Int32](repeating: 1, count: d - 1)), + // dValue.rankLifted(), + // Tensor([Int32](repeating: 1, count: indices.rank - 1))]) + // batchIndices += dIndices.reshaped(toShape: dShape) + // } + + // let flatIndices = batchIndices.flattened() + // let outerShape = shapeTensor[Int(batchDims + 1)...] + // let innerShape = shapeTensor[.., alongAxis axis: Int = 0) -> Tensor { + // precondition(mask.rank != 0, "The boolean mask cannot be a scalar.") + // let posAxis = axis < 0 ? axis + rank : axis + // let leadingSize = shapeTensor[posAxis ..< posAxis + mask.rank].product().rankLifted() + // let reshapedTensor = reshaped( + // toShape: Tensor(concatenating: [ + // shapeTensor[..(mask.flattened().nonZeroIndices().squeezingShape(at: 1)) + // return reshapedTensor.gathering(atIndices: indices, alongAxis: posAxis) + // } } -public extension Tensor { - /// Returns the locations of non-zero / true values in this tensor. - /// - /// The coordinates are returned in a 2-D tensor where the first dimension (rows) represents the - /// number of non-zero elements, and the second dimension (columns) represents the coordinates - /// of the non-zero elements. Keep in mind that the shape of the output tensor can vary - /// depending on how many true values there are in this tensor. Indices are output in row-major - /// order. - /// - /// For example: - /// ``` - /// // 'input' is [[true, false], [true, false]] - /// // 'input' has 2 true values and so the output has 2 rows. - /// // 'input' has rank of 2, and so the second dimension of the output has size 2. - /// input.nonZeroIndices() // is [[0, 0], [1, 0]] - /// - /// // 'input' is [[[ true, false], [ true, false]], - /// // [[false, true], [false, true]], - /// // [[false, false], [false, true]]] - /// // 'input' has 5 true values and so the output has 5 rows. - /// // 'input' has rank 3, and so the second dimension of the output has size 3. - /// input.nonZeroIndices() // is [[0, 0, 0], - /// // [0, 1, 0], - /// // [1, 0, 1], - /// // [1, 1, 1], - /// // [2, 1, 1]] - /// ``` - /// - /// - Returns: A tensor with shape `(num_true, rank(condition))`. +internal extension Tensor where Scalar : TensorFlowFloatingPoint { @inlinable - func nonZeroIndices() -> Tensor { - return Raw.where_(self) + func _vjpReshaped(toShape newShape: Tensor) -> (Tensor, (Tensor) -> Tensor) { + let value = reshaped(toShape: newShape) + return (value, { [shape = shapeTensor] v in v.reshaped(toShape: shape) }) } -} -internal extension Tensor where Scalar : TensorFlowFloatingPoint { - @usableFromInline - func _vjpSplit( - numSplits: Int, - alongAxis axis: Int = 0 - ) -> ([Tensor], (Array.CotangentVector) -> Tensor) { - let result = split(numSplits: numSplits, alongAxis: axis) - return (result, { v in Tensor(concatenating: v.base, alongAxis: axis) }) + @inlinable + func _vjpExpandingShape(at shapeIndex: Int) -> (Tensor, (Tensor) -> Tensor) { + let value = expandingShape(at: shapeIndex) + return (value, { v in v.squeezingShape(at: shapeIndex) }) } - @usableFromInline - func _vjpSplit( - sizes: Tensor, - alongAxis axis: Int = 0 - ) -> ([Tensor], (Array.CotangentVector) -> Tensor) { - let result = split(sizes: sizes, alongAxis: axis) - return (result, { v in Tensor(concatenating: v.base, alongAxis: axis) }) + @inlinable + func _vjpSqueezingShape(at axes: [Int]) -> (Tensor, (Tensor) -> Tensor) { + let value = squeezingShape(at: axes) + return (value, { [shape = shapeTensor] v in v.reshaped(toShape: shape) }) } - @usableFromInline - func _vjpGathering( - atIndices indices: Tensor, - alongAxis axis: Int = 0 - ) -> (Tensor, (Tensor) -> Tensor) { - let result = gathering(atIndices: indices, alongAxis: axis) - let posAxis = axis < 0 ? axis + rank : axis - return (result, { [shape = shapeTensor] v in - let indicesSize = Tensor(Int32(indices.scalarCount)) - let outerShape = shape[..(rangeFrom: 0, to: Int32(outerSize), stride: 1) - let innerIndices = Tensor( - rangeFrom: Int32(outerSize) + 1, - to: Int32(outerSize) + 1 + Int32(innerSize), - stride: 1) - let valuesShape = Tensor(concatenating: [outerShape, indicesSize, innerShape]) - let values = v.reshaped(toShape: valuesShape) - let valueIndices = indices.reshaped(toShape: indicesSize) + // @inlinable + // func _vjpSplit( + // numSplits: Int, + // alongAxis axis: Int = 0 + // ) -> ([Tensor], (Array.CotangentVector) -> Tensor) { + // let result = split(numSplits: numSplits, alongAxis: axis) + // return (result, { v in Tensor(concatenating: v.base, alongAxis: axis) }) + // } - // We need to sum up every slice `values[..., i, ....]` corresponding to - // `tensor[..., indices[i], ...]`. Since `unsortedSegmentSum` does not support an axis - // parameter, we transpose the gather dimension to the front, then use - // `unsortedSegmentSum` to build a `[gatherAxis, outerAxes, innerAxes]` tensor with all - // the gradients affecting each index in `gatherAxis` summed up. - let permutations = Tensor(concatenating: [ - Tensor([Int32(outerSize)]), outerIndices, innerIndices]) - let transposedValues = values.transposed(withPermutations: permutations) - let gradient = Raw.unsortedSegmentSum( - data: transposedValues, - segmentIds: valueIndices, - numSegments: shape[posAxis]) + // @inlinable + // func _vjpSplit( + // sizes: Tensor, + // alongAxis axis: Int = 0 + // ) -> ([Tensor], (Array.CotangentVector) -> Tensor) { + // let result = split(sizes: sizes, alongAxis: axis) + // return (result, { v in Tensor(concatenating: v.base, alongAxis: axis) }) + // } + + // @inlinable + // func _vjpGathering( + // atIndices indices: Tensor, + // alongAxis axis: Int = 0 + // ) -> (Tensor, (Tensor) -> Tensor) { + // let result = gathering(atIndices: indices, alongAxis: axis) + // let posAxis = axis < 0 ? axis + rank : axis + // return (result, { [shape = shapeTensor] v in + // let indicesSize = Tensor(Int32(indices.scalarCount)) + // let outerShape = shape[..(rangeFrom: 0, to: Int32(outerSize), stride: 1) + // let innerIndices = Tensor( + // rangeFrom: Int32(outerSize) + 1, + // to: Int32(outerSize) + 1 + Int32(innerSize), + // stride: 1) + // let valuesShape = Tensor(concatenating: [outerShape, indicesSize, innerShape]) + // let values = v.reshaped(toShape: valuesShape) + // let valueIndices = indices.reshaped(toShape: indicesSize) + + // // We need to sum up every slice `values[..., i, ....]` corresponding to + // // `tensor[..., indices[i], ...]`. Since `unsortedSegmentSum` does not support an axis + // // parameter, we transpose the gather dimension to the front, then use + // // `unsortedSegmentSum` to build a `[gatherAxis, outerAxes, innerAxes]` tensor with all + // // the gradients affecting each index in `gatherAxis` summed up. + // let permutations = Tensor(concatenating: [ + // Tensor([Int32(outerSize)]), outerIndices, innerIndices]) + // let transposedValues = values.transposed(withPermutations: permutations) + // let gradient = Raw.unsortedSegmentSum( + // data: transposedValues, + // segmentIds: valueIndices, + // numSegments: shape[posAxis]) - // Finally, we invert the above transpose operation by moving dimension 0 back to its - // original position. - let inversePermutations = Tensor(concatenating: [ - outerIndices + 1, Tensor([0]), innerIndices]) - return gradient.transposed(withPermutations: inversePermutations) - }) - } + // // Finally, we invert the above transpose operation by moving dimension 0 back to its + // // original position. + // let inversePermutations = Tensor(concatenating: [ + // outerIndices + 1, Tensor([0]), innerIndices]) + // return gradient.transposed(withPermutations: inversePermutations) + // }) + // } } + +// public extension Tensor { +// /// Returns the locations of non-zero / true values in this tensor. +// /// +// /// The coordinates are returned in a 2-D tensor where the first dimension (rows) represents the +// /// number of non-zero elements, and the second dimension (columns) represents the coordinates +// /// of the non-zero elements. Keep in mind that the shape of the output tensor can vary +// /// depending on how many true values there are in this tensor. Indices are output in row-major +// /// order. +// /// +// /// For example: +// /// ``` +// /// // 'input' is [[true, false], [true, false]] +// /// // 'input' has 2 true values and so the output has 2 rows. +// /// // 'input' has rank of 2, and so the second dimension of the output has size 2. +// /// input.nonZeroIndices() // is [[0, 0], [1, 0]] +// /// +// /// // 'input' is [[[ true, false], [ true, false]], +// /// // [[false, true], [false, true]], +// /// // [[false, false], [false, true]]] +// /// // 'input' has 5 true values and so the output has 5 rows. +// /// // 'input' has rank 3, and so the second dimension of the output has size 3. +// /// input.nonZeroIndices() // is [[0, 0, 0], +// /// // [0, 1, 0], +// /// // [1, 0, 1], +// /// // [1, 1, 1], +// /// // [2, 1, 1]] +// /// ``` +// /// +// /// - Returns: A tensor with shape `(num_true, rank(condition))`. +// @inlinable +// func nonZeroIndices() -> Tensor { +// return Raw.where_(self) +// } +// } diff --git a/Sources/DeepLearning/Tensors.swift b/Sources/DeepLearning/Tensors.swift new file mode 100644 index 000000000..d400bf935 --- /dev/null +++ b/Sources/DeepLearning/Tensors.swift @@ -0,0 +1,109 @@ +// Copyright 2018 The TensorFlow Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#if !COMPILING_TENSORFLOW_MODULE +import TensorFlow +#endif + +//===------------------------------------------------------------------------------------------===// +// Description and Visualization +//===------------------------------------------------------------------------------------------===// + +// String conversion. +extension Tensor : CustomStringConvertible { + /// A textual representation of the tensor. + /// + /// - Note: use `fullDescription` for a non-pretty-printed description showing all scalars. + public var description: String { + return array.description + } +} + +public extension Tensor { + /// A textual representation of the tensor. Returns a summarized description if `summarize` is + /// true and the element count exceeds twice the `edgeElementCount`. + /// + /// - Parameters: + /// - lineWidth: The max line width for printing. Used to determine number of scalars to print + /// per line. + /// - edgeElementCount: The maximum number of elements to print before and after summarization + /// via ellipses (`...`). + /// - summarizing: If true, summarize description if element count exceeds twice + /// `edgeElementCount`. + func description( + lineWidth: Int = 80, + edgeElementCount: Int = 3, + summarizing: Bool = false + ) -> String { + return array.description( + lineWidth: lineWidth, + edgeElementCount: edgeElementCount, + summarizing: summarizing) + } + + /// A full, non-pretty-printed textual representation of the tensor, showing + /// all scalars. + var fullDescription: String { + return array.fullDescription + } +} + +// Xcode Playground display conversion. +extension Tensor : CustomPlaygroundDisplayConvertible { + public var playgroundDescription: Any { + return description + } +} + +// Mirror representation, used by debugger/REPL. +extension Tensor : CustomReflectable { + public var customMirror: Mirror { + return Mirror(self, children: [], displayStyle: .struct) + } +} + +//===------------------------------------------------------------------------------------------===// +// Codable Conformance +//===------------------------------------------------------------------------------------------===// + +extension Tensor : Codable where Scalar : Codable { + @inlinable + public func encode(to encoder: Encoder) throws { + var container = encoder.singleValueContainer() + try container.encode(array) + } + + @inlinable + public init(from decoder: Decoder) throws { + let container = try decoder.singleValueContainer() + let array = try container.decode(ShapedArray.self) + self.init(array) + } +} + +//===------------------------------------------------------------------------------------------===// +// Equality +//===------------------------------------------------------------------------------------------===// + +extension Tensor : Equatable where Scalar : Equatable { + @inlinable + public static func == (lhs: Tensor, rhs: Tensor) -> Bool { + return (lhs .== rhs).all() + } + + @inlinable + public static func != (lhs: Tensor, rhs: Tensor) -> Bool { + return (lhs .== rhs).any() + } +} From 34b475acfdbd13d60885ce9b20f062f09c933e19 Mon Sep 17 00:00:00 2001 From: Anthony Platanios Date: Sat, 20 Apr 2019 12:28:50 -0400 Subject: [PATCH 36/55] Minor edit. --- Sources/DeepLearning/Tensors.swift | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Sources/DeepLearning/Tensors.swift b/Sources/DeepLearning/Tensors.swift index d400bf935..daee815d8 100644 --- a/Sources/DeepLearning/Tensors.swift +++ b/Sources/DeepLearning/Tensors.swift @@ -16,6 +16,10 @@ import TensorFlow #endif +#if COMPILING_TENSORFLOW_MODULE +infix operator .== : ComparisonPrecedence +#endif + //===------------------------------------------------------------------------------------------===// // Description and Visualization //===------------------------------------------------------------------------------------------===// From 86072a49f0b65655dfdf24d549bb11f20aff67f8 Mon Sep 17 00:00:00 2001 From: Anthony Platanios Date: Sat, 20 Apr 2019 12:56:34 -0400 Subject: [PATCH 37/55] Moved some more stuff to swift-apis. --- .../DeepLearning/Operators/Comparison.swift | 237 +++++++++++++ Sources/DeepLearning/Operators/Math.swift | 332 ++++++++++++++++++ Sources/DeepLearning/Tensors.swift | 16 - 3 files changed, 569 insertions(+), 16 deletions(-) create mode 100644 Sources/DeepLearning/Operators/Comparison.swift diff --git a/Sources/DeepLearning/Operators/Comparison.swift b/Sources/DeepLearning/Operators/Comparison.swift new file mode 100644 index 000000000..2bc7329be --- /dev/null +++ b/Sources/DeepLearning/Operators/Comparison.swift @@ -0,0 +1,237 @@ +// Copyright 2018 The TensorFlow Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#if !COMPILING_TENSORFLOW_MODULE +import TensorFlow +#endif + +infix operator .< : ComparisonPrecedence +infix operator .<= : ComparisonPrecedence +infix operator .>= : ComparisonPrecedence +infix operator .> : ComparisonPrecedence +infix operator .== : ComparisonPrecedence +infix operator .!= : ComparisonPrecedence + +public extension Tensor where Scalar : Numeric & Comparable { + /// Computes `lhs < rhs` element-wise and returns a `Tensor` of Boolean /// scalars. + @inlinable + static func .< (lhs: Tensor, rhs: Tensor) -> Tensor { + return Raw.less(lhs, rhs) + } + + /// Computes `lhs <= rhs` element-wise and returns a `Tensor` of Boolean scalars. + @inlinable + static func .<= (lhs: Tensor, rhs: Tensor) -> Tensor { + return Raw.lessEqual(lhs, rhs) + } + + /// Computes `lhs > rhs` element-wise and returns a `Tensor` of Boolean scalars. + @inlinable + static func .> (lhs: Tensor, rhs: Tensor) -> Tensor { + return Raw.greater(lhs, rhs) + } + + /// Computes `lhs >= rhs` element-wise and returns a `Tensor` of Boolean scalars. + @inlinable + static func .>= (lhs: Tensor, rhs: Tensor) -> Tensor { + return Raw.greaterEqual(lhs, rhs) + } + + /// Computes `lhs < rhs` element-wise and returns a `Tensor` of Boolean scalars. + /// - Note: `.<` supports broadcasting. + @inlinable + static func .< (lhs: Scalar, rhs: Tensor) -> Tensor { + return Raw.less(Tensor(lhs), rhs) + } + + /// Computes `lhs <= rhs` element-wise and returns a `Tensor` of Boolean scalars. + /// - Note: `.<=` supports broadcasting. + @inlinable + static func .<= (lhs: Scalar, rhs: Tensor) -> Tensor { + return Raw.lessEqual(Tensor(lhs), rhs) + } + + /// Computes `lhs > rhs` element-wise and returns a `Tensor` of Boolean scalars. + /// - Note: `.>` supports broadcasting. + @inlinable + static func .> (lhs: Scalar, rhs: Tensor) -> Tensor { + return Raw.greater(Tensor(lhs), rhs) + } + + /// Computes `lhs >= rhs` element-wise and returns a `Tensor` of Boolean scalars. + /// - Note: `.>=` supports broadcasting. + @inlinable + static func .>= (lhs: Scalar, rhs: Tensor) -> Tensor { + return Raw.greaterEqual(Tensor(lhs), rhs) + } + + /// Computes `lhs < rhs` element-wise and returns a `Tensor` of Boolean scalars. + /// - Note: `.<` supports broadcasting. + @inlinable + static func .< (lhs: Tensor, rhs: Scalar) -> Tensor { + return Raw.less(lhs, Tensor(rhs)) + } + + /// Computes `lhs <= rhs` element-wise and returns a `Tensor` of Boolean scalars. + /// - Note: `.<=` supports broadcasting. + @inlinable + static func .<= (lhs: Tensor, rhs: Scalar) -> Tensor { + return Raw.lessEqual(lhs, Tensor(rhs)) + } + + /// Computes `lhs > rhs` element-wise and returns a `Tensor` of Boolean scalars. + /// - Note: `.>` supports broadcasting. + @inlinable + static func .> (lhs: Tensor, rhs: Scalar) -> Tensor { + return Raw.greater(lhs, Tensor(rhs)) + } + + /// Computes `lhs >= rhs` element-wise and returns a `Tensor` of Boolean scalars. + /// - Note: `.>=` supports broadcasting. + @inlinable + static func .>= (lhs: Tensor, rhs: Scalar) -> Tensor { + return Raw.greaterEqual(lhs, Tensor(rhs)) + } +} + +extension Tensor : Equatable where Scalar : Equatable { + @inlinable + public static func == (lhs: Tensor, rhs: Tensor) -> Bool { + return (lhs .== rhs).all() + } + + @inlinable + public static func != (lhs: Tensor, rhs: Tensor) -> Bool { + return (lhs .== rhs).any() + } +} + +extension Tensor : Comparable where Scalar : Numeric & Comparable { + /// Returns a Boolean value indicating whether the value of the first argument is + /// lexicographically less than that of the second argument. + @inlinable + public static func < (lhs: Tensor, rhs: Tensor) -> Bool { + return (lhs .< rhs).all() + } + + /// Returns a Boolean value indicating whether the value of the first argument is + /// lexicographically less than or equal to that of the second argument. + @inlinable + public static func <= (lhs: Tensor, rhs: Tensor) -> Bool { + return (lhs .<= rhs).all() + } + + /// Returns a Boolean value indicating whether the value of the first argument is + /// lexicographically greater than that of the second argument. + @inlinable + public static func > (lhs: Tensor, rhs: Tensor) -> Bool { + return (lhs .> rhs).all() + } + + /// Returns a Boolean value indicating whether the value of the first argument is + /// lexicographically greater than or equal to that of the second argument. + @inlinable + public static func >= (lhs: Tensor, rhs: Tensor) -> Bool { + return (lhs .>= rhs).all() + } +} + +public extension Tensor where Scalar : Numeric & Comparable { + /// Returns a Boolean value indicating whether the value of the first argument is + /// lexicographically less than that of the second argument. + @inlinable + static func < (lhs: Tensor, rhs: Scalar) -> Bool { + return (lhs .< rhs).all() + } + + /// Returns a Boolean value indicating whether the value of the first argument is + /// lexicographically less than or equal to that of the second argument. + @inlinable + static func <= (lhs: Tensor, rhs: Scalar) -> Bool { + return (lhs .<= rhs).all() + } + + /// Returns a Boolean value indicating whether the value of the first argument is + /// lexicographically greater than that of the second argument. + @inlinable + static func > (lhs: Tensor, rhs: Scalar) -> Bool { + return (lhs .> rhs).all() + } + + /// Returns a Boolean value indicating whether the value of the first argument is + /// lexicographically greater than or equal to that of the second argument. + @inlinable + static func >= (lhs: Tensor, rhs: Scalar) -> Bool { + return (lhs .>= rhs).all() + } +} + +public extension Tensor where Scalar : Equatable { + /// Computes `lhs != rhs` element-wise and returns a `Tensor` of Boolean scalars. + /// - Note: `.==` supports broadcasting. + @inlinable + static func .==(lhs: Tensor, rhs: Tensor) -> Tensor { + return Raw.equal(lhs, rhs) + } + + /// Computes `lhs != rhs` element-wise and returns a `Tensor` of Boolean scalars. + /// - Note: `.!=` supports broadcasting. + @inlinable + static func .!=(lhs: Tensor, rhs: Tensor) -> Tensor { + return Raw.notEqual(lhs, rhs) + } + + /// Computes `lhs == rhs` element-wise and returns a `Tensor` of Boolean scalars. + /// - Note: `.==` supports broadcasting. + @inlinable + static func .==(lhs: Scalar, rhs: Tensor) -> Tensor { + return Tensor(lhs) .== rhs + } + + /// Computes `lhs != rhs` element-wise and returns a `Tensor` of Boolean scalars. + /// - Note: `.!=` supports broadcasting. + @inlinable + static func .!=(lhs: Scalar, rhs: Tensor) -> Tensor { + return Tensor(lhs) .!= rhs + } + + /// Computes `lhs == rhs` element-wise and returns a `Tensor` of Boolean + /// scalars. + /// - Note: `.==` supports broadcasting. + @inlinable + static func .==(lhs: Tensor, rhs: Scalar) -> Tensor { + return lhs .== Tensor(rhs) + } + + /// Computes `lhs != rhs` element-wise and returns a `Tensor` of Boolean scalars. + /// - Note: `.!=` supports broadcasting. + @inlinable + static func .!=(lhs: Tensor, rhs: Scalar) -> Tensor { + return lhs .!= Tensor(rhs) + } +} + +// TODO: infix operator ≈ : ComparisonPrecedence + +public extension Tensor where Scalar : FloatingPoint & Equatable { + /// Returns a `Tensor` of Boolean values indicating whether the elements of `self` are + /// approximately equal to those of `other`. + @inlinable + func elementsApproximatelyEqual( + _ other: Tensor, + tolerance: Double = 0.00001 + ) -> Tensor { + return Raw.approximateEqual(self, other, tolerance: tolerance) + } +} diff --git a/Sources/DeepLearning/Operators/Math.swift b/Sources/DeepLearning/Operators/Math.swift index be927ef61..56225be4a 100644 --- a/Sources/DeepLearning/Operators/Math.swift +++ b/Sources/DeepLearning/Operators/Math.swift @@ -20,6 +20,338 @@ import TensorFlow infix operator .> : ComparisonPrecedence #endif +// TODO: +// - Consider explicit broadcasting for elementwise binary ops when +// scalarization and rank getter are implemented. + +//===------------------------------------------------------------------------------------------===// +// Additive Group +//===------------------------------------------------------------------------------------------===// + +extension Tensor : AdditiveArithmetic where Scalar : Numeric { + /// A scalar zero tensor. + @inlinable + public static var zero: Tensor { + get { + return Tensor(zeros: []) + } + } + + /// Adds two tensors and produces their sum. + /// - Note: `+` supports broadcasting. + @inlinable + @differentiable(vjp: _vjpAdd(lhs:rhs:) where Scalar : TensorFlowFloatingPoint) + public static func + (lhs: Tensor, rhs: Tensor) -> Tensor { + return Raw.add(lhs, rhs) + } + + /// Subtracts one tensor from another and produces their difference. + /// - Note: `-` supports broadcasting. + @inlinable + @differentiable(vjp: _vjpSubtract(lhs:rhs:) where Scalar : TensorFlowFloatingPoint) + public static func - (lhs: Tensor, rhs: Tensor) -> Tensor { + return Raw.sub(lhs, rhs) + } +} + +internal extension Tensor where Scalar : TensorFlowFloatingPoint { + @inlinable + static func _vjpAdd(lhs: Tensor, rhs: Tensor) -> (Tensor, (Tensor) -> (Tensor, Tensor)) { + return (lhs + rhs, { [lhsShape = lhs.shapeTensor, rhsShape = rhs.shapeTensor] v in + (v.unbroadcast(toShape: lhsShape), v.unbroadcast(toShape: rhsShape)) + }) + } + + @inlinable + static func _vjpSubtract(lhs: Tensor, rhs: Tensor) -> (Tensor, (Tensor) -> (Tensor, Tensor)) { + return (lhs - rhs, { [lhsShape = lhs.shapeTensor, rhsShape = rhs.shapeTensor] v in + (v.unbroadcast(toShape: lhsShape), -v.unbroadcast(toShape: rhsShape)) + }) + } +} + +//===------------------------------------------------------------------------------------------===// +// Vector Space +//===------------------------------------------------------------------------------------------===// + +extension Tensor : VectorNumeric where Scalar : Numeric { + /// Multiplies the scalar with every scalar of the tensor and produces the product. + @inlinable + @differentiable(vjp: _vjpMultiply(lhs:rhs:) where Scalar : TensorFlowFloatingPoint) + public static func * (lhs: Scalar, rhs: Tensor) -> Tensor { + return Tensor(lhs) * rhs + } +} + +internal extension Tensor where Scalar : TensorFlowFloatingPoint { + @inlinable + static func _vjpMultiply(lhs: Tensor, rhs: Tensor) -> (Tensor, (Tensor) -> (Tensor, Tensor)) { + return (lhs * rhs, { [lhsShape = lhs.shapeTensor, rhsShape = rhs.shapeTensor] v in + ((rhs * v).unbroadcast(toShape: lhsShape), (lhs * v).unbroadcast(toShape: rhsShape)) + }) + } +} + +extension Tensor : ShapedVectorNumeric where Scalar : Numeric {} + +extension Tensor : Differentiable where Scalar : TensorFlowFloatingPoint { + public typealias TangentVector = Tensor + public typealias CotangentVector = Tensor + public typealias AllDifferentiableVariables = Tensor + + @inlinable + public func tangentVector(from cotangent: CotangentVector) -> TangentVector { + return cotangent + } +} + +//===------------------------------------------------------------------------------------------===// +// Additional Element-wise Operators +//===------------------------------------------------------------------------------------------===// + +public extension Tensor where Scalar : Numeric { + /// Adds the scalar to every scalar of the tensor and produces the sum. + @inlinable + @differentiable(vjp: _vjpAdd(lhs:rhs:) where Scalar : TensorFlowFloatingPoint) + static func + (lhs: Scalar, rhs: Tensor) -> Tensor { + return Tensor(lhs) + rhs + } + + /// Adds the scalar to every scalar of the tensor and produces the sum. + @inlinable + @differentiable(vjp: _vjpAdd(lhs:rhs:) where Scalar : TensorFlowFloatingPoint) + static func + (lhs: Tensor, rhs: Scalar) -> Tensor { + return lhs + Tensor(rhs) + } + + /// Subtracts the scalar from every scalar of the tensor and produces the difference. + @inlinable + @differentiable(vjp: _vjpSubtract(lhs:rhs:) where Scalar : TensorFlowFloatingPoint) + static func - (lhs: Scalar, rhs: Tensor) -> Tensor { + return Tensor(lhs) - rhs + } + + /// Subtracts the scalar from every scalar of the tensor and produces the difference + @inlinable + @differentiable(vjp: _vjpSubtract(lhs:rhs:) where Scalar : TensorFlowFloatingPoint) + static func - (lhs: Tensor, rhs: Scalar) -> Tensor { + return lhs - Tensor(rhs) + } + + /// Adds two tensors and stores the result in the left-hand-side variable. + /// - Note: `+=` supports broadcasting. + @inlinable + static func += (lhs: inout Tensor, rhs: Tensor) { + lhs = lhs + rhs + } + + /// Adds the scalar to every scalar of the tensor and stores the result in the left-hand-side + /// variable. + @inlinable + static func += (lhs: inout Tensor, rhs: Scalar) { + lhs = lhs + rhs + } + + /// Subtracts the second tensor from the first and stores the result in the left-hand-side + /// variable. + /// - Note: `-=` supports broadcasting. + @inlinable + static func -= (lhs: inout Tensor, rhs: Tensor) { + lhs = lhs - rhs + } + + /// Subtracts the scalar from every scalar of the tensor and stores the result in the + /// left-hand-side variable. + @inlinable + static func -= (lhs: inout Tensor, rhs: Scalar) { + lhs = lhs - rhs + } + + /// Multiplies two tensors and produces their product. + /// - Note: `*` supports broadcasting. + @inlinable + @differentiable(vjp: _vjpMultiply(lhs:rhs:) where Scalar : TensorFlowFloatingPoint) + static func * (lhs: Tensor, rhs: Tensor) -> Tensor { + return Raw.mul(lhs, rhs) + } + + /// Multiplies the scalar with every scalar of the tensor and produces the product. + @inlinable + @differentiable(vjp: _vjpMultiply(lhs:rhs:) where Scalar : TensorFlowFloatingPoint) + static func * (lhs: Tensor, rhs: Scalar) -> Tensor { + return lhs * Tensor(rhs) + } + + /// Multiplies two tensors and stores the result in the left-hand-side variable. + /// - Note: `*=` supports broadcasting. + @inlinable + static func *= (lhs: inout Tensor, rhs: Tensor) { + lhs = lhs * rhs + } + + @inlinable + static func *= (lhs: inout Tensor, rhs: Scalar) { + lhs = lhs * rhs + } + + /// Returns the quotient of dividing the first tensor by the second. + /// - Note: `/` supports broadcasting. + @inlinable + @differentiable(vjp: _vjpDivide(lhs:rhs:) where Scalar : TensorFlowFloatingPoint) + static func / (lhs: Tensor, rhs: Tensor) -> Tensor { + return Raw.div(lhs, rhs) + } + + /// Returns the quotient of dividing the scalar by the tensor, broadcasting the scalar. + @inlinable + @differentiable(vjp: _vjpDivide(lhs:rhs:) where Scalar : TensorFlowFloatingPoint) + static func / (lhs: Scalar, rhs: Tensor) -> Tensor { + return Tensor(lhs) / rhs + } + + /// Returns the quotient of dividing the tensor by the scalar, broadcasting the scalar. + @inlinable + @differentiable(vjp: _vjpDivide(lhs:rhs:) where Scalar : TensorFlowFloatingPoint) + static func / (lhs: Tensor, rhs: Scalar) -> Tensor { + return lhs / Tensor(rhs) + } + + /// Divides the first tensor by the second and stores the quotient in the left-hand-side + /// variable. + @inlinable + static func /= (lhs: inout Tensor, rhs: Tensor) { + lhs = lhs / rhs + } + + /// Divides the tensor by the scalar, broadcasting the scalar, and stores the quotient in the + /// left-hand-side variable. + @inlinable + static func /= (lhs: inout Tensor, rhs: Scalar) { + lhs = lhs / rhs + } + + /// Returns the remainder of dividing the first tensor by the second. + /// - Note: `%` supports broadcasting. + @inlinable + static func % (lhs: Tensor, rhs: Tensor) -> Tensor { + return Raw.mod(lhs, rhs) + } + + /// Returns the remainder of dividing the tensor by the scalar, broadcasting the scalar. + @inlinable + static func % (lhs: Tensor, rhs: Scalar) -> Tensor { + return lhs % Tensor(rhs) + } + + /// Returns the remainder of dividing the scalar by the tensor, broadcasting the scalar. + @inlinable + static func % (lhs: Scalar, rhs: Tensor) -> Tensor { + return Tensor(lhs) % rhs + } + + /// Divides the first tensor by the second and stores the remainder in the left-hand-side + /// variable. + @inlinable + static func %= (lhs: inout Tensor, rhs: Tensor) { + lhs = lhs % rhs + } + + /// Divides the tensor by the scalar and stores the remainder in the left-hand-side variable. + @inlinable + static func %= (lhs: inout Tensor, rhs: Scalar) { + lhs = lhs % rhs + } +} + +internal extension Tensor where Scalar : TensorFlowFloatingPoint { + @inlinable + static func _vjpAdd(lhs: Tensor, rhs: Scalar) -> (Tensor, (Tensor) -> (Tensor, Scalar)) { + return (lhs + rhs, { v in (v, v.sum().scalarized()) }) + } + + @inlinable + static func _vjpAdd(lhs: Scalar, rhs: Tensor) -> (Tensor, (Tensor) -> (Scalar, Tensor)) { + return (lhs + rhs, { v in (v.sum().scalarized(), v) }) + } + + @inlinable + static func _vjpSubtract(lhs: Tensor, rhs: Scalar) -> (Tensor, (Tensor) -> (Tensor, Scalar)) { + return (lhs - rhs, { v in (v, 0 - v.sum().scalarized()) }) + } + + @inlinable + static func _vjpSubtract(lhs: Scalar, rhs: Tensor) -> (Tensor, (Tensor) -> (Scalar, Tensor)) { + return (lhs - rhs, { v in (v.sum().scalarized(), 0 - v) }) + } + + @inlinable + static func _vjpMultiply(lhs: Tensor, rhs: Scalar) -> (Tensor, (Tensor) -> (Tensor, Scalar)) { + return (lhs * rhs, { v in (v * rhs, (v * lhs).sum().scalarized()) }) + } + + @inlinable + static func _vjpMultiply(lhs: Scalar, rhs: Tensor) -> (Tensor, (Tensor) -> (Scalar, Tensor)) { + return (lhs * rhs, { v in ((v * rhs).sum().scalarized(), v * lhs) }) + } + + @inlinable + static func _vjpDivide(lhs: Tensor, rhs: Tensor) -> (Tensor, (Tensor) -> (Tensor, Tensor)) { + return (lhs / rhs, { [lhsShape = lhs.shapeTensor, rhsShape = rhs.shapeTensor] v in + ((v / rhs).unbroadcast(toShape: lhsShape), + ((-lhs) / rhs.squared() * v).unbroadcast(toShape: rhsShape)) + }) + } + + @inlinable + static func _vjpDivide(lhs: Tensor, rhs: Scalar) -> (Tensor, (Tensor) -> (Tensor, Scalar)) { + return (lhs / rhs, { v in + (v / rhs, (v * (0 - lhs) / Tensor(rhs).squared()).sum().scalarized()) + }) + } + + @inlinable + static func _vjpDivide(lhs: Scalar, rhs: Tensor) -> (Tensor, (Tensor) -> (Scalar, Tensor)) { + return (lhs / rhs, { v in ((v / rhs).sum().scalarized(), v * -lhs / rhs.squared()) }) + } +} + +public extension Tensor where Scalar == Bool { + /// Computes `!self` element-wise. + @inlinable + func elementsLogicalNot() -> Tensor { + return Raw.logicalNot(self) + } + + /// Computes `self && other` element-wise. + /// - Note: `&&` supports broadcasting. + @inlinable + func elementsLogicalAnd(_ other: Tensor) -> Tensor { + return Raw.logicalAnd(self, other) + } + + /// Computes `self && other` element-wise, broadcasting `other`. + @inlinable + func elementsLogicalAnd(_ other: Scalar) -> Tensor { + return elementsLogicalAnd(Tensor(other)) + } + + /// Computes `self || other` element-wise. + @inlinable + func elementsLogicalOr(_ other: Tensor) -> Tensor { + return Raw.logicalOr(self, other) + } + + /// Computes `self || other` element-wise, broadcasting `other`. + @inlinable + func elementsLogicalOr(_ other: Scalar) -> Tensor { + return elementsLogicalOr(Tensor(other)) + } +} + +//===------------------------------------------------------------------------------------------===// +// Universal Functions +//===------------------------------------------------------------------------------------------===// + /// Returns the values of the specified tensor rounded to the nearest integer, element-wise. @inlinable @differentiable(vjp: _vjpRound) diff --git a/Sources/DeepLearning/Tensors.swift b/Sources/DeepLearning/Tensors.swift index daee815d8..e11e1f5d7 100644 --- a/Sources/DeepLearning/Tensors.swift +++ b/Sources/DeepLearning/Tensors.swift @@ -95,19 +95,3 @@ extension Tensor : Codable where Scalar : Codable { self.init(array) } } - -//===------------------------------------------------------------------------------------------===// -// Equality -//===------------------------------------------------------------------------------------------===// - -extension Tensor : Equatable where Scalar : Equatable { - @inlinable - public static func == (lhs: Tensor, rhs: Tensor) -> Bool { - return (lhs .== rhs).all() - } - - @inlinable - public static func != (lhs: Tensor, rhs: Tensor) -> Bool { - return (lhs .== rhs).any() - } -} From bc0a581166f6179d9a270403460190e46791e5e6 Mon Sep 17 00:00:00 2001 From: Anthony Platanios Date: Sat, 20 Apr 2019 13:03:10 -0400 Subject: [PATCH 38/55] Removed all the newly-added ops. --- Sources/DeepLearning/Initializers.swift | 183 +++++-------- Sources/DeepLearning/Operators/Basic.swift | 304 --------------------- Sources/DeepLearning/Operators/Math.swift | 24 -- 3 files changed, 61 insertions(+), 450 deletions(-) diff --git a/Sources/DeepLearning/Initializers.swift b/Sources/DeepLearning/Initializers.swift index 7eb77250b..e644fd78a 100644 --- a/Sources/DeepLearning/Initializers.swift +++ b/Sources/DeepLearning/Initializers.swift @@ -109,44 +109,44 @@ internal extension Tensor where Scalar : TensorFlowFloatingPoint { public extension Tensor { /// Creates a tensor from an array of tensors (which may themselves be scalars). @inlinable - // @differentiable(where Scalar : TensorFlowFloatingPoint) + @differentiable(where Scalar : TensorFlowFloatingPoint) init(_ elements: [Tensor]) { self = Tensor(stacking: elements) } - // /// Stacks `tensors`, along the `axis` dimension, into a new tensor with rank one higher than - // /// the current tensor and each tensor in `tensors`. - // /// - // /// Given that `tensors` all have shape `[A, B, C]`, and `tensors.count = N`, then: - // /// - if `axis == 0` then the resulting tensor will have the shape `[N, A, B, C]`. - // /// - if `axis == 1` then the resulting tensor will have the shape `[A, N, B, C]`. - // /// - etc. - // /// - // /// For example: - // /// ``` - // /// // 'x' is [1, 4] - // /// // 'y' is [2, 5] - // /// // 'z' is [3, 6] - // /// Tensor(stacking: [x, y, z]) // is [[1, 4], [2, 5], [3, 6]] - // /// Tensor(stacking: [x, y, z], alongAxis: 1) // is [[1, 2, 3], [4, 5, 6]] - // /// ``` - // /// - // /// This is the opposite of `Tensor.unstacked`. - // /// - // /// - Parameters: - // /// - tensors: Tensors to stack. - // /// - axis: Dimension along which to stack. Negative values wrap around. - // /// - // /// - Precondition: All tensors must have the same shape. - // /// - Precondition: `axis` must be in the range `[-rank, rank)`, where `rank` is the rank of the - // /// provided tensors. - // /// - // /// - Returns: The stacked tensor. - // @inlinable - // @differentiable(vjp: _vjpStacking where Scalar : TensorFlowFloatingPoint) - // init(stacking tensors: [Tensor], alongAxis axis: Int = 0) { - // self = Raw.pack(tensors, axis: Int64(axis)) - // } + /// Stacks `tensors`, along the `axis` dimension, into a new tensor with rank one higher than + /// the current tensor and each tensor in `tensors`. + /// + /// Given that `tensors` all have shape `[A, B, C]`, and `tensors.count = N`, then: + /// - if `axis == 0` then the resulting tensor will have the shape `[N, A, B, C]`. + /// - if `axis == 1` then the resulting tensor will have the shape `[A, N, B, C]`. + /// - etc. + /// + /// For example: + /// ``` + /// // 'x' is [1, 4] + /// // 'y' is [2, 5] + /// // 'z' is [3, 6] + /// Tensor(stacking: [x, y, z]) // is [[1, 4], [2, 5], [3, 6]] + /// Tensor(stacking: [x, y, z], alongAxis: 1) // is [[1, 2, 3], [4, 5, 6]] + /// ``` + /// + /// This is the opposite of `Tensor.unstacked`. + /// + /// - Parameters: + /// - tensors: Tensors to stack. + /// - axis: Dimension along which to stack. Negative values wrap around. + /// + /// - Precondition: All tensors must have the same shape. + /// - Precondition: `axis` must be in the range `[-rank, rank)`, where `rank` is the rank of the + /// provided tensors. + /// + /// - Returns: The stacked tensor. + @inlinable + @differentiable(vjp: _vjpStacking where Scalar : TensorFlowFloatingPoint) + init(stacking tensors: [Tensor], alongAxis axis: Int = 0) { + self = Raw.pack(tensors, axis: Int64(axis)) + } /// Concatenates `tensors` along the `axis` dimension. /// @@ -181,68 +181,40 @@ public extension Tensor { /// /// - Returns: The concatenated tensor. @inlinable - // @differentiable(wrt: tensors, vjp: _vjpConcatenating where Scalar : TensorFlowFloatingPoint) + @differentiable(wrt: tensors, vjp: _vjpConcatenating where Scalar : TensorFlowFloatingPoint) init(concatenating tensors: [Tensor], alongAxis axis: Int = 0) { precondition(tensors.count > 0) self = Raw.concatV2(tensors, axis: Tensor(Int32(axis))) } - - // /// Returns a tiled tensor, constructed by tiling the provided tensor. - // /// - // /// This constructor creates a new tensor by replicating `tensor` `multiples` times. The - // /// constructed tensor's `i`'th dimension has `tensor.shape[i] * multiples[i]` elements, and the - // /// values of `tensor` are replicated `multiples[i]` times along the `i`'th dimension. For - // /// example, tiling `[a b c d]` by `[2]` produces `[a b c d a b c d]`. - // /// - // /// - Precondition: The shape of `multiples` must be `[tensor.rank]`. - // @inlinable - // @differentiable(wrt: tensor, vjp: _vjpTiling where Scalar : TensorFlowFloatingPoint) - // init(tiling tensor: Tensor, multiples: Tensor) { - // self = Raw.tile(tensor, multiples: multiples) - // } } -// internal extension Tensor where Scalar : TensorFlowFloatingPoint { -// @inlinable -// static func _vjpStacking( -// stacking tensors: [Tensor], -// alongAxis axis: Int = 0 -// ) -> (Tensor, (Tensor) -> Array.DifferentiableView) { -// let result = Tensor(stacking: tensors, alongAxis: axis) -// return (result, { v in -// return Array.DifferentiableView(v.unstack(alongAxis: axis)) -// }) -// } - -// @inlinable -// static func _vjpConcatenating( -// concatenating tensors: [Tensor], -// alongAxis axis: Int = 0 -// ) -> (Tensor, (Tensor) -> Array.DifferentiableView) { -// let result = Tensor(concatenating: tensors, alongAxis: axis) -// let posAxis = axis < 0 ? axis + tensors[0].rank : axis -// let sizes = Tensor(stacking: tensors.map { $0.shapeTensor[posAxis] }) -// return (result, { [count = tensors.count] v in -// if count == 1 { return Array.DifferentiableView([v]) } -// let splits = v.split(sizes: sizes, alongAxis: posAxis) -// return Array.DifferentiableView(splits) -// }) -// } +internal extension Tensor where Scalar : TensorFlowFloatingPoint { + @inlinable + static func _vjpStacking( + stacking tensors: [Tensor], + alongAxis axis: Int = 0 + ) -> (Tensor, (Tensor) -> Array.DifferentiableView) { + let result = Tensor(stacking: tensors, alongAxis: axis) + return (result, { v in + return Array.DifferentiableView(v.unstack(alongAxis: axis)) + }) + } -// @inlinable -// static func _vjpTiling( -// tiling tensor: Tensor, -// multiples: Tensor -// ) -> (Tensor, (Tensor) -> Tensor) { -// let result = Tensor(tiling: tensor, multiples: multiples) -// return (result, { [shape = tensor.shapeTensor] v in -// let splitShape = Tensor(stacking: [multiples, shape]).transposed().flattened() -// let axes = Tensor( -// rangeFrom: 0, to: Int32(splitShape.scalarCount), stride: 2) -// return v.reshaped(toShape: splitShape).sum(squeezingAxes: axes) -// }) -// } -// } + @inlinable + static func _vjpConcatenating( + concatenating tensors: [Tensor], + alongAxis axis: Int = 0 + ) -> (Tensor, (Tensor) -> Array.DifferentiableView) { + let result = Tensor(concatenating: tensors, alongAxis: axis) + let posAxis = axis < 0 ? axis + tensors[0].rank : axis + let sizes = Tensor(stacking: tensors.map { $0.shapeTensor[posAxis] }) + return (result, { [count = tensors.count] v in + if count == 1 { return Array.DifferentiableView([v]) } + let splits = v.split(sizes: sizes, alongAxis: posAxis) + return Array.DifferentiableView(splits) + }) + } +} //===------------------------------------------------------------------------------------------===// // Numeric @@ -265,25 +237,6 @@ public extension Tensor where Scalar : Numeric { self.init(repeating: 1, shape: shape) } - // /// Creates a tensor with all scalars set to zero that has the same shape and type as the provided - // /// tensor. - // /// - // /// - Parameter other: Tensor whose shape and data type to use. - // @inlinable - // init(zerosLike other: Tensor) { - // self = Raw.zerosLike(other) - // } - - // /// Creates a tensor with all scalars set to one that has the same shape and type as the provided - // /// tensor. - // /// - // /// - Parameter other: Tensor whose shape and data type to use. - // @inlinable - // init(onesLike other: Tensor) { - // self = Raw.onesLike(other) - // } - - /// Creates a 1-D tensor representing a sequence from a starting value to, but not including, /// an end value, stepping by the specified amount. /// @@ -300,20 +253,6 @@ public extension Tensor where Scalar : Numeric { self = Raw.range(start: Tensor(start), limit: Tensor(end), delta: Tensor(stride)) } - // /// Creates a 1-D tensor representing a sequence from a starting value to, but not including, an - // /// end value, stepping by the specified amount. - // /// - // /// - Parameters: - // /// - start: The starting value to use for the sequence. If the sequence contains any values, - // /// the first one is `start`. - // /// - end: An end value to limit the sequence. `end` is never an element of the resulting - // /// sequence. - // /// - stride: The amount to step by with each iteration. `stride` must be positive. - // @inlinable - // init(rangeFrom start: Tensor, to end: Tensor, stride: Tensor) { - // self = Raw.range(start: start, limit: end, delta: stride) - // } - /// Creates a one-hot tensor at given indices. The locations represented by /// `indices` take value `onValue` (`1` by default), while all other locations /// take value `offValue` (`0` by default). If the input `indices` is rank diff --git a/Sources/DeepLearning/Operators/Basic.swift b/Sources/DeepLearning/Operators/Basic.swift index 7df841cf1..ae0fe360a 100644 --- a/Sources/DeepLearning/Operators/Basic.swift +++ b/Sources/DeepLearning/Operators/Basic.swift @@ -91,214 +91,6 @@ public extension Tensor { func squeezingShape(at axes: [Int]) -> Tensor { return Raw.squeeze(self, squeezeDims: axes.map(Int32.init)) } - - // @inlinable - // @differentiable(wrt: self where Scalar : TensorFlowFloatingPoint) - // func unstack(alongAxis axis: Int = 0) -> [Tensor] { - // return split(numSplits: shape[axis], alongAxis: axis) - // } - - // @inlinable - // @differentiable(vjp: _vjpSplit(numSplits:alongAxis:) where Scalar : TensorFlowFloatingPoint) - // func split(numSplits: Int, alongAxis axis: Int = 0) -> [Tensor] { - // return Raw.split( - // splitDim: Tensor(Int32(axis)), value: self, numSplit: Int64(numSplits)) - // } - - // @inlinable - // @differentiable( - // wrt: self, - // vjp: _vjpSplit(sizes:alongAxis:) where Scalar : TensorFlowFloatingPoint) - // func split(sizes: Tensor, alongAxis axis: Int = 0) -> [Tensor] { - // return Raw.splitV( - // value: self, - // sizeSplits: sizes, - // splitDim: Tensor(Int32(axis)), - // numSplit: Int64(sizes.shape[0])) - // } - - // /// Gathers slices of this tensor at `indices` along the `axis` dimension. - // /// - // /// For 0-D (scalar) `indices`: - // /// ``` - // /// result[p_0, ..., p_{axis-1}, - // /// p_{axis + 1}, ..., p_{N-1}] = - // /// self[p_0, ..., p_{axis-1}, - // /// indices, - // /// p_{axis + 1}, ..., p_{N-1}] - // /// ``` - // /// - // /// For 1-D (vector) `indices`: - // /// ``` - // /// result[p_0, ..., p_{axis-1}, - // /// i, - // /// p_{axis + 1}, ..., p_{N-1}] = - // /// self[p_0, ..., p_{axis-1}, - // /// indices[i], - // /// p_{axis + 1}, ..., p_{N-1}] - // /// ``` - // /// - // /// In the general case, produces a resulting tensor where: - // /// ``` - // /// result[p_0, ..., p_{axis-1}, - // /// i_{batch\_dims}, ..., i_{M-1}, - // /// p_{axis + 1}, ..., p_{N-1}] = - // /// self[p_0, ..., p_{axis-1}, - // /// indices[i_0, ..., i_{M-1}], - // /// p_{axis + 1}, ..., p_{N-1}] - // /// ``` - // /// where `N = self.rank` and `M = indices.rank`. - // /// - // /// The shape of the resulting tensor is: - // /// `self.shape[.., - // alongAxis axis: Int = 0 - // ) -> Tensor { - // return Raw.gatherV2(params: self, indices: indices, axis: Tensor(Int32(axis))) - // } - - // /// Gathers slices of this tensor at `indices` along the `axis` dimension, while ignoring the - // /// first `batchDims` dimensions that correspond to batch dimensions. - // /// - // /// Performs similar functionality to `gathering`, except that the resulting tensor shape is now: - // /// `self.shape[.., - // alongAxis axis: Int, - // numBatchDims batchDims: Int - // ) -> Tensor { - // precondition(batchDims >= 0 && batchDims < indices.rank, - // "'numBatchDims' must be non-negative and less than 'indices.rank'.") - // precondition(batchDims < rank, "'numBatchDims' must be less than the tensor's rank.") - - // // Handle the axis argument by transposing the axis dimension so that it is the first - // // non-batch dimension, recursively calling `batchGathering` with `axis = 0`, and then - // // transposing the result to put the pre-axis dimensions before the indices dimensions. - // if axis != batchDims { - // // Adjust axis to be positive. - // let posAxis = axis < 0 ? axis + rank : axis - - // precondition(posAxis >= 0 && posAxis < rank, "'axis' is out of range.") - // precondition(batchDims <= posAxis, "'batchDims' must be less than or equal to 'axis'.") - - // // Move self[axis] up to self[batchDims]. - // let permutation = Tensor(concatenating: [ - // Tensor(rangeFrom: 0, to: Int32(batchDims), stride: 1), - // Tensor(Int32(axis)).rankLifted(), - // Tensor(rangeFrom: Int32(batchDims), to: Int32(posAxis), stride: 1), - // Tensor(rangeFrom: Int32(axis) + 1, to: Int32(rank), stride: 1)]) - // let tensor = transposed(withPermutations: permutation) - // let result = tensor.batchGathering( - // atIndices: indices, alongAxis: batchDims, numBatchDims: batchDims) - - // // Move the result dimensions corresponding to self[batchDims ..< axis] to just before - // // the dimensions corresponding to indices[batchDims ...]. - // let start = indices.rank + posAxis - batchDims - // let resultPermutation = Tensor(concatenating: [ - // Tensor(rangeFrom: 0, to: Int32(batchDims), stride: 1), - // Tensor(rangeFrom: Int32(indices.rank), to: Int32(start), stride: 1), - // Tensor(rangeFrom: Int32(batchDims), to: Int32(indices.rank), stride: 1), - // Tensor(rangeFrom: Int32(start), to: Int32(result.rank), stride: 1)]) - // return result.transposed(withPermutations: resultPermutation) - // } - - // var batchIndices = indices - // var accumulated = Tensor(ones: []) - // for d in (1...batchDims).reversed() { - // accumulated *= shapeTensor[d] - // let dValue = shapeTensor[d - 1] - // let dIndices = Tensor( - // rangeFrom: Tensor(zeros: []), - // to: dValue, - // stride: Tensor(ones: []) - // ) * accumulated - // let dShape = Tensor(concatenating: [ - // Tensor([Int32](repeating: 1, count: d - 1)), - // dValue.rankLifted(), - // Tensor([Int32](repeating: 1, count: indices.rank - 1))]) - // batchIndices += dIndices.reshaped(toShape: dShape) - // } - - // let flatIndices = batchIndices.flattened() - // let outerShape = shapeTensor[Int(batchDims + 1)...] - // let innerShape = shapeTensor[.., alongAxis axis: Int = 0) -> Tensor { - // precondition(mask.rank != 0, "The boolean mask cannot be a scalar.") - // let posAxis = axis < 0 ? axis + rank : axis - // let leadingSize = shapeTensor[posAxis ..< posAxis + mask.rank].product().rankLifted() - // let reshapedTensor = reshaped( - // toShape: Tensor(concatenating: [ - // shapeTensor[..(mask.flattened().nonZeroIndices().squeezingShape(at: 1)) - // return reshapedTensor.gathering(atIndices: indices, alongAxis: posAxis) - // } } internal extension Tensor where Scalar : TensorFlowFloatingPoint { @@ -319,100 +111,4 @@ internal extension Tensor where Scalar : TensorFlowFloatingPoint { let value = squeezingShape(at: axes) return (value, { [shape = shapeTensor] v in v.reshaped(toShape: shape) }) } - - // @inlinable - // func _vjpSplit( - // numSplits: Int, - // alongAxis axis: Int = 0 - // ) -> ([Tensor], (Array.CotangentVector) -> Tensor) { - // let result = split(numSplits: numSplits, alongAxis: axis) - // return (result, { v in Tensor(concatenating: v.base, alongAxis: axis) }) - // } - - // @inlinable - // func _vjpSplit( - // sizes: Tensor, - // alongAxis axis: Int = 0 - // ) -> ([Tensor], (Array.CotangentVector) -> Tensor) { - // let result = split(sizes: sizes, alongAxis: axis) - // return (result, { v in Tensor(concatenating: v.base, alongAxis: axis) }) - // } - - // @inlinable - // func _vjpGathering( - // atIndices indices: Tensor, - // alongAxis axis: Int = 0 - // ) -> (Tensor, (Tensor) -> Tensor) { - // let result = gathering(atIndices: indices, alongAxis: axis) - // let posAxis = axis < 0 ? axis + rank : axis - // return (result, { [shape = shapeTensor] v in - // let indicesSize = Tensor(Int32(indices.scalarCount)) - // let outerShape = shape[..(rangeFrom: 0, to: Int32(outerSize), stride: 1) - // let innerIndices = Tensor( - // rangeFrom: Int32(outerSize) + 1, - // to: Int32(outerSize) + 1 + Int32(innerSize), - // stride: 1) - // let valuesShape = Tensor(concatenating: [outerShape, indicesSize, innerShape]) - // let values = v.reshaped(toShape: valuesShape) - // let valueIndices = indices.reshaped(toShape: indicesSize) - - // // We need to sum up every slice `values[..., i, ....]` corresponding to - // // `tensor[..., indices[i], ...]`. Since `unsortedSegmentSum` does not support an axis - // // parameter, we transpose the gather dimension to the front, then use - // // `unsortedSegmentSum` to build a `[gatherAxis, outerAxes, innerAxes]` tensor with all - // // the gradients affecting each index in `gatherAxis` summed up. - // let permutations = Tensor(concatenating: [ - // Tensor([Int32(outerSize)]), outerIndices, innerIndices]) - // let transposedValues = values.transposed(withPermutations: permutations) - // let gradient = Raw.unsortedSegmentSum( - // data: transposedValues, - // segmentIds: valueIndices, - // numSegments: shape[posAxis]) - - // // Finally, we invert the above transpose operation by moving dimension 0 back to its - // // original position. - // let inversePermutations = Tensor(concatenating: [ - // outerIndices + 1, Tensor([0]), innerIndices]) - // return gradient.transposed(withPermutations: inversePermutations) - // }) - // } } - -// public extension Tensor { -// /// Returns the locations of non-zero / true values in this tensor. -// /// -// /// The coordinates are returned in a 2-D tensor where the first dimension (rows) represents the -// /// number of non-zero elements, and the second dimension (columns) represents the coordinates -// /// of the non-zero elements. Keep in mind that the shape of the output tensor can vary -// /// depending on how many true values there are in this tensor. Indices are output in row-major -// /// order. -// /// -// /// For example: -// /// ``` -// /// // 'input' is [[true, false], [true, false]] -// /// // 'input' has 2 true values and so the output has 2 rows. -// /// // 'input' has rank of 2, and so the second dimension of the output has size 2. -// /// input.nonZeroIndices() // is [[0, 0], [1, 0]] -// /// -// /// // 'input' is [[[ true, false], [ true, false]], -// /// // [[false, true], [false, true]], -// /// // [[false, false], [false, true]]] -// /// // 'input' has 5 true values and so the output has 5 rows. -// /// // 'input' has rank 3, and so the second dimension of the output has size 3. -// /// input.nonZeroIndices() // is [[0, 0, 0], -// /// // [0, 1, 0], -// /// // [1, 0, 1], -// /// // [1, 1, 1], -// /// // [2, 1, 1]] -// /// ``` -// /// -// /// - Returns: A tensor with shape `(num_true, rank(condition))`. -// @inlinable -// func nonZeroIndices() -> Tensor { -// return Raw.where_(self) -// } -// } diff --git a/Sources/DeepLearning/Operators/Math.swift b/Sources/DeepLearning/Operators/Math.swift index 56225be4a..2799004cd 100644 --- a/Sources/DeepLearning/Operators/Math.swift +++ b/Sources/DeepLearning/Operators/Math.swift @@ -381,30 +381,6 @@ internal func _vjpSigmoid( return (sigmoid(x), { v in Raw.sigmoidGrad(x, dy: v) }) } -// /// Computes the log-sigmoid of the specified tensor element-wise. Specifically, -// /// `y = log(1 / (1 + exp(-x)))`. For numerical stability, we use `y = -softplus(-x)`. -// @inlinable -// @differentiable -// public func logSigmoid(_ x: Tensor) -> Tensor { -// return -softplus(-x) -// } - -// /// Computes the softplus function for the specified tensor element-wise. The softplus function is -// /// defined as `log(exp(x) + 1)`. -// @inlinable -// @differentiable(vjp: _vjpSoftplus) -// public func softplus(_ x: Tensor) -> Tensor { -// return Raw.softplus(features: x) -// } - -// @inlinable -// internal func _vjpSoftplus( -// _ x: Tensor -// ) -> (Tensor, (Tensor) -> Tensor) { -// return (softplus(x), { v in v * sigmoid(x) }) -// } - - /// Computes the softmax of the specified tensor along the last axis. /// Specifically, computes `exp(x) / exp(x).sum(alongAxes: -1)`. @inlinable From a91c00a26d65c86952c3f3e2a4895eb4bcc01a5b Mon Sep 17 00:00:00 2001 From: Anthony Platanios Date: Sat, 20 Apr 2019 13:31:49 -0400 Subject: [PATCH 39/55] Moved some more stuff to swift-apis. --- Sources/DeepLearning/Operators/Basic.swift | 90 +++++ Sources/DeepLearning/Operators/Math.swift | 438 ++++++++++++++++++++- 2 files changed, 521 insertions(+), 7 deletions(-) diff --git a/Sources/DeepLearning/Operators/Basic.swift b/Sources/DeepLearning/Operators/Basic.swift index ae0fe360a..6327d1390 100644 --- a/Sources/DeepLearning/Operators/Basic.swift +++ b/Sources/DeepLearning/Operators/Basic.swift @@ -112,3 +112,93 @@ internal extension Tensor where Scalar : TensorFlowFloatingPoint { return (value, { [shape = shapeTensor] v in v.reshaped(toShape: shape) }) } } + +//===------------------------------------------------------------------------------------------===// +// Other Tensor Transformations +//===------------------------------------------------------------------------------------------===// + +public extension Tensor { + /// Returns a transposed tensor, with dimensions permuted in the specified order. + @inlinable + @differentiable( + wrt: self, + vjp: _vjpTransposed(withPermutations:) where Scalar : TensorFlowFloatingPoint) + func transposed(withPermutations permutations: Tensor) -> Tensor { + return Raw.transpose(self, perm: permutations) + } + + /// Returns a transposed tensor, with dimensions permuted in the specified order. + @inlinable + @differentiable( + wrt: self, + vjp: _vjpTransposed(withPermutations:) where Scalar : TensorFlowFloatingPoint) + func transposed(withPermutations permutations: [Int]) -> Tensor { + let permutations = permutations.map(Int32.init) + return transposed(withPermutations: Tensor(permutations)) + } + + /// Returns a transposed tensor, with dimensions permuted in the specified order. + @inlinable + @differentiable( + wrt: self, vjp: _vjpTransposed(withPermutations:) where Scalar : TensorFlowFloatingPoint) + func transposed(withPermutations permutations: Int...) -> Tensor { + return transposed(withPermutations: permutations) + } + + /// Returns a transposed tensor, with dimensions permuted in reverse order. + @inlinable + @differentiable(wrt: self, vjp: _vjpTransposed() where Scalar : TensorFlowFloatingPoint) + func transposed() -> Tensor { + let defaultPermutations = rankTensor - 1 - Tensor( + rangeFrom: 0, to: Int32(rank), stride: 1) + return transposed(withPermutations: Tensor(defaultPermutations)) + } + + /// Concatenates tensors along the specified axis. + /// - Precondition: The tensors must have the same dimensions, except for the + /// specified axis. + /// - Precondition: The axis must be in the range `-rank.. Tensor { + return Tensor(concatenating: [self, other], alongAxis: axis) + } + + /// Concatenation operator. + /// - Note: `++` is a custom operator that does not exist in Swift, but does + /// in Haskell/Scala. Its addition is not an insignificant language change + /// and may be controversial. The existence/naming of `++` will be discussed + /// during a later API design phase. + @inlinable + @differentiable(where Scalar : TensorFlowFloatingPoint) + static func ++ (lhs: Tensor, rhs: Tensor) -> Tensor { + return lhs.concatenated(with: rhs) + } +} + +internal extension Tensor where Scalar : TensorFlowFloatingPoint { + @inlinable + func _vjpTransposed( + withPermutations permutations: Tensor + ) -> (Tensor, (Tensor) -> Tensor) { + let value = transposed(withPermutations: permutations) + return (value, { $0.transposed(withPermutations: permutations) }) + } + + @inlinable + func _vjpTransposed(withPermutations permutations: [Int]) -> (Tensor, (Tensor) -> Tensor) { + let value = transposed(withPermutations: permutations) + return (value, { $0.transposed(withPermutations: permutations) }) + } + + @inlinable + func _vjpTransposed(withPermutations permutations: Int...) -> (Tensor, (Tensor) -> Tensor) { + let value = transposed(withPermutations: permutations) + return (value, { $0.transposed(withPermutations: permutations) }) + } + + @inlinable + func _vjpTransposed() -> (Tensor, (Tensor) -> Tensor) { + return (transposed(), { $0.transposed() }) + } +} diff --git a/Sources/DeepLearning/Operators/Math.swift b/Sources/DeepLearning/Operators/Math.swift index 2799004cd..7b295912d 100644 --- a/Sources/DeepLearning/Operators/Math.swift +++ b/Sources/DeepLearning/Operators/Math.swift @@ -349,21 +349,445 @@ public extension Tensor where Scalar == Bool { } //===------------------------------------------------------------------------------------------===// -// Universal Functions +// Element-wise Unary Math Functions //===------------------------------------------------------------------------------------------===// -/// Returns the values of the specified tensor rounded to the nearest integer, element-wise. +// Export Glibc/Darwin math functions. We should not require users to import +// Foundation/Darwin/Glibc in order to use scalar math functions. +// +#if os(macOS) || os(iOS) || os(watchOS) || os(tvOS) +@_exported import Darwin.C +#else +@_exported import Glibc +#endif +// +// FIXME(rxwei): Scoped imports are not yet supported in parseable module +// interfaces, so `@_exported import` won't work. When that becomes supported, +// switch to `@_exported import` by removing `import Darwin.C/Glibc` above and +// uncommenting the following lines. In the meantime, consider using indirect +// wrappers for each function so that random libc symbols won't be leaked to +// users' code completion. +// +// #if os(macOS) || os(iOS) || os(watchOS) || os(tvOS) +// @_exported import func Darwin.C.sin +// @_exported import func Darwin.C.cos +// @_exported import func Darwin.C.tan +// @_exported import func Darwin.C.sinf +// @_exported import func Darwin.C.cosf +// @_exported import func Darwin.C.tanf +// @_exported import func Darwin.C.sinh +// @_exported import func Darwin.C.cosh +// @_exported import func Darwin.C.tanh +// @_exported import func Darwin.C.sinhf +// @_exported import func Darwin.C.coshf +// @_exported import func Darwin.C.tanhf +// @_exported import func Darwin.C.log +// @_exported import func Darwin.C.logf +// @_exported import func Darwin.C.exp +// @_exported import func Darwin.C.expf +// @_exported import func Darwin.C.pow +// @_exported import func Darwin.C.powf +// #else +// @_exported import func Glibc.sin +// @_exported import func Glibc.cos +// @_exported import func Glibc.tan +// @_exported import func Glibc.sinf +// @_exported import func Glibc.cosf +// @_exported import func Glibc.tanf +// @_exported import func Glibc.sinh +// @_exported import func Glibc.cosh +// @_exported import func Glibc.tanh +// @_exported import func Glibc.sinhf +// @_exported import func Glibc.coshf +// @_exported import func Glibc.tanhf +// @_exported import func Glibc.log +// @_exported import func Glibc.logf +// @_exported import func Glibc.exp +// @_exported import func Glibc.expf +// @_exported import func Glibc.pow +// @_exported import func Glibc.powf +// #endif + +public extension Tensor where Scalar : SignedNumeric { + /// Computes the negation of the specified tensor element-wise. + @inlinable + @differentiable(vjp: _vjpNegate(_:) where Scalar : TensorFlowFloatingPoint) + static prefix func - (rhs: Tensor) -> Tensor { + return Raw.neg(rhs) + } +} + +internal extension Tensor where Scalar : TensorFlowFloatingPoint { + @inlinable + static func _vjpNegate(_ x: Tensor) -> (Tensor, (Tensor) -> Tensor) { + return (-x, { v in -v }) + } +} + +/// Computes the absolute value of the specified tensor element-wise. @inlinable -@differentiable(vjp: _vjpRound) -public func round(_ x: Tensor) -> Tensor { - return Raw.round(x) +@differentiable(vjp: _vjpAbs(_:) where T : TensorFlowFloatingPoint) +public func abs(_ x: Tensor) -> Tensor { + return Raw.abs(x) } @inlinable -internal func _vjpRound( +internal func _vjpAbs( _ x: Tensor ) -> (Tensor, (Tensor) -> Tensor) { - return (round(x), { v in Tensor(zerosLike: v) }) + let sign = Raw.sign(x) + return (abs(x), { v in v * sign }) +} + +/// Computes the natural logarithm of the specified tensor element-wise. +@inlinable +@differentiable(vjp: _vjpLog(_:) where T : TensorFlowFloatingPoint) +public func log(_ x: Tensor) -> Tensor { + return Raw.log(x) +} + +@inlinable +internal func _vjpLog( + _ x: Tensor +) -> (Tensor, (Tensor) -> Tensor) { + return (log(x), { v in v / x }) +} + +/// Computes `sin` of the specified tensor element-wise. +@inlinable +@differentiable(vjp: _vjpSin(_:) where T : TensorFlowFloatingPoint) +public func sin(_ x: Tensor) -> Tensor { + return Raw.sin(x) +} + +@inlinable +internal func _vjpSin( + _ x: Tensor +) -> (Tensor, (Tensor) -> Tensor) { + return (sin(x), { v in v * cos(x) }) +} + +/// Computes `cos` of the specified tensor element-wise. +@inlinable +@differentiable(vjp: _vjpCos(_:) where T : TensorFlowFloatingPoint) +public func cos(_ x: Tensor) -> Tensor { + return Raw.cos(x) +} + +@inlinable +internal func _vjpCos( + _ x: Tensor +) -> (Tensor, (Tensor) -> Tensor) { + return (cos(x), { v in -v * sin(x) }) +} + +/// Computes `tan` of the specified tensor element-wise. +@inlinable +@differentiable(vjp: _vjpTan(_:) where T : TensorFlowFloatingPoint) +public func tan(_ x: Tensor) -> Tensor { + return Raw.tan(x) +} + +@inlinable +internal func _vjpTan( + _ x: Tensor +) -> (Tensor, (Tensor) -> Tensor) { + let value = tan(x) + return (value, { v in v * (1 + value.squared()) }) +} + +/// Computes `sinh` of the specified tensor element-wise. +@inlinable +@differentiable(vjp: _vjpSinh(_:) where T : TensorFlowFloatingPoint) +public func sinh(_ x: Tensor) -> Tensor { + return Raw.sinh(x) +} + +@inlinable +internal func _vjpSinh( + _ x: Tensor +) -> (Tensor, (Tensor) -> Tensor) { + return (sinh(x), { v in v * cosh(x) }) +} + +/// Computes `cosh` of the specified tensor element-wise. +@inlinable +@differentiable(vjp: _vjpCosh(_:) where T : TensorFlowFloatingPoint) +public func cosh(_ x: Tensor) -> Tensor { + return Raw.cosh(x) +} + +@inlinable +internal func _vjpCosh( + _ x: Tensor +) -> (Tensor, (Tensor) -> Tensor) { + return (cosh(x), { v in v * sinh(x) }) +} + +/// Computes `tanh` of the specified tensor element-wise. +@inlinable +@differentiable(vjp: _vjpTanh(_:) where T : TensorFlowFloatingPoint) +public func tanh(_ x: Tensor) -> Tensor { + return Raw.tanh(x) +} + +@inlinable +internal func _vjpTanh( + _ x: Tensor +) -> (Tensor, (Tensor) -> Tensor) { + let value = tanh(x) + return (value, { v in v * (1 - value.squared()) }) +} + +/// Computes the square of the tensor. +public extension Tensor where Scalar : Numeric { + @inlinable + @differentiable(wrt: self, vjp: _vjpSquared() where Scalar : TensorFlowFloatingPoint) + func squared() -> Tensor { + return Raw.square(self) + } +} + +internal extension Tensor where Scalar : TensorFlowFloatingPoint { + @inlinable + func _vjpSquared() -> (Tensor, (Tensor) -> Tensor) { + return (squared(), { 2 * self * $0 }) + } +} + +/// Computes the square root of the specified tensor element-wise. +@inlinable +@differentiable(vjp: _vjpSqrt(_:) where T : TensorFlowFloatingPoint) +public func sqrt(_ x: Tensor) -> Tensor { + return Raw.sqrt(x) +} + +@inlinable +internal func _vjpSqrt( + _ x: Tensor +) -> (Tensor, (Tensor) -> Tensor) { + let value = sqrt(x) + return (value, { v in v / (2 * value) }) +} + +/// Computes the inverse square root of the specified tensor element-wise. +@inlinable +@differentiable(vjp: _vjpRsqrt(_:) where T : TensorFlowFloatingPoint) +public func rsqrt(_ x: Tensor) -> Tensor { + return Raw.rsqrt(x) +} + +@inlinable +internal func _vjpRsqrt( + _ x: Tensor +) -> (Tensor, (Tensor) -> Tensor) { + let value = rsqrt(x) + return (value, { v in -v / 2 * value }) +} + +/// Computes `exp` of the specified tensor element-wise. +@inlinable +@differentiable(vjp: _vjpExp(_:) where T : TensorFlowFloatingPoint) +public func exp(_ x: Tensor) -> Tensor { + return Raw.exp(x) +} + +@inlinable +internal func _vjpExp( + _ x: Tensor +) -> (Tensor, (Tensor) -> Tensor) { + let value = exp(x) + return (value, { v in value * v }) +} + +/// Computes the ceiling of the specified tensor element-wise. +@inlinable +@differentiable(vjp: _vjpCeil(_:) where T : TensorFlowFloatingPoint) +public func ceil(_ x: Tensor) -> Tensor { + return Raw.ceil(x) +} + +@inlinable +internal func _vjpCeil( + _ x: Tensor +) -> (Tensor, (Tensor) -> Tensor) { + return (ceil(x), { _ in Tensor(0).broadcast(like: x) }) +} + +/// Computes the floor of the specified tensor element-wise. +@inlinable +@differentiable(vjp: _vjpFloor(_:) where T : TensorFlowFloatingPoint) +public func floor(_ x: Tensor) -> Tensor { + return Raw.floor(x) +} + +@inlinable +internal func _vjpFloor( + _ x: Tensor +) -> (Tensor, (Tensor) -> Tensor) { + return (floor(x), { _ in Tensor(0).broadcast(like: x) }) +} + +//===------------------------------------------------------------------------------------------===// +// Element-wise Binary Math Functions +//===------------------------------------------------------------------------------------------===// + +/// Computes the power of the first tensor to the second tensor. +@inlinable +@differentiable(vjp: _vjpPow(_:_:) where T : TensorFlowFloatingPoint) +public func pow(_ lhs: Tensor, _ rhs: Tensor) -> Tensor where T : FloatingPoint { + return Raw.pow(lhs, rhs) +} + +@inlinable +internal func _vjpPow( + _ x: Tensor, _ y: Tensor +) -> (Tensor, (Tensor) -> (Tensor, Tensor)) { + let value = pow(x, y) + return (value, { v in + ((v * y * pow(x, y-1)).unbroadcast(like: x), + (v * log(x) * value).unbroadcast(like: y)) + }) +} + +/// Computes the power of the scalar to the tensor, broadcasting the scalar. +@inlinable +// @differentiable(where T : TensorFlowFloatingPoint) +public func pow(_ lhs: T, _ rhs: Tensor) -> Tensor where T : FloatingPoint { + return pow(Tensor(lhs), rhs) +} + +/// Computes the power of the tensor to the scalar, broadcasting the scalar. +@inlinable +// @differentiable(where T : TensorFlowFloatingPoint) +public func pow(_ lhs: Tensor, _ rhs: T) -> Tensor where T : FloatingPoint { + return pow(lhs, Tensor(rhs)) +} + +/// Computes the element-wise maximum of two tensors. +/// - Note: `max` supports broadcasting. +@inlinable +@differentiable(vjp: _vjpMax(_:_:) where T : TensorFlowFloatingPoint) +public func max(_ lhs: Tensor, _ rhs: Tensor) -> Tensor where T : Numeric & Comparable { + return Raw.maximum(lhs, rhs) +} + +@inlinable +internal func _vjpMax( + _ x: Tensor, _ y: Tensor +) -> (Tensor, (Tensor) -> (Tensor, Tensor)) { + let value = max(x, y) + return (value, { v in _vjpMinMaxHelper(x, y, originalValue: value, vector: v) }) +} + +/// Computes the element-wise maximum of the scalar and the tensor, broadcasting the scalar. +@inlinable +// @differentiable(where T : TensorFlowFloatingPoint) +public func max(_ lhs: T, _ rhs: Tensor) -> Tensor where T : Numeric & Comparable { + return max(Tensor(lhs), rhs) +} + +/// Computes the element-wise maximum of the scalar and the tensor, broadcasting the scalar. +@inlinable +// @differentiable(where T : TensorFlowFloatingPoint) +public func max(_ lhs: Tensor, _ rhs: T) -> Tensor where T : Numeric & Comparable { + return max(lhs, Tensor(rhs)) +} + +/// Computes the element-wise minimum of two tensors. +/// - Note: `min` supports broadcasting. +@inlinable +@differentiable(vjp: _vjpMin(_:_:) where T : TensorFlowFloatingPoint) +public func min(_ lhs: Tensor, _ rhs: Tensor) -> Tensor where T : Numeric & Comparable { + return Raw.minimum(lhs, rhs) +} + +@inlinable +internal func _vjpMin( + _ x: Tensor, _ y: Tensor +) -> (Tensor, (Tensor) -> (Tensor, Tensor)) { + let value = min(x, y) + return (value, { v in _vjpMinMaxHelper(x, y, originalValue: value, vector: v) }) +} + +/// Computes the element-wise minimum of the scalar and the tensor, broadcasting the scalar. +@inlinable +// @differentiable(where T : TensorFlowFloatingPoint) +public func min(_ lhs: T, _ rhs: Tensor) -> Tensor where T : Numeric & Comparable { + return min(Tensor(lhs), rhs) +} + +/// Computes the element-wise minimum of the scalar and the tensor, broadcasting the scalar. +@inlinable +// @differentiable(where T : TensorFlowFloatingPoint) +public func min(_ lhs: Tensor, _ rhs: T) -> Tensor where T : Numeric & Comparable { + return min(lhs, Tensor(rhs)) +} + +@inlinable +internal func _vjpMinMaxHelper( + _ x: Tensor, + _ y: Tensor, + originalValue: Tensor, + vector: Tensor +) -> (Tensor, Tensor) { + let denom = 1 + Tensor(x .== y) + let dfdx = vector * Tensor(x .== originalValue) / denom + let dfdy = vector * Tensor(y .== originalValue) / denom + return (dfdx.unbroadcast(like: x), dfdy.unbroadcast(like: y)) +} + +//===------------------------------------------------------------------------------------------===// +// Selection Functions +//===------------------------------------------------------------------------------------------===// + +public extension Tensor where Scalar == Bool { + /// Returns a new tensor containing elements from either `left` or `right`, + /// depending on the elements of `self`. + /// + /// `self` acts as a mask that chooses, based on the value at each scalar, + /// whether the corresponding scalar in the output should be taken from + /// `left` (if `true`) or `right` (if `false`). + /// + /// - Precondition: `left` and `right` must have the same shape. If + /// `left` and `right` are scalar, then `self` must also be scalar. If + /// `left` and `right` have rank greater than or equal to 1, then `self` + /// must be either have the same shape as `left` or be a 1-D `Tensor` such + /// that `self.scalarCount == left[0]`. + @available(*, deprecated, message: "Use '.replacing(with:mask:)' instead") + @inlinable + func selecting(_ left: Tensor, _ right: Tensor) -> Tensor { + return left.replacing(with: right, where: self) + } +} + +public extension Tensor { + /// Replaces elements of this tensor with `other` in the lanes where `mask` is + /// `true`. + /// + /// - Precondition: `self` and `other` must have the same shape. If + /// `self` and `other` are scalar, then `mask` must also be scalar. If + /// `self` and `other` have rank greater than or equal to `1`, then `mask` + /// must be either have the same shape as `self` or be a 1-D `Tensor` such + /// that `mask.scalarCount == self.shape[0]`. + @inlinable + @differentiable(wrt: (self, other), vjp: _vjpReplacing where Scalar : TensorFlowFloatingPoint) + func replacing(with other: Tensor, where mask: Tensor) -> Tensor { + return Raw.select(condition: mask, t: self, e: other) + } +} + +internal extension Tensor where Scalar : TensorFlowFloatingPoint { + @inlinable + func _vjpReplacing( + with other: Tensor, + where mask: Tensor + ) -> (Tensor, (Tensor) -> (Tensor, Tensor)) { + return (replacing(with: other, where: mask), { v in + let zeros = Tensor(zeros: v.shape) + return (v.replacing(with: zeros, where: mask), zeros.replacing(with: v, where: mask)) + }) + } } /// Computes the sigmoid of the specified tensor element-wise. From 0ad98436d047da18ff3bcd5c6c92edcb60886ca4 Mon Sep 17 00:00:00 2001 From: Anthony Platanios Date: Sat, 20 Apr 2019 14:07:26 -0400 Subject: [PATCH 40/55] Moved some more stuff to swift-apis. --- Sources/DeepLearning/Operators/Basic.swift | 368 ++++++++++ Sources/DeepLearning/Operators/Math.swift | 755 +++++++++++++++++++-- Sources/DeepLearning/Tensors.swift | 30 + 3 files changed, 1094 insertions(+), 59 deletions(-) diff --git a/Sources/DeepLearning/Operators/Basic.swift b/Sources/DeepLearning/Operators/Basic.swift index 6327d1390..42479481f 100644 --- a/Sources/DeepLearning/Operators/Basic.swift +++ b/Sources/DeepLearning/Operators/Basic.swift @@ -202,3 +202,371 @@ internal extension Tensor where Scalar : TensorFlowFloatingPoint { return (transposed(), { $0.transposed() }) } } + +//===------------------------------------------------------------------------------------------===// +// Broadcasting +//===------------------------------------------------------------------------------------------===// + +// TODO: What about precedence? Also, why is this operator meaningful for broadcasting? +infix operator .= + +public extension Tensor { + @inlinable + func broadcast(toShape shape: Tensor) -> Tensor { + return Raw.broadcastTo(self, shape: shape) + } + + @inlinable + func broadcast(to shape: TensorShape) -> Tensor { + return broadcast(toShape: Tensor(shape.dimensions.map(Int32.init))) + } + + /// Broadcast to the same shape as the specified `Tensor`. + /// - Precondition: The specified shape must be compatible for broadcasting. + @inlinable + func broadcast(like other: Tensor) -> Tensor { + return broadcast(toShape: other.shapeTensor) + } + + @inlinable + static func .= (lhs: inout Tensor, rhs: Tensor) { + lhs = rhs.broadcast(like: lhs) + } +} + +// TODO: Why is this limited only to numeric data types whereas `broadcast` is not? +public extension Tensor where Scalar : Numeric { + @inlinable + func unbroadcast(toShape otherShape: Tensor) -> Tensor { + let rankDiff = (rankTensor - otherShape.scalarCountTensor).rankLifted() + let ones: Tensor = Raw.fill(dims: rankDiff, value: Tensor(1)) + let paddedShape = ones ++ otherShape + let nonEqualIndices = paddedShape .!= shapeTensor + let broadcastIndices = Raw.where_(nonEqualIndices).flattened() + let unbroadcasted: Tensor = Raw.sum( + self, reductionIndices: Tensor(broadcastIndices), keepDims: false) + return Raw.reshape(unbroadcasted, shape: otherShape) + } + + @inlinable + func unbroadcast(like other: Tensor) -> Tensor { + return unbroadcast(toShape: other.shapeTensor) + } + + @inlinable + func unbroadcast(to shape: TensorShape) -> Tensor { + return unbroadcast(toShape: Tensor(shape.dimensions.map(Int32.init))) + } +} + +//===------------------------------------------------------------------------------------------===// +// Padding +//===------------------------------------------------------------------------------------------===// + +public extension Tensor where Scalar : Numeric { + /// Returns a padded tensor according to the specified padding sizes. + @inlinable + func padded(forSizes sizes: [(before: Int, after: Int)], with value: Scalar = 0) -> Tensor { + let paddings = Tensor( + shape: [sizes.count, 2], + scalars: sizes.flatMap { [Int32($0.before), Int32($0.after)] }) + return Raw.padV2(self, paddings: paddings, constantValues: Tensor(value)) + } +} + +//===------------------------------------------------------------------------------------------===// +// Indexing and Slicing +//===------------------------------------------------------------------------------------------===// + +// TODO: Negative indexing and strides syntax. + +public extension Tensor { + /// Extracts a slice from the tensor defined by lower and upper bounds for + /// each dimension. + /// + /// - Parameter lowerBounds: The lower bounds at each dimension. + /// - Parameter upperBounds: The upper bounds at each dimension. + @inlinable + @differentiable(wrt: self) + func slice(lowerBounds: [Int], upperBounds: [Int]) -> Tensor { + // TODO: Precondition `lowerBounds.count == upperBounds.count`, + // preferably in graph. + // TODO: Differentiating control flow is not supported yet, thus the thunks. + let lowerBoundsTensor = Tensor({lowerBounds.map(Int32.init)}()) + let upperBoundsTensor = Tensor({upperBounds.map(Int32.init)}()) + return slice(lowerBounds: lowerBoundsTensor, sizes: upperBoundsTensor - lowerBoundsTensor) + } + + @inlinable + @differentiable(wrt: self, vjp: _vjpSlice) + func slice(lowerBounds: Tensor, sizes: Tensor) -> Tensor { + return Raw.slice(self, begin: lowerBounds, size: sizes) + } + + @inlinable + internal func _vjpSlice( + lowerBounds: Tensor, + sizes: Tensor + ) -> (Tensor, (Tensor) -> Tensor) { + let value = slice(lowerBounds: lowerBounds, sizes: sizes) + let afterPaddings = shapeTensor - value.shapeTensor - lowerBounds + return (value, { [after = afterPaddings] v in + let beforePaddings = lowerBounds.expandingShape(at: 1) + let afterPaddings = after.expandingShape(at: 1) + let paddings = Tensor( + concatenating: [beforePaddings, afterPaddings], alongAxis: 1) + return Raw.pad(v, paddings: paddings) + }) + } +} + +public enum TensorRange : TensorRangeExpression { + case ellipsis + case newAxis + case squeezeAxis + case index(Int) + case range(Range, stride: Int) + case closedRange(ClosedRange, stride: Int) + case partialRangeFrom(PartialRangeFrom, stride: Int) + case partialRangeUpTo(PartialRangeUpTo, stride: Int) + case partialRangeThrough(PartialRangeThrough, stride: Int) + + public var tensorRange: TensorRange { return self } +} + +extension TensorRange : Equatable { + public static func == (lhs: TensorRange, rhs: TensorRange) -> Bool { + switch (lhs, rhs) { + case (.ellipsis, .ellipsis), + (.newAxis, .newAxis), + (.squeezeAxis, .squeezeAxis): + return true + case (let .index(i1), let .index(i2)): return i1 == i2 + case (let .range(r1, s1), let .range(r2, s2)): return r1 == r2 && s1 == s2 + case (let .closedRange(r1, s1), let .closedRange(r2, s2)): + return r1 == r2 && s1 == s2 + case (let .partialRangeFrom(r1, s1), let .partialRangeFrom(r2, s2)): + return r1.lowerBound == r2.lowerBound && s1 == s2 + case (let .partialRangeUpTo(r1, s1), let .partialRangeUpTo(r2, s2)): + return r1.upperBound == r2.upperBound && s1 == s2 + case (let .partialRangeThrough(r1, s1), let .partialRangeThrough(r2, s2)): + return r1.upperBound == r2.upperBound && s1 == s2 + default: return false + } + } +} + +public protocol TensorRangeExpression { + var tensorRange: TensorRange { get } +} + +// TODO: Cannot extend non-nominal type 'UnboundedRange'. +// extension UnboundedRange : TensorRangeExpression { +// public var tensorRange: TensorRange { return .ellipsis } +// } + +extension Int : TensorRangeExpression { + public var tensorRange: TensorRange { return .index(self) } +} + +extension Range : TensorRangeExpression where Bound == Int { + public var tensorRange: TensorRange { + return .range(self, stride: 1) + } +} + +extension ClosedRange : TensorRangeExpression where Bound == Int { + public var tensorRange: TensorRange { + return .closedRange(self, stride: 1) + } +} + +extension PartialRangeFrom : TensorRangeExpression where Bound == Int { + public var tensorRange: TensorRange { + return .partialRangeFrom(self, stride: 1) + } +} + +extension PartialRangeUpTo : TensorRangeExpression where Bound == Int { + public var tensorRange: TensorRange { + return .partialRangeUpTo(self, stride: 1) + } +} + +extension PartialRangeThrough : TensorRangeExpression where Bound == Int { + public var tensorRange: TensorRange { + return .partialRangeThrough(self, stride: 1) + } +} + +infix operator .. : StridedRangeFormationPrecedence +precedencegroup StridedRangeFormationPrecedence { + associativity: left + higherThan: CastingPrecedence + lowerThan: RangeFormationPrecedence +} + +public extension Range where Bound == Int { + static func .. (range: Range, stride: Int) -> TensorRange { + return .range(range, stride: stride) + } +} + +public extension ClosedRange where Bound == Int { + static func .. (range: ClosedRange, stride: Int) -> TensorRange { + return .closedRange(range, stride: stride) + } +} + +public extension PartialRangeFrom where Bound == Int { + static func .. (range: PartialRangeFrom, stride: Int) -> TensorRange { + return .partialRangeFrom(range, stride: stride) + } +} + +public extension PartialRangeUpTo where Bound == Int { + static func .. (range: PartialRangeUpTo, stride: Int) -> TensorRange { + return .partialRangeUpTo(range, stride: stride) + } +} + +public extension PartialRangeThrough where Bound == Int { + static func .. (range: PartialRangeThrough, stride: Int) -> TensorRange { + return .partialRangeThrough(range, stride: stride) + } +} + +public extension Tensor { + @_fixed_layout @usableFromInline + internal struct IndexPath { + @usableFromInline + let begin, end, strides: Tensor + + @usableFromInline + let beginMask, endMask, ellipsisMask, newAxisMask, squeezeAxisMask: Int64 + + @inlinable + public init( + begin: Tensor, end: Tensor, strides: Tensor, + beginMask: Int64, endMask: Int64, ellipsisMask: Int64, newAxisMask: Int64, + squeezeAxisMask: Int64 + ) { + self.begin = begin + self.end = end + self.strides = strides + self.beginMask = beginMask + self.endMask = endMask + self.ellipsisMask = ellipsisMask + self.newAxisMask = newAxisMask + self.squeezeAxisMask = squeezeAxisMask + } + } + + @inlinable + @differentiable(wrt: self, vjp: _vjpSubscript) + internal subscript(_ indexPath: IndexPath) -> Tensor { + get { + return Raw.stridedSlice( + self, begin: indexPath.begin, end: indexPath.end, + strides: indexPath.strides, beginMask: indexPath.beginMask, + endMask: indexPath.endMask, ellipsisMask: indexPath.ellipsisMask, + newAxisMask: indexPath.newAxisMask, + shrinkAxisMask: indexPath.squeezeAxisMask) + } + set { + self = Raw.tensorStridedSliceUpdate( + self, begin: indexPath.begin, end: indexPath.end, + strides: indexPath.strides, value: newValue, + beginMask: indexPath.beginMask, endMask: indexPath.endMask, + ellipsisMask: indexPath.ellipsisMask, + newAxisMask: indexPath.newAxisMask, + shrinkAxisMask: indexPath.squeezeAxisMask) + } + } + + @inlinable + // TODO: @differentiable(wrt: self) + subscript(_ ranges: TensorRangeExpression...) -> Tensor { + get { + return self[IndexPath(ranges.map { $0.tensorRange })] + } + set { + self[IndexPath(ranges.map { $0.tensorRange })] = newValue + } + } + + @usableFromInline + internal func _vjpSubscript( + _ indexPath: IndexPath + ) -> (Tensor, (Tensor) -> Tensor) { + return (self[indexPath], { [shape = shapeTensor] v in + Raw.stridedSliceGrad( + shape: shape, begin: indexPath.begin, end: indexPath.end, + strides: indexPath.strides, dy: v, beginMask: indexPath.beginMask, + endMask: indexPath.endMask, ellipsisMask: indexPath.ellipsisMask, + newAxisMask: indexPath.newAxisMask, + shrinkAxisMask: indexPath.squeezeAxisMask) + }) + } +} + +internal extension Tensor.IndexPath { + @inlinable + init(_ ranges: [TensorRange]) { + precondition(!ranges.isEmpty, "The tensor range collection cannot be empty.") + precondition(ranges.count { $0 == TensorRange.ellipsis } < 2, + "Only one ellipsis is allowed per tensor range collection.") + + var begin = [Int32](repeating: 0, count: ranges.count) + var end = [Int32](repeating: 0, count: ranges.count) + var strides = [Int32](repeating: 1, count: ranges.count) + var beginMask: Int64 = 0 + var endMask: Int64 = 0 + var ellipsisMask: Int64 = 0 + var newAxisMask: Int64 = 0 + var squeezeAxisMask: Int64 = 0 + for (i, index) in ranges.enumerated() { + switch index { + case .ellipsis: ellipsisMask |= 1 << i + case .newAxis: newAxisMask |= 1 << i + case .squeezeAxis: squeezeAxisMask |= 1 << i + case .index(let index): + begin[i] = Int32(index) + end[i] = Int32(index) + 1 + squeezeAxisMask |= 1 << i + case .range(let range, let stride): + begin[i] = Int32(range.lowerBound) + end[i] = Int32(range.upperBound) + strides[i] = Int32(stride) + case .closedRange(let range, let stride): + begin[i] = Int32(range.lowerBound) + switch Int32(range.upperBound) { + case -1: endMask |= 1 << i + case let u: end[i] = u + 1 + } + strides[i] = Int32(stride) + case .partialRangeFrom(let range, let stride): + begin[i] = Int32(range.lowerBound) + strides[i] = Int32(stride) + endMask |= 1 << i + case .partialRangeUpTo(let range, let stride): + end[i] = Int32(range.upperBound) + strides[i] = Int32(stride) + beginMask |= 1 << i + case .partialRangeThrough(let range, let stride): + end[i] = Int32(range.upperBound) + 1 + strides[i] = Int32(stride) + beginMask |= 1 << i + } + } + + self.begin = Tensor(begin) + self.end = Tensor(end) + self.strides = Tensor(strides) + self.beginMask = beginMask + self.endMask = endMask + self.ellipsisMask = ellipsisMask + self.newAxisMask = newAxisMask + self.squeezeAxisMask = squeezeAxisMask + } +} diff --git a/Sources/DeepLearning/Operators/Math.swift b/Sources/DeepLearning/Operators/Math.swift index 7b295912d..9f858a98b 100644 --- a/Sources/DeepLearning/Operators/Math.swift +++ b/Sources/DeepLearning/Operators/Math.swift @@ -628,6 +628,80 @@ internal func _vjpFloor( return (floor(x), { _ in Tensor(0).broadcast(like: x) }) } +/// Computes the sigmoid of the specified tensor element-wise. +/// Specifically, computes `1 / (1 + exp(-x))`. +@inlinable +@differentiable(vjp: _vjpSigmoid) +public func sigmoid(_ x: Tensor) -> Tensor { + return Raw.sigmoid(x) +} + +@inlinable +internal func _vjpSigmoid( + _ x: Tensor +) -> (Tensor, (Tensor) -> Tensor) { + return (sigmoid(x), { v in Raw.sigmoidGrad(x, dy: v) }) +} + +/// Computes the softmax of the specified tensor along the last axis. +/// Specifically, computes `exp(x) / exp(x).sum(alongAxes: -1)`. +@inlinable +@differentiable(vjp: _vjpSoftmax(_:) where T : TensorFlowFloatingPoint) +public func softmax(_ x: Tensor) -> Tensor { + return Raw.softmax(logits: x) +} + +/// Computes the softmax of the specified tensor along the specified axis. +/// Specifically, computes `exp(x) / exp(x).sum(alongAxes: axis)`. +@inlinable +// TODO: [AD]. +public func softmax(_ x: Tensor, alongAxis axis: Int) -> Tensor { + let xExp = exp(x) + let xExpSum = Raw.sum(xExp, reductionIndices: Tensor(axis), keepDims: true) + return xExp / xExpSum +} + +@inlinable +func _vjpSoftmax( + _ x: Tensor +) -> (Tensor, (Tensor) -> Tensor) { + let value = softmax(x) + return (value, { v in + let sumChannels = (v * value).sum(alongAxes: -1) + return (v - sumChannels) * value + }) +} + +/// Computes the log-softmax of the specified tensor element-wise. +@inlinable +@differentiable(vjp: _vjpLogSoftmax(_:) where T : TensorFlowFloatingPoint) +public func logSoftmax(_ x: Tensor) -> Tensor { + return Raw.logSoftmax(logits: x) +} + +@inlinable +func _vjpLogSoftmax( + _ x: Tensor +) -> (Tensor, (Tensor) -> Tensor) { + let value = logSoftmax(x) + return (value, { v in v - v.sum(alongAxes: -1) * exp(value) }) +} + +/// Computes `relu` of the specified tensor element-wise. +/// Specifically, computes `max(0, x)`. +@inlinable +@differentiable(vjp: _vjpRelu(_:) where T : TensorFlowFloatingPoint) +public func relu(_ x: Tensor) -> Tensor { + return max(0, x) +} + +@inlinable +func _vjpRelu( + _ x: Tensor +) -> (Tensor, (Tensor) -> Tensor) { + return (relu(x), { v in Tensor(x .> 0) * v }) +} + //===------------------------------------------------------------------------------------------===// // Element-wise Binary Math Functions //===------------------------------------------------------------------------------------------===// @@ -790,81 +864,644 @@ internal extension Tensor where Scalar : TensorFlowFloatingPoint { } } -/// Computes the sigmoid of the specified tensor element-wise. -/// Specifically, computes `1 / (1 + exp(-x))`. -@inlinable -@differentiable(vjp: _vjpSigmoid) -public func sigmoid(_ x: Tensor) -> Tensor { - return Raw.sigmoid(x) +//===------------------------------------------------------------------------------------------===// +// Reduction Functions +//===------------------------------------------------------------------------------------------===// + +public extension Tensor where Scalar == Bool { + /// Returns `true` if all scalars are equal to `true`. Otherwise, returns `false`. + // NOTE: This overload is necessary, otherwise `all()` would refer to the variadic method + // `all(squeezingAxes:)` with zero indices. + @inlinable + func all() -> Bool { + let axes = Tensor(rangeFrom: 0, to: Int32(rank), stride: 1) + return _TFGetScalarOrDie(Raw.all(self, reductionIndices: axes).handle) + } + + /// Returns `true` if any scalars are equal to `true`. Otherwise, returns `false`. + // NOTE: This overload is necessary, otherwise `any()` would refer to the variadic method + // `any(squeezingAxes:)` with zero indices. + @inlinable + func any() -> Bool { + let axes = Tensor(rangeFrom: 0, to: Int32(rank), stride: 1) + return _TFGetScalarOrDie(Raw.any(self, reductionIndices: axes).handle) + } + + /// Performs a logical AND operation along the specified axes. The reduced dimensions are + /// removed. + /// - Parameter axes: The dimensions to reduce. + /// - Precondition: Each value in `axes` must be in the range `-rank.. Tensor { + let axes = axes.map(Int32.init) + return Raw.all(self, reductionIndices: Tensor(axes), keepDims: false) + } + + /// Performs a logical AND operation along the specified axes. The reduced dimensions are + /// removed. + /// - Parameter axes: The dimensions to reduce. + /// - Precondition: Each value in `axes` must be in the range `-rank.. Tensor { + let axes = axes.map(Int32.init) + return Raw.any(self, reductionIndices: Tensor(axes), keepDims: false) + } + + /// Performs a logical AND operation along the specified axes. The reduced dimensions are + /// retained with value 1. + /// - Parameter axes: The dimensions to reduce. + /// - Precondition: Each value in `axes` must be in the range `-rank.. Tensor { + let axes = axes.map(Int32.init) + return Raw.all(self, reductionIndices: Tensor(axes), keepDims: true) + } + + /// Performs a logical OR operation along the specified axes. The reduced + /// dimensions are retained with value 1. + /// - Parameter axes: The dimensions to reduce. + /// - Precondition: Each value in `axes` must be in the range `-rank.. Tensor { + let axes = axes.map(Int32.init) + return Raw.any(self, reductionIndices: Tensor(axes), keepDims: true) + } } -@inlinable -internal func _vjpSigmoid( - _ x: Tensor -) -> (Tensor, (Tensor) -> Tensor) { - return (sigmoid(x), { v in Raw.sigmoidGrad(x, dy: v) }) +public extension Tensor where Scalar : Numeric & Comparable { + // NOTE: This overload is necessary, otherwise `min()` would refer to the variadic method + // `min(squeezingAxes:)` with zero indices. + @inlinable + func min() -> Tensor { + let axes = Tensor(rangeFrom: 0, to: Int32(rank), stride: 1) + return Raw.min(self, reductionIndices: axes) + } + + // NOTE: This overload is necessary, otherwise `max()` would refer to the variadic method + // `max(squeezingAxes:)` with zero indices. + @inlinable + func max() -> Tensor { + let axes = Tensor(rangeFrom: 0, to: Int32(rank), stride: 1) + return Raw.max(self, reductionIndices: axes) + } + + /// Returns the maximum values along the specified axes. The reduced dimensions are removed. + /// - Parameter axes: The dimensions to reduce. + /// - Precondition: Each value in `axes` must be in the range `-rank.. Tensor { + let axes = axes.map(Int32.init) + return Raw.max(self, reductionIndices: Tensor(axes), keepDims: false) + } + + /// Returns the maximum values along the specified axes. The reduced dimensions are removed. + /// - Parameter axes: The dimensions to reduce. + /// - Precondition: Each value in `axes` must be in the range `-rank.. Tensor { + return max(squeezingAxes: axes) + } + + /// Returns the minimum values along the specified axes. The reduced dimensions are removed. + /// - Parameter axes: The dimensions to reduce. + /// - Precondition: Each value in `axes` must be in the range `-rank.. Tensor { + let axes = axes.map(Int32.init) + return Raw.min(self, reductionIndices: Tensor(axes), keepDims: false) + } + + /// Returns the minimum values along the specified axes. The reduced dimensions are removed. + /// - Parameter axes: The dimensions to reduce. + /// - Precondition: Each value in `axes` must be in the range `-rank.. Tensor { + return min(squeezingAxes: axes) + } + + /// Returns the indices of the maximum values along the specified axes. The reduced dimensions + /// are removed. + /// - Parameter axes: The dimensions to reduce. + /// - Precondition: Each value in `axes` must be in the range `-rank.. Tensor { + return Raw.argMax(self, dimension: Tensor(Int32(axis))) + } + + /// Returns the indices of the minimum values along the specified axes. The reduced dimensions + /// are removed. + /// - Parameter axes: The dimensions to reduce. + /// - Precondition: Each value in `axes` must be in the range `-rank.. Tensor { + return Raw.argMin(self, dimension: Tensor(Int32(axis))) + } + + /// Returns the minimum along the specified axes. The reduced dimensions are retained with + /// value 1. + /// - Parameter axes: The dimensions to reduce. + /// - Precondition: Each value in `axes` must be in the range `-rank.. Tensor { + let axes = axes.map(Int32.init) + return Raw.min(self, reductionIndices: Tensor(axes), keepDims: true) + } + + /// Returns the minimum along the specified axes. The reduced dimensions are retained with + /// value 1. + /// - Parameter axes: The dimensions to reduce. + /// - Precondition: Each value in `axes` must be in the range `-rank.. Tensor { + return min(alongAxes: axes) + } + + /// Returns the minimum along the specified axes. The reduced dimensions are retained with + /// value 1. + /// - Parameter axes: The dimensions to reduce. + /// - Precondition: Each value in `axes` must be in the range `-rank.. Tensor { + let axes = axes.map(Int32.init) + return Raw.max(self, reductionIndices: Tensor(axes), keepDims: true) + } + + /// Returns the minimum along the specified axes. The reduced dimensions are retained with + /// value 1. + /// - Parameter axes: The dimensions to reduce. + /// - Precondition: Each value in `axes` must be in the range `-rank.. Tensor { + return max(alongAxes: axes) + } + + /// Returns the index of the maximum value of the flattened scalars. + @inlinable + func argmax() -> Tensor { + return flattened().argmax(squeezingAxis: 0) + } + + /// Returns the index of the minimum value of the flattened scalars. + @inlinable + func argmin() -> Tensor { + return flattened().argmin(squeezingAxis: 0) + } } -/// Computes the softmax of the specified tensor along the last axis. -/// Specifically, computes `exp(x) / exp(x).sum(alongAxes: -1)`. -@inlinable -@differentiable(vjp: _vjpSoftmax(_:) where T : TensorFlowFloatingPoint) -public func softmax(_ x: Tensor) -> Tensor { - return Raw.softmax(logits: x) +// MARK: - Numeric Reductions + +public extension Tensor where Scalar : Numeric { + // MARK: - Sum + + /// Returns the sum along the specified axes. The reduced dimensions are removed. + /// - Parameter axes: The dimensions to reduce. + /// - Precondition: Each value in `axes` must be in the range `-rank...rank`. + @inlinable + @differentiable(wrt: self, vjp: _vjpSum(squeezingAxes:) where Scalar : TensorFlowFloatingPoint) + func sum(squeezingAxes axes: Tensor) -> Tensor { + return Raw.sum(self, reductionIndices: Tensor(axes), keepDims: false) + } + + /// Returns the sum along the specified axes. The reduced dimensions are removed. + /// - Parameter axes: The dimensions to reduce. + /// - Precondition: Each value in `axes` must be in the range `-rank...rank`. + @inlinable + @differentiable(wrt: self where Scalar : TensorFlowFloatingPoint) + func sum(squeezingAxes axes: [Int]) -> Tensor { + // TODO(TF-433): Remove workaround for differentiating `map`. + let axes = {axes.map(Int32.init)}() + return sum(squeezingAxes: Tensor(axes)) + } + + /// Returns the sum along the specified axes. The reduced dimensions are removed. + /// - Parameter axes: The dimensions to reduce. + /// - Precondition: Each value in `axes` must be in the range `-rank...rank`. + @inlinable + @differentiable(wrt: self where Scalar : TensorFlowFloatingPoint) + func sum(squeezingAxes axes: Int...) -> Tensor { + return sum(squeezingAxes: axes) + } + + @inlinable + @differentiable(wrt: self where Scalar : TensorFlowFloatingPoint) + func sum() -> Tensor { + return flattened().sum(squeezingAxes: 0) + } + + /// Returns the sum along the specified axes. The reduced dimensions are retained with value 1. + /// - Parameter axes: The dimensions to reduce. + /// - Precondition: Each value in `axes` must be in the range `-rank..) -> Tensor { + return Raw.sum(self, reductionIndices: axes, keepDims: true) + } + + /// Returns the sum along the specified axes. The reduced dimensions are retained with value 1. + /// - Parameter axes: The dimensions to reduce. + /// - Precondition: Each value in `axes` must be in the range `-rank.. Tensor { + // TODO(TF-433): Remove workaround for differentiating `map`. + let axes = {axes.map(Int32.init)}() + return sum(alongAxes: Tensor(axes)) + } + + /// Returns the sum along the specified axes. The reduced dimensions are retained with value 1. + /// - Parameter axes: The dimensions to reduce. + /// - Precondition: Each value in `axes` must be in the range `-rank.. Tensor { + return sum(alongAxes: axes) + } + + // MARK: - Product + + /// Returns the product along the specified axes. The reduced dimensions are removed. + /// + /// - Parameter axes: The dimensions to reduce. + /// - Precondition: Each value in `axes` must be in the range `-rank...rank`. + // TODO: Make this @differentiable. + @inlinable + func product(squeezingAxes axes: Tensor) -> Tensor { + return Raw.prod(self, reductionIndices: axes, keepDims: false) + } + + /// Returns the product along the specified axes. The reduced dimensions are removed. + /// + /// - Parameter axes: The dimensions to reduce. + /// - Precondition: Each value in `axes` must be in the range `-rank...rank`. + @inlinable + func product(squeezingAxes axes: [Int]) -> Tensor { + // TODO(TF-433): Remove workaround for differentiating `map`. + let axes = {axes.map(Int32.init)}() + return product(squeezingAxes: Tensor(axes)) + } + + /// Returns the product along the specified axes. The reduced dimensions are removed. + /// + /// - Parameter axes: The dimensions to reduce. + /// - Precondition: Each value in `axes` must be in the range `-rank...rank`. + @inlinable + func product(squeezingAxes axes: Int...) -> Tensor { + return product(squeezingAxes: axes) + } + + @inlinable + func product() -> Tensor { + return flattened().product(squeezingAxes: 0) + } + + /// Returns the product along the specified axes. The reduced dimensions are retained with + /// value 1. + /// - Parameter axes: The dimensions to reduce. + /// - Precondition: Each value in `axes` must be in the range `-rank..) -> Tensor { + return Raw.prod(self, reductionIndices: axes, keepDims: true) + } + + /// Returns the product along the specified axes. The reduced dimensions are retained with + /// value 1. + /// - Parameter axes: The dimensions to reduce. + /// - Precondition: Each value in `axes` must be in the range `-rank.. Tensor { + // TODO(TF-433): Remove workaround for differentiating `map`. + let axes = {axes.map(Int32.init)}() + return product(alongAxes: Tensor(axes)) + } + + /// Returns the product along the specified axes. The reduced dimensions are retained with + /// value 1. + /// - Parameter axes: The dimensions to reduce. + /// - Precondition: Each value in `axes` must be in the range `-rank.. Tensor { + return product(alongAxes: axes) + } + + // MARK: - Mean + + /// Returns the arithmetic mean along the specified axes. The reduced dimensions are removed. + /// - Parameter axes: The dimensions to reduce. + /// - Precondition: Each value in `axes` must be in the range `-rank...rank`. + @inlinable + @differentiable(wrt: self, vjp: _vjpMean(squeezingAxes:) where Scalar : TensorFlowFloatingPoint) + func mean(squeezingAxes axes: Tensor) -> Tensor { + return Raw.mean(self, reductionIndices: axes, keepDims: false) + } + + /// Returns the arithmetic mean along the specified axes. The reduced dimensions are removed. + /// - Parameter axes: The dimensions to reduce. + /// - Precondition: Each value in `axes` must be in the range `-rank...rank`. + @inlinable + @differentiable(wrt: self where Scalar : TensorFlowFloatingPoint) + func mean(squeezingAxes axes: [Int]) -> Tensor { + // TODO(TF-433): Remove workaround for differentiating `map`. + let axes = {axes.map(Int32.init)}() + return mean(squeezingAxes: Tensor(axes)) + } + + /// Returns the arithmetic mean along the specified axes. The reduced dimensions are removed. + /// - Parameter axes: The dimensions to reduce. + /// - Precondition: Each value in `axes` must be in the range `-rank...rank`. + @inlinable + @differentiable(wrt: self where Scalar : TensorFlowFloatingPoint) + func mean(squeezingAxes axes: Int...) -> Tensor { + return mean(squeezingAxes: axes) + } + + @inlinable + @differentiable(wrt: self where Scalar : TensorFlowFloatingPoint) + func mean() -> Tensor { + return flattened().mean(squeezingAxes: [0]) + } + + /// Returns the arithmetic mean along the specified axes. The reduced dimensions are retained + /// with value 1. + /// - Parameter axes: The dimensions to reduce. + /// - Precondition: Each value in `axes` must be in the range `-rank..) -> Tensor { + return Raw.mean(self, reductionIndices: axes, keepDims: true) + } + + /// Returns the arithmetic mean along the specified axes. The reduced dimensions are retained + /// with value 1. + /// - Parameter axes: The dimensions to reduce. + /// - Precondition: Each value in `axes` must be in the range `-rank.. Tensor { + // TODO(TF-433): Remove workaround for differentiating `map`. + let axes = {axes.map(Int32.init)}() + return mean(alongAxes: Tensor(axes)) + } + + /// Returns the arithmetic mean along the specified axes. The reduced dimensions are retained + /// with value 1. + /// - Parameter axes: The dimensions to reduce. + /// - Precondition: Each value in `axes` must be in the range `-rank.. Tensor { + return mean(alongAxes: axes) + } + + // MARK: - Variance + + /// Returns the variance along the specified axes. The reduced dimensions are removed. Does not + /// apply Bessel's correction. + /// - Parameter axes: The dimensions to reduce. + /// - Precondition: Each value in `axes` must be in the range `-rank..) -> Tensor { + let squaredDiff = (self - mean(alongAxes: axes)).squared() + return squaredDiff.mean(squeezingAxes: axes) + } + + /// Returns the variance along the specified axes. The reduced dimensions are removed. Does not + /// apply Bessel's correction. + /// - Parameter axes: The dimensions to reduce. + /// - Precondition: Each value in `axes` must be in the range `-rank.. Tensor { + // TODO(TF-433): Remove workaround for differentiating `map`. + let axes = {axes.map(Int32.init)}() + return variance(squeezingAxes: Tensor(axes)) + } + + /// Returns the variance along the specified axes. The reduced dimensions are retained with + /// value 1. Does not apply Bessel's correction. + /// - Parameter axes: The dimensions to reduce. + /// - Precondition: Each value in `axes` must be in the range `-rank.. Tensor { + return variance(squeezingAxes: axes) + } + + @differentiable(wrt: self where Scalar : TensorFlowFloatingPoint) + @inlinable + func variance() -> Tensor { + let mean = self.mean() + let squaredDiff = (self - mean).squared() + return squaredDiff.mean() + } + + /// Returns the variance along the specified axes. The reduced dimensions are retained with + /// value 1. Does not apply Bessel's correction. + /// - Parameter axes: The dimensions to reduce. + /// - Precondition: Each value in `axes` must be in the range `-rank..) -> Tensor { + let squaredDiff = (self - mean(alongAxes: axes)).squared() + return squaredDiff.mean(alongAxes: axes) + } + + /// Returns the variance along the specified axes. The reduced dimensions are retained with + /// value 1. Does not apply Bessel's correction. + /// - Parameter axes: The dimensions to reduce. + /// - Precondition: Each value in `axes` must be in the range `-rank.. Tensor { + // TODO(TF-433): Remove workaround for differentiating `map`. + let axes = {axes.map(Int32.init)}() + return variance(alongAxes: Tensor(axes)) + } + + /// Returns the variance along the specified axes. The reduced dimensions are retained with + /// value 1. Does not apply Bessel's correction. + /// - Parameter axes: The dimensions to reduce. + /// - Precondition: Each value in `axes` must be in the range `-rank.. Tensor { + return variance(alongAxes: axes) + } } -/// Computes the softmax of the specified tensor along the specified axis. -/// Specifically, computes `exp(x) / exp(x).sum(alongAxes: axis)`. -@inlinable -// TODO: [AD]. -public func softmax( - _ x: Tensor, - alongAxis axis: Int -) -> Tensor { - let expx = exp(x) - // TODO: [BUG] keepDims = true for the sum. - return expx / expx.sum(alongAxes: axis) +internal extension Tensor where Scalar : TensorFlowFloatingPoint { + @inlinable + func _vjpSum(alongAxes axes: Tensor) -> (Tensor, (Tensor) -> Tensor) { + let value = sum(alongAxes: axes) + return (value, { [shape = shapeTensor] in $0.broadcast(toShape: shape) }) + } + + @inlinable + func _vjpSum(squeezingAxes axes: Tensor) -> (Tensor, (Tensor) -> Tensor) { + let value = sum(squeezingAxes: axes) + return (value, { [shape = shapeTensor] in $0.broadcast(toShape: shape) }) + } + + @inlinable + func _vjpMean(alongAxes axes: Tensor) -> (Tensor, (Tensor) -> Tensor) { + let value = mean(alongAxes: axes) + let count = Raw.gather(params: shapeTensor, indices: axes).product() + return (value, { [shape = shapeTensor] in $0.broadcast(toShape: shape) / Tensor(count) }) + } + + @inlinable + func _vjpMean(squeezingAxes axes: [Int]) -> (Tensor, (Tensor) -> Tensor) { + let value = mean(squeezingAxes: axes) + return (value, { [shape = shapeTensor, count = axes.map { shape[$0] }.reduce(1, *)] in + $0.broadcast(toShape: shape) / Tensor(Scalar(count)) + }) + } + + @inlinable + func _vjpMean( + squeezingAxes axes: Tensor + ) -> (Tensor, (Tensor) -> Tensor) { + let value = mean(squeezingAxes: axes) + let count = Raw.gather(params: shapeTensor, indices: axes).product() + return (value, { [shape = shapeTensor] in $0.broadcast(toShape: shape) / Tensor(count) }) + } } -@inlinable -func _vjpSoftmax( - _ x: Tensor -) -> (Tensor, (Tensor) -> Tensor) { - let value = softmax(x) - return (value, { v in - let sumChannels = (v * value).sum(alongAxes: -1) - return (v - sumChannels) * value - }) +// TODO: Consider making the return type be generic over `FloatingPoint` types +// so that `self`'s scalar type can be any `Numeric` type. +public extension Tensor where Scalar : TensorFlowFloatingPoint { + /// Returns the standard deviation of the elements along the specified axes. The reduced + /// dimensions are retained with value `1`. Does not apply Bessel's correction. + /// + /// - Parameter axes: The dimensions to reduce. + /// - Precondition: Each value in `axes` must be in the range `-rank..) -> Tensor { + return sqrt(variance(squeezingAxes: axes)) + } + + /// Returns the standard deviation of the elements along the specified axes. The reduced + /// dimensions are retained with value `1`. Does not apply Bessel's correction. + /// + /// - Parameter axes: The dimensions to reduce. + /// - Precondition: Each value in `axes` must be in the range `-rank.. Tensor { + return sqrt(variance(squeezingAxes: axes)) + } + + /// Returns the standard deviation of the elements along the specified axes. The reduced + /// dimensions are retained with value `1`. Does not apply Bessel's correction. + /// + /// - Parameter axes: The dimensions to reduce. + /// - Precondition: Each value in `axes` must be in the range `-rank.. Tensor { + return standardDeviation(squeezingAxes: axes) + } + + /// Returns the standard deviation of the elements along the specified axes. The reduced + /// dimensions are retained with value `1`. Does not apply Bessel's correction. + /// + /// - Parameter axes: The dimensions to reduce. + /// - Precondition: Each value in `axes` must be in the range `-rank.. Tensor { + // Reduce along all dimensions. + return standardDeviation(squeezingAxes: Array(0..) -> Tensor { + return sqrt(variance(alongAxes: axes)) + } + + /// Returns the standard deviation of the elements along the specified axes. The reduced + /// dimensions are retained with value `1`. Does not apply Bessel's correction. + /// + /// - Parameter axes: The dimensions to reduce. + /// - Precondition: Each value in `axes` must be in the range `-rank.. Tensor { + // TODO(TF-433): Remove workaround for differentiating `map`. + let axes = {axes.map(Int32.init)}() + return standardDeviation(alongAxes: Tensor(axes)) + } + + /// Returns the standard deviation of the elements along the specified axes. The reduced + /// dimensions are retained with value `1`. Does not apply Bessel's correction. + /// + /// - Parameter axes: The dimensions to reduce. + /// - Precondition: Each value in `axes` must be in the range `-rank.. Tensor { + return sqrt(variance(alongAxes: axes)) + } } -/// Computes the log-softmax of the specified tensor element-wise. +//===------------------------------------------------------------------------------------------===// +// Linear Algebra +//===------------------------------------------------------------------------------------------===// + +/// Performs matrix multiplication with another tensor and produces the result. @inlinable -@differentiable(vjp: _vjpLogSoftmax(_:) where T : TensorFlowFloatingPoint) -public func logSoftmax(_ x: Tensor) -> Tensor { - return Raw.logSoftmax(logits: x) +@differentiable(vjp: _vjpMatmul(_:_:) where Scalar : TensorFlowFloatingPoint) +public func matmul( + _ lhs: Tensor, + _ rhs: Tensor +) -> Tensor { + // Default arguments specified explicitly to avoid "external declarations of SILFunctions with + // shared visibility is not allowed" SILVerifier error in + // "tests/AutoDiff/tensor_autodiff_runtime.swift". + return Raw.matMul(lhs, rhs, transposeA: false, transposeB: false) } @inlinable -func _vjpLogSoftmax( - _ x: Tensor -) -> (Tensor, (Tensor) -> Tensor) { - let value = logSoftmax(x) - return (value, { v in - v - v.sum(alongAxes: -1) * exp(value) - }) +internal func _vjpMatmul( + _ lhs: Tensor, + _ rhs: Tensor +) -> (Tensor, (Tensor) -> (Tensor, Tensor)) { + let value = matmul(lhs, rhs) + return (value, { v in + (matmul(v, rhs.transposed()), matmul(lhs.transposed(), v)) + }) } -/// Computes `relu` of the specified tensor element-wise. -/// Specifically, computes `max(0, x)`. -@inlinable -@differentiable(vjp: _vjpRelu(_:) where T : TensorFlowFloatingPoint) -public func relu(_ x: Tensor) -> Tensor { - return max(0, x) +infix operator • : MultiplicationPrecedence + +public extension Tensor where Scalar : Numeric { + // TODO: We have to define a custom VJP on • because AD can't yet differentiate generic methods. + // After AD can differentiate generic methods, remove the custom VJP. + + /// Performs matrix multiplication between two tensors and produces the result. + @inlinable + @differentiable(vjp: _vjpMatmulOperator(lhs:rhs:) where Scalar : TensorFlowFloatingPoint) + static func • (lhs: Tensor, rhs: Tensor) -> Tensor { + return matmul(lhs, rhs) + } } -@inlinable -func _vjpRelu( - _ x: Tensor -) -> (Tensor, (Tensor) -> Tensor) { - return (relu(x), { v in Tensor(x .> 0) * v }) +// TODO: We have to define a custom VJP on • because AD can't yet +// differentiate generic methods. After AD can differentiate generic methods, +// remove the custom VJP. +internal extension Tensor where Scalar : TensorFlowFloatingPoint { + @inlinable + static func _vjpMatmulOperator( + lhs: Tensor, + rhs: Tensor + ) -> (Tensor, (Tensor) -> (Tensor, Tensor)) { + return _vjpMatmul(lhs, rhs) + } } diff --git a/Sources/DeepLearning/Tensors.swift b/Sources/DeepLearning/Tensors.swift index e11e1f5d7..2d4e3b32d 100644 --- a/Sources/DeepLearning/Tensors.swift +++ b/Sources/DeepLearning/Tensors.swift @@ -20,6 +20,36 @@ import TensorFlow infix operator .== : ComparisonPrecedence #endif +//===------------------------------------------------------------------------------------------===// +// Tensor Properties +//===------------------------------------------------------------------------------------------===// + +public extension Tensor { + /// The rank of the tensor, represented as a `Tensor`. + @inlinable + var rankTensor: Tensor { + get { + return Raw.rank(self) + } + } + + /// The dimensions of the tensor, represented as a `Tensor`. + @inlinable + var shapeTensor: Tensor { + get { + return Raw.shape(self) + } + } + + /// The number of scalars in the tensor, represented as a `Tensor`. + @inlinable + var scalarCountTensor: Tensor { + get { + return Raw.size(self) + } + } +} + //===------------------------------------------------------------------------------------------===// // Description and Visualization //===------------------------------------------------------------------------------------------===// From 1120692fcde6a35a669da18938de4b52f87e2f89 Mon Sep 17 00:00:00 2001 From: Anthony Platanios Date: Sat, 20 Apr 2019 14:12:44 -0400 Subject: [PATCH 41/55] Added a README file to the 'Operators' source directory. --- Sources/DeepLearning/Operators/Basic.swift | 2 ++ Sources/DeepLearning/Operators/README.md | 22 ++++++++++++++++++++++ 2 files changed, 24 insertions(+) create mode 100644 Sources/DeepLearning/Operators/README.md diff --git a/Sources/DeepLearning/Operators/Basic.swift b/Sources/DeepLearning/Operators/Basic.swift index 42479481f..5e5a6e594 100644 --- a/Sources/DeepLearning/Operators/Basic.swift +++ b/Sources/DeepLearning/Operators/Basic.swift @@ -117,6 +117,8 @@ internal extension Tensor where Scalar : TensorFlowFloatingPoint { // Other Tensor Transformations //===------------------------------------------------------------------------------------------===// +infix operator ++ : AdditionPrecedence + public extension Tensor { /// Returns a transposed tensor, with dimensions permuted in the specified order. @inlinable diff --git a/Sources/DeepLearning/Operators/README.md b/Sources/DeepLearning/Operators/README.md new file mode 100644 index 000000000..c2f0d5e22 --- /dev/null +++ b/Sources/DeepLearning/Operators/README.md @@ -0,0 +1,22 @@ +# Ops and Convenience Methods + +The majority of the Tensor API is implemented in terms of 'ops' that are +partitioned out to the TensorFlow graph when the compiler runs. These +ops are intentionally designed to reflect TensorFlow ops, but provide nicer +Swift syntax for accessing them. In addition to the core ops themselves, +we also define some helper function wrappers, e.g. to make things symmetric +and generally feel nice to use. + +The ops themselves are defined by the primitive `#tfop(...)` syntax, here +are some examples: +``` +result = #tfop("Add", lhs, rhs) +result = #tfop("Const", dtype: Float.self, value$tensor: 4.0) +``` + +The first parameter to this syntax is the TensorFlow op name as a string. +After that, the inputs are specified, and then attributes are specified +with their name as the keyword argument. + +Inputs and outputs must be of TensorHandle, ResourceHandle, or VariantHandle +type. These are magic types known to the compiler. From e7a04d2ad44ae311c6c41b77a79f59de3529a630 Mon Sep 17 00:00:00 2001 From: Anthony Platanios Date: Sat, 20 Apr 2019 14:22:59 -0400 Subject: [PATCH 42/55] Brought the gradient helper functions from the stdlib. --- Sources/DeepLearning/Gradients.swift | 178 +++++++++++++++++++++++ Sources/DeepLearning/Operators/README.md | 19 +++ 2 files changed, 197 insertions(+) create mode 100644 Sources/DeepLearning/Gradients.swift diff --git a/Sources/DeepLearning/Gradients.swift b/Sources/DeepLearning/Gradients.swift new file mode 100644 index 000000000..04a37fe8b --- /dev/null +++ b/Sources/DeepLearning/Gradients.swift @@ -0,0 +1,178 @@ +// Copyright 2018 The TensorFlow Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#if !COMPILING_TENSORFLOW_MODULE +import TensorFlow +#endif + +//===------------------------------------------------------------------------------------------===// +// Method-style Differential Operators +//===------------------------------------------------------------------------------------------===// + +public extension Differentiable { + @inlinable + func gradient( + in f: @differentiable (Self) -> Tensor + ) -> CotangentVector { + return self.pullback(in: f)(Tensor(1)) + } + + @inlinable + func valueWithGradient( + in f: @differentiable (Self) -> Tensor + ) -> (value: Tensor, gradient: CotangentVector) { + let (y, pb) = self.valueWithPullback(in: f) + return (y, pb(Tensor(1))) + } + + @inlinable + func gradient( + at x: T, + in f: @differentiable (Self, T) -> Tensor + ) -> (CotangentVector, T.CotangentVector) { + return self.pullback(at: x, in: f)(Tensor(1)) + } + + @inlinable + func valueWithGradient( + at x: T, + in f: @differentiable (Self, T) -> Tensor + ) -> (value: Tensor, gradient: (CotangentVector, T.CotangentVector)) { + let (y, pb) = self.valueWithPullback(at: x, in: f) + return (y, pb(Tensor(1))) + } +} + +//===------------------------------------------------------------------------------------------===// +// Free-Function-Style Differential Operators +//===------------------------------------------------------------------------------------------===// + +// Value with gradient + +@inlinable +public func valueWithGradient( + at x: T, + in f: @differentiable (T) -> Tensor +) -> (value: Tensor, gradient: T.CotangentVector) +where T : Differentiable, R : TensorFlowFloatingPoint { + let (y, pullback) = valueWithPullback(at: x, in: f) + return (y, pullback(Tensor(1))) +} + +@inlinable +public func valueWithGradient( + at x: T, + _ y: U, + in f: @differentiable (T, U) -> Tensor +) -> (value: Tensor, gradient: (T.CotangentVector, U.CotangentVector)) + where T : Differentiable, U : Differentiable, R : TensorFlowFloatingPoint { + let (y, pullback) = valueWithPullback(at: x, y, in: f) + return (y, pullback(Tensor(1))) +} + +@inlinable +public func valueWithGradient( + at x: T, + _ y: U, + _ z: V, + in f: @differentiable (T, U, V) -> Tensor +) -> (value: Tensor, gradient: (T.CotangentVector, U.CotangentVector, V.CotangentVector)) + where T : Differentiable, U : Differentiable, V : Differentiable, R : TensorFlowFloatingPoint { + let (y, pullback) = valueWithPullback(at: x, y, z, in: f) + return (y, pullback(Tensor(1))) +} + +// Value with gradient (curried) + +@inlinable +public func valueWithGradient( + of f: @escaping @differentiable (T) -> Tensor +) -> (T) -> (value: Tensor, gradient: T.CotangentVector) + where T : Differentiable, R : TensorFlowFloatingPoint { + return { x in valueWithGradient(at: x, in: f) } +} + +@inlinable +public func valueWithGradient( + of f: @escaping @differentiable (T, U) -> Tensor +) -> (T, U) -> (value: Tensor, gradient: (T.CotangentVector, U.CotangentVector)) + where T : Differentiable, U : Differentiable, R : TensorFlowFloatingPoint { + return { x, y in valueWithGradient(at: x, y, in: f) } +} + +@inlinable +public func valueWithGradient( + of f: @escaping @differentiable (T, U, V) -> Tensor +) -> (T, U, V) -> ( + value: Tensor, + gradient: (T.CotangentVector, U.CotangentVector, V.CotangentVector)) + where T : Differentiable, U : Differentiable, V : Differentiable, R : TensorFlowFloatingPoint { + return { x, y, z in valueWithGradient(at: x, y, z, in: f) } +} + +// Gradient + +@inlinable +public func gradient( + at x: T, + in f: @differentiable (T) -> Tensor +) -> T.CotangentVector where T : Differentiable, R : TensorFlowFloatingPoint { + return pullback(at: x, in: f)(Tensor(1)) +} + +@inlinable +public func gradient( + at x: T, + _ y: U, + in f: @differentiable (T, U) -> Tensor +) -> (T.CotangentVector, U.CotangentVector) + where T : Differentiable, U : Differentiable, R : TensorFlowFloatingPoint { + return pullback(at: x, y, in: f)(Tensor(1)) +} + +@inlinable +public func gradient( + at x: T, + _ y: U, + _ z: V, + in f: @differentiable (T, U, V) -> Tensor +) -> (T.CotangentVector, U.CotangentVector, V.CotangentVector) + where T : Differentiable, U : Differentiable, V : Differentiable, R : TensorFlowFloatingPoint { + return pullback(at: x, y, z, in: f)(Tensor(1)) +} + +// Gradient (curried) + +@inlinable +public func gradient( + of f: @escaping @differentiable (T) -> Tensor +) -> (T) -> T.CotangentVector where T : Differentiable, R : TensorFlowFloatingPoint { + return { x in gradient(at: x, in: f) } +} + +@inlinable +public func gradient( + of f: @escaping @differentiable (T, U) -> Tensor +) -> (T, U) -> (T.CotangentVector, U.CotangentVector) + where T : Differentiable, U : Differentiable, R : TensorFlowFloatingPoint { + return { x, y in gradient(at: x, y, in: f) } +} + +@inlinable +public func gradient( + of f: @escaping @differentiable (T, U, V) -> Tensor +) -> (T, U, V) -> (T.CotangentVector, U.CotangentVector, V.CotangentVector) + where T : Differentiable, U : Differentiable, V : Differentiable, R : TensorFlowFloatingPoint { + return { x, y, z in gradient(at: x, y, z, in: f) } +} diff --git a/Sources/DeepLearning/Operators/README.md b/Sources/DeepLearning/Operators/README.md index c2f0d5e22..76e7a7e69 100644 --- a/Sources/DeepLearning/Operators/README.md +++ b/Sources/DeepLearning/Operators/README.md @@ -20,3 +20,22 @@ with their name as the keyword argument. Inputs and outputs must be of TensorHandle, ResourceHandle, or VariantHandle type. These are magic types known to the compiler. + +## Auto-Differentiation Support + +We also provide vector-Jacobian product (VJP) definitions for some of the +convenience methods. + +Terminology: +- originalValue (f): The function being differentiated, or the result of that + function. +- VJP (f'): The function as the result of differentiation, computing + the vector-Jacobian products with respect to all arguments, or the result + of that function. + +For more information, visit: +https://en.wikipedia.org/wiki/Automatic_differentiation + +The attribute '@differentiable(vjp: ...)' is used to register a function's VJP. +The automatic differentiation pass identifies these VJPs and chains them +together to produce arbitrary differentiable programs. From 3ee21ffaaf48dbee0237d796618fc736a4f7812f Mon Sep 17 00:00:00 2001 From: Anthony Platanios Date: Sat, 20 Apr 2019 15:26:57 -0400 Subject: [PATCH 43/55] Bug fixes. --- Sources/DeepLearning/Initializers.swift | 56 +++++++++++----------- Sources/DeepLearning/Operators/Basic.swift | 4 +- Sources/DeepLearning/Operators/Math.swift | 3 +- 3 files changed, 33 insertions(+), 30 deletions(-) diff --git a/Sources/DeepLearning/Initializers.swift b/Sources/DeepLearning/Initializers.swift index e644fd78a..5e90be2b7 100644 --- a/Sources/DeepLearning/Initializers.swift +++ b/Sources/DeepLearning/Initializers.swift @@ -143,7 +143,7 @@ public extension Tensor { /// /// - Returns: The stacked tensor. @inlinable - @differentiable(vjp: _vjpStacking where Scalar : TensorFlowFloatingPoint) + // @differentiable(vjp: _vjpStacking where Scalar : TensorFlowFloatingPoint) init(stacking tensors: [Tensor], alongAxis axis: Int = 0) { self = Raw.pack(tensors, axis: Int64(axis)) } @@ -181,40 +181,40 @@ public extension Tensor { /// /// - Returns: The concatenated tensor. @inlinable - @differentiable(wrt: tensors, vjp: _vjpConcatenating where Scalar : TensorFlowFloatingPoint) + // @differentiable(wrt: tensors, vjp: _vjpConcatenating where Scalar : TensorFlowFloatingPoint) init(concatenating tensors: [Tensor], alongAxis axis: Int = 0) { precondition(tensors.count > 0) self = Raw.concatV2(tensors, axis: Tensor(Int32(axis))) } } -internal extension Tensor where Scalar : TensorFlowFloatingPoint { - @inlinable - static func _vjpStacking( - stacking tensors: [Tensor], - alongAxis axis: Int = 0 - ) -> (Tensor, (Tensor) -> Array.DifferentiableView) { - let result = Tensor(stacking: tensors, alongAxis: axis) - return (result, { v in - return Array.DifferentiableView(v.unstack(alongAxis: axis)) - }) - } +// internal extension Tensor where Scalar : TensorFlowFloatingPoint { +// @inlinable +// static func _vjpStacking( +// stacking tensors: [Tensor], +// alongAxis axis: Int = 0 +// ) -> (Tensor, (Tensor) -> Array.DifferentiableView) { +// let result = Tensor(stacking: tensors, alongAxis: axis) +// return (result, { v in +// return Array.DifferentiableView(v.unstack(alongAxis: axis)) +// }) +// } - @inlinable - static func _vjpConcatenating( - concatenating tensors: [Tensor], - alongAxis axis: Int = 0 - ) -> (Tensor, (Tensor) -> Array.DifferentiableView) { - let result = Tensor(concatenating: tensors, alongAxis: axis) - let posAxis = axis < 0 ? axis + tensors[0].rank : axis - let sizes = Tensor(stacking: tensors.map { $0.shapeTensor[posAxis] }) - return (result, { [count = tensors.count] v in - if count == 1 { return Array.DifferentiableView([v]) } - let splits = v.split(sizes: sizes, alongAxis: posAxis) - return Array.DifferentiableView(splits) - }) - } -} +// @inlinable +// static func _vjpConcatenating( +// concatenating tensors: [Tensor], +// alongAxis axis: Int = 0 +// ) -> (Tensor, (Tensor) -> Array.DifferentiableView) { +// let result = Tensor(concatenating: tensors, alongAxis: axis) +// let posAxis = axis < 0 ? axis + tensors[0].rank : axis +// let sizes = Tensor(stacking: tensors.map { $0.shapeTensor[posAxis] }) +// return (result, { [count = tensors.count] v in +// if count == 1 { return Array.DifferentiableView([v]) } +// let splits = v.split(sizes: sizes, alongAxis: posAxis) +// return Array.DifferentiableView(splits) +// }) +// } +// } //===------------------------------------------------------------------------------------------===// // Numeric diff --git a/Sources/DeepLearning/Operators/Basic.swift b/Sources/DeepLearning/Operators/Basic.swift index 5e5a6e594..428a4ca78 100644 --- a/Sources/DeepLearning/Operators/Basic.swift +++ b/Sources/DeepLearning/Operators/Basic.swift @@ -20,13 +20,15 @@ import TensorFlow // Shape Transformations //===------------------------------------------------------------------------------------------===// -public extension Tensor { +public extension TensorFlowScalar { /// Convert to a tensor with the specified rank, with all dimensions equal to 1. @inlinable func makeTensor(rank: Int) -> Tensor { return Tensor(repeating: self, shape: TensorShape(rank)) } +} +public extension Tensor { /// Reshape to the shape of the specified `Tensor`. /// - Precondition: The number of scalars matches the new shape. @inlinable diff --git a/Sources/DeepLearning/Operators/Math.swift b/Sources/DeepLearning/Operators/Math.swift index 9f858a98b..8851080ec 100644 --- a/Sources/DeepLearning/Operators/Math.swift +++ b/Sources/DeepLearning/Operators/Math.swift @@ -18,6 +18,7 @@ import TensorFlow #if COMPILING_TENSORFLOW_MODULE infix operator .> : ComparisonPrecedence +infix operator .== : ComparisonPrecedence #endif // TODO: @@ -657,7 +658,7 @@ public func softmax(_ x: Tensor) -> Tensor { // TODO: [AD]. public func softmax(_ x: Tensor, alongAxis axis: Int) -> Tensor { let xExp = exp(x) - let xExpSum = Raw.sum(xExp, reductionIndices: Tensor(axis), keepDims: true) + let xExpSum = Raw.sum(xExp, reductionIndices: Tensor(Int32(axis)), keepDims: true) return xExp / xExpSum } From ef1c73bc6744cbb7e375d5d9e99463d5b71b7a38 Mon Sep 17 00:00:00 2001 From: Anthony Platanios Date: Sat, 20 Apr 2019 16:38:00 -0400 Subject: [PATCH 44/55] Brought the tensor tests from the stdlib. --- Sources/DeepLearning/Helpers.swift | 4 + Sources/DeepLearning/Initializers.swift | 4 +- Sources/DeepLearning/Operators/Basic.swift | 19 +- Sources/DeepLearning/Operators/NN.swift | 95 ++-- Sources/DeepLearning/PythonConversion.swift | 174 +++++++ .../DeepLearningTests/InitializerTests.swift | 97 ++++ .../OperatorTests/BasicTests.swift | 452 ++++++++++++++++++ .../OperatorTests/ComparisonTests.swift | 30 ++ .../OperatorTests/MathTests.swift | 199 ++++++++ Tests/DeepLearningTests/TensorTests.swift | 81 ++++ Tests/DeepLearningTests/XCTestManifests.swift | 4 + 11 files changed, 1100 insertions(+), 59 deletions(-) create mode 100644 Sources/DeepLearning/PythonConversion.swift create mode 100644 Tests/DeepLearningTests/InitializerTests.swift create mode 100644 Tests/DeepLearningTests/OperatorTests/BasicTests.swift create mode 100644 Tests/DeepLearningTests/OperatorTests/ComparisonTests.swift create mode 100644 Tests/DeepLearningTests/OperatorTests/MathTests.swift create mode 100644 Tests/DeepLearningTests/TensorTests.swift diff --git a/Sources/DeepLearning/Helpers.swift b/Sources/DeepLearning/Helpers.swift index 4d9c0217b..39bcf1e1d 100644 --- a/Sources/DeepLearning/Helpers.swift +++ b/Sources/DeepLearning/Helpers.swift @@ -30,3 +30,7 @@ public func identity(_ x: Tensor) -> Tensor { func pow(_ x: T, _ y: T) -> T { return T(pow(Double(x), Double(y))) } + +extension Array where Element : Differentiable { + +} diff --git a/Sources/DeepLearning/Initializers.swift b/Sources/DeepLearning/Initializers.swift index 5e90be2b7..204ca8a26 100644 --- a/Sources/DeepLearning/Initializers.swift +++ b/Sources/DeepLearning/Initializers.swift @@ -109,7 +109,7 @@ internal extension Tensor where Scalar : TensorFlowFloatingPoint { public extension Tensor { /// Creates a tensor from an array of tensors (which may themselves be scalars). @inlinable - @differentiable(where Scalar : TensorFlowFloatingPoint) + // @differentiable(where Scalar : TensorFlowFloatingPoint) init(_ elements: [Tensor]) { self = Tensor(stacking: elements) } @@ -196,7 +196,7 @@ public extension Tensor { // ) -> (Tensor, (Tensor) -> Array.DifferentiableView) { // let result = Tensor(stacking: tensors, alongAxis: axis) // return (result, { v in -// return Array.DifferentiableView(v.unstack(alongAxis: axis)) +// Array.DifferentiableView(v.unstack(alongAxis: axis)) // }) // } diff --git a/Sources/DeepLearning/Operators/Basic.swift b/Sources/DeepLearning/Operators/Basic.swift index 428a4ca78..71a8970a0 100644 --- a/Sources/DeepLearning/Operators/Basic.swift +++ b/Sources/DeepLearning/Operators/Basic.swift @@ -163,7 +163,7 @@ public extension Tensor { /// specified axis. /// - Precondition: The axis must be in the range `-rank.. Tensor { return Tensor(concatenating: [self, other], alongAxis: axis) } @@ -205,6 +205,23 @@ internal extension Tensor where Scalar : TensorFlowFloatingPoint { func _vjpTransposed() -> (Tensor, (Tensor) -> Tensor) { return (transposed(), { $0.transposed() }) } + + @inlinable + func _vjpConcatenated( + with other: Tensor, + alongAxis axis: Int + ) -> (Tensor, (Tensor) -> (Tensor, Tensor)) { + let idx = axis < 0 ? axis + rank : axis + let splits = Tensor([shapeTensor[idx], other.shapeTensor[idx]]) + return (concatenated(with: other, alongAxis: axis), { result in + let gradients = Raw.splitV( + value: result, + sizeSplits: splits, + splitDim: Tensor(Int32(axis)), + numSplit: Int64(2)) + return (gradients[0], gradients[1]) + }) + } } //===------------------------------------------------------------------------------------------===// diff --git a/Sources/DeepLearning/Operators/NN.swift b/Sources/DeepLearning/Operators/NN.swift index f1401af20..1664b1954 100644 --- a/Sources/DeepLearning/Operators/NN.swift +++ b/Sources/DeepLearning/Operators/NN.swift @@ -20,7 +20,7 @@ import TensorFlow // Normalization //===------------------------------------------------------------------------------------------===// -public extension Tensor where Scalar: TensorFlowFloatingPoint { +public extension Tensor where Scalar : TensorFlowFloatingPoint { /// Computes the batch normalized tensor along the specified axis. /// /// Specifically, returns `(self - mu) / (var + epsilon) * gamma + beta` where `mu` and `var` are @@ -32,9 +32,7 @@ public extension Tensor where Scalar: TensorFlowFloatingPoint { /// - scale: The scale, also known as gamma. /// - epsilon: A small value added to the denominator for numerical stability. @inlinable - @differentiable( - wrt: (self, offset, scale), - vjp: _vjpBatchNormalized) + @differentiable(wrt: (self, offset, scale), vjp: _vjpBatchNormalized) func batchNormalized( alongAxis axis: Int, offset: Tensor = Tensor(0), @@ -56,8 +54,7 @@ public extension Tensor where Scalar: TensorFlowFloatingPoint { scale: Tensor, epsilon: Scalar ) -> (Tensor, (Tensor) -> (Tensor, Tensor, Tensor)) { - let value = batchNormalized( - alongAxis: axis, offset: offset, scale: scale, epsilon: epsilon) + let value = batchNormalized(alongAxis: axis, offset: offset, scale: scale, epsilon: epsilon) return (value, { v in let mean = self.mean(alongAxes: axis) let squaredDiff: Tensor = Raw.squaredDifference(self, mean) @@ -79,7 +76,7 @@ public extension Tensor where Scalar: TensorFlowFloatingPoint { } } -public extension Tensor where Scalar: BinaryFloatingPoint { +public extension Tensor where Scalar : BinaryFloatingPoint { /// Computes the batch normalized tensor along the specified axis. /// /// Specifically, returns `(self - mu)/(var + epsilon) * gamma + beta` where @@ -94,9 +91,8 @@ public extension Tensor where Scalar: BinaryFloatingPoint { /// stability. @inlinable @differentiable( - wrt: (self, offset, scale), vjp: _vjpBatchNormalized - where Scalar : TensorFlowFloatingPoint - ) + wrt: (self, offset, scale), + vjp: _vjpBatchNormalized where Scalar : TensorFlowFloatingPoint) func batchNormalized( alongAxis axis: Int, offset: Tensor = Tensor(0), @@ -142,7 +138,7 @@ public extension Padding { } } -public extension Tensor where Scalar: TensorFlowFloatingPoint { +public extension Tensor where Scalar : TensorFlowFloatingPoint { /// TensorFlow builtin conv2d gradient helper for the input. @inlinable @differentiable(wrt: (self, filter), vjp: _vjpConv2DBackpropInput) @@ -186,14 +182,13 @@ public extension Tensor where Scalar: TensorFlowFloatingPoint { _ strides: (Int, Int, Int, Int), _ padding: Padding ) -> (Tensor, (Tensor) -> (Tensor, Tensor)) { - let value = conv2DBackpropInput(shape: shape, filter: filter, strides: strides, - padding: padding) + let value = conv2DBackpropInput( + shape: shape, filter: filter, strides: strides, padding: padding) return (value, { v in - return ( - self.conv2DBackpropFilter(input: v, filterSizes: shape, strides: strides, - padding: padding), - v.convolved2D(withFilter: filter, strides: strides, padding: padding) - ) + ( + self.conv2DBackpropFilter( + input: v, filterSizes: shape, strides: strides, padding: padding), + v.convolved2D(withFilter: filter, strides: strides, padding: padding)) }) } @@ -204,14 +199,13 @@ public extension Tensor where Scalar: TensorFlowFloatingPoint { _ strides: (Int, Int, Int, Int), _ padding: Padding ) -> (Tensor, (Tensor) -> (Tensor, Tensor)) { - let value = conv2DBackpropFilter(input: input, filterSizes: filterSizes, - strides: strides, padding: padding) + let value = conv2DBackpropFilter( + input: input, filterSizes: filterSizes, strides: strides, padding: padding) return (value, { v in - return ( - self.conv2DBackpropInput(shape: filterSizes, filter: v, strides: strides, - padding: padding), - input.convolved2D(withFilter: v, strides: strides, padding: padding) - ) + ( + self.conv2DBackpropInput( + shape: filterSizes, filter: v, strides: strides, padding: padding), + input.convolved2D(withFilter: v, strides: strides, padding: padding)) }) } @@ -221,19 +215,15 @@ public extension Tensor where Scalar: TensorFlowFloatingPoint { strides: (Int, Int, Int, Int), padding: Padding ) -> (Tensor, (Tensor) -> (Tensor, Tensor)) { - let value = convolved2D(withFilter: filter, strides: strides, - padding: padding) + let value = convolved2D(withFilter: filter, strides: strides, padding: padding) return (value, { v in - return ( + ( v.conv2DBackpropInput( shape: self.shapeTensor, filter: filter, - strides: strides, padding: padding - ), + strides: strides, padding: padding), v.conv2DBackpropFilter( input: self, filterSizes: filter.shapeTensor, - strides: strides, padding: padding - ) - ) + strides: strides, padding: padding)) }) } @@ -245,10 +235,9 @@ public extension Tensor where Scalar: TensorFlowFloatingPoint { ) -> (Tensor, (Tensor) -> Tensor) { // TODO: Currently this is not higher order differentiable. Redefine in // closed form. - let value = maxPooled(kernelSize: kernelSize, strides: strides, - padding: padding) + let value = maxPooled(kernelSize: kernelSize, strides: strides, padding: padding) return (value, { v in - return Raw.maxPoolGradV2( + Raw.maxPoolGradV2( origInput: self, origOutput: value, grad: v, @@ -256,8 +245,7 @@ public extension Tensor where Scalar: TensorFlowFloatingPoint { Int32(kernelSize.2), Int32(kernelSize.3)]), strides: Tensor([Int32(strides.0), Int32(strides.1), Int32(strides.2), Int32(strides.3)]), - padding: padding.raw - ) + padding: padding.raw) }) } @@ -269,22 +257,20 @@ public extension Tensor where Scalar: TensorFlowFloatingPoint { ) -> (Tensor, (Tensor) -> Tensor) { // TODO: Currently this is not higher order differentiable. Redefine in // closed form. - let value = averagePooled(kernelSize: kernelSize, strides: strides, - padding: padding) + let value = averagePooled(kernelSize: kernelSize, strides: strides, padding: padding) return (value, { v in - return Raw.avgPoolGrad( + Raw.avgPoolGrad( origInputShape: self.shapeTensor, grad: v, ksize: [Int32(kernelSize.0), Int32(kernelSize.1), Int32(kernelSize.2), Int32(kernelSize.3)], strides: [Int32(strides.0), Int32(strides.1), Int32(strides.2), Int32(strides.3)], - padding: padding.raw - ) + padding: padding.raw) }) } } -public extension Tensor where Scalar: FloatingPoint { +public extension Tensor where Scalar : FloatingPoint { /// Computes a 2-D convolution using `self` as input, with the specified /// filter, strides, and padding. /// @@ -295,11 +281,10 @@ public extension Tensor where Scalar: FloatingPoint { /// - padding: The padding for the operation. /// - Precondition: `self` must have rank 4. /// - Precondition: `filter` must have rank 4. - @inlinable @inline(__always) + @inlinable @differentiable( - wrt: (self, filter), vjp: _vjpConvolved2D - where Scalar: TensorFlowFloatingPoint - ) + wrt: (self, filter), + vjp: _vjpConvolved2D where Scalar: TensorFlowFloatingPoint) func convolved2D( withFilter filter: Tensor, strides: (Int, Int, Int, Int), @@ -321,11 +306,10 @@ public extension Tensor where Scalar: FloatingPoint { /// - strides: The strides of the sliding filter for each dimension of the /// input. /// - padding: The padding for the operation. - @inlinable @inline(__always) + @inlinable @differentiable( - wrt: self, vjp: _vjpMaxPooled(kernelSize:strides:padding:) - where Scalar : TensorFlowFloatingPoint - ) + wrt: self, + vjp: _vjpMaxPooled(kernelSize:strides:padding:) where Scalar : TensorFlowFloatingPoint) func maxPooled( kernelSize: (Int, Int, Int, Int), strides: (Int, Int, Int, Int), @@ -348,11 +332,10 @@ public extension Tensor where Scalar: FloatingPoint { /// - strides: The strides of the sliding filter for each dimension of the /// input. /// - padding: The padding for the operation. - @inlinable @inline(__always) + @inlinable @differentiable( - wrt: self, vjp: _vjpAveragePooled(kernelSize:strides:padding:) - where Scalar : TensorFlowFloatingPoint - ) + wrt: self, + vjp: _vjpAveragePooled(kernelSize:strides:padding:) where Scalar : TensorFlowFloatingPoint) func averagePooled( kernelSize: (Int, Int, Int, Int), strides: (Int, Int, Int, Int), diff --git a/Sources/DeepLearning/PythonConversion.swift b/Sources/DeepLearning/PythonConversion.swift new file mode 100644 index 000000000..a1b10d30a --- /dev/null +++ b/Sources/DeepLearning/PythonConversion.swift @@ -0,0 +1,174 @@ +// Copyright 2018 The TensorFlow Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#if !COMPILING_TENSORFLOW_MODULE +import TensorFlow +#endif + +#if canImport(Python) +import Python + +/// The `numpy` Python module. +/// Note: Global variables are lazy, so the following declaration won't produce +// a Python import error until it is first used. +private let np = Python.import("numpy") + +private func debugLogNumpyError(_ message: String) { + debugLog("NumPy conversion error: " + message) +} + +extension ShapedArray : ConvertibleFromNumpyArray + where Scalar : NumpyScalarCompatible { + /// Creates a `ShapedArray` with the same shape and scalars as the specified + /// `numpy.ndarray` instance. + /// + /// - Parameter numpyArray: The `numpy.ndarray` instance to convert. + /// - Precondition: The `numpy` Python package must be installed. + /// - Precondition: `numpyArray` must have a compatible scalar `dtype`. + public init?(numpy numpyArray: PythonObject) { + // Check if input is a `numpy.ndarray` instance. + guard Python.isinstance(numpyArray, np.ndarray) == true else { + debugLogNumpyError(""" + PythonObject input has type '\(Python.type(numpyArray))' and is not \ + an instance of 'numpy.ndarray'. + """) + return nil + } + // Check if the dtype of the `ndarray` is compatible with the `Scalar` + // type. + guard Scalar.numpyScalarTypes.contains(numpyArray.dtype) else { + debugLogNumpyError(""" + 'numpy.ndarray' dtype '\(numpyArray.dtype)' is incompatible with \ + Swift type '\(Scalar.self)'. + """) + return nil + } + + let pyShape = numpyArray.__array_interface__["shape"] + guard let shape = [Int](pyShape) else { + debugLogNumpyError("cannot access shape of 'numpy.ndarray' instance.") + return nil + } + + // Make sure that the array is contiguous in memory. This does a copy if + // the array is not already contiguous in memory. + let contiguousNumpyArray = np.ascontiguousarray(numpyArray) + + guard let ptrVal = + UInt(contiguousNumpyArray.__array_interface__["data"].tuple2.0) else { + debugLogNumpyError("cannot access data of 'numpy.ndarray' instance.") + return nil + } + // Note: `ptr` is not nil even if the `ndarray` is empty (i.e. has a shape + // of `(0,)`). + guard let ptr = UnsafePointer(bitPattern: ptrVal) else { + fatalError("'numpy.ndarray' data pointer was nil") + } + // This code avoids calling `init(shape: [Int], scalars: S)`, + // which inefficiently copies scalars one by one. Instead, + // `init(shape: [Int], scalars: [Scalar])` is called, which efficiently + // does a `memcpy` of the entire `scalars` array. + // Unecessary copying is minimized. + let dummyPointer = UnsafeMutablePointer.allocate(capacity: 1) + let scalarCount = shape.reduce(1, *) + var scalars: [Scalar] = Array(repeating: dummyPointer.move(), + count: scalarCount) + dummyPointer.deallocate() + scalars.withUnsafeMutableBufferPointer { buffPtr in + buffPtr.baseAddress!.assign(from: ptr, count: scalarCount) + } + self.init(shape: shape, scalars: scalars) + } +} + +extension Tensor : ConvertibleFromNumpyArray + where Scalar : NumpyScalarCompatible { + /// Creates a tensor with the same shape and scalars as the specified + /// `numpy.ndarray` instance. + /// + /// - Parameter numpyArray: The `numpy.ndarray` instance to convert. + /// - Precondition: The `numpy` Python package must be installed. + /// - Returns: `numpyArray` converted to an `Array`. Returns `nil` if + /// `numpyArray` does not have a compatible scalar `dtype`. + public init?(numpy numpyArray: PythonObject) { + // Check if input is a `numpy.ndarray` instance. + guard Python.isinstance(numpyArray, np.ndarray) == true else { + debugLogNumpyError(""" + PythonObject input has type '\(Python.type(numpyArray))' and is not \ + an instance of 'numpy.ndarray'. + """) + return nil + } + // Check if the dtype of the `ndarray` is compatible with the `Scalar` + // type. + guard Scalar.numpyScalarTypes.contains(numpyArray.dtype) else { + debugLogNumpyError(""" + 'numpy.ndarray' dtype '\(numpyArray.dtype)' is incompatible with \ + Swift type '\(Scalar.self)'. + """) + return nil + } + + let pyShape = numpyArray.__array_interface__["shape"] + guard let dimensions = [Int](pyShape) else { + debugLogNumpyError("cannot access shape of 'numpy.ndarray' instance.") + return nil + } + let shape = TensorShape(dimensions) + + // Make sure that the array is contiguous in memory. This does a copy if + // the array is not already contiguous in memory. + let contiguousNumpyArray = np.ascontiguousarray(numpyArray) + + guard let ptrVal = + UInt(contiguousNumpyArray.__array_interface__["data"].tuple2.0) else { + debugLogNumpyError("cannot access data of 'numpy.ndarray' instance.") + return nil + } + // Note: `ptr` is not nil even if the `ndarray` is empty (i.e. has a shape + // of `(0,)`). + guard let ptr = UnsafePointer(bitPattern: ptrVal) else { + fatalError("'numpy.ndarray' data pointer was nil") + } + let buffPtr = UnsafeBufferPointer(start: ptr, + count: Int(shape.contiguousSize)) + self.init(shape: shape, scalars: buffPtr) + } +} + +extension ShapedArray where Scalar : NumpyScalarCompatible { + /// Creates a `numpy.ndarray` instance with the same shape and scalars as + /// this `ShapedArray`. + /// + /// - Precondition: The `numpy` Python package must be installed. + public func makeNumpyArray() -> PythonObject { + return scalars.makeNumpyArray().reshape(shape) + } +} + +extension Tensor where Scalar : NumpyScalarCompatible { + /// Creates a `numpy.ndarray` instance with the same shape and scalars as + /// this tensor. + /// + /// - Precondition: The `numpy` Python package must be installed. + public func makeNumpyArray() -> PythonObject { return array.makeNumpyArray() } +} + +extension TensorShape : PythonConvertible { + public var pythonObject: PythonObject { + return dimensions.pythonObject + } +} + +#endif // canImport(Python) diff --git a/Tests/DeepLearningTests/InitializerTests.swift b/Tests/DeepLearningTests/InitializerTests.swift new file mode 100644 index 000000000..f91109065 --- /dev/null +++ b/Tests/DeepLearningTests/InitializerTests.swift @@ -0,0 +1,97 @@ +// Copyright 2019 The TensorFlow Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import XCTest +@testable import DeepLearning + +final class InitializerTests: XCTestCase { + func testInitializers() { + let scalar = Tensor(1) + let matrix: Tensor = [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]] + let broadcastScalar = Tensor(broadcasting: 10, rank: 3) + let some4d = Tensor( + shape: [2, 1, 2, 1], + scalars: AnyRandomAccessCollection([2, 3, 4, 5])) + XCTAssertEqual(ShapedArray(shape: [2, 1, 2, 1], scalars: [2, 3, 4, 5]), some4d.array) + XCTAssertEqual(ShapedArray(shape: [], scalars: [1]), scalar.array) + XCTAssertEqual(ShapedArray(shape: [2, 3], scalars: [1, 2, 3, 4, 5, 6]), matrix.array) + XCTAssertEqual(ShapedArray(shape: [1, 1, 1], scalars: [10]), broadcastScalar.array) + } + + func testFactoryInitializers() { + let x = Tensor(ones: [1, 10]) + XCTAssertEqual(ShapedArray(repeating: 1, shape: [1, 10]), x.array) + } + + func testNumericInitializers() { + let x = Tensor(oneHotAtIndices: [0, 2, -1, 1], depth: 3) + XCTAssertEqual(ShapedArray( + shape: [4, 3], + scalars: [1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0]), x.array) + } + + func testScalarToTensorConversion() { + let tensor = Tensor(broadcasting: 42, rank: 4) + XCTAssertEqual([1, 1, 1, 1], tensor.shape) + XCTAssertEqual([42], tensor.scalars) + } + + func testArrayConversion() { + let array3D = ShapedArray(repeating: 1.0, shape: [2, 3, 4]) + let tensor3D = Tensor(array3D) + XCTAssertEqual(array3D, tensor3D.array) + } + + func testNonTPUDataTypeCast() { + // TPU does not support Int8 or 16 casting. + guard !_RuntimeConfig.executionMode.isTPU else { return } + + let x = Tensor(ones: [5, 5]) + let ints = Tensor(x) + let floats = Tensor(x) + let i8s = Tensor(floats) + XCTAssertEqual(ShapedArray(repeating: 1, shape: [5, 5]), ints.array) + XCTAssertEqual(ShapedArray(repeating: 1, shape: [5, 5]), floats.array) + XCTAssertEqual(ShapedArray(repeating: 1, shape: [5, 5]), i8s.array) + } + + func testTPUDataTypeCast() { + // Non-TPU mode (e.g. eager) does not support Uint32 casting. + guard _RuntimeConfig.executionMode.isTPU else { return } + + let x = Tensor(ones: [5, 5]) + let ints = Tensor(x) + let floats = Tensor(x) + let u32s = Tensor(floats) + XCTAssertEqual(ShapedArray(repeating: 1, shape: [5, 5]), ints.array) + XCTAssertEqual(ShapedArray(repeating: 1, shape: [5, 5]), floats.array) + XCTAssertEqual(ShapedArray(repeating: 1, shape: [5, 5]), u32s.array) + } + + func testNonTPUBoolToNumericCast() { + // TPU does not support Int8 or 16 casting. + // + // When changing to UInt32, got another TPU/XLA compilation error when + // converting from bools to Uint32 (different from missing kernel error). + if _RuntimeConfig.executionMode.isTPU { return } + + let bools = Tensor(shape: [2, 2], scalars: [true, false, true, false]) + let ints = Tensor(bools) + let floats = Tensor(bools) + let i8s = Tensor(bools) + XCTAssertEqual(ShapedArray(shape: [2, 2], scalars: [1, 0, 1, 0]), ints.array) + XCTAssertEqual(ShapedArray(shape: [2, 2], scalars: [1, 0, 1, 0]), floats.array) + XCTAssertEqual(ShapedArray(shape: [2, 2], scalars: [1, 0, 1, 0]), i8s.array) + } +} diff --git a/Tests/DeepLearningTests/OperatorTests/BasicTests.swift b/Tests/DeepLearningTests/OperatorTests/BasicTests.swift new file mode 100644 index 000000000..112430984 --- /dev/null +++ b/Tests/DeepLearningTests/OperatorTests/BasicTests.swift @@ -0,0 +1,452 @@ +// Copyright 2019 The TensorFlow Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import XCTest +@testable import DeepLearning + +final class BasicOperatorTests: XCTestCase { + func testElementIndexing() { + // NOTE: cannot test multiple `Tensor.shape` or `Tensor.scalars` directly + // until send and receive are implemented (without writing a bunch of mini + // tests). Instead, `Tensor.array` is called to make a ShapedArray host copy + // and the ShapedArray is tested. + let tensor3D = Tensor( + shape: [3, 4, 5], scalars: Array(stride(from: 0.0, to: 60, by: 1))) + let element2D = tensor3D[2] + let element1D = tensor3D[1][3] + let element0D = tensor3D[2][0][3] + + let array2D = element2D.array + let array1D = element1D.array + let array0D = element0D.array + + /// Test shapes + XCTAssertEqual([4, 5], array2D.shape) + XCTAssertEqual([5], array1D.shape) + XCTAssertEqual([], array0D.shape) + + /// Test scalars + XCTAssertEqual(Array(stride(from: 40.0, to: 60, by: 1)), array2D.scalars) + XCTAssertEqual(Array(stride(from: 35.0, to: 40, by: 1)), array1D.scalars) + XCTAssertEqual([43], array0D.scalars) + } + + func testElementIndexingAssignment() { + // NOTE: cannot test multiple `Tensor.shape` or `Tensor.scalars` directly + // until send and receive are implemented (without writing a bunch of mini + // tests). Instead, `Tensor.array` is called to make a ShapedArray host copy + // and the ShapedArray is tested. + var tensor3D = Tensor( + shape: [3, 4, 5], scalars: Array(stride(from: 0.0, to: 60, by: 1))) + tensor3D[2] = Tensor( + shape: [4, 5], scalars: Array(stride(from: 20.0, to: 40, by: 1))) + let element2D = tensor3D[2] + let element1D = tensor3D[1][3] + let element0D = tensor3D[2][0][3] + + let array2D = element2D.array + let array1D = element1D.array + let array0D = element0D.array + + /// Test shapes + XCTAssertEqual([4, 5], array2D.shape) + XCTAssertEqual([5], array1D.shape) + XCTAssertEqual([], array0D.shape) + + /// Test scalars + XCTAssertEqual(Array(stride(from: 20.0, to: 40, by: 1)), array2D.scalars) + XCTAssertEqual(Array(stride(from: 35.0, to: 40, by: 1)), array1D.scalars) + XCTAssertEqual([23], array0D.scalars) + } + + func testNestedElementIndexing() { + // NOTE: This test could use a clearer name, along with other "indexing" + // tests. Note to update corresponding test names in other files + // (shaped_array.test) as well. + let tensor3D = Tensor( + shape: [3, 4, 5], scalars: Array(stride(from: 0.0, to: 60, by: 1))) + let element1D = tensor3D[1, 3] + let element0D = tensor3D[2, 0, 3] + + let array1D = element1D.array + let array0D = element0D.array + + /// Test shapes + XCTAssertEqual([5], array1D.shape) + XCTAssertEqual([], array0D.shape) + + /// Test scalars + XCTAssertEqual(Array(stride(from: 35.0, to: 40, by: 1)), array1D.scalars) + XCTAssertEqual([43], array0D.scalars) + } + + func testSliceIndexing() { + // NOTE: cannot test `Tensor.shape` or `Tensor.scalars` directly until send + // and receive are implemented (without writing a bunch of mini tests). + // Instead, `Tensor.array` is called to make a ShapedArray host copy and the + // ShapedArray is tested instead. + let tensor3D = Tensor( + shape: [3, 4, 5], scalars: Array(stride(from: 0.0, to: 60, by: 1))) + let slice3D = tensor3D[2...] + let slice2D = tensor3D[1][0..<2] + let slice1D = tensor3D[0][0][3..<5] + + let array3D = slice3D.array + let array2D = slice2D.array + let array1D = slice1D.array + + /// Test shapes + XCTAssertEqual([1, 4, 5], array3D.shape) + XCTAssertEqual([2, 5], array2D.shape) + XCTAssertEqual([2], array1D.shape) + + /// Test scalars + XCTAssertEqual(Array(stride(from: 40.0, to: 60, by: 1)), array3D.scalars) + XCTAssertEqual(Array(stride(from: 20.0, to: 30, by: 1)), array2D.scalars) + XCTAssertEqual(Array(stride(from: 3.0, to: 5, by: 1)), array1D.scalars) + } + + func testSliceIndexingAssignment() { + // NOTE: cannot test `Tensor.shape` or `Tensor.scalars` directly until send + // and receive are implemented (without writing a bunch of mini tests). + // Instead, `Tensor.array` is called to make a ShapedArray host copy and the + // ShapedArray is tested instead. + var tensor3D = Tensor( + shape: [3, 4, 5], scalars: Array(stride(from: 0.0, to: 60, by: 1))) + tensor3D[2, 0..<5, 0..<6] = Tensor( + shape: [4, 5], scalars: Array(stride(from: 20.0, to: 40, by: 1))) + let slice3D = tensor3D[2...] + let slice2D = tensor3D[1][0..<2] + let slice1D = tensor3D[0][0][3..<5] + + let array3D = slice3D.array + let array2D = slice2D.array + let array1D = slice1D.array + + /// Test shapes + XCTAssertEqual([1, 4, 5], array3D.shape) + XCTAssertEqual([2, 5], array2D.shape) + XCTAssertEqual([2], array1D.shape) + + /// Test scalars + XCTAssertEqual(Array(stride(from: 20.0, to: 40, by: 1)), array3D.scalars) + XCTAssertEqual(Array(stride(from: 20.0, to: 30, by: 1)), array2D.scalars) + XCTAssertEqual(Array(stride(from: 3.0, to: 5, by: 1)), array1D.scalars) + } + + func testEllipsisIndexing() { + // NOTE: cannot test `Tensor.shape` or `Tensor.scalars` directly until send + // and receive are implemented (without writing a bunch of mini tests). + // Instead, `Tensor.array` is called to make a ShapedArray host copy and the + // ShapedArray is tested instead. + var tensor3D = Tensor( + shape: [3, 4, 5], scalars: Array(stride(from: 0.0, to: 60, by: 1))) + tensor3D[2, TensorRange.ellipsis] = Tensor( + shape: [4, 5], scalars: Array(stride(from: 20.0, to: 40, by: 1))) + let slice3D = tensor3D[2..., TensorRange.ellipsis] + let slice2D = tensor3D[1][0..<2] + let slice1D = tensor3D[0][0][3..<5] + + let array3D = slice3D.array + let array2D = slice2D.array + let array1D = slice1D.array + + /// Test shapes + XCTAssertEqual([1, 4, 5], array3D.shape) + XCTAssertEqual([2, 5], array2D.shape) + XCTAssertEqual([2], array1D.shape) + + /// Test scalars + XCTAssertEqual(Array(stride(from: 20.0, to: 40, by: 1)), array3D.scalars) + XCTAssertEqual(Array(stride(from: 20.0, to: 30, by: 1)), array2D.scalars) + XCTAssertEqual(Array(stride(from: 3.0, to: 5, by: 1)), array1D.scalars) + } + + func testNewAxisIndexing() { + // NOTE: cannot test `Tensor.shape` or `Tensor.scalars` directly until send + // and receive are implemented (without writing a bunch of mini tests). + // Instead, `Tensor.array` is called to make a ShapedArray host copy and the + // ShapedArray is tested instead. + let tensor3D = Tensor( + shape: [3, 4, 5], scalars: Array(stride(from: 0.0, to: 60, by: 1))) + let newAxis = TensorRange.newAxis + let ellipsis = TensorRange.ellipsis + let slice3D = tensor3D[2..., newAxis, ellipsis] + let slice2D = tensor3D[1, newAxis][0..<1, 0..<2] + let slice1D = tensor3D[0][newAxis, 0][0..<1, 3..<5, newAxis] + + let array3D = slice3D.array + let array2D = slice2D.array + let array1D = slice1D.array + + /// Test shapes + XCTAssertEqual([1, 1, 4, 5], array3D.shape) + XCTAssertEqual([1, 2, 5], array2D.shape) + XCTAssertEqual([1, 2, 1], array1D.shape) + + /// Test scalars + XCTAssertEqual(Array(stride(from: 40.0, to: 60, by: 1)), array3D.scalars) + XCTAssertEqual(Array(stride(from: 20.0, to: 30, by: 1)), array2D.scalars) + XCTAssertEqual(Array(stride(from: 3.0, to: 5, by: 1)), array1D.scalars) + } + + func testSqueezeAxisIndexing() { + // NOTE: cannot test `Tensor.shape` or `Tensor.scalars` directly until send + // and receive are implemented (without writing a bunch of mini tests). + // Instead, `Tensor.array` is called to make a ShapedArray host copy and the + // ShapedArray is tested instead. + let tensor3D = Tensor( + shape: [3, 4, 5], scalars: Array(stride(from: 0.0, to: 60, by: 1))) + let newAxis = TensorRange.newAxis + let ellipsis = TensorRange.ellipsis + let squeezeAxis = TensorRange.squeezeAxis + let slice3D = tensor3D[2..., newAxis, ellipsis][squeezeAxis, squeezeAxis] + let slice2D = tensor3D[1, newAxis][squeezeAxis, 0..<2] + let slice1D = tensor3D[0..<1, 0, 3..<5, newAxis][ + squeezeAxis, ellipsis, squeezeAxis] + + let array3D = slice3D.array + let array2D = slice2D.array + let array1D = slice1D.array + + /// Test shapes + XCTAssertEqual([4, 5], array3D.shape) + XCTAssertEqual([2, 5], array2D.shape) + XCTAssertEqual([2], array1D.shape) + + /// Test scalars + XCTAssertEqual(Array(stride(from: 40.0, to: 60, by: 1)), array3D.scalars) + XCTAssertEqual(Array(stride(from: 20.0, to: 30, by: 1)), array2D.scalars) + XCTAssertEqual(Array(stride(from: 3.0, to: 5, by: 1)), array1D.scalars) + } + + func testStridedSliceIndexing() { + // NOTE: cannot test `Tensor.shape` or `Tensor.scalars` directly until send + // and receive are implemented (without writing a bunch of mini tests). + // Instead, `Tensor.array` is called to make a ShapedArray host copy and the + // ShapedArray is tested instead. + let tensor3D = Tensor( + shape: [3, 4, 5], scalars: Array(stride(from: 0.0, to: 60, by: 1))) + let slice3D = tensor3D[2...] + let slice2D = tensor3D[1][0..<3..2] + let slice1D = tensor3D[0][0][1..<5..2] + + let array3D = slice3D.array + let array2D = slice2D.array + let array1D = slice1D.array + + /// Test shapes + XCTAssertEqual([1, 4, 5], array3D.shape) + XCTAssertEqual([2, 5], array2D.shape) + XCTAssertEqual([2], array1D.shape) + + /// Test scalars + XCTAssertEqual(Array(stride(from: 40.0, to: 60, by: 1)), array3D.scalars) + XCTAssertEqual( + Array(stride(from: 20.0, to: 25, by: 1)) + + Array(stride(from: 30.0, to: 35, by: 1)), array2D.scalars) + XCTAssertEqual(Array(stride(from: 1.0, to: 5, by: 2)), array1D.scalars) + } + + func testStridedSliceIndexingAssignment() { + // NOTE: cannot test `Tensor.shape` or `Tensor.scalars` directly until send + // and receive are implemented (without writing a bunch of mini tests). + // Instead, `Tensor.array` is called to make a ShapedArray host copy and the + // ShapedArray is tested instead. + var tensor3D = Tensor( + shape: [3, 4, 5], scalars: Array(stride(from: 0.0, to: 60, by: 1))) + tensor3D[2, 0..<5..2, 0..<6] = Tensor( + shape: [2, 5], scalars: Array(stride(from: 20.0, to: 40, by: 2))) + let slice3D = tensor3D[2...] + let slice2D = tensor3D[1][0..<2] + let slice1D = tensor3D[0][0][3..<5] + + let array3D = slice3D.array + let array2D = slice2D.array + let array1D = slice1D.array + + /// Test shapes + XCTAssertEqual([1, 4, 5], array3D.shape) + XCTAssertEqual([2, 5], array2D.shape) + XCTAssertEqual([2], array1D.shape) + + /// Test scalars + XCTAssertEqual( + Array(stride(from: 20.0, to: 30, by: 2)) + + Array(stride(from: 45.0, to: 50, by: 1)) + + Array(stride(from: 30.0, to: 40, by: 2)) + + Array(stride(from: 55.0, to: 60, by: 1)), array3D.scalars) + XCTAssertEqual(Array(stride(from: 20.0, to: 30, by: 1)), array2D.scalars) + XCTAssertEqual(Array(stride(from: 3.0, to: 5, by: 1)), array1D.scalars) + } + + func testWholeTensorSlicing() { + let t: Tensor = [[[1, 1, 1], [2, 2, 2]], + [[3, 3, 3], [4, 4, 4]], + [[5, 5, 5], [6, 6, 6]]] + let slice2 = t.slice(lowerBounds: [1, 0, 0], upperBounds: [2, 1, 3]) + XCTAssertEqual(ShapedArray(shape: [1, 1, 3], scalars: [3, 3, 3]), slice2.array) + } + + func testAdvancedIndexing() { + // NOTE: cannot test multiple `Tensor.shape` or `Tensor.scalars` directly + // until send and receive are implemented (without writing a bunch of mini + // tests). Instead, `Tensor.array` is called to make a ShapedArray host copy + // and the ShapedArray is tested. + let tensor3D = Tensor( + shape: [3, 4, 5], scalars: Array(stride(from: 0.0, to: 60, by: 1))) + let element2D = tensor3D[1..<3, 0, 3...] + let array2D = element2D.array + + // Test shape + XCTAssertEqual([2, 2], array2D.shape) + + // Test scalars + XCTAssertEqual(Array([23.0, 24.0, 43.0, 44.0]), array2D.scalars) + } + + func testConcatenation() { + // 2 x 3 + let t1 = Tensor([[0, 1, 2], [3, 4, 5]]) + // 2 x 3 + let t2 = Tensor([[6, 7, 8], [9, 10, 11]]) + let concatenated = t1 ++ t2 + let concatenated0 = t1.concatenated(with: t2) + let concatenated1 = t1.concatenated(with: t2, alongAxis: 1) + XCTAssertEqual(ShapedArray(shape: [4, 3], scalars: Array(0..<12)), concatenated.array) + XCTAssertEqual(ShapedArray(shape: [4, 3], scalars: Array(0..<12)), concatenated0.array) + XCTAssertEqual( + ShapedArray(shape: [2, 6], scalars: [0, 1, 2, 6, 7, 8, 3, 4, 5, 9, 10, 11]), + concatenated1.array) + } + + func testVJPConcatenation() { + let a1 = Tensor([1,2,3,4]) + let b1 = Tensor([5,6,7,8,9,10]) + + let a2 = Tensor([1,1,1,1]) + let b2 = Tensor([1,1,1,1,1,1]) + + let grads = gradient(at: a2, b2) { a, b in + return ((a1 * a) ++ (b1 * b)).sum() + } + + XCTAssertEqual(a1, grads.0) + XCTAssertEqual(b1, grads.1) + } + + func testVJPConcatenationNegativeAxis() { + let a1 = Tensor([1,2,3,4]) + let b1 = Tensor([5,6,7,8,9,10]) + + let a2 = Tensor([1,1,1,1]) + let b2 = Tensor([1,1,1,1,1,1]) + + let grads = gradient(at: a2, b2) { a, b in + return (a1 * a).concatenated(with: b1 * b, alongAxis: -1).sum() + } + + XCTAssertEqual(a1, grads.0) + XCTAssertEqual(b1, grads.1) + } + + func testTranspose() { + // 3 x 2 -> 2 x 3 + let xT = Tensor([[1, 2], [3, 4], [5, 6]]).transposed() + let xTArray = xT.array + XCTAssertEqual(2, xTArray.rank) + XCTAssertEqual([2, 3], xTArray.shape) + XCTAssertEqual([1, 3, 5, 2, 4, 6], xTArray.scalars) + } + + func testReshape() { + // 2 x 3 -> 1 x 3 x 1 x 2 x 1 + let matrix = Tensor([[0, 1, 2], [3, 4, 5]]) + let reshaped = matrix.reshaped(to: [1, 3, 1, 2, 1]) + + XCTAssertEqual([1, 3, 1, 2, 1], reshaped.shape) + XCTAssertEqual(Array(0..<6), reshaped.scalars) + } + + func testFlatten() { + // 2 x 3 -> 6 + let matrix = Tensor([[0, 1, 2], [3, 4, 5]]) + let flattened = matrix.flattened() + + XCTAssertEqual([6], flattened.shape) + XCTAssertEqual(Array(0..<6), flattened.scalars) + } + + func testFlatten0D() { + let scalar = Tensor(5) + let flattened = scalar.flattened() + XCTAssertEqual([1], flattened.shape) + XCTAssertEqual([5], flattened.scalars) + } + + func testReshapeToScalar() { + // 1 x 1 -> scalar + let z = Tensor([[10]]).reshaped(to: []) + XCTAssertEqual([], z.shape) + } + + func testReshapeTensor() { + // 2 x 3 -> 1 x 3 x 1 x 2 x 1 + let x = Tensor(repeating: 0.0, shape: [2, 3]) + let y = Tensor(repeating: 0.0, shape: [1, 3, 1, 2, 1]) + let result = x.reshaped(like: y) + XCTAssertEqual([1, 3, 1, 2, 1], result.shape) + } + + func testUnbroadcast1() { + let x = Tensor(repeating: 1, shape: [2, 3, 4, 5]) + let y = Tensor(repeating: 1, shape: [4, 5]) + let z = x.unbroadcast(like: y) + XCTAssertEqual(ShapedArray(repeating: 6, shape: [4, 5]), z.array) + } + + func testUnbroadcast2() { + let x = Tensor(repeating: 1, shape: [2, 3, 4, 5]) + let y = Tensor(repeating: 1, shape: [3, 1, 5]) + let z = x.unbroadcast(like: y) + XCTAssertEqual(ShapedArray(repeating: 8, shape: [3, 1, 5]), z.array) + } + + func testSliceUpdate() { + guard !_RuntimeConfig.executionMode.isTPU else { return } + var t1 = Tensor([[1, 2, 3], [4, 5, 6]]) + t1[0] = Tensor(zeros: [3]) + XCTAssertEqual(ShapedArray(shape:[2, 3], scalars: [0, 0, 0, 4, 5, 6]), t1.array) + var t2 = t1 + t2[0][2] = Tensor(3) + XCTAssertEqual(ShapedArray(shape:[2, 3], scalars: [0, 0, 3, 4, 5, 6]), t2.array) + var t3 = Tensor([[true, true, true], [false, false, false]]) + t3[0][1] = Tensor(false) + XCTAssertEqual(ShapedArray( + shape:[2, 3], scalars: [true, false, true, false, false, false]), t3.array) + var t4 = Tensor([[true, true, true], [false, false, false]]) + t4[0] = Tensor(repeating: false, shape: [3]) + XCTAssertEqual(ShapedArray(repeating: false, shape: [2, 3]), t4.array) + } + + func testBroadcastTensor() { + // 1 -> 2 x 3 x 4 + let one = Tensor(1) + var target = Tensor(repeating: 0.0, shape: [2, 3, 4]) + let broadcasted = one.broadcast(like: target) + XCTAssertEqual(Tensor(repeating: 1, shape: [2, 3, 4]), broadcasted) + target .= Tensor(repeating: 1, shape: [1, 3, 1]) + XCTAssertEqual(Tensor(repeating: 1, shape: [2, 3, 4]), target) + } +} diff --git a/Tests/DeepLearningTests/OperatorTests/ComparisonTests.swift b/Tests/DeepLearningTests/OperatorTests/ComparisonTests.swift new file mode 100644 index 000000000..f667dbbcc --- /dev/null +++ b/Tests/DeepLearningTests/OperatorTests/ComparisonTests.swift @@ -0,0 +1,30 @@ +// Copyright 2019 The TensorFlow Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import XCTest +@testable import DeepLearning + +final class ComparisonOperatorTests: XCTestCase { + func testElementwiseComparison() { + let x = Tensor([0, 1, 2]) + let y = Tensor([2, 1, 3]) + XCTAssertEqual((x .< y).scalars, [true, false, true]) + } + + func testLexicographicalComparison() { + let x = Tensor([0, 1, 2, 3, 4]) + let y = Tensor([2, 3, 4, 5, 6]) + XCTAssertTrue(x < y) + } +} diff --git a/Tests/DeepLearningTests/OperatorTests/MathTests.swift b/Tests/DeepLearningTests/OperatorTests/MathTests.swift new file mode 100644 index 000000000..8c1898fbb --- /dev/null +++ b/Tests/DeepLearningTests/OperatorTests/MathTests.swift @@ -0,0 +1,199 @@ +// Copyright 2019 The TensorFlow Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import XCTest +@testable import DeepLearning + +final class MathOperatorTests: XCTestCase { + func testReduction() { + // 2 x 5 + let x = Tensor([[1, 2, 3, 4, 5], [1, 2, 3, 4, 5]]) + XCTAssertEqual(Tensor(30), x.sum().toHost(shape: [])) + XCTAssertEqual( + Tensor(shape: [5], scalars: [2, 4, 6, 8, 10]), + x.sum(squeezingAxes: 0).toHost(shape: [])) + XCTAssertEqual( + Tensor(shape: [1, 5], scalars: [2, 4, 6, 8, 10]), + x.sum(alongAxes: 0).toHost(shape: [])) + + XCTAssertEqual(Tensor(14400), x.product().toHost(shape: [])) + XCTAssertEqual( + Tensor(shape: [5], scalars: [1, 4, 9, 16, 25]), + x.product(squeezingAxes: 0).toHost(shape: [])) + XCTAssertEqual( + Tensor(shape: [1, 5], scalars: [1, 4, 9, 16, 25]), + x.product(alongAxes: 0).toHost(shape: [])) + + XCTAssertEqual(Tensor(3), x.mean().toHost(shape: [])) + XCTAssertEqual( + Tensor(shape: [5], scalars: [1, 2, 3, 4, 5]), + x.mean(squeezingAxes: 0).toHost(shape: [])) + XCTAssertEqual( + Tensor(shape: [5], scalars: [1, 2, 3, 4, 5]), + x.mean(alongAxes: 0).toHost(shape: [])) + XCTAssertEqual( + Tensor(shape: [2], scalars: [3, 3]), + x.mean(squeezingAxes: 1).toHost(shape: [])) + XCTAssertEqual( + Tensor(shape: [1, 2], scalars: [3, 3]), + x.mean(alongAxes: 1).toHost(shape: [])) + + XCTAssertEqual(Tensor(2), x.variance().toHost(shape: [])) + XCTAssertEqual( + Tensor(shape: [5], scalars: [0, 0, 0, 0, 0]), + x.variance(squeezingAxes: 0).toHost(shape: [])) + XCTAssertEqual( + Tensor(shape: [5], scalars: [0, 0, 0, 0, 0]), + x.variance(alongAxes: 0).toHost(shape: [])) + XCTAssertEqual( + Tensor(shape: [2], scalars: [2, 2]), + x.variance(squeezingAxes: 1).toHost(shape: [])) + XCTAssertEqual( + Tensor(shape: [1, 2], scalars: [2, 2]), + x.variance(alongAxes: 1).toHost(shape: [])) + } + + func testArgmax() { + // 2 x 3 + let x = Tensor([[0, 1, 2], [3, 4, 5]]) + let argmax0 = x.argmax(squeezingAxis: 0) + let argmax1 = x.argmax(squeezingAxis: 1) + let scalarsArgmax = x.argmax() + XCTAssertEqual(ShapedArray(shape: [3], scalars: [1, 1, 1]), argmax0.array) + XCTAssertEqual(ShapedArray(shape: [2], scalars: [2, 2]), argmax1.array) + XCTAssertEqual(ShapedArray(shape: [], scalars: [5]), scalarsArgmax.array) + } + + func testCeilAndFloor() { + let x = Tensor([-1.3, -0.4, 0.5, 1.6]) + let xFloor = floor(x) + let xCeil = ceil(x) + XCTAssertEqual(ShapedArray(shape: [4], scalars: [-2, -1, 0, 1]), xFloor.array) + XCTAssertEqual(ShapedArray(shape: [4], scalars: [-1, 0, 1, 2]), xCeil.array) + } + + func testSimpleMath() { + let x = Tensor([1.2, 1.2]) + let y = tanh(x) + let array = y.array + XCTAssertEqual([2], array.shape) + XCTAssertEqual([0.833655, 0.833655], array.scalars, accuracy: 0.0001) + } + + func testStandardDeviation() { + XCTAssertEqual(Tensor(0), Tensor([1]).standardDeviation()) + XCTAssertEqual(Tensor(0.5), Tensor([0, 1]).standardDeviation(alongAxes: 0)) + XCTAssertEqual(Tensor(0.5), Tensor([0, 1]).standardDeviation()) + XCTAssertEqual( + 2.87228132, + Tensor(rangeFrom: 0, to: 10, stride: 1).standardDeviation().scalarized(), + accuracy: 0.001) + let matrix = Tensor(rangeFrom: 0, to: 10, stride: 1).reshaped(to: [2, 5]) + XCTAssertEqual(2.87228132, matrix.standardDeviation().scalarized(), accuracy: 0.001) + XCTAssertEqual( + [1.4142, 1.4142], + matrix.standardDeviation(alongAxes: 1).array.scalars, + accuracy: 0.001) + } + + func test3Adds() { + let a = Tensor([1]) + let b = Tensor([2]) + let c = Tensor([3]) + + let o = a + b + c + XCTAssertEqual([6], o.scalars) + } + + func testMultiOpMath() { + let x = Tensor([1.2, 1.2]) + let y = Tensor([2.4, 2.4]) + let t1 = x + y + let t2 = t1 * t1 + let t3 = sqrt(t2) + + let array1 = t1.array + let array2 = t2.array + let array3 = t3.array + XCTAssertEqual([2], array1.shape) + XCTAssertEqual([2], array2.shape) + XCTAssertEqual([2], array3.shape) + XCTAssertEqual([3.6, 3.6], array1.scalars, accuracy: 0.001) + XCTAssertEqual([12.96, 12.96], array2.scalars, accuracy: 0.001) + XCTAssertEqual([3.6, 3.6], array3.scalars, accuracy: 0.001) + } + + func testXWPlusB() { + // Shape: 1 x 4 + let x = Tensor([[1.0, 2.0, 2.0, 1.0]]) + // Shape: 4 x 2 + let w = Tensor([[1.0, 0.0], [3.0, 0.0], [2.0, 3.0], [1.0, 0.0]]) + // Shape: 2 + let b = Tensor([0.5, 0.5]) + // Shape: 1 x 2 (broadcasted) + let result = matmul(x, w) + b + XCTAssertEqual([1, 2], result.shape) + XCTAssertEqual([12.5, 6.5], result.scalars) + } + + @inline(never) + func testXORInference() { + func xor(_ x: Float, _ y: Float) -> Float { + let x = Tensor([x, y]).reshaped(to: [1, 2]) + + // FIXME: If params are declared outside of `xor`, it would crash. + // 2 x 4 + let w1 = Tensor( + [[-1.83586664, -0.20809225, 0.47667537, 1.90780607], + [-1.83523219, -0.51167348, 0.15490439, 1.91018065]]) + // 1 x 4 + let b1 = Tensor([[2.54353216, 0.25132703, -0.16503136, -0.85754058]]) + // 4 x 1 + let w2 = Tensor([[3.04350065], [0.35590511], [-0.3252157], [3.49349223]]) + // 1 x 1 + let b2 = Tensor([[-0.74635993]]) + + let o1 = tanh(matmul(x, w1) + b1) + let y = tanh(matmul(o1, w2) + b2) + return y.array.scalars[0] // TODO: use better scalar getter + } + + XCTAssertEqual(0.0, xor(0.0, 0.0), accuracy: 0.1) + XCTAssertEqual(1.0, xor(0.0, 1.0), accuracy: 0.1) + XCTAssertEqual(1.0, xor(1.0, 0.0), accuracy: 0.1) + XCTAssertEqual(0.0, xor(1.0, 1.0), accuracy: 0.1) + } + + func testMLPClassifierStruct() { + struct MLPClassifier { + // 2 x 4 + var w1 = Tensor([[1.0, 0.8, 0.4, 0.4], + [0.4, 0.3, 0.2, 0.1]]) + // 4 x 1 + var w2 = Tensor([[0.4], [0.4], [0.3], [0.9]]) + var b1 = Tensor(zeros: [1, 4]) + var b2 = Tensor(zeros: [1, 1]) + + func prediction(for x: Tensor) -> Tensor { + let o1 = tanh(matmul(x, w1) + b1) + return tanh(matmul(o1, w2) + b2) + } + } + + let input = Tensor([[1, 0.5]]) + let classifier = MLPClassifier() + let prediction = classifier.prediction(for: input) + XCTAssertEqual([0.816997], prediction.scalars, accuracy: 0.001) + } +} diff --git a/Tests/DeepLearningTests/TensorTests.swift b/Tests/DeepLearningTests/TensorTests.swift new file mode 100644 index 000000000..01e18fda8 --- /dev/null +++ b/Tests/DeepLearningTests/TensorTests.swift @@ -0,0 +1,81 @@ +// Copyright 2019 The TensorFlow Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import XCTest +@testable import DeepLearning + +final class TensorTests: XCTestCase { + func testSimpleCond() { + func selectValue(_ pred: Bool) -> Tensor { + let a = Tensor(0) + let b = Tensor(1) + if pred { + return a + } + return b + } + + XCTAssertEqual(0, selectValue(true).scalar) + } + + @inline(never) + func testRankGetter() { + let tensor = Tensor(shape: [3, 4, 5], scalars: Array(0..<60)) + XCTAssertEqual(3, tensor.rank) + } + + // TODO: Merge all rank/shape getter tests into one when we support code motion to avoid sends. + + @inline(never) + func testRankGetter2() { + let vector = Tensor([1]) + XCTAssertEqual(1, vector.rank) + } + + @inline(never) + func testRankGetter3() { + let matrix = Tensor([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]) + XCTAssertEqual(2, matrix.rank) + } + + @inline(never) + func testRankGetter4() { + let ones = Tensor(ones: [1, 2, 2, 2, 2, 2, 1]) + XCTAssertEqual(7, ones.rank) + } + + @inline(never) + func testShapeGetter() { + let tensor = Tensor(shape: [3, 4, 5], scalars: Array(0..<60)) + XCTAssertEqual([3, 4, 5], tensor.shape) + } + + @inline(never) + func testShapeGetter2() { + let vector = Tensor([1]) + XCTAssertEqual([1], vector.shape) + } + + @inline(never) + func testShapeGetter3() { + let matrix = Tensor([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]) + XCTAssertEqual([2, 3], matrix.shape) + } + + @inline(never) + func testShapeGetter4() { + let ones = Tensor(ones: [1, 2, 2, 2, 2, 2, 1]) + XCTAssertEqual([1, 2, 2, 2, 2, 2, 1], ones.shape) + } +} diff --git a/Tests/DeepLearningTests/XCTestManifests.swift b/Tests/DeepLearningTests/XCTestManifests.swift index 96a9048a5..e75c25298 100644 --- a/Tests/DeepLearningTests/XCTestManifests.swift +++ b/Tests/DeepLearningTests/XCTestManifests.swift @@ -22,6 +22,10 @@ public func allTests() -> [XCTestCaseEntry] { testCase(TrivialModelTests.allTests), testCase(SequentialTests.allTests), testCase(LayerTests.allTests), + testCase(TensorTests.allTests), + testCase(BasicOperatorTests.allTests), + testCase(ComparisonOperatorTests.allTests), + testCase(MathOperatorTests.allTests), ] } #endif From 0e06843857b629f9643febf818fdf39032b5daf5 Mon Sep 17 00:00:00 2001 From: Anthony Platanios Date: Sat, 20 Apr 2019 18:27:41 -0400 Subject: [PATCH 45/55] Minor bug fix. --- Sources/DeepLearning/Operators/Math.swift | 31 ++++++++++++----------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/Sources/DeepLearning/Operators/Math.swift b/Sources/DeepLearning/Operators/Math.swift index 8851080ec..7ab4fb6ba 100644 --- a/Sources/DeepLearning/Operators/Math.swift +++ b/Sources/DeepLearning/Operators/Math.swift @@ -658,8 +658,7 @@ public func softmax(_ x: Tensor) -> Tensor { // TODO: [AD]. public func softmax(_ x: Tensor, alongAxis axis: Int) -> Tensor { let xExp = exp(x) - let xExpSum = Raw.sum(xExp, reductionIndices: Tensor(Int32(axis)), keepDims: true) - return xExp / xExpSum + return xExp / xExp.sum(alongAxes: Tensor(Int32(axis))) } @inlinable @@ -1340,7 +1339,13 @@ internal extension Tensor where Scalar : TensorFlowFloatingPoint { @inlinable func _vjpSum(squeezingAxes axes: Tensor) -> (Tensor, (Tensor) -> Tensor) { let value = sum(squeezingAxes: axes) - return (value, { [shape = shapeTensor] in $0.broadcast(toShape: shape) }) + return (value, { [shape = shapeTensor] in + var result = $0 + for i in axes.array.scalars { + result = result.expandingShape(at: Int(i)) + } + return result.broadcast(toShape: shape) + }) } @inlinable @@ -1351,20 +1356,16 @@ internal extension Tensor where Scalar : TensorFlowFloatingPoint { } @inlinable - func _vjpMean(squeezingAxes axes: [Int]) -> (Tensor, (Tensor) -> Tensor) { - let value = mean(squeezingAxes: axes) - return (value, { [shape = shapeTensor, count = axes.map { shape[$0] }.reduce(1, *)] in - $0.broadcast(toShape: shape) / Tensor(Scalar(count)) - }) - } - - @inlinable - func _vjpMean( - squeezingAxes axes: Tensor - ) -> (Tensor, (Tensor) -> Tensor) { + func _vjpMean(squeezingAxes axes: Tensor) -> (Tensor, (Tensor) -> Tensor) { let value = mean(squeezingAxes: axes) let count = Raw.gather(params: shapeTensor, indices: axes).product() - return (value, { [shape = shapeTensor] in $0.broadcast(toShape: shape) / Tensor(count) }) + return (value, { [shape = shapeTensor] in + var result = $0 + for i in axes.array.scalars { + result = result.expandingShape(at: Int(i)) + } + return result.broadcast(toShape: shape) / Tensor(count) + }) } } From eb407cf8d13655f5d10a0eaadaf6340a687995dd Mon Sep 17 00:00:00 2001 From: Anthony Platanios Date: Sat, 20 Apr 2019 18:59:47 -0400 Subject: [PATCH 46/55] Addressed Richard's comments. --- ...ents.swift => DifferentialOperators.swift} | 32 +- Sources/DeepLearning/Helpers.swift | 4 - Sources/DeepLearning/Initializers.swift | 32 +- Sources/DeepLearning/Layer.swift | 2 +- Sources/DeepLearning/Operators/Basic.swift | 87 ++++-- .../DeepLearning/Operators/Comparison.swift | 28 +- Sources/DeepLearning/Operators/Math.swift | 274 +++++++++--------- Sources/DeepLearning/Operators/NN.swift | 39 ++- Sources/DeepLearning/Operators/README.md | 41 --- Sources/DeepLearning/Optimizer.swift | 27 +- Sources/DeepLearning/PythonConversion.swift | 16 +- Sources/DeepLearning/Random.swift | 12 +- Sources/DeepLearning/Tensors.swift | 22 +- 13 files changed, 295 insertions(+), 321 deletions(-) rename Sources/DeepLearning/{Gradients.swift => DifferentialOperators.swift} (79%) delete mode 100644 Sources/DeepLearning/Operators/README.md diff --git a/Sources/DeepLearning/Gradients.swift b/Sources/DeepLearning/DifferentialOperators.swift similarity index 79% rename from Sources/DeepLearning/Gradients.swift rename to Sources/DeepLearning/DifferentialOperators.swift index 04a37fe8b..bfb53db77 100644 --- a/Sources/DeepLearning/Gradients.swift +++ b/Sources/DeepLearning/DifferentialOperators.swift @@ -22,14 +22,14 @@ import TensorFlow public extension Differentiable { @inlinable - func gradient( + func gradient( in f: @differentiable (Self) -> Tensor ) -> CotangentVector { return self.pullback(in: f)(Tensor(1)) } @inlinable - func valueWithGradient( + func valueWithGradient( in f: @differentiable (Self) -> Tensor ) -> (value: Tensor, gradient: CotangentVector) { let (y, pb) = self.valueWithPullback(in: f) @@ -37,7 +37,7 @@ public extension Differentiable { } @inlinable - func gradient( + func gradient( at x: T, in f: @differentiable (Self, T) -> Tensor ) -> (CotangentVector, T.CotangentVector) { @@ -45,7 +45,7 @@ public extension Differentiable { } @inlinable - func valueWithGradient( + func valueWithGradient( at x: T, in f: @differentiable (Self, T) -> Tensor ) -> (value: Tensor, gradient: (CotangentVector, T.CotangentVector)) { @@ -65,7 +65,7 @@ public func valueWithGradient( at x: T, in f: @differentiable (T) -> Tensor ) -> (value: Tensor, gradient: T.CotangentVector) -where T : Differentiable, R : TensorFlowFloatingPoint { +where T: Differentiable, R: TensorFlowFloatingPoint { let (y, pullback) = valueWithPullback(at: x, in: f) return (y, pullback(Tensor(1))) } @@ -76,7 +76,7 @@ public func valueWithGradient( _ y: U, in f: @differentiable (T, U) -> Tensor ) -> (value: Tensor, gradient: (T.CotangentVector, U.CotangentVector)) - where T : Differentiable, U : Differentiable, R : TensorFlowFloatingPoint { + where T: Differentiable, U: Differentiable, R: TensorFlowFloatingPoint { let (y, pullback) = valueWithPullback(at: x, y, in: f) return (y, pullback(Tensor(1))) } @@ -88,7 +88,7 @@ public func valueWithGradient( _ z: V, in f: @differentiable (T, U, V) -> Tensor ) -> (value: Tensor, gradient: (T.CotangentVector, U.CotangentVector, V.CotangentVector)) - where T : Differentiable, U : Differentiable, V : Differentiable, R : TensorFlowFloatingPoint { + where T: Differentiable, U: Differentiable, V: Differentiable, R: TensorFlowFloatingPoint { let (y, pullback) = valueWithPullback(at: x, y, z, in: f) return (y, pullback(Tensor(1))) } @@ -99,7 +99,7 @@ public func valueWithGradient( public func valueWithGradient( of f: @escaping @differentiable (T) -> Tensor ) -> (T) -> (value: Tensor, gradient: T.CotangentVector) - where T : Differentiable, R : TensorFlowFloatingPoint { + where T: Differentiable, R: TensorFlowFloatingPoint { return { x in valueWithGradient(at: x, in: f) } } @@ -107,7 +107,7 @@ public func valueWithGradient( public func valueWithGradient( of f: @escaping @differentiable (T, U) -> Tensor ) -> (T, U) -> (value: Tensor, gradient: (T.CotangentVector, U.CotangentVector)) - where T : Differentiable, U : Differentiable, R : TensorFlowFloatingPoint { + where T: Differentiable, U: Differentiable, R: TensorFlowFloatingPoint { return { x, y in valueWithGradient(at: x, y, in: f) } } @@ -117,7 +117,7 @@ public func valueWithGradient( ) -> (T, U, V) -> ( value: Tensor, gradient: (T.CotangentVector, U.CotangentVector, V.CotangentVector)) - where T : Differentiable, U : Differentiable, V : Differentiable, R : TensorFlowFloatingPoint { + where T: Differentiable, U: Differentiable, V: Differentiable, R: TensorFlowFloatingPoint { return { x, y, z in valueWithGradient(at: x, y, z, in: f) } } @@ -127,7 +127,7 @@ public func valueWithGradient( public func gradient( at x: T, in f: @differentiable (T) -> Tensor -) -> T.CotangentVector where T : Differentiable, R : TensorFlowFloatingPoint { +) -> T.CotangentVector where T: Differentiable, R: TensorFlowFloatingPoint { return pullback(at: x, in: f)(Tensor(1)) } @@ -137,7 +137,7 @@ public func gradient( _ y: U, in f: @differentiable (T, U) -> Tensor ) -> (T.CotangentVector, U.CotangentVector) - where T : Differentiable, U : Differentiable, R : TensorFlowFloatingPoint { + where T: Differentiable, U: Differentiable, R: TensorFlowFloatingPoint { return pullback(at: x, y, in: f)(Tensor(1)) } @@ -148,7 +148,7 @@ public func gradient( _ z: V, in f: @differentiable (T, U, V) -> Tensor ) -> (T.CotangentVector, U.CotangentVector, V.CotangentVector) - where T : Differentiable, U : Differentiable, V : Differentiable, R : TensorFlowFloatingPoint { + where T: Differentiable, U: Differentiable, V: Differentiable, R: TensorFlowFloatingPoint { return pullback(at: x, y, z, in: f)(Tensor(1)) } @@ -157,7 +157,7 @@ public func gradient( @inlinable public func gradient( of f: @escaping @differentiable (T) -> Tensor -) -> (T) -> T.CotangentVector where T : Differentiable, R : TensorFlowFloatingPoint { +) -> (T) -> T.CotangentVector where T: Differentiable, R: TensorFlowFloatingPoint { return { x in gradient(at: x, in: f) } } @@ -165,7 +165,7 @@ public func gradient( public func gradient( of f: @escaping @differentiable (T, U) -> Tensor ) -> (T, U) -> (T.CotangentVector, U.CotangentVector) - where T : Differentiable, U : Differentiable, R : TensorFlowFloatingPoint { + where T: Differentiable, U: Differentiable, R: TensorFlowFloatingPoint { return { x, y in gradient(at: x, y, in: f) } } @@ -173,6 +173,6 @@ public func gradient( public func gradient( of f: @escaping @differentiable (T, U, V) -> Tensor ) -> (T, U, V) -> (T.CotangentVector, U.CotangentVector, V.CotangentVector) - where T : Differentiable, U : Differentiable, V : Differentiable, R : TensorFlowFloatingPoint { + where T: Differentiable, U: Differentiable, V: Differentiable, R: TensorFlowFloatingPoint { return { x, y, z in gradient(at: x, y, z, in: f) } } diff --git a/Sources/DeepLearning/Helpers.swift b/Sources/DeepLearning/Helpers.swift index 39bcf1e1d..4d9c0217b 100644 --- a/Sources/DeepLearning/Helpers.swift +++ b/Sources/DeepLearning/Helpers.swift @@ -30,7 +30,3 @@ public func identity(_ x: Tensor) -> Tensor { func pow(_ x: T, _ y: T) -> T { return T(pow(Double(x), Double(y))) } - -extension Array where Element : Differentiable { - -} diff --git a/Sources/DeepLearning/Initializers.swift b/Sources/DeepLearning/Initializers.swift index 204ca8a26..17884c3ca 100644 --- a/Sources/DeepLearning/Initializers.swift +++ b/Sources/DeepLearning/Initializers.swift @@ -36,7 +36,7 @@ public extension Tensor { /// - shape: The dimensions of the tensor. @inlinable @differentiable( - vjp: _vjpInit(repeating:shape:) where Scalar : TensorFlowFloatingPoint) + vjp: _vjpInit(repeating:shape:) where Scalar: TensorFlowFloatingPoint) init(repeating repeatedValue: Scalar, shape: TensorShape) { self = Raw.fill( dims: Tensor(shape.dimensions.map(Int32.init)), @@ -46,7 +46,7 @@ public extension Tensor { /// Creates a tensor by broadcasting the given scalar to a given rank with /// all dimensions being 1. @inlinable - // @differentiable(where Scalar : TensorFlowFloatingPoint) + // @differentiable(where Scalar: TensorFlowFloatingPoint) init(broadcasting scalar: Scalar, rank: Int) { self = Tensor(scalar).reshaped(to: TensorShape(repeating: 1, count: rank)) } @@ -60,7 +60,7 @@ public extension Tensor { } } -internal extension Tensor where Scalar : TensorFlowFloatingPoint { +internal extension Tensor where Scalar: TensorFlowFloatingPoint { @inlinable static func _vjpInit( repeating repeatedValue: Scalar, @@ -76,7 +76,7 @@ internal extension Tensor where Scalar : TensorFlowFloatingPoint { // Casting //===------------------------------------------------------------------------------------------===// -public extension Tensor where Scalar : Numeric { +public extension Tensor where Scalar: Numeric { /// Perform an element-wise type conversion from a `Bool` tensor. @inlinable init(_ other: Tensor) { @@ -86,16 +86,16 @@ public extension Tensor where Scalar : Numeric { /// Perform an element-wise conversion from another `Tensor`. @inlinable @differentiable( - vjp: _vjpCast where Scalar : TensorFlowFloatingPoint, + vjp: _vjpCast where Scalar: TensorFlowFloatingPoint, OtherScalar: TensorFlowFloatingPoint) - init(_ other: Tensor) { + init(_ other: Tensor) { self = Raw.cast(other) } } -internal extension Tensor where Scalar : TensorFlowFloatingPoint { +internal extension Tensor where Scalar: TensorFlowFloatingPoint { @inlinable - static func _vjpCast( + static func _vjpCast( _ other: Tensor ) -> (Tensor, (Tensor) -> Tensor) { return (Tensor(other), { v in Tensor(v) }) @@ -109,7 +109,7 @@ internal extension Tensor where Scalar : TensorFlowFloatingPoint { public extension Tensor { /// Creates a tensor from an array of tensors (which may themselves be scalars). @inlinable - // @differentiable(where Scalar : TensorFlowFloatingPoint) + // @differentiable(where Scalar: TensorFlowFloatingPoint) init(_ elements: [Tensor]) { self = Tensor(stacking: elements) } @@ -131,7 +131,7 @@ public extension Tensor { /// Tensor(stacking: [x, y, z], alongAxis: 1) // is [[1, 2, 3], [4, 5, 6]] /// ``` /// - /// This is the opposite of `Tensor.unstacked`. + /// This is the opposite of `Tensor.unstack(alongAxis:)`. /// /// - Parameters: /// - tensors: Tensors to stack. @@ -143,7 +143,7 @@ public extension Tensor { /// /// - Returns: The stacked tensor. @inlinable - // @differentiable(vjp: _vjpStacking where Scalar : TensorFlowFloatingPoint) + // @differentiable(vjp: _vjpStacking where Scalar: TensorFlowFloatingPoint) init(stacking tensors: [Tensor], alongAxis axis: Int = 0) { self = Raw.pack(tensors, axis: Int64(axis)) } @@ -181,14 +181,14 @@ public extension Tensor { /// /// - Returns: The concatenated tensor. @inlinable - // @differentiable(wrt: tensors, vjp: _vjpConcatenating where Scalar : TensorFlowFloatingPoint) + // @differentiable(wrt: tensors, vjp: _vjpConcatenating where Scalar: TensorFlowFloatingPoint) init(concatenating tensors: [Tensor], alongAxis axis: Int = 0) { precondition(tensors.count > 0) self = Raw.concatV2(tensors, axis: Tensor(Int32(axis))) } } -// internal extension Tensor where Scalar : TensorFlowFloatingPoint { +// internal extension Tensor where Scalar: TensorFlowFloatingPoint { // @inlinable // static func _vjpStacking( // stacking tensors: [Tensor], @@ -206,7 +206,7 @@ public extension Tensor { // alongAxis axis: Int = 0 // ) -> (Tensor, (Tensor) -> Array.DifferentiableView) { // let result = Tensor(concatenating: tensors, alongAxis: axis) -// let posAxis = axis < 0 ? axis + tensors[0].rank : axis +// let posAxis = axis < 0 ? axis + tensors[0].rank: axis // let sizes = Tensor(stacking: tensors.map { $0.shapeTensor[posAxis] }) // return (result, { [count = tensors.count] v in // if count == 1 { return Array.DifferentiableView([v]) } @@ -220,7 +220,7 @@ public extension Tensor { // Numeric //===------------------------------------------------------------------------------------------===// -public extension Tensor where Scalar : Numeric { +public extension Tensor where Scalar: Numeric { /// Creates a tensor with all scalars set to zero. /// /// - Parameter shape: Shape of the tensor. @@ -418,7 +418,7 @@ public extension Tensor where Scalar: BinaryFloatingPoint, } } -fileprivate extension Tensor where Scalar : BinaryFloatingPoint { +fileprivate extension Tensor where Scalar: BinaryFloatingPoint { private static func glorot( fromStandardUniform randomUniform: __shared Tensor, shape: __shared TensorShape diff --git a/Sources/DeepLearning/Layer.swift b/Sources/DeepLearning/Layer.swift index 1ab238fe2..29052a3ef 100644 --- a/Sources/DeepLearning/Layer.swift +++ b/Sources/DeepLearning/Layer.swift @@ -558,7 +558,7 @@ public struct TransposedConv2D: Layer { self.activation = activation self.strides = strides self.padding = padding - self.paddingIndex = padding == .same ? 0 : 1 + self.paddingIndex = padding == .same ? 0: 1 } /// Returns the output obtained from applying the layer to the given input. diff --git a/Sources/DeepLearning/Operators/Basic.swift b/Sources/DeepLearning/Operators/Basic.swift index 71a8970a0..56cdde2fc 100644 --- a/Sources/DeepLearning/Operators/Basic.swift +++ b/Sources/DeepLearning/Operators/Basic.swift @@ -29,10 +29,37 @@ public extension TensorFlowScalar { } public extension Tensor { + /// Unpacks the given dimension of a rank-`R` tensor into multiple rank-`(R-1)` tensors. Unpacks + /// `N` tensors from this tensor by chipping it along the `axis` dimension, where `N` is + /// inferred from this tensor's shape. For example, given a tensor with shape `[A, B, C, D]`: + /// + /// - If `axis == 0` then the `i`th tensor in the returned array is the slice + /// `self[i, :, :, :]` and each tensor in that array will have shape `[B, C, D]`. + /// (Note that the dimension unpacked along is gone, unlike + /// `Tensor.split(numSplits:alongAxis)`, or `Tensor.split(sizes:alongAxis)`). + /// - If `axis == 1` then the `i`th tensor in the returned array is the slice + /// `value[:, i, :, :]` and each tensor in that array will have shape `[A, C, D]`. + /// - Etc. + /// + /// This is the opposite of `Tensor.init(stacking:alongAxis:)`. + /// + /// - Parameters: + /// - axis: Dimension along which to unstack. Negative values wrap around. + /// + /// - Precondition: `axis` must be in the range `[-rank, rank)`, where `rank` is the rank of the + /// provided tensors. + /// + /// - Returns: Array containing the unstacked tensors. + @inlinable + // @differentiable(vjp: _vjpUnstack(alongAxis:) wrt: self where Scalar : TensorFlowFloatingPoint) + func unstack(alongAxis axis: Int = 0) -> [Tensor] { + return Raw.unpack(value: self, num: shape[axis], axis: Int64(axis)) + } + /// Reshape to the shape of the specified `Tensor`. /// - Precondition: The number of scalars matches the new shape. @inlinable - @differentiable(wrt: self where Scalar : TensorFlowFloatingPoint) + @differentiable(wrt: self where Scalar: TensorFlowFloatingPoint) func reshaped(like other: Tensor) -> Tensor { return reshaped(toShape: other.shapeTensor) } @@ -40,7 +67,7 @@ public extension Tensor { /// Reshape to the specified shape. /// - Precondition: The number of scalars matches the new shape. @inlinable - @differentiable(wrt: self where Scalar : TensorFlowFloatingPoint) + @differentiable(wrt: self where Scalar: TensorFlowFloatingPoint) func reshaped(to newShape: TensorShape) -> Tensor { // TODO(TF-433): Remove workaround for differentiating `map`. return reshaped(toShape: Tensor({newShape.dimensions.map(Int32.init)}())) @@ -51,14 +78,14 @@ public extension Tensor { @inlinable @differentiable( wrt: self, - vjp: _vjpReshaped(toShape:) where Scalar : TensorFlowFloatingPoint) + vjp: _vjpReshaped(toShape:) where Scalar: TensorFlowFloatingPoint) func reshaped(toShape newShape: Tensor) -> Tensor { return Raw.reshape(self, shape: newShape) } /// Return a copy of the tensor collapsed into a 1-D `Tensor`, in row-major order. @inlinable - @differentiable(wrt: self where Scalar : TensorFlowFloatingPoint) + @differentiable(wrt: self where Scalar: TensorFlowFloatingPoint) func flattened() -> Tensor { return reshaped(to: [-1]) } @@ -66,14 +93,14 @@ public extension Tensor { /// Returns a shape-expanded `Tensor`, with a dimension of 1 inserted at the /// specified shape index. @inlinable - @differentiable(wrt: self, vjp: _vjpExpandingShape(at:) where Scalar : TensorFlowFloatingPoint) + @differentiable(wrt: self, vjp: _vjpExpandingShape(at:) where Scalar: TensorFlowFloatingPoint) func expandingShape(at shapeIndex: Int) -> Tensor { return Raw.expandDims(self, dim: Tensor(Int32(shapeIndex))) } /// Returns a rank-lifted `Tensor` with a leading dimension of 1. @inlinable - @differentiable(wrt: self where Scalar : TensorFlowFloatingPoint) + @differentiable(wrt: self where Scalar: TensorFlowFloatingPoint) func rankLifted() -> Tensor { return expandingShape(at: 0) } @@ -81,7 +108,7 @@ public extension Tensor { /// Remove the specified dimensions of size 1 from the shape of a tensor. If no dimensions are /// specified, then all dimensions of size 1 will be removed. @inlinable - @differentiable(wrt: self where Scalar : TensorFlowFloatingPoint) + @differentiable(wrt: self where Scalar: TensorFlowFloatingPoint) func squeezingShape(at axes: Int...) -> Tensor { return squeezingShape(at: axes) } @@ -89,13 +116,13 @@ public extension Tensor { /// Remove the specified dimensions of size 1 from the shape of a tensor. If no dimensions are /// specified, then all dimensions of size 1 will be removed. @inlinable - @differentiable(wrt: self, vjp: _vjpSqueezingShape(at:) where Scalar : TensorFlowFloatingPoint) + @differentiable(wrt: self, vjp: _vjpSqueezingShape(at:) where Scalar: TensorFlowFloatingPoint) func squeezingShape(at axes: [Int]) -> Tensor { return Raw.squeeze(self, squeezeDims: axes.map(Int32.init)) } } -internal extension Tensor where Scalar : TensorFlowFloatingPoint { +internal extension Tensor where Scalar: TensorFlowFloatingPoint { @inlinable func _vjpReshaped(toShape newShape: Tensor) -> (Tensor, (Tensor) -> Tensor) { let value = reshaped(toShape: newShape) @@ -119,14 +146,14 @@ internal extension Tensor where Scalar : TensorFlowFloatingPoint { // Other Tensor Transformations //===------------------------------------------------------------------------------------------===// -infix operator ++ : AdditionPrecedence +infix operator ++: AdditionPrecedence public extension Tensor { /// Returns a transposed tensor, with dimensions permuted in the specified order. @inlinable @differentiable( wrt: self, - vjp: _vjpTransposed(withPermutations:) where Scalar : TensorFlowFloatingPoint) + vjp: _vjpTransposed(withPermutations:) where Scalar: TensorFlowFloatingPoint) func transposed(withPermutations permutations: Tensor) -> Tensor { return Raw.transpose(self, perm: permutations) } @@ -135,7 +162,7 @@ public extension Tensor { @inlinable @differentiable( wrt: self, - vjp: _vjpTransposed(withPermutations:) where Scalar : TensorFlowFloatingPoint) + vjp: _vjpTransposed(withPermutations:) where Scalar: TensorFlowFloatingPoint) func transposed(withPermutations permutations: [Int]) -> Tensor { let permutations = permutations.map(Int32.init) return transposed(withPermutations: Tensor(permutations)) @@ -144,14 +171,14 @@ public extension Tensor { /// Returns a transposed tensor, with dimensions permuted in the specified order. @inlinable @differentiable( - wrt: self, vjp: _vjpTransposed(withPermutations:) where Scalar : TensorFlowFloatingPoint) + wrt: self, vjp: _vjpTransposed(withPermutations:) where Scalar: TensorFlowFloatingPoint) func transposed(withPermutations permutations: Int...) -> Tensor { return transposed(withPermutations: permutations) } /// Returns a transposed tensor, with dimensions permuted in reverse order. @inlinable - @differentiable(wrt: self, vjp: _vjpTransposed() where Scalar : TensorFlowFloatingPoint) + @differentiable(wrt: self, vjp: _vjpTransposed() where Scalar: TensorFlowFloatingPoint) func transposed() -> Tensor { let defaultPermutations = rankTensor - 1 - Tensor( rangeFrom: 0, to: Int32(rank), stride: 1) @@ -163,7 +190,7 @@ public extension Tensor { /// specified axis. /// - Precondition: The axis must be in the range `-rank.. Tensor { return Tensor(concatenating: [self, other], alongAxis: axis) } @@ -174,13 +201,13 @@ public extension Tensor { /// and may be controversial. The existence/naming of `++` will be discussed /// during a later API design phase. @inlinable - @differentiable(where Scalar : TensorFlowFloatingPoint) + @differentiable(where Scalar: TensorFlowFloatingPoint) static func ++ (lhs: Tensor, rhs: Tensor) -> Tensor { return lhs.concatenated(with: rhs) } } -internal extension Tensor where Scalar : TensorFlowFloatingPoint { +internal extension Tensor where Scalar: TensorFlowFloatingPoint { @inlinable func _vjpTransposed( withPermutations permutations: Tensor @@ -211,7 +238,7 @@ internal extension Tensor where Scalar : TensorFlowFloatingPoint { with other: Tensor, alongAxis axis: Int ) -> (Tensor, (Tensor) -> (Tensor, Tensor)) { - let idx = axis < 0 ? axis + rank : axis + let idx = axis < 0 ? axis + rank: axis let splits = Tensor([shapeTensor[idx], other.shapeTensor[idx]]) return (concatenated(with: other, alongAxis: axis), { result in let gradients = Raw.splitV( @@ -256,7 +283,7 @@ public extension Tensor { } // TODO: Why is this limited only to numeric data types whereas `broadcast` is not? -public extension Tensor where Scalar : Numeric { +public extension Tensor where Scalar: Numeric { @inlinable func unbroadcast(toShape otherShape: Tensor) -> Tensor { let rankDiff = (rankTensor - otherShape.scalarCountTensor).rankLifted() @@ -284,7 +311,7 @@ public extension Tensor where Scalar : Numeric { // Padding //===------------------------------------------------------------------------------------------===// -public extension Tensor where Scalar : Numeric { +public extension Tensor where Scalar: Numeric { /// Returns a padded tensor according to the specified padding sizes. @inlinable func padded(forSizes sizes: [(before: Int, after: Int)], with value: Scalar = 0) -> Tensor { @@ -341,7 +368,7 @@ public extension Tensor { } } -public enum TensorRange : TensorRangeExpression { +public enum TensorRange: TensorRangeExpression { case ellipsis case newAxis case squeezeAxis @@ -355,7 +382,7 @@ public enum TensorRange : TensorRangeExpression { public var tensorRange: TensorRange { return self } } -extension TensorRange : Equatable { +extension TensorRange: Equatable { public static func == (lhs: TensorRange, rhs: TensorRange) -> Bool { switch (lhs, rhs) { case (.ellipsis, .ellipsis), @@ -382,45 +409,45 @@ public protocol TensorRangeExpression { } // TODO: Cannot extend non-nominal type 'UnboundedRange'. -// extension UnboundedRange : TensorRangeExpression { +// extension UnboundedRange: TensorRangeExpression { // public var tensorRange: TensorRange { return .ellipsis } // } -extension Int : TensorRangeExpression { +extension Int: TensorRangeExpression { public var tensorRange: TensorRange { return .index(self) } } -extension Range : TensorRangeExpression where Bound == Int { +extension Range: TensorRangeExpression where Bound == Int { public var tensorRange: TensorRange { return .range(self, stride: 1) } } -extension ClosedRange : TensorRangeExpression where Bound == Int { +extension ClosedRange: TensorRangeExpression where Bound == Int { public var tensorRange: TensorRange { return .closedRange(self, stride: 1) } } -extension PartialRangeFrom : TensorRangeExpression where Bound == Int { +extension PartialRangeFrom: TensorRangeExpression where Bound == Int { public var tensorRange: TensorRange { return .partialRangeFrom(self, stride: 1) } } -extension PartialRangeUpTo : TensorRangeExpression where Bound == Int { +extension PartialRangeUpTo: TensorRangeExpression where Bound == Int { public var tensorRange: TensorRange { return .partialRangeUpTo(self, stride: 1) } } -extension PartialRangeThrough : TensorRangeExpression where Bound == Int { +extension PartialRangeThrough: TensorRangeExpression where Bound == Int { public var tensorRange: TensorRange { return .partialRangeThrough(self, stride: 1) } } -infix operator .. : StridedRangeFormationPrecedence +infix operator ..: StridedRangeFormationPrecedence precedencegroup StridedRangeFormationPrecedence { associativity: left higherThan: CastingPrecedence diff --git a/Sources/DeepLearning/Operators/Comparison.swift b/Sources/DeepLearning/Operators/Comparison.swift index 2bc7329be..02bf5fadf 100644 --- a/Sources/DeepLearning/Operators/Comparison.swift +++ b/Sources/DeepLearning/Operators/Comparison.swift @@ -16,14 +16,14 @@ import TensorFlow #endif -infix operator .< : ComparisonPrecedence -infix operator .<= : ComparisonPrecedence -infix operator .>= : ComparisonPrecedence -infix operator .> : ComparisonPrecedence -infix operator .== : ComparisonPrecedence -infix operator .!= : ComparisonPrecedence - -public extension Tensor where Scalar : Numeric & Comparable { +infix operator .<: ComparisonPrecedence +infix operator .<=: ComparisonPrecedence +infix operator .>=: ComparisonPrecedence +infix operator .>: ComparisonPrecedence +infix operator .==: ComparisonPrecedence +infix operator .!=: ComparisonPrecedence + +public extension Tensor where Scalar: Numeric & Comparable { /// Computes `lhs < rhs` element-wise and returns a `Tensor` of Boolean /// scalars. @inlinable static func .< (lhs: Tensor, rhs: Tensor) -> Tensor { @@ -105,7 +105,7 @@ public extension Tensor where Scalar : Numeric & Comparable { } } -extension Tensor : Equatable where Scalar : Equatable { +extension Tensor: Equatable where Scalar: Equatable { @inlinable public static func == (lhs: Tensor, rhs: Tensor) -> Bool { return (lhs .== rhs).all() @@ -117,7 +117,7 @@ extension Tensor : Equatable where Scalar : Equatable { } } -extension Tensor : Comparable where Scalar : Numeric & Comparable { +extension Tensor: Comparable where Scalar: Numeric & Comparable { /// Returns a Boolean value indicating whether the value of the first argument is /// lexicographically less than that of the second argument. @inlinable @@ -147,7 +147,7 @@ extension Tensor : Comparable where Scalar : Numeric & Comparable { } } -public extension Tensor where Scalar : Numeric & Comparable { +public extension Tensor where Scalar: Numeric & Comparable { /// Returns a Boolean value indicating whether the value of the first argument is /// lexicographically less than that of the second argument. @inlinable @@ -177,7 +177,7 @@ public extension Tensor where Scalar : Numeric & Comparable { } } -public extension Tensor where Scalar : Equatable { +public extension Tensor where Scalar: Equatable { /// Computes `lhs != rhs` element-wise and returns a `Tensor` of Boolean scalars. /// - Note: `.==` supports broadcasting. @inlinable @@ -222,9 +222,9 @@ public extension Tensor where Scalar : Equatable { } } -// TODO: infix operator ≈ : ComparisonPrecedence +// TODO: infix operator ≈: ComparisonPrecedence -public extension Tensor where Scalar : FloatingPoint & Equatable { +public extension Tensor where Scalar: FloatingPoint & Equatable { /// Returns a `Tensor` of Boolean values indicating whether the elements of `self` are /// approximately equal to those of `other`. @inlinable diff --git a/Sources/DeepLearning/Operators/Math.swift b/Sources/DeepLearning/Operators/Math.swift index 7ab4fb6ba..66c25b50c 100644 --- a/Sources/DeepLearning/Operators/Math.swift +++ b/Sources/DeepLearning/Operators/Math.swift @@ -17,8 +17,8 @@ import TensorFlow #endif #if COMPILING_TENSORFLOW_MODULE -infix operator .> : ComparisonPrecedence -infix operator .== : ComparisonPrecedence +infix operator .>: ComparisonPrecedence +infix operator .==: ComparisonPrecedence #endif // TODO: @@ -29,19 +29,17 @@ infix operator .== : ComparisonPrecedence // Additive Group //===------------------------------------------------------------------------------------------===// -extension Tensor : AdditiveArithmetic where Scalar : Numeric { +extension Tensor: AdditiveArithmetic where Scalar: Numeric { /// A scalar zero tensor. @inlinable public static var zero: Tensor { - get { return Tensor(zeros: []) - } } /// Adds two tensors and produces their sum. /// - Note: `+` supports broadcasting. @inlinable - @differentiable(vjp: _vjpAdd(lhs:rhs:) where Scalar : TensorFlowFloatingPoint) + @differentiable(vjp: _vjpAdd(lhs:rhs:) where Scalar: TensorFlowFloatingPoint) public static func + (lhs: Tensor, rhs: Tensor) -> Tensor { return Raw.add(lhs, rhs) } @@ -49,13 +47,13 @@ extension Tensor : AdditiveArithmetic where Scalar : Numeric { /// Subtracts one tensor from another and produces their difference. /// - Note: `-` supports broadcasting. @inlinable - @differentiable(vjp: _vjpSubtract(lhs:rhs:) where Scalar : TensorFlowFloatingPoint) + @differentiable(vjp: _vjpSubtract(lhs:rhs:) where Scalar: TensorFlowFloatingPoint) public static func - (lhs: Tensor, rhs: Tensor) -> Tensor { return Raw.sub(lhs, rhs) } } -internal extension Tensor where Scalar : TensorFlowFloatingPoint { +internal extension Tensor where Scalar: TensorFlowFloatingPoint { @inlinable static func _vjpAdd(lhs: Tensor, rhs: Tensor) -> (Tensor, (Tensor) -> (Tensor, Tensor)) { return (lhs + rhs, { [lhsShape = lhs.shapeTensor, rhsShape = rhs.shapeTensor] v in @@ -75,16 +73,16 @@ internal extension Tensor where Scalar : TensorFlowFloatingPoint { // Vector Space //===------------------------------------------------------------------------------------------===// -extension Tensor : VectorNumeric where Scalar : Numeric { +extension Tensor: VectorNumeric where Scalar: Numeric { /// Multiplies the scalar with every scalar of the tensor and produces the product. @inlinable - @differentiable(vjp: _vjpMultiply(lhs:rhs:) where Scalar : TensorFlowFloatingPoint) + @differentiable(vjp: _vjpMultiply(lhs:rhs:) where Scalar: TensorFlowFloatingPoint) public static func * (lhs: Scalar, rhs: Tensor) -> Tensor { return Tensor(lhs) * rhs } } -internal extension Tensor where Scalar : TensorFlowFloatingPoint { +internal extension Tensor where Scalar: TensorFlowFloatingPoint { @inlinable static func _vjpMultiply(lhs: Tensor, rhs: Tensor) -> (Tensor, (Tensor) -> (Tensor, Tensor)) { return (lhs * rhs, { [lhsShape = lhs.shapeTensor, rhsShape = rhs.shapeTensor] v in @@ -93,9 +91,9 @@ internal extension Tensor where Scalar : TensorFlowFloatingPoint { } } -extension Tensor : ShapedVectorNumeric where Scalar : Numeric {} +extension Tensor: ShapedVectorNumeric where Scalar: Numeric {} -extension Tensor : Differentiable where Scalar : TensorFlowFloatingPoint { +extension Tensor: Differentiable where Scalar: TensorFlowFloatingPoint { public typealias TangentVector = Tensor public typealias CotangentVector = Tensor public typealias AllDifferentiableVariables = Tensor @@ -110,31 +108,31 @@ extension Tensor : Differentiable where Scalar : TensorFlowFloatingPoint { // Additional Element-wise Operators //===------------------------------------------------------------------------------------------===// -public extension Tensor where Scalar : Numeric { +public extension Tensor where Scalar: Numeric { /// Adds the scalar to every scalar of the tensor and produces the sum. @inlinable - @differentiable(vjp: _vjpAdd(lhs:rhs:) where Scalar : TensorFlowFloatingPoint) + @differentiable(vjp: _vjpAdd(lhs:rhs:) where Scalar: TensorFlowFloatingPoint) static func + (lhs: Scalar, rhs: Tensor) -> Tensor { return Tensor(lhs) + rhs } /// Adds the scalar to every scalar of the tensor and produces the sum. @inlinable - @differentiable(vjp: _vjpAdd(lhs:rhs:) where Scalar : TensorFlowFloatingPoint) + @differentiable(vjp: _vjpAdd(lhs:rhs:) where Scalar: TensorFlowFloatingPoint) static func + (lhs: Tensor, rhs: Scalar) -> Tensor { return lhs + Tensor(rhs) } /// Subtracts the scalar from every scalar of the tensor and produces the difference. @inlinable - @differentiable(vjp: _vjpSubtract(lhs:rhs:) where Scalar : TensorFlowFloatingPoint) + @differentiable(vjp: _vjpSubtract(lhs:rhs:) where Scalar: TensorFlowFloatingPoint) static func - (lhs: Scalar, rhs: Tensor) -> Tensor { return Tensor(lhs) - rhs } /// Subtracts the scalar from every scalar of the tensor and produces the difference @inlinable - @differentiable(vjp: _vjpSubtract(lhs:rhs:) where Scalar : TensorFlowFloatingPoint) + @differentiable(vjp: _vjpSubtract(lhs:rhs:) where Scalar: TensorFlowFloatingPoint) static func - (lhs: Tensor, rhs: Scalar) -> Tensor { return lhs - Tensor(rhs) } @@ -171,14 +169,14 @@ public extension Tensor where Scalar : Numeric { /// Multiplies two tensors and produces their product. /// - Note: `*` supports broadcasting. @inlinable - @differentiable(vjp: _vjpMultiply(lhs:rhs:) where Scalar : TensorFlowFloatingPoint) + @differentiable(vjp: _vjpMultiply(lhs:rhs:) where Scalar: TensorFlowFloatingPoint) static func * (lhs: Tensor, rhs: Tensor) -> Tensor { return Raw.mul(lhs, rhs) } /// Multiplies the scalar with every scalar of the tensor and produces the product. @inlinable - @differentiable(vjp: _vjpMultiply(lhs:rhs:) where Scalar : TensorFlowFloatingPoint) + @differentiable(vjp: _vjpMultiply(lhs:rhs:) where Scalar: TensorFlowFloatingPoint) static func * (lhs: Tensor, rhs: Scalar) -> Tensor { return lhs * Tensor(rhs) } @@ -190,6 +188,8 @@ public extension Tensor where Scalar : Numeric { lhs = lhs * rhs } + /// Multiplies the tensor with the scalar, broadcasting the scalar, and stores the result in the + /// left-hand-side variable. @inlinable static func *= (lhs: inout Tensor, rhs: Scalar) { lhs = lhs * rhs @@ -198,21 +198,21 @@ public extension Tensor where Scalar : Numeric { /// Returns the quotient of dividing the first tensor by the second. /// - Note: `/` supports broadcasting. @inlinable - @differentiable(vjp: _vjpDivide(lhs:rhs:) where Scalar : TensorFlowFloatingPoint) + @differentiable(vjp: _vjpDivide(lhs:rhs:) where Scalar: TensorFlowFloatingPoint) static func / (lhs: Tensor, rhs: Tensor) -> Tensor { return Raw.div(lhs, rhs) } /// Returns the quotient of dividing the scalar by the tensor, broadcasting the scalar. @inlinable - @differentiable(vjp: _vjpDivide(lhs:rhs:) where Scalar : TensorFlowFloatingPoint) + @differentiable(vjp: _vjpDivide(lhs:rhs:) where Scalar: TensorFlowFloatingPoint) static func / (lhs: Scalar, rhs: Tensor) -> Tensor { return Tensor(lhs) / rhs } /// Returns the quotient of dividing the tensor by the scalar, broadcasting the scalar. @inlinable - @differentiable(vjp: _vjpDivide(lhs:rhs:) where Scalar : TensorFlowFloatingPoint) + @differentiable(vjp: _vjpDivide(lhs:rhs:) where Scalar: TensorFlowFloatingPoint) static func / (lhs: Tensor, rhs: Scalar) -> Tensor { return lhs / Tensor(rhs) } @@ -264,7 +264,7 @@ public extension Tensor where Scalar : Numeric { } } -internal extension Tensor where Scalar : TensorFlowFloatingPoint { +internal extension Tensor where Scalar: TensorFlowFloatingPoint { @inlinable static func _vjpAdd(lhs: Tensor, rhs: Scalar) -> (Tensor, (Tensor) -> (Tensor, Scalar)) { return (lhs + rhs, { v in (v, v.sum().scalarized()) }) @@ -409,16 +409,16 @@ public extension Tensor where Scalar == Bool { // @_exported import func Glibc.powf // #endif -public extension Tensor where Scalar : SignedNumeric { +public extension Tensor where Scalar: SignedNumeric { /// Computes the negation of the specified tensor element-wise. @inlinable - @differentiable(vjp: _vjpNegate(_:) where Scalar : TensorFlowFloatingPoint) + @differentiable(vjp: _vjpNegate(_:) where Scalar: TensorFlowFloatingPoint) static prefix func - (rhs: Tensor) -> Tensor { return Raw.neg(rhs) } } -internal extension Tensor where Scalar : TensorFlowFloatingPoint { +internal extension Tensor where Scalar: TensorFlowFloatingPoint { @inlinable static func _vjpNegate(_ x: Tensor) -> (Tensor, (Tensor) -> Tensor) { return (-x, { v in -v }) @@ -427,13 +427,13 @@ internal extension Tensor where Scalar : TensorFlowFloatingPoint { /// Computes the absolute value of the specified tensor element-wise. @inlinable -@differentiable(vjp: _vjpAbs(_:) where T : TensorFlowFloatingPoint) -public func abs(_ x: Tensor) -> Tensor { +@differentiable(vjp: _vjpAbs(_:) where T: TensorFlowFloatingPoint) +public func abs(_ x: Tensor) -> Tensor { return Raw.abs(x) } @inlinable -internal func _vjpAbs( +internal func _vjpAbs( _ x: Tensor ) -> (Tensor, (Tensor) -> Tensor) { let sign = Raw.sign(x) @@ -442,13 +442,13 @@ internal func _vjpAbs( /// Computes the natural logarithm of the specified tensor element-wise. @inlinable -@differentiable(vjp: _vjpLog(_:) where T : TensorFlowFloatingPoint) -public func log(_ x: Tensor) -> Tensor { +@differentiable(vjp: _vjpLog(_:) where T: TensorFlowFloatingPoint) +public func log(_ x: Tensor) -> Tensor { return Raw.log(x) } @inlinable -internal func _vjpLog( +internal func _vjpLog( _ x: Tensor ) -> (Tensor, (Tensor) -> Tensor) { return (log(x), { v in v / x }) @@ -456,13 +456,13 @@ internal func _vjpLog( /// Computes `sin` of the specified tensor element-wise. @inlinable -@differentiable(vjp: _vjpSin(_:) where T : TensorFlowFloatingPoint) -public func sin(_ x: Tensor) -> Tensor { +@differentiable(vjp: _vjpSin(_:) where T: TensorFlowFloatingPoint) +public func sin(_ x: Tensor) -> Tensor { return Raw.sin(x) } @inlinable -internal func _vjpSin( +internal func _vjpSin( _ x: Tensor ) -> (Tensor, (Tensor) -> Tensor) { return (sin(x), { v in v * cos(x) }) @@ -470,13 +470,13 @@ internal func _vjpSin( /// Computes `cos` of the specified tensor element-wise. @inlinable -@differentiable(vjp: _vjpCos(_:) where T : TensorFlowFloatingPoint) -public func cos(_ x: Tensor) -> Tensor { +@differentiable(vjp: _vjpCos(_:) where T: TensorFlowFloatingPoint) +public func cos(_ x: Tensor) -> Tensor { return Raw.cos(x) } @inlinable -internal func _vjpCos( +internal func _vjpCos( _ x: Tensor ) -> (Tensor, (Tensor) -> Tensor) { return (cos(x), { v in -v * sin(x) }) @@ -484,13 +484,13 @@ internal func _vjpCos( /// Computes `tan` of the specified tensor element-wise. @inlinable -@differentiable(vjp: _vjpTan(_:) where T : TensorFlowFloatingPoint) -public func tan(_ x: Tensor) -> Tensor { +@differentiable(vjp: _vjpTan(_:) where T: TensorFlowFloatingPoint) +public func tan(_ x: Tensor) -> Tensor { return Raw.tan(x) } @inlinable -internal func _vjpTan( +internal func _vjpTan( _ x: Tensor ) -> (Tensor, (Tensor) -> Tensor) { let value = tan(x) @@ -499,13 +499,13 @@ internal func _vjpTan( /// Computes `sinh` of the specified tensor element-wise. @inlinable -@differentiable(vjp: _vjpSinh(_:) where T : TensorFlowFloatingPoint) -public func sinh(_ x: Tensor) -> Tensor { +@differentiable(vjp: _vjpSinh(_:) where T: TensorFlowFloatingPoint) +public func sinh(_ x: Tensor) -> Tensor { return Raw.sinh(x) } @inlinable -internal func _vjpSinh( +internal func _vjpSinh( _ x: Tensor ) -> (Tensor, (Tensor) -> Tensor) { return (sinh(x), { v in v * cosh(x) }) @@ -513,13 +513,13 @@ internal func _vjpSinh( /// Computes `cosh` of the specified tensor element-wise. @inlinable -@differentiable(vjp: _vjpCosh(_:) where T : TensorFlowFloatingPoint) -public func cosh(_ x: Tensor) -> Tensor { +@differentiable(vjp: _vjpCosh(_:) where T: TensorFlowFloatingPoint) +public func cosh(_ x: Tensor) -> Tensor { return Raw.cosh(x) } @inlinable -internal func _vjpCosh( +internal func _vjpCosh( _ x: Tensor ) -> (Tensor, (Tensor) -> Tensor) { return (cosh(x), { v in v * sinh(x) }) @@ -527,13 +527,13 @@ internal func _vjpCosh( /// Computes `tanh` of the specified tensor element-wise. @inlinable -@differentiable(vjp: _vjpTanh(_:) where T : TensorFlowFloatingPoint) -public func tanh(_ x: Tensor) -> Tensor { +@differentiable(vjp: _vjpTanh(_:) where T: TensorFlowFloatingPoint) +public func tanh(_ x: Tensor) -> Tensor { return Raw.tanh(x) } @inlinable -internal func _vjpTanh( +internal func _vjpTanh( _ x: Tensor ) -> (Tensor, (Tensor) -> Tensor) { let value = tanh(x) @@ -541,15 +541,15 @@ internal func _vjpTanh( } /// Computes the square of the tensor. -public extension Tensor where Scalar : Numeric { +public extension Tensor where Scalar: Numeric { @inlinable - @differentiable(wrt: self, vjp: _vjpSquared() where Scalar : TensorFlowFloatingPoint) + @differentiable(wrt: self, vjp: _vjpSquared() where Scalar: TensorFlowFloatingPoint) func squared() -> Tensor { return Raw.square(self) } } -internal extension Tensor where Scalar : TensorFlowFloatingPoint { +internal extension Tensor where Scalar: TensorFlowFloatingPoint { @inlinable func _vjpSquared() -> (Tensor, (Tensor) -> Tensor) { return (squared(), { 2 * self * $0 }) @@ -558,13 +558,13 @@ internal extension Tensor where Scalar : TensorFlowFloatingPoint { /// Computes the square root of the specified tensor element-wise. @inlinable -@differentiable(vjp: _vjpSqrt(_:) where T : TensorFlowFloatingPoint) -public func sqrt(_ x: Tensor) -> Tensor { +@differentiable(vjp: _vjpSqrt(_:) where T: TensorFlowFloatingPoint) +public func sqrt(_ x: Tensor) -> Tensor { return Raw.sqrt(x) } @inlinable -internal func _vjpSqrt( +internal func _vjpSqrt( _ x: Tensor ) -> (Tensor, (Tensor) -> Tensor) { let value = sqrt(x) @@ -573,13 +573,13 @@ internal func _vjpSqrt( /// Computes the inverse square root of the specified tensor element-wise. @inlinable -@differentiable(vjp: _vjpRsqrt(_:) where T : TensorFlowFloatingPoint) -public func rsqrt(_ x: Tensor) -> Tensor { +@differentiable(vjp: _vjpRsqrt(_:) where T: TensorFlowFloatingPoint) +public func rsqrt(_ x: Tensor) -> Tensor { return Raw.rsqrt(x) } @inlinable -internal func _vjpRsqrt( +internal func _vjpRsqrt( _ x: Tensor ) -> (Tensor, (Tensor) -> Tensor) { let value = rsqrt(x) @@ -588,13 +588,13 @@ internal func _vjpRsqrt( /// Computes `exp` of the specified tensor element-wise. @inlinable -@differentiable(vjp: _vjpExp(_:) where T : TensorFlowFloatingPoint) -public func exp(_ x: Tensor) -> Tensor { +@differentiable(vjp: _vjpExp(_:) where T: TensorFlowFloatingPoint) +public func exp(_ x: Tensor) -> Tensor { return Raw.exp(x) } @inlinable -internal func _vjpExp( +internal func _vjpExp( _ x: Tensor ) -> (Tensor, (Tensor) -> Tensor) { let value = exp(x) @@ -603,13 +603,13 @@ internal func _vjpExp( /// Computes the ceiling of the specified tensor element-wise. @inlinable -@differentiable(vjp: _vjpCeil(_:) where T : TensorFlowFloatingPoint) -public func ceil(_ x: Tensor) -> Tensor { +@differentiable(vjp: _vjpCeil(_:) where T: TensorFlowFloatingPoint) +public func ceil(_ x: Tensor) -> Tensor { return Raw.ceil(x) } @inlinable -internal func _vjpCeil( +internal func _vjpCeil( _ x: Tensor ) -> (Tensor, (Tensor) -> Tensor) { return (ceil(x), { _ in Tensor(0).broadcast(like: x) }) @@ -617,13 +617,13 @@ internal func _vjpCeil( /// Computes the floor of the specified tensor element-wise. @inlinable -@differentiable(vjp: _vjpFloor(_:) where T : TensorFlowFloatingPoint) -public func floor(_ x: Tensor) -> Tensor { +@differentiable(vjp: _vjpFloor(_:) where T: TensorFlowFloatingPoint) +public func floor(_ x: Tensor) -> Tensor { return Raw.floor(x) } @inlinable -internal func _vjpFloor( +internal func _vjpFloor( _ x: Tensor ) -> (Tensor, (Tensor) -> Tensor) { return (floor(x), { _ in Tensor(0).broadcast(like: x) }) @@ -633,12 +633,12 @@ internal func _vjpFloor( /// Specifically, computes `1 / (1 + exp(-x))`. @inlinable @differentiable(vjp: _vjpSigmoid) -public func sigmoid(_ x: Tensor) -> Tensor { +public func sigmoid(_ x: Tensor) -> Tensor { return Raw.sigmoid(x) } @inlinable -internal func _vjpSigmoid( +internal func _vjpSigmoid( _ x: Tensor ) -> (Tensor, (Tensor) -> Tensor) { return (sigmoid(x), { v in Raw.sigmoidGrad(x, dy: v) }) @@ -647,8 +647,8 @@ internal func _vjpSigmoid( /// Computes the softmax of the specified tensor along the last axis. /// Specifically, computes `exp(x) / exp(x).sum(alongAxes: -1)`. @inlinable -@differentiable(vjp: _vjpSoftmax(_:) where T : TensorFlowFloatingPoint) -public func softmax(_ x: Tensor) -> Tensor { +@differentiable(vjp: _vjpSoftmax(_:) where T: TensorFlowFloatingPoint) +public func softmax(_ x: Tensor) -> Tensor { return Raw.softmax(logits: x) } @@ -656,13 +656,13 @@ public func softmax(_ x: Tensor) -> Tensor { /// Specifically, computes `exp(x) / exp(x).sum(alongAxes: axis)`. @inlinable // TODO: [AD]. -public func softmax(_ x: Tensor, alongAxis axis: Int) -> Tensor { +public func softmax(_ x: Tensor, alongAxis axis: Int) -> Tensor { let xExp = exp(x) return xExp / xExp.sum(alongAxes: Tensor(Int32(axis))) } @inlinable -func _vjpSoftmax( +func _vjpSoftmax( _ x: Tensor ) -> (Tensor, (Tensor) -> Tensor) { let value = softmax(x) @@ -674,13 +674,13 @@ func _vjpSoftmax( /// Computes the log-softmax of the specified tensor element-wise. @inlinable -@differentiable(vjp: _vjpLogSoftmax(_:) where T : TensorFlowFloatingPoint) -public func logSoftmax(_ x: Tensor) -> Tensor { +@differentiable(vjp: _vjpLogSoftmax(_:) where T: TensorFlowFloatingPoint) +public func logSoftmax(_ x: Tensor) -> Tensor { return Raw.logSoftmax(logits: x) } @inlinable -func _vjpLogSoftmax( +func _vjpLogSoftmax( _ x: Tensor ) -> (Tensor, (Tensor) -> Tensor) { let value = logSoftmax(x) @@ -690,13 +690,13 @@ func _vjpLogSoftmax( /// Computes `relu` of the specified tensor element-wise. /// Specifically, computes `max(0, x)`. @inlinable -@differentiable(vjp: _vjpRelu(_:) where T : TensorFlowFloatingPoint) -public func relu(_ x: Tensor) -> Tensor { +@differentiable(vjp: _vjpRelu(_:) where T: TensorFlowFloatingPoint) +public func relu(_ x: Tensor) -> Tensor { return max(0, x) } @inlinable -func _vjpRelu( +func _vjpRelu( _ x: Tensor ) -> (Tensor, (Tensor) -> Tensor) { return (relu(x), { v in Tensor(x .> 0) * v }) @@ -708,13 +708,13 @@ func _vjpRelu( /// Computes the power of the first tensor to the second tensor. @inlinable -@differentiable(vjp: _vjpPow(_:_:) where T : TensorFlowFloatingPoint) -public func pow(_ lhs: Tensor, _ rhs: Tensor) -> Tensor where T : FloatingPoint { +@differentiable(vjp: _vjpPow(_:_:) where T: TensorFlowFloatingPoint) +public func pow(_ lhs: Tensor, _ rhs: Tensor) -> Tensor where T: FloatingPoint { return Raw.pow(lhs, rhs) } @inlinable -internal func _vjpPow( +internal func _vjpPow( _ x: Tensor, _ y: Tensor ) -> (Tensor, (Tensor) -> (Tensor, Tensor)) { let value = pow(x, y) @@ -726,28 +726,28 @@ internal func _vjpPow( /// Computes the power of the scalar to the tensor, broadcasting the scalar. @inlinable -// @differentiable(where T : TensorFlowFloatingPoint) -public func pow(_ lhs: T, _ rhs: Tensor) -> Tensor where T : FloatingPoint { +// @differentiable(where T: TensorFlowFloatingPoint) +public func pow(_ lhs: T, _ rhs: Tensor) -> Tensor where T: FloatingPoint { return pow(Tensor(lhs), rhs) } /// Computes the power of the tensor to the scalar, broadcasting the scalar. @inlinable -// @differentiable(where T : TensorFlowFloatingPoint) -public func pow(_ lhs: Tensor, _ rhs: T) -> Tensor where T : FloatingPoint { +// @differentiable(where T: TensorFlowFloatingPoint) +public func pow(_ lhs: Tensor, _ rhs: T) -> Tensor where T: FloatingPoint { return pow(lhs, Tensor(rhs)) } /// Computes the element-wise maximum of two tensors. /// - Note: `max` supports broadcasting. @inlinable -@differentiable(vjp: _vjpMax(_:_:) where T : TensorFlowFloatingPoint) -public func max(_ lhs: Tensor, _ rhs: Tensor) -> Tensor where T : Numeric & Comparable { +@differentiable(vjp: _vjpMax(_:_:) where T: TensorFlowFloatingPoint) +public func max(_ lhs: Tensor, _ rhs: Tensor) -> Tensor where T: Numeric & Comparable { return Raw.maximum(lhs, rhs) } @inlinable -internal func _vjpMax( +internal func _vjpMax( _ x: Tensor, _ y: Tensor ) -> (Tensor, (Tensor) -> (Tensor, Tensor)) { let value = max(x, y) @@ -756,28 +756,28 @@ internal func _vjpMax( /// Computes the element-wise maximum of the scalar and the tensor, broadcasting the scalar. @inlinable -// @differentiable(where T : TensorFlowFloatingPoint) -public func max(_ lhs: T, _ rhs: Tensor) -> Tensor where T : Numeric & Comparable { +// @differentiable(where T: TensorFlowFloatingPoint) +public func max(_ lhs: T, _ rhs: Tensor) -> Tensor where T: Numeric & Comparable { return max(Tensor(lhs), rhs) } /// Computes the element-wise maximum of the scalar and the tensor, broadcasting the scalar. @inlinable -// @differentiable(where T : TensorFlowFloatingPoint) -public func max(_ lhs: Tensor, _ rhs: T) -> Tensor where T : Numeric & Comparable { +// @differentiable(where T: TensorFlowFloatingPoint) +public func max(_ lhs: Tensor, _ rhs: T) -> Tensor where T: Numeric & Comparable { return max(lhs, Tensor(rhs)) } /// Computes the element-wise minimum of two tensors. /// - Note: `min` supports broadcasting. @inlinable -@differentiable(vjp: _vjpMin(_:_:) where T : TensorFlowFloatingPoint) -public func min(_ lhs: Tensor, _ rhs: Tensor) -> Tensor where T : Numeric & Comparable { +@differentiable(vjp: _vjpMin(_:_:) where T: TensorFlowFloatingPoint) +public func min(_ lhs: Tensor, _ rhs: Tensor) -> Tensor where T: Numeric & Comparable { return Raw.minimum(lhs, rhs) } @inlinable -internal func _vjpMin( +internal func _vjpMin( _ x: Tensor, _ y: Tensor ) -> (Tensor, (Tensor) -> (Tensor, Tensor)) { let value = min(x, y) @@ -786,20 +786,20 @@ internal func _vjpMin( /// Computes the element-wise minimum of the scalar and the tensor, broadcasting the scalar. @inlinable -// @differentiable(where T : TensorFlowFloatingPoint) -public func min(_ lhs: T, _ rhs: Tensor) -> Tensor where T : Numeric & Comparable { +// @differentiable(where T: TensorFlowFloatingPoint) +public func min(_ lhs: T, _ rhs: Tensor) -> Tensor where T: Numeric & Comparable { return min(Tensor(lhs), rhs) } /// Computes the element-wise minimum of the scalar and the tensor, broadcasting the scalar. @inlinable -// @differentiable(where T : TensorFlowFloatingPoint) -public func min(_ lhs: Tensor, _ rhs: T) -> Tensor where T : Numeric & Comparable { +// @differentiable(where T: TensorFlowFloatingPoint) +public func min(_ lhs: Tensor, _ rhs: T) -> Tensor where T: Numeric & Comparable { return min(lhs, Tensor(rhs)) } @inlinable -internal func _vjpMinMaxHelper( +internal func _vjpMinMaxHelper( _ x: Tensor, _ y: Tensor, originalValue: Tensor, @@ -845,13 +845,13 @@ public extension Tensor { /// must be either have the same shape as `self` or be a 1-D `Tensor` such /// that `mask.scalarCount == self.shape[0]`. @inlinable - @differentiable(wrt: (self, other), vjp: _vjpReplacing where Scalar : TensorFlowFloatingPoint) + @differentiable(wrt: (self, other), vjp: _vjpReplacing where Scalar: TensorFlowFloatingPoint) func replacing(with other: Tensor, where mask: Tensor) -> Tensor { return Raw.select(condition: mask, t: self, e: other) } } -internal extension Tensor where Scalar : TensorFlowFloatingPoint { +internal extension Tensor where Scalar: TensorFlowFloatingPoint { @inlinable func _vjpReplacing( with other: Tensor, @@ -928,7 +928,7 @@ public extension Tensor where Scalar == Bool { } } -public extension Tensor where Scalar : Numeric & Comparable { +public extension Tensor where Scalar: Numeric & Comparable { // NOTE: This overload is necessary, otherwise `min()` would refer to the variadic method // `min(squeezingAxes:)` with zero indices. @inlinable @@ -1050,14 +1050,14 @@ public extension Tensor where Scalar : Numeric & Comparable { // MARK: - Numeric Reductions -public extension Tensor where Scalar : Numeric { +public extension Tensor where Scalar: Numeric { // MARK: - Sum /// Returns the sum along the specified axes. The reduced dimensions are removed. /// - Parameter axes: The dimensions to reduce. /// - Precondition: Each value in `axes` must be in the range `-rank...rank`. @inlinable - @differentiable(wrt: self, vjp: _vjpSum(squeezingAxes:) where Scalar : TensorFlowFloatingPoint) + @differentiable(wrt: self, vjp: _vjpSum(squeezingAxes:) where Scalar: TensorFlowFloatingPoint) func sum(squeezingAxes axes: Tensor) -> Tensor { return Raw.sum(self, reductionIndices: Tensor(axes), keepDims: false) } @@ -1066,7 +1066,7 @@ public extension Tensor where Scalar : Numeric { /// - Parameter axes: The dimensions to reduce. /// - Precondition: Each value in `axes` must be in the range `-rank...rank`. @inlinable - @differentiable(wrt: self where Scalar : TensorFlowFloatingPoint) + @differentiable(wrt: self where Scalar: TensorFlowFloatingPoint) func sum(squeezingAxes axes: [Int]) -> Tensor { // TODO(TF-433): Remove workaround for differentiating `map`. let axes = {axes.map(Int32.init)}() @@ -1077,13 +1077,13 @@ public extension Tensor where Scalar : Numeric { /// - Parameter axes: The dimensions to reduce. /// - Precondition: Each value in `axes` must be in the range `-rank...rank`. @inlinable - @differentiable(wrt: self where Scalar : TensorFlowFloatingPoint) + @differentiable(wrt: self where Scalar: TensorFlowFloatingPoint) func sum(squeezingAxes axes: Int...) -> Tensor { return sum(squeezingAxes: axes) } @inlinable - @differentiable(wrt: self where Scalar : TensorFlowFloatingPoint) + @differentiable(wrt: self where Scalar: TensorFlowFloatingPoint) func sum() -> Tensor { return flattened().sum(squeezingAxes: 0) } @@ -1092,7 +1092,7 @@ public extension Tensor where Scalar : Numeric { /// - Parameter axes: The dimensions to reduce. /// - Precondition: Each value in `axes` must be in the range `-rank..) -> Tensor { return Raw.sum(self, reductionIndices: axes, keepDims: true) } @@ -1101,7 +1101,7 @@ public extension Tensor where Scalar : Numeric { /// - Parameter axes: The dimensions to reduce. /// - Precondition: Each value in `axes` must be in the range `-rank.. Tensor { // TODO(TF-433): Remove workaround for differentiating `map`. let axes = {axes.map(Int32.init)}() @@ -1112,7 +1112,7 @@ public extension Tensor where Scalar : Numeric { /// - Parameter axes: The dimensions to reduce. /// - Precondition: Each value in `axes` must be in the range `-rank.. Tensor { return sum(alongAxes: axes) } @@ -1189,7 +1189,7 @@ public extension Tensor where Scalar : Numeric { /// - Parameter axes: The dimensions to reduce. /// - Precondition: Each value in `axes` must be in the range `-rank...rank`. @inlinable - @differentiable(wrt: self, vjp: _vjpMean(squeezingAxes:) where Scalar : TensorFlowFloatingPoint) + @differentiable(wrt: self, vjp: _vjpMean(squeezingAxes:) where Scalar: TensorFlowFloatingPoint) func mean(squeezingAxes axes: Tensor) -> Tensor { return Raw.mean(self, reductionIndices: axes, keepDims: false) } @@ -1198,7 +1198,7 @@ public extension Tensor where Scalar : Numeric { /// - Parameter axes: The dimensions to reduce. /// - Precondition: Each value in `axes` must be in the range `-rank...rank`. @inlinable - @differentiable(wrt: self where Scalar : TensorFlowFloatingPoint) + @differentiable(wrt: self where Scalar: TensorFlowFloatingPoint) func mean(squeezingAxes axes: [Int]) -> Tensor { // TODO(TF-433): Remove workaround for differentiating `map`. let axes = {axes.map(Int32.init)}() @@ -1209,13 +1209,13 @@ public extension Tensor where Scalar : Numeric { /// - Parameter axes: The dimensions to reduce. /// - Precondition: Each value in `axes` must be in the range `-rank...rank`. @inlinable - @differentiable(wrt: self where Scalar : TensorFlowFloatingPoint) + @differentiable(wrt: self where Scalar: TensorFlowFloatingPoint) func mean(squeezingAxes axes: Int...) -> Tensor { return mean(squeezingAxes: axes) } @inlinable - @differentiable(wrt: self where Scalar : TensorFlowFloatingPoint) + @differentiable(wrt: self where Scalar: TensorFlowFloatingPoint) func mean() -> Tensor { return flattened().mean(squeezingAxes: [0]) } @@ -1225,7 +1225,7 @@ public extension Tensor where Scalar : Numeric { /// - Parameter axes: The dimensions to reduce. /// - Precondition: Each value in `axes` must be in the range `-rank..) -> Tensor { return Raw.mean(self, reductionIndices: axes, keepDims: true) } @@ -1235,7 +1235,7 @@ public extension Tensor where Scalar : Numeric { /// - Parameter axes: The dimensions to reduce. /// - Precondition: Each value in `axes` must be in the range `-rank.. Tensor { // TODO(TF-433): Remove workaround for differentiating `map`. let axes = {axes.map(Int32.init)}() @@ -1247,7 +1247,7 @@ public extension Tensor where Scalar : Numeric { /// - Parameter axes: The dimensions to reduce. /// - Precondition: Each value in `axes` must be in the range `-rank.. Tensor { return mean(alongAxes: axes) } @@ -1259,7 +1259,7 @@ public extension Tensor where Scalar : Numeric { /// - Parameter axes: The dimensions to reduce. /// - Precondition: Each value in `axes` must be in the range `-rank..) -> Tensor { let squaredDiff = (self - mean(alongAxes: axes)).squared() return squaredDiff.mean(squeezingAxes: axes) @@ -1270,7 +1270,7 @@ public extension Tensor where Scalar : Numeric { /// - Parameter axes: The dimensions to reduce. /// - Precondition: Each value in `axes` must be in the range `-rank.. Tensor { // TODO(TF-433): Remove workaround for differentiating `map`. let axes = {axes.map(Int32.init)}() @@ -1282,12 +1282,12 @@ public extension Tensor where Scalar : Numeric { /// - Parameter axes: The dimensions to reduce. /// - Precondition: Each value in `axes` must be in the range `-rank.. Tensor { return variance(squeezingAxes: axes) } - @differentiable(wrt: self where Scalar : TensorFlowFloatingPoint) + @differentiable(wrt: self where Scalar: TensorFlowFloatingPoint) @inlinable func variance() -> Tensor { let mean = self.mean() @@ -1300,7 +1300,7 @@ public extension Tensor where Scalar : Numeric { /// - Parameter axes: The dimensions to reduce. /// - Precondition: Each value in `axes` must be in the range `-rank..) -> Tensor { let squaredDiff = (self - mean(alongAxes: axes)).squared() return squaredDiff.mean(alongAxes: axes) @@ -1311,7 +1311,7 @@ public extension Tensor where Scalar : Numeric { /// - Parameter axes: The dimensions to reduce. /// - Precondition: Each value in `axes` must be in the range `-rank.. Tensor { // TODO(TF-433): Remove workaround for differentiating `map`. let axes = {axes.map(Int32.init)}() @@ -1323,13 +1323,13 @@ public extension Tensor where Scalar : Numeric { /// - Parameter axes: The dimensions to reduce. /// - Precondition: Each value in `axes` must be in the range `-rank.. Tensor { return variance(alongAxes: axes) } } -internal extension Tensor where Scalar : TensorFlowFloatingPoint { +internal extension Tensor where Scalar: TensorFlowFloatingPoint { @inlinable func _vjpSum(alongAxes axes: Tensor) -> (Tensor, (Tensor) -> Tensor) { let value = sum(alongAxes: axes) @@ -1371,7 +1371,7 @@ internal extension Tensor where Scalar : TensorFlowFloatingPoint { // TODO: Consider making the return type be generic over `FloatingPoint` types // so that `self`'s scalar type can be any `Numeric` type. -public extension Tensor where Scalar : TensorFlowFloatingPoint { +public extension Tensor where Scalar: TensorFlowFloatingPoint { /// Returns the standard deviation of the elements along the specified axes. The reduced /// dimensions are retained with value `1`. Does not apply Bessel's correction. /// @@ -1459,8 +1459,8 @@ public extension Tensor where Scalar : TensorFlowFloatingPoint { /// Performs matrix multiplication with another tensor and produces the result. @inlinable -@differentiable(vjp: _vjpMatmul(_:_:) where Scalar : TensorFlowFloatingPoint) -public func matmul( +@differentiable(vjp: _vjpMatmul(_:_:) where Scalar: TensorFlowFloatingPoint) +public func matmul( _ lhs: Tensor, _ rhs: Tensor ) -> Tensor { @@ -1471,7 +1471,7 @@ public func matmul( } @inlinable -internal func _vjpMatmul( +internal func _vjpMatmul( _ lhs: Tensor, _ rhs: Tensor ) -> (Tensor, (Tensor) -> (Tensor, Tensor)) { @@ -1481,15 +1481,15 @@ internal func _vjpMatmul( }) } -infix operator • : MultiplicationPrecedence +infix operator •: MultiplicationPrecedence -public extension Tensor where Scalar : Numeric { +public extension Tensor where Scalar: Numeric { // TODO: We have to define a custom VJP on • because AD can't yet differentiate generic methods. // After AD can differentiate generic methods, remove the custom VJP. /// Performs matrix multiplication between two tensors and produces the result. @inlinable - @differentiable(vjp: _vjpMatmulOperator(lhs:rhs:) where Scalar : TensorFlowFloatingPoint) + @differentiable(vjp: _vjpMatmulOperator(lhs:rhs:) where Scalar: TensorFlowFloatingPoint) static func • (lhs: Tensor, rhs: Tensor) -> Tensor { return matmul(lhs, rhs) } @@ -1498,7 +1498,7 @@ public extension Tensor where Scalar : Numeric { // TODO: We have to define a custom VJP on • because AD can't yet // differentiate generic methods. After AD can differentiate generic methods, // remove the custom VJP. -internal extension Tensor where Scalar : TensorFlowFloatingPoint { +internal extension Tensor where Scalar: TensorFlowFloatingPoint { @inlinable static func _vjpMatmulOperator( lhs: Tensor, diff --git a/Sources/DeepLearning/Operators/NN.swift b/Sources/DeepLearning/Operators/NN.swift index 1664b1954..bd160e111 100644 --- a/Sources/DeepLearning/Operators/NN.swift +++ b/Sources/DeepLearning/Operators/NN.swift @@ -20,7 +20,7 @@ import TensorFlow // Normalization //===------------------------------------------------------------------------------------------===// -public extension Tensor where Scalar : TensorFlowFloatingPoint { +public extension Tensor where Scalar: TensorFlowFloatingPoint { /// Computes the batch normalized tensor along the specified axis. /// /// Specifically, returns `(self - mu) / (var + epsilon) * gamma + beta` where `mu` and `var` are @@ -76,7 +76,7 @@ public extension Tensor where Scalar : TensorFlowFloatingPoint { } } -public extension Tensor where Scalar : BinaryFloatingPoint { +public extension Tensor where Scalar: BinaryFloatingPoint { /// Computes the batch normalized tensor along the specified axis. /// /// Specifically, returns `(self - mu)/(var + epsilon) * gamma + beta` where @@ -92,7 +92,7 @@ public extension Tensor where Scalar : BinaryFloatingPoint { @inlinable @differentiable( wrt: (self, offset, scale), - vjp: _vjpBatchNormalized where Scalar : TensorFlowFloatingPoint) + vjp: _vjpBatchNormalized where Scalar: TensorFlowFloatingPoint) func batchNormalized( alongAxis axis: Int, offset: Tensor = Tensor(0), @@ -138,7 +138,7 @@ public extension Padding { } } -public extension Tensor where Scalar : TensorFlowFloatingPoint { +public extension Tensor where Scalar: TensorFlowFloatingPoint { /// TensorFlow builtin conv2d gradient helper for the input. @inlinable @differentiable(wrt: (self, filter), vjp: _vjpConv2DBackpropInput) @@ -185,10 +185,9 @@ public extension Tensor where Scalar : TensorFlowFloatingPoint { let value = conv2DBackpropInput( shape: shape, filter: filter, strides: strides, padding: padding) return (value, { v in - ( - self.conv2DBackpropFilter( - input: v, filterSizes: shape, strides: strides, padding: padding), - v.convolved2D(withFilter: filter, strides: strides, padding: padding)) + (self.conv2DBackpropFilter( + input: v, filterSizes: shape, strides: strides, padding: padding), + v.convolved2D(withFilter: filter, strides: strides, padding: padding)) }) } @@ -202,10 +201,9 @@ public extension Tensor where Scalar : TensorFlowFloatingPoint { let value = conv2DBackpropFilter( input: input, filterSizes: filterSizes, strides: strides, padding: padding) return (value, { v in - ( - self.conv2DBackpropInput( - shape: filterSizes, filter: v, strides: strides, padding: padding), - input.convolved2D(withFilter: v, strides: strides, padding: padding)) + (self.conv2DBackpropInput( + shape: filterSizes, filter: v, strides: strides, padding: padding), + input.convolved2D(withFilter: v, strides: strides, padding: padding)) }) } @@ -217,13 +215,10 @@ public extension Tensor where Scalar : TensorFlowFloatingPoint { ) -> (Tensor, (Tensor) -> (Tensor, Tensor)) { let value = convolved2D(withFilter: filter, strides: strides, padding: padding) return (value, { v in - ( - v.conv2DBackpropInput( - shape: self.shapeTensor, filter: filter, - strides: strides, padding: padding), - v.conv2DBackpropFilter( - input: self, filterSizes: filter.shapeTensor, - strides: strides, padding: padding)) + (v.conv2DBackpropInput( + shape: self.shapeTensor, filter: filter, strides: strides, padding: padding), + v.conv2DBackpropFilter( + input: self, filterSizes: filter.shapeTensor, strides: strides, padding: padding)) }) } @@ -270,7 +265,7 @@ public extension Tensor where Scalar : TensorFlowFloatingPoint { } } -public extension Tensor where Scalar : FloatingPoint { +public extension Tensor where Scalar: FloatingPoint { /// Computes a 2-D convolution using `self` as input, with the specified /// filter, strides, and padding. /// @@ -309,7 +304,7 @@ public extension Tensor where Scalar : FloatingPoint { @inlinable @differentiable( wrt: self, - vjp: _vjpMaxPooled(kernelSize:strides:padding:) where Scalar : TensorFlowFloatingPoint) + vjp: _vjpMaxPooled(kernelSize:strides:padding:) where Scalar: TensorFlowFloatingPoint) func maxPooled( kernelSize: (Int, Int, Int, Int), strides: (Int, Int, Int, Int), @@ -335,7 +330,7 @@ public extension Tensor where Scalar : FloatingPoint { @inlinable @differentiable( wrt: self, - vjp: _vjpAveragePooled(kernelSize:strides:padding:) where Scalar : TensorFlowFloatingPoint) + vjp: _vjpAveragePooled(kernelSize:strides:padding:) where Scalar: TensorFlowFloatingPoint) func averagePooled( kernelSize: (Int, Int, Int, Int), strides: (Int, Int, Int, Int), diff --git a/Sources/DeepLearning/Operators/README.md b/Sources/DeepLearning/Operators/README.md deleted file mode 100644 index 76e7a7e69..000000000 --- a/Sources/DeepLearning/Operators/README.md +++ /dev/null @@ -1,41 +0,0 @@ -# Ops and Convenience Methods - -The majority of the Tensor API is implemented in terms of 'ops' that are -partitioned out to the TensorFlow graph when the compiler runs. These -ops are intentionally designed to reflect TensorFlow ops, but provide nicer -Swift syntax for accessing them. In addition to the core ops themselves, -we also define some helper function wrappers, e.g. to make things symmetric -and generally feel nice to use. - -The ops themselves are defined by the primitive `#tfop(...)` syntax, here -are some examples: -``` -result = #tfop("Add", lhs, rhs) -result = #tfop("Const", dtype: Float.self, value$tensor: 4.0) -``` - -The first parameter to this syntax is the TensorFlow op name as a string. -After that, the inputs are specified, and then attributes are specified -with their name as the keyword argument. - -Inputs and outputs must be of TensorHandle, ResourceHandle, or VariantHandle -type. These are magic types known to the compiler. - -## Auto-Differentiation Support - -We also provide vector-Jacobian product (VJP) definitions for some of the -convenience methods. - -Terminology: -- originalValue (f): The function being differentiated, or the result of that - function. -- VJP (f'): The function as the result of differentiation, computing - the vector-Jacobian products with respect to all arguments, or the result - of that function. - -For more information, visit: -https://en.wikipedia.org/wiki/Automatic_differentiation - -The attribute '@differentiable(vjp: ...)' is used to register a function's VJP. -The automatic differentiation pass identifies these VJPs and chains them -together to produce arbitrary differentiable programs. diff --git a/Sources/DeepLearning/Optimizer.swift b/Sources/DeepLearning/Optimizer.swift index 81e03bcbd..35a58ad56 100644 --- a/Sources/DeepLearning/Optimizer.swift +++ b/Sources/DeepLearning/Optimizer.swift @@ -16,16 +16,14 @@ import TensorFlow #endif -public protocol Optimizable: Differentiable & KeyPathIterable - where AllDifferentiableVariables: KeyPathIterable { } - /// A machine learning optimizer. /// /// Optimizers apply an optimization algorithm to update the differentiable variables of a machine /// learning model. public protocol Optimizer { /// The type of the model whose parameters are optimized. - associatedtype Model: Optimizable + associatedtype Model: Differentiable & KeyPathIterable + where AllDifferentiableVariables: KeyPathIterable /// The scalar parameter type. associatedtype Scalar: FloatingPoint /// The learning rate. @@ -48,8 +46,9 @@ fileprivate extension Tensor where Scalar: Numeric { /// /// Reference: ["Adam - A Method for Stochastic Optimization"]( /// https://arxiv.org/abs/1412.6980v8) -public class Adam: Optimizer - where Model.AllDifferentiableVariables == Model.CotangentVector { +public class Adam: Optimizer + where AllDifferentiableVariables: KeyPathIterable, + Model.AllDifferentiableVariables == Model.CotangentVector { /// The learning rate. public var learningRate: Float /// A coefficient used to calculate the first and second moments of @@ -142,8 +141,9 @@ public class Adam: Optimizer /// /// Reference: ["rmsprop: Divide the gradient by a running average of its recent magnitude"]( /// http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf) -public class RMSProp: Optimizer - where Model.AllDifferentiableVariables == Model.CotangentVector { +public class RMSProp: Optimizer + where AllDifferentiableVariables: KeyPathIterable, + Model.AllDifferentiableVariables == Model.CotangentVector { /// The learning rate. public var learningRate: Float // TODO: Document `rho`. Keras doesn't document `rho`. @@ -206,8 +206,9 @@ public class RMSProp: Optimizer /// /// An optimizer that implements stochastic gradient descent, with support for momentum, learning /// rate decay, and Nesterov momentum. -public class SGD: Optimizer - where Model.AllDifferentiableVariables == Model.CotangentVector { +public class SGD: Optimizer + where AllDifferentiableVariables: KeyPathIterable, + Model.AllDifferentiableVariables == Model.CotangentVector { /// The learning rate. public var learningRate: Float /// The momentum factor. It accelerates stochastic gradient descent in the relevant direction @@ -278,8 +279,10 @@ public class SGD: Optimizer // MARK: - Manifold optimizers /// A Riemann manifold stochastic gradient descent (SGD) optimizer. -public class RiemannSGD: Optimizer - where Model.TangentVector: VectorNumeric, Model.TangentVector.Scalar == Scalar { +public class RiemannSGD: Optimizer + where AllDifferentiableVariables: KeyPathIterable, + Model.TangentVector: VectorNumeric, + Model.TangentVector.Scalar == Scalar { /// The learning rate. public var learningRate: Scalar diff --git a/Sources/DeepLearning/PythonConversion.swift b/Sources/DeepLearning/PythonConversion.swift index a1b10d30a..4ff73cbaa 100644 --- a/Sources/DeepLearning/PythonConversion.swift +++ b/Sources/DeepLearning/PythonConversion.swift @@ -28,8 +28,8 @@ private func debugLogNumpyError(_ message: String) { debugLog("NumPy conversion error: " + message) } -extension ShapedArray : ConvertibleFromNumpyArray - where Scalar : NumpyScalarCompatible { +extension ShapedArray: ConvertibleFromNumpyArray + where Scalar: NumpyScalarCompatible { /// Creates a `ShapedArray` with the same shape and scalars as the specified /// `numpy.ndarray` instance. /// @@ -75,7 +75,7 @@ extension ShapedArray : ConvertibleFromNumpyArray guard let ptr = UnsafePointer(bitPattern: ptrVal) else { fatalError("'numpy.ndarray' data pointer was nil") } - // This code avoids calling `init(shape: [Int], scalars: S)`, + // This code avoids calling `init(shape: [Int], scalars: S)`, // which inefficiently copies scalars one by one. Instead, // `init(shape: [Int], scalars: [Scalar])` is called, which efficiently // does a `memcpy` of the entire `scalars` array. @@ -92,8 +92,8 @@ extension ShapedArray : ConvertibleFromNumpyArray } } -extension Tensor : ConvertibleFromNumpyArray - where Scalar : NumpyScalarCompatible { +extension Tensor: ConvertibleFromNumpyArray + where Scalar: NumpyScalarCompatible { /// Creates a tensor with the same shape and scalars as the specified /// `numpy.ndarray` instance. /// @@ -147,7 +147,7 @@ extension Tensor : ConvertibleFromNumpyArray } } -extension ShapedArray where Scalar : NumpyScalarCompatible { +extension ShapedArray where Scalar: NumpyScalarCompatible { /// Creates a `numpy.ndarray` instance with the same shape and scalars as /// this `ShapedArray`. /// @@ -157,7 +157,7 @@ extension ShapedArray where Scalar : NumpyScalarCompatible { } } -extension Tensor where Scalar : NumpyScalarCompatible { +extension Tensor where Scalar: NumpyScalarCompatible { /// Creates a `numpy.ndarray` instance with the same shape and scalars as /// this tensor. /// @@ -165,7 +165,7 @@ extension Tensor where Scalar : NumpyScalarCompatible { public func makeNumpyArray() -> PythonObject { return array.makeNumpyArray() } } -extension TensorShape : PythonConvertible { +extension TensorShape: PythonConvertible { public var pythonObject: PythonObject { return dimensions.pythonObject } diff --git a/Sources/DeepLearning/Random.swift b/Sources/DeepLearning/Random.swift index 7a6752193..8c90ccdf4 100644 --- a/Sources/DeepLearning/Random.swift +++ b/Sources/DeepLearning/Random.swift @@ -429,8 +429,8 @@ public struct UniformIntegerDistribution: RandomDistributi } @_fixed_layout -public struct UniformFloatingPointDistribution: RandomDistribution - where T.RawSignificand : FixedWidthInteger { +public struct UniformFloatingPointDistribution: RandomDistribution + where T.RawSignificand: FixedWidthInteger { public let lowerBound: T public let upperBound: T @@ -445,8 +445,8 @@ public struct UniformFloatingPointDistribution: RandomD } @_fixed_layout -public struct NormalDistribution: RandomDistribution - where T.RawSignificand : FixedWidthInteger { +public struct NormalDistribution: RandomDistribution + where T.RawSignificand: FixedWidthInteger { public let mean: T public let standardDeviation: T private let uniformDist = UniformFloatingPointDistribution() @@ -536,7 +536,7 @@ public struct BetaDistribution: RandomDistribution { } while r + alpha * (log(alpha) - log(b + w)) < t w = min(w, Float.greatestFiniteMagnitude) - return a == alpha0 ? w / (b + w) : b / (b + w) + return a == alpha0 ? w / (b + w): b / (b + w) } /// Returns one sample from a Beta(alpha, beta) distribution using Cheng's BC @@ -592,6 +592,6 @@ public struct BetaDistribution: RandomDistribution { } w = min(w, Float.greatestFiniteMagnitude) - return a == alpha0 ? w / (b + w) : b / (b + w) + return a == alpha0 ? w / (b + w): b / (b + w) } } diff --git a/Sources/DeepLearning/Tensors.swift b/Sources/DeepLearning/Tensors.swift index 2d4e3b32d..1c1700649 100644 --- a/Sources/DeepLearning/Tensors.swift +++ b/Sources/DeepLearning/Tensors.swift @@ -17,7 +17,7 @@ import TensorFlow #endif #if COMPILING_TENSORFLOW_MODULE -infix operator .== : ComparisonPrecedence +infix operator .==: ComparisonPrecedence #endif //===------------------------------------------------------------------------------------------===// @@ -28,25 +28,19 @@ public extension Tensor { /// The rank of the tensor, represented as a `Tensor`. @inlinable var rankTensor: Tensor { - get { - return Raw.rank(self) - } + return Raw.rank(self) } /// The dimensions of the tensor, represented as a `Tensor`. @inlinable var shapeTensor: Tensor { - get { - return Raw.shape(self) - } + return Raw.shape(self) } /// The number of scalars in the tensor, represented as a `Tensor`. @inlinable var scalarCountTensor: Tensor { - get { - return Raw.size(self) - } + return Raw.size(self) } } @@ -55,7 +49,7 @@ public extension Tensor { //===------------------------------------------------------------------------------------------===// // String conversion. -extension Tensor : CustomStringConvertible { +extension Tensor: CustomStringConvertible { /// A textual representation of the tensor. /// /// - Note: use `fullDescription` for a non-pretty-printed description showing all scalars. @@ -94,14 +88,14 @@ public extension Tensor { } // Xcode Playground display conversion. -extension Tensor : CustomPlaygroundDisplayConvertible { +extension Tensor: CustomPlaygroundDisplayConvertible { public var playgroundDescription: Any { return description } } // Mirror representation, used by debugger/REPL. -extension Tensor : CustomReflectable { +extension Tensor: CustomReflectable { public var customMirror: Mirror { return Mirror(self, children: [], displayStyle: .struct) } @@ -111,7 +105,7 @@ extension Tensor : CustomReflectable { // Codable Conformance //===------------------------------------------------------------------------------------------===// -extension Tensor : Codable where Scalar : Codable { +extension Tensor: Codable where Scalar: Codable { @inlinable public func encode(to encoder: Encoder) throws { var container = encoder.singleValueContainer() From f4b7e01715ed4ad678b0a1ce3db3b8a09814148c Mon Sep 17 00:00:00 2001 From: Anthony Platanios Date: Sat, 20 Apr 2019 19:05:02 -0400 Subject: [PATCH 47/55] Minor edit. --- Sources/DeepLearning/Optimizer.swift | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/Sources/DeepLearning/Optimizer.swift b/Sources/DeepLearning/Optimizer.swift index 35a58ad56..ac4cf0b90 100644 --- a/Sources/DeepLearning/Optimizer.swift +++ b/Sources/DeepLearning/Optimizer.swift @@ -22,8 +22,7 @@ import TensorFlow /// learning model. public protocol Optimizer { /// The type of the model whose parameters are optimized. - associatedtype Model: Differentiable & KeyPathIterable - where AllDifferentiableVariables: KeyPathIterable + associatedtype Model: Differentiable /// The scalar parameter type. associatedtype Scalar: FloatingPoint /// The learning rate. @@ -279,10 +278,8 @@ public class SGD: Optimizer // MARK: - Manifold optimizers /// A Riemann manifold stochastic gradient descent (SGD) optimizer. -public class RiemannSGD: Optimizer - where AllDifferentiableVariables: KeyPathIterable, - Model.TangentVector: VectorNumeric, - Model.TangentVector.Scalar == Scalar { +public class RiemannSGD: Optimizer + where Model.TangentVector: VectorNumeric, Model.TangentVector.Scalar == Scalar { /// The learning rate. public var learningRate: Scalar From b207e42685b64d3901a66b6381b69b8afbb55209 Mon Sep 17 00:00:00 2001 From: Anthony Platanios Date: Sat, 20 Apr 2019 19:14:59 -0400 Subject: [PATCH 48/55] Reverted the change in the existing optimizer implementations. --- Sources/DeepLearning/Optimizer.swift | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/Sources/DeepLearning/Optimizer.swift b/Sources/DeepLearning/Optimizer.swift index ac4cf0b90..440a691e0 100644 --- a/Sources/DeepLearning/Optimizer.swift +++ b/Sources/DeepLearning/Optimizer.swift @@ -45,9 +45,8 @@ fileprivate extension Tensor where Scalar: Numeric { /// /// Reference: ["Adam - A Method for Stochastic Optimization"]( /// https://arxiv.org/abs/1412.6980v8) -public class Adam: Optimizer - where AllDifferentiableVariables: KeyPathIterable, - Model.AllDifferentiableVariables == Model.CotangentVector { +public class Adam: Optimizer + where Model.AllDifferentiableVariables == Model.CotangentVector { /// The learning rate. public var learningRate: Float /// A coefficient used to calculate the first and second moments of @@ -140,9 +139,8 @@ public class Adam: Optimizer /// /// Reference: ["rmsprop: Divide the gradient by a running average of its recent magnitude"]( /// http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf) -public class RMSProp: Optimizer - where AllDifferentiableVariables: KeyPathIterable, - Model.AllDifferentiableVariables == Model.CotangentVector { +public class RMSProp: Optimizer + where Model.AllDifferentiableVariables == Model.CotangentVector { /// The learning rate. public var learningRate: Float // TODO: Document `rho`. Keras doesn't document `rho`. @@ -205,9 +203,8 @@ public class RMSProp: Optimizer /// /// An optimizer that implements stochastic gradient descent, with support for momentum, learning /// rate decay, and Nesterov momentum. -public class SGD: Optimizer - where AllDifferentiableVariables: KeyPathIterable, - Model.AllDifferentiableVariables == Model.CotangentVector { +public class SGD: Optimizer + where Model.AllDifferentiableVariables == Model.CotangentVector { /// The learning rate. public var learningRate: Float /// The momentum factor. It accelerates stochastic gradient descent in the relevant direction @@ -278,7 +275,7 @@ public class SGD: Optimizer // MARK: - Manifold optimizers /// A Riemann manifold stochastic gradient descent (SGD) optimizer. -public class RiemannSGD: Optimizer +public class RiemannSGD: Optimizer where Model.TangentVector: VectorNumeric, Model.TangentVector.Scalar == Scalar { /// The learning rate. public var learningRate: Scalar From 3dcd46d7dbf72a38053cae7bc1072ad09e2d667e Mon Sep 17 00:00:00 2001 From: Anthony Platanios Date: Sat, 20 Apr 2019 19:54:27 -0400 Subject: [PATCH 49/55] Added VJPs for some operations. --- Sources/DeepLearning/Initializers.swift | 61 ++++++------ Sources/DeepLearning/Layer.swift | 3 +- Sources/DeepLearning/Operators/Basic.swift | 104 +++++++++++++++++++-- 3 files changed, 127 insertions(+), 41 deletions(-) diff --git a/Sources/DeepLearning/Initializers.swift b/Sources/DeepLearning/Initializers.swift index 17884c3ca..3ab3f5654 100644 --- a/Sources/DeepLearning/Initializers.swift +++ b/Sources/DeepLearning/Initializers.swift @@ -86,8 +86,7 @@ public extension Tensor where Scalar: Numeric { /// Perform an element-wise conversion from another `Tensor`. @inlinable @differentiable( - vjp: _vjpCast where Scalar: TensorFlowFloatingPoint, - OtherScalar: TensorFlowFloatingPoint) + vjp: _vjpCast where Scalar: TensorFlowFloatingPoint, OtherScalar: TensorFlowFloatingPoint) init(_ other: Tensor) { self = Raw.cast(other) } @@ -109,7 +108,7 @@ internal extension Tensor where Scalar: TensorFlowFloatingPoint { public extension Tensor { /// Creates a tensor from an array of tensors (which may themselves be scalars). @inlinable - // @differentiable(where Scalar: TensorFlowFloatingPoint) + @differentiable(where Scalar: TensorFlowFloatingPoint) init(_ elements: [Tensor]) { self = Tensor(stacking: elements) } @@ -143,7 +142,7 @@ public extension Tensor { /// /// - Returns: The stacked tensor. @inlinable - // @differentiable(vjp: _vjpStacking where Scalar: TensorFlowFloatingPoint) + @differentiable(vjp: _vjpStacking where Scalar: TensorFlowFloatingPoint) init(stacking tensors: [Tensor], alongAxis axis: Int = 0) { self = Raw.pack(tensors, axis: Int64(axis)) } @@ -181,40 +180,40 @@ public extension Tensor { /// /// - Returns: The concatenated tensor. @inlinable - // @differentiable(wrt: tensors, vjp: _vjpConcatenating where Scalar: TensorFlowFloatingPoint) + @differentiable(vjp: _vjpConcatenating where Scalar: TensorFlowFloatingPoint) init(concatenating tensors: [Tensor], alongAxis axis: Int = 0) { precondition(tensors.count > 0) self = Raw.concatV2(tensors, axis: Tensor(Int32(axis))) } } -// internal extension Tensor where Scalar: TensorFlowFloatingPoint { -// @inlinable -// static func _vjpStacking( -// stacking tensors: [Tensor], -// alongAxis axis: Int = 0 -// ) -> (Tensor, (Tensor) -> Array.DifferentiableView) { -// let result = Tensor(stacking: tensors, alongAxis: axis) -// return (result, { v in -// Array.DifferentiableView(v.unstack(alongAxis: axis)) -// }) -// } +internal extension Tensor where Scalar: TensorFlowFloatingPoint { + @inlinable + static func _vjpStacking( + stacking tensors: [Tensor], + alongAxis axis: Int = 0 + ) -> (Tensor, (Tensor) -> Array.DifferentiableView) { + let result = Tensor(stacking: tensors, alongAxis: axis) + return (result, { v in + Array.DifferentiableView(v.unstack(alongAxis: axis)) + }) + } -// @inlinable -// static func _vjpConcatenating( -// concatenating tensors: [Tensor], -// alongAxis axis: Int = 0 -// ) -> (Tensor, (Tensor) -> Array.DifferentiableView) { -// let result = Tensor(concatenating: tensors, alongAxis: axis) -// let posAxis = axis < 0 ? axis + tensors[0].rank: axis -// let sizes = Tensor(stacking: tensors.map { $0.shapeTensor[posAxis] }) -// return (result, { [count = tensors.count] v in -// if count == 1 { return Array.DifferentiableView([v]) } -// let splits = v.split(sizes: sizes, alongAxis: posAxis) -// return Array.DifferentiableView(splits) -// }) -// } -// } + @inlinable + static func _vjpConcatenating( + concatenating tensors: [Tensor], + alongAxis axis: Int = 0 + ) -> (Tensor, (Tensor) -> Array.DifferentiableView) { + let result = Tensor(concatenating: tensors, alongAxis: axis) + let posAxis = axis < 0 ? axis + tensors[0].rank: axis + let sizes = Tensor(stacking: tensors.map { $0.shapeTensor[posAxis] }) + return (result, { [count = tensors.count] v in + if count == 1 { return Array.DifferentiableView([v]) } + let splits = v.split(sizes: sizes, alongAxis: posAxis) + return Array.DifferentiableView(splits) + }) + } +} //===------------------------------------------------------------------------------------------===// // Numeric diff --git a/Sources/DeepLearning/Layer.swift b/Sources/DeepLearning/Layer.swift index 29052a3ef..7f045d8d2 100644 --- a/Sources/DeepLearning/Layer.swift +++ b/Sources/DeepLearning/Layer.swift @@ -23,7 +23,8 @@ import TensorFlow /// /// `Layer` instances define a differentiable `applied(to:)` method for mapping inputs to /// outputs. -public protocol Layer: Optimizable { +public protocol Layer: Differentiable & KeyPathIterable + where AllDifferentiableVariables: KeyPathIterable { /// The input type of the layer. associatedtype Input: Differentiable /// The output type of the layer. diff --git a/Sources/DeepLearning/Operators/Basic.swift b/Sources/DeepLearning/Operators/Basic.swift index 56cdde2fc..51b8390b7 100644 --- a/Sources/DeepLearning/Operators/Basic.swift +++ b/Sources/DeepLearning/Operators/Basic.swift @@ -33,11 +33,11 @@ public extension Tensor { /// `N` tensors from this tensor by chipping it along the `axis` dimension, where `N` is /// inferred from this tensor's shape. For example, given a tensor with shape `[A, B, C, D]`: /// - /// - If `axis == 0` then the `i`th tensor in the returned array is the slice + /// - If `axis == 0` then the `i`-th tensor in the returned array is the slice /// `self[i, :, :, :]` and each tensor in that array will have shape `[B, C, D]`. /// (Note that the dimension unpacked along is gone, unlike /// `Tensor.split(numSplits:alongAxis)`, or `Tensor.split(sizes:alongAxis)`). - /// - If `axis == 1` then the `i`th tensor in the returned array is the slice + /// - If `axis == 1` then the `i`-th tensor in the returned array is the slice /// `value[:, i, :, :]` and each tensor in that array will have shape `[A, C, D]`. /// - Etc. /// @@ -51,9 +51,73 @@ public extension Tensor { /// /// - Returns: Array containing the unstacked tensors. @inlinable - // @differentiable(vjp: _vjpUnstack(alongAxis:) wrt: self where Scalar : TensorFlowFloatingPoint) + @differentiable(vjp: _vjpUnstack(alongAxis:) where Scalar: TensorFlowFloatingPoint) func unstack(alongAxis axis: Int = 0) -> [Tensor] { - return Raw.unpack(value: self, num: shape[axis], axis: Int64(axis)) + return Raw.unpack(value: self, num: Int64(shape[axis]), axis: Int64(axis)) + } + + /// Splits a tensor into multiple tensors. The tensor is split along dimension `axis` into + /// `numSplits` smaller tensors. This requires that `numSplits` evenly divides `shape[axis]`. + /// + /// For example: + /// ``` + /// // 'value' is a tensor with shape [5, 30] + /// // Split 'value' into 3 tensors along dimension 1: + /// let parts = value.split(numSplits: 3, alongAxis: 1) + /// parts[0] // has shape [5, 10] + /// parts[1] // has shape [5, 10] + /// parts[2] // has shape [5, 10] + /// ``` + /// + /// - Parameters: + /// - numSplits: Number of splits to create. + /// - axis: Dimension along which to split this tensor. Negative values wrap around. + /// + /// - Precondition: `numSplits` must divide the size of dimension `axis` evenly. + /// - Precondition: `axis` must be in the range `[-rank, rank)`, where `rank` is the rank of the + /// provided tensors. + /// + /// - Returns: Array containing the tensors parts. + @inlinable + @differentiable(vjp: _vjpSplit(numSplits:alongAxis:) where Scalar: TensorFlowFloatingPoint) + func split(numSplits: Int, alongAxis axis: Int = 0) -> [Tensor] { + return Raw.split( + splitDim: Tensor(Int32(axis)), value: self, numSplit: Int64(numSplits)) + } + + /// Splits a tensor into multiple tensors. The tensor is split into `sizes.shape[0]` pieces. + /// The shape of the `i`-th piece has the same shape as this tensor except along dimension + /// `axis` where the size is `sizes[i]`. + /// + /// For example: + /// ``` + /// // 'value' is a tensor with shape [5, 30] + /// // Split 'value' into 3 tensors with sizes [4, 15, 11] along dimension 1: + /// let parts = value.split(sizes: Tensor([4, 15, 11]), alongAxis: 1) + /// parts[0] // has shape [5, 4] + /// parts[1] // has shape [5, 15] + /// parts[2] // has shape [5, 11] + /// ``` + /// + /// - Parameters: + /// - sizes: 1-D tensor containing the size of each split. + /// - axis: Dimension along which to split this tensor. Negative values wrap around. + /// + /// - Precondition: The values in `sizes` must add up to the size of dimension `axis`. + /// - Precondition: `axis` must be in the range `[-rank, rank)`, where `rank` is the rank of the + /// provided tensors. + /// + /// - Returns: Array containing the tensors parts. + @inlinable + @differentiable( + wrt: self, + vjp: _vjpSplit(sizes:alongAxis:) where Scalar: TensorFlowFloatingPoint) + func split(sizes: Tensor, alongAxis axis: Int = 0) -> [Tensor] { + return Raw.splitV( + value: self, + sizeSplits: sizes, + splitDim: Tensor(Int32(axis)), + numSplit: Int64(sizes.shape[0])) } /// Reshape to the shape of the specified `Tensor`. @@ -123,6 +187,32 @@ public extension Tensor { } internal extension Tensor where Scalar: TensorFlowFloatingPoint { + @inlinable + func _vjpUnstack( + alongAxis axis: Int = 0 + ) -> ([Tensor], (Array.CotangentVector) -> Tensor) { + let result = unstack(alongAxis: axis) + return (result, { v in Tensor(stacking: v.base, alongAxis: axis) }) + } + + @inlinable + func _vjpSplit( + numSplits: Int, + alongAxis axis: Int = 0 + ) -> ([Tensor], (Array.CotangentVector) -> Tensor) { + let result = split(numSplits: numSplits, alongAxis: axis) + return (result, { v in Tensor(concatenating: v.base, alongAxis: axis) }) + } + + @inlinable + func _vjpSplit( + sizes: Tensor, + alongAxis axis: Int = 0 + ) -> ([Tensor], (Array.CotangentVector) -> Tensor) { + let result = split(sizes: sizes, alongAxis: axis) + return (result, { v in Tensor(concatenating: v.base, alongAxis: axis) }) + } + @inlinable func _vjpReshaped(toShape newShape: Tensor) -> (Tensor, (Tensor) -> Tensor) { let value = reshaped(toShape: newShape) @@ -241,11 +331,7 @@ internal extension Tensor where Scalar: TensorFlowFloatingPoint { let idx = axis < 0 ? axis + rank: axis let splits = Tensor([shapeTensor[idx], other.shapeTensor[idx]]) return (concatenated(with: other, alongAxis: axis), { result in - let gradients = Raw.splitV( - value: result, - sizeSplits: splits, - splitDim: Tensor(Int32(axis)), - numSplit: Int64(2)) + let gradients = result.split(sizes: splits, alongAxis: axis) return (gradients[0], gradients[1]) }) } From 5548c56b18e188c136b834b7314310a7c49722c8 Mon Sep 17 00:00:00 2001 From: Anthony Platanios Date: Sat, 20 Apr 2019 20:18:09 -0400 Subject: [PATCH 50/55] Incorporated fix from stdlib. --- Sources/DeepLearning/Operators/Basic.swift | 22 ++++++++++++++++------ Sources/DeepLearning/Operators/Math.swift | 8 ++------ 2 files changed, 18 insertions(+), 12 deletions(-) diff --git a/Sources/DeepLearning/Operators/Basic.swift b/Sources/DeepLearning/Operators/Basic.swift index 51b8390b7..55d2f8fd3 100644 --- a/Sources/DeepLearning/Operators/Basic.swift +++ b/Sources/DeepLearning/Operators/Basic.swift @@ -154,12 +154,22 @@ public extension Tensor { return reshaped(to: [-1]) } + /// Returns a shape-expanded `Tensor`, with a dimension of 1 inserted at the specified shape + /// indices. + @inlinable + @differentiable(wrt: self where Scalar : TensorFlowFloatingPoint) + func expandingShape(at axes: Int...) -> Tensor { + return expandingShape(at: axes) + } + /// Returns a shape-expanded `Tensor`, with a dimension of 1 inserted at the - /// specified shape index. + /// specified shape indices. @inlinable @differentiable(wrt: self, vjp: _vjpExpandingShape(at:) where Scalar: TensorFlowFloatingPoint) - func expandingShape(at shapeIndex: Int) -> Tensor { - return Raw.expandDims(self, dim: Tensor(Int32(shapeIndex))) + func expandingShape(at axes: [Int]) -> Tensor { + var result = self + for i in axes { result = Raw.expandDims(result, dim: Tensor(Int32(i))) } + return result } /// Returns a rank-lifted `Tensor` with a leading dimension of 1. @@ -220,9 +230,9 @@ internal extension Tensor where Scalar: TensorFlowFloatingPoint { } @inlinable - func _vjpExpandingShape(at shapeIndex: Int) -> (Tensor, (Tensor) -> Tensor) { - let value = expandingShape(at: shapeIndex) - return (value, { v in v.squeezingShape(at: shapeIndex) }) + func _vjpExpandingShape(at axes: [Int]) -> (Tensor, (Tensor) -> Tensor) { + let value = self.expandingShape(at: axes) + return (value, { v in v.squeezingShape(at: axes) }) } @inlinable diff --git a/Sources/DeepLearning/Operators/Math.swift b/Sources/DeepLearning/Operators/Math.swift index 66c25b50c..90d3d7bcf 100644 --- a/Sources/DeepLearning/Operators/Math.swift +++ b/Sources/DeepLearning/Operators/Math.swift @@ -1341,9 +1341,7 @@ internal extension Tensor where Scalar: TensorFlowFloatingPoint { let value = sum(squeezingAxes: axes) return (value, { [shape = shapeTensor] in var result = $0 - for i in axes.array.scalars { - result = result.expandingShape(at: Int(i)) - } + for i in axes.array.scalars { result = result.expandingShape(at: Int(i)) } return result.broadcast(toShape: shape) }) } @@ -1361,9 +1359,7 @@ internal extension Tensor where Scalar: TensorFlowFloatingPoint { let count = Raw.gather(params: shapeTensor, indices: axes).product() return (value, { [shape = shapeTensor] in var result = $0 - for i in axes.array.scalars { - result = result.expandingShape(at: Int(i)) - } + for i in axes.array.scalars { result = result.expandingShape(at: Int(i)) } return result.broadcast(toShape: shape) / Tensor(count) }) } From 89fb4e4d3f7f4049b1d30d026398b7ab6b86e18f Mon Sep 17 00:00:00 2001 From: Anthony Platanios Date: Sat, 20 Apr 2019 20:36:48 -0400 Subject: [PATCH 51/55] Addressed Richard's feedback. --- .../DeepLearningTests/InitializerTests.swift | 11 ++++ .../OperatorTests/BasicTests.swift | 27 ++++++++++ .../OperatorTests/ComparisonTests.swift | 5 ++ .../OperatorTests/MathTests.swift | 14 +++++- Tests/DeepLearningTests/TensorTests.swift | 50 ++++++------------- 5 files changed, 70 insertions(+), 37 deletions(-) diff --git a/Tests/DeepLearningTests/InitializerTests.swift b/Tests/DeepLearningTests/InitializerTests.swift index f91109065..3407e5816 100644 --- a/Tests/DeepLearningTests/InitializerTests.swift +++ b/Tests/DeepLearningTests/InitializerTests.swift @@ -94,4 +94,15 @@ final class InitializerTests: XCTestCase { XCTAssertEqual(ShapedArray(shape: [2, 2], scalars: [1, 0, 1, 0]), floats.array) XCTAssertEqual(ShapedArray(shape: [2, 2], scalars: [1, 0, 1, 0]), i8s.array) } + + static var allTests = [ + ("testInitializers", testInitializers), + ("testFactoryInitializers", testFactoryInitializers), + ("testNumericInitializers", testNumericInitializers), + ("testScalarToTensorConversion", testScalarToTensorConversion), + ("testArrayConversion", testArrayConversion), + ("testNonTPUDataTypeCast", testNonTPUDataTypeCast), + ("testTPUDataTypeCast", testTPUDataTypeCast), + ("testNonTPUBoolToNumericCast", testNonTPUBoolToNumericCast) + ] } diff --git a/Tests/DeepLearningTests/OperatorTests/BasicTests.swift b/Tests/DeepLearningTests/OperatorTests/BasicTests.swift index 112430984..ae25efbc5 100644 --- a/Tests/DeepLearningTests/OperatorTests/BasicTests.swift +++ b/Tests/DeepLearningTests/OperatorTests/BasicTests.swift @@ -449,4 +449,31 @@ final class BasicOperatorTests: XCTestCase { target .= Tensor(repeating: 1, shape: [1, 3, 1]) XCTAssertEqual(Tensor(repeating: 1, shape: [2, 3, 4]), target) } + + static var allTests = [ + ("testElementIndexing", testElementIndexing), + ("testElementIndexingAssignment", testElementIndexingAssignment), + ("testNestedElementIndexing", testNestedElementIndexing), + ("testSliceIndexing", testSliceIndexing), + ("testSliceIndexingAssignment", testSliceIndexingAssignment), + ("testEllipsisIndexing", testEllipsisIndexing), + ("testNewAxisIndexing", testNewAxisIndexing), + ("testSqueezeAxisIndexing", testSqueezeAxisIndexing), + ("testStridedSliceIndexing", testStridedSliceIndexing), + ("testStridedSliceIndexingAssignment", testStridedSliceIndexingAssignment), + ("testWholeTensorSlicing", testWholeTensorSlicing), + ("testAdvancedIndexing", testAdvancedIndexing), + ("testConcatenation", testConcatenation), + ("testVJPConcatenation", testVJPConcatenation), + ("testTranspose", testTranspose), + ("testReshape", testReshape), + ("testFlatten", testFlatten), + ("testFlatten0D", testFlatten0D), + ("testReshapeToScalar", testReshapeToScalar), + ("testReshapeTensor", testReshapeTensor), + ("testUnbroadcast1", testUnbroadcast1), + ("testUnbroadcast2", testUnbroadcast2), + ("testSliceUpdate", testSliceUpdate), + ("testBroadcastTensor", testBroadcastTensor) + ] } diff --git a/Tests/DeepLearningTests/OperatorTests/ComparisonTests.swift b/Tests/DeepLearningTests/OperatorTests/ComparisonTests.swift index f667dbbcc..e20a9cdc9 100644 --- a/Tests/DeepLearningTests/OperatorTests/ComparisonTests.swift +++ b/Tests/DeepLearningTests/OperatorTests/ComparisonTests.swift @@ -27,4 +27,9 @@ final class ComparisonOperatorTests: XCTestCase { let y = Tensor([2, 3, 4, 5, 6]) XCTAssertTrue(x < y) } + + static var allTests = [ + ("testElementwiseComparison", testElementwiseComparison), + ("testLexicographicalComparison", testLexicographicalComparison) + ] } diff --git a/Tests/DeepLearningTests/OperatorTests/MathTests.swift b/Tests/DeepLearningTests/OperatorTests/MathTests.swift index 8c1898fbb..3f769be07 100644 --- a/Tests/DeepLearningTests/OperatorTests/MathTests.swift +++ b/Tests/DeepLearningTests/OperatorTests/MathTests.swift @@ -147,7 +147,6 @@ final class MathOperatorTests: XCTestCase { XCTAssertEqual([12.5, 6.5], result.scalars) } - @inline(never) func testXORInference() { func xor(_ x: Float, _ y: Float) -> Float { let x = Tensor([x, y]).reshaped(to: [1, 2]) @@ -196,4 +195,17 @@ final class MathOperatorTests: XCTestCase { let prediction = classifier.prediction(for: input) XCTAssertEqual([0.816997], prediction.scalars, accuracy: 0.001) } + + static var allTests = [ + ("testReduction", testReduction), + ("testArgmax", testArgmax), + ("testCeilAndFloor", testCeilAndFloor), + ("testSimpleMath", testSimpleMath), + ("testStandardDeviation", testStandardDeviation), + ("test3Adds", test3Adds), + ("testMultiOpMath", testMultiOpMath), + ("testXWPlusB", testXWPlusB), + ("testXORInference", testXORInference), + ("testMLPClassifierStruct", testMLPClassifierStruct) + ] } diff --git a/Tests/DeepLearningTests/TensorTests.swift b/Tests/DeepLearningTests/TensorTests.swift index 01e18fda8..ec7d1f6e3 100644 --- a/Tests/DeepLearningTests/TensorTests.swift +++ b/Tests/DeepLearningTests/TensorTests.swift @@ -29,53 +29,31 @@ final class TensorTests: XCTestCase { XCTAssertEqual(0, selectValue(true).scalar) } - @inline(never) func testRankGetter() { - let tensor = Tensor(shape: [3, 4, 5], scalars: Array(0..<60)) - XCTAssertEqual(3, tensor.rank) - } - - // TODO: Merge all rank/shape getter tests into one when we support code motion to avoid sends. - - @inline(never) - func testRankGetter2() { let vector = Tensor([1]) - XCTAssertEqual(1, vector.rank) - } - - @inline(never) - func testRankGetter3() { let matrix = Tensor([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]) - XCTAssertEqual(2, matrix.rank) - } - - @inline(never) - func testRankGetter4() { let ones = Tensor(ones: [1, 2, 2, 2, 2, 2, 1]) + let tensor = Tensor(shape: [3, 4, 5], scalars: Array(0..<60)) + XCTAssertEqual(1, vector.rank) + XCTAssertEqual(2, matrix.rank) XCTAssertEqual(7, ones.rank) + XCTAssertEqual(3, tensor.rank) } - @inline(never) func testShapeGetter() { - let tensor = Tensor(shape: [3, 4, 5], scalars: Array(0..<60)) - XCTAssertEqual([3, 4, 5], tensor.shape) - } - - @inline(never) - func testShapeGetter2() { let vector = Tensor([1]) - XCTAssertEqual([1], vector.shape) - } - - @inline(never) - func testShapeGetter3() { let matrix = Tensor([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]) - XCTAssertEqual([2, 3], matrix.shape) - } - - @inline(never) - func testShapeGetter4() { let ones = Tensor(ones: [1, 2, 2, 2, 2, 2, 1]) + let tensor = Tensor(shape: [3, 4, 5], scalars: Array(0..<60)) + XCTAssertEqual([1], vector.shape) + XCTAssertEqual([2, 3], matrix.shape) XCTAssertEqual([1, 2, 2, 2, 2, 2, 1], ones.shape) + XCTAssertEqual([3, 4, 5], tensor.shape) } + + static var allTests = [ + ("testSimpleCond", testSimpleCond), + ("testRankGetter", testRankGetter), + ("testShapeGetter", testShapeGetter) + ] } From 61eae263cc5d2499aa55eae8da9255479a236262 Mon Sep 17 00:00:00 2001 From: Anthony Platanios Date: Sat, 20 Apr 2019 20:40:07 -0400 Subject: [PATCH 52/55] Changed the indentation in the 'PythonConversion.swift' file. --- Sources/DeepLearning/PythonConversion.swift | 251 ++++++++++---------- 1 file changed, 124 insertions(+), 127 deletions(-) diff --git a/Sources/DeepLearning/PythonConversion.swift b/Sources/DeepLearning/PythonConversion.swift index 4ff73cbaa..5e52548c4 100644 --- a/Sources/DeepLearning/PythonConversion.swift +++ b/Sources/DeepLearning/PythonConversion.swift @@ -25,150 +25,147 @@ import Python private let np = Python.import("numpy") private func debugLogNumpyError(_ message: String) { - debugLog("NumPy conversion error: " + message) + debugLog("NumPy conversion error: " + message) } extension ShapedArray: ConvertibleFromNumpyArray - where Scalar: NumpyScalarCompatible { - /// Creates a `ShapedArray` with the same shape and scalars as the specified - /// `numpy.ndarray` instance. - /// - /// - Parameter numpyArray: The `numpy.ndarray` instance to convert. - /// - Precondition: The `numpy` Python package must be installed. - /// - Precondition: `numpyArray` must have a compatible scalar `dtype`. - public init?(numpy numpyArray: PythonObject) { - // Check if input is a `numpy.ndarray` instance. - guard Python.isinstance(numpyArray, np.ndarray) == true else { - debugLogNumpyError(""" - PythonObject input has type '\(Python.type(numpyArray))' and is not \ - an instance of 'numpy.ndarray'. - """) - return nil + where Scalar: NumpyScalarCompatible { + /// Creates a `ShapedArray` with the same shape and scalars as the specified + /// `numpy.ndarray` instance. + /// + /// - Parameter numpyArray: The `numpy.ndarray` instance to convert. + /// - Precondition: The `numpy` Python package must be installed. + /// - Precondition: `numpyArray` must have a compatible scalar `dtype`. + public init?(numpy numpyArray: PythonObject) { + // Check if input is a `numpy.ndarray` instance. + guard Python.isinstance(numpyArray, np.ndarray) == true else { + debugLogNumpyError(""" + PythonObject input has type '\(Python.type(numpyArray))' and is not \ + an instance of 'numpy.ndarray'. + """) + return nil + } + // Check if the dtype of the `ndarray` is compatible with the `Scalar` + // type. + guard Scalar.numpyScalarTypes.contains(numpyArray.dtype) else { + debugLogNumpyError(""" + 'numpy.ndarray' dtype '\(numpyArray.dtype)' is incompatible with \ + Swift type '\(Scalar.self)'. + """) + return nil + } + + let pyShape = numpyArray.__array_interface__["shape"] + guard let shape = [Int](pyShape) else { + debugLogNumpyError("cannot access shape of 'numpy.ndarray' instance.") + return nil + } + + // Make sure that the array is contiguous in memory. This does a copy if + // the array is not already contiguous in memory. + let contiguousNumpyArray = np.ascontiguousarray(numpyArray) + + guard let ptrVal = + UInt(contiguousNumpyArray.__array_interface__["data"].tuple2.0) else { + debugLogNumpyError("cannot access data of 'numpy.ndarray' instance.") + return nil + } + // Note: `ptr` is not nil even if the `ndarray` is empty (i.e. has a shape + // of `(0,)`). + guard let ptr = UnsafePointer(bitPattern: ptrVal) else { + fatalError("'numpy.ndarray' data pointer was nil") + } + // This code avoids calling `init(shape: [Int], scalars: S)`, + // which inefficiently copies scalars one by one. Instead, + // `init(shape: [Int], scalars: [Scalar])` is called, which efficiently + // does a `memcpy` of the entire `scalars` array. + // Unecessary copying is minimized. + let dummyPointer = UnsafeMutablePointer.allocate(capacity: 1) + let scalarCount = shape.reduce(1, *) + var scalars: [Scalar] = Array(repeating: dummyPointer.move(), count: scalarCount) + dummyPointer.deallocate() + scalars.withUnsafeMutableBufferPointer { buffPtr in + buffPtr.baseAddress!.assign(from: ptr, count: scalarCount) + } + self.init(shape: shape, scalars: scalars) } - // Check if the dtype of the `ndarray` is compatible with the `Scalar` - // type. - guard Scalar.numpyScalarTypes.contains(numpyArray.dtype) else { - debugLogNumpyError(""" - 'numpy.ndarray' dtype '\(numpyArray.dtype)' is incompatible with \ - Swift type '\(Scalar.self)'. - """) - return nil - } - - let pyShape = numpyArray.__array_interface__["shape"] - guard let shape = [Int](pyShape) else { - debugLogNumpyError("cannot access shape of 'numpy.ndarray' instance.") - return nil - } - - // Make sure that the array is contiguous in memory. This does a copy if - // the array is not already contiguous in memory. - let contiguousNumpyArray = np.ascontiguousarray(numpyArray) - - guard let ptrVal = - UInt(contiguousNumpyArray.__array_interface__["data"].tuple2.0) else { - debugLogNumpyError("cannot access data of 'numpy.ndarray' instance.") - return nil - } - // Note: `ptr` is not nil even if the `ndarray` is empty (i.e. has a shape - // of `(0,)`). - guard let ptr = UnsafePointer(bitPattern: ptrVal) else { - fatalError("'numpy.ndarray' data pointer was nil") - } - // This code avoids calling `init(shape: [Int], scalars: S)`, - // which inefficiently copies scalars one by one. Instead, - // `init(shape: [Int], scalars: [Scalar])` is called, which efficiently - // does a `memcpy` of the entire `scalars` array. - // Unecessary copying is minimized. - let dummyPointer = UnsafeMutablePointer.allocate(capacity: 1) - let scalarCount = shape.reduce(1, *) - var scalars: [Scalar] = Array(repeating: dummyPointer.move(), - count: scalarCount) - dummyPointer.deallocate() - scalars.withUnsafeMutableBufferPointer { buffPtr in - buffPtr.baseAddress!.assign(from: ptr, count: scalarCount) - } - self.init(shape: shape, scalars: scalars) - } } extension Tensor: ConvertibleFromNumpyArray - where Scalar: NumpyScalarCompatible { - /// Creates a tensor with the same shape and scalars as the specified - /// `numpy.ndarray` instance. - /// - /// - Parameter numpyArray: The `numpy.ndarray` instance to convert. - /// - Precondition: The `numpy` Python package must be installed. - /// - Returns: `numpyArray` converted to an `Array`. Returns `nil` if - /// `numpyArray` does not have a compatible scalar `dtype`. - public init?(numpy numpyArray: PythonObject) { - // Check if input is a `numpy.ndarray` instance. - guard Python.isinstance(numpyArray, np.ndarray) == true else { - debugLogNumpyError(""" - PythonObject input has type '\(Python.type(numpyArray))' and is not \ - an instance of 'numpy.ndarray'. - """) - return nil - } - // Check if the dtype of the `ndarray` is compatible with the `Scalar` - // type. - guard Scalar.numpyScalarTypes.contains(numpyArray.dtype) else { - debugLogNumpyError(""" - 'numpy.ndarray' dtype '\(numpyArray.dtype)' is incompatible with \ - Swift type '\(Scalar.self)'. - """) - return nil + where Scalar: NumpyScalarCompatible { + /// Creates a tensor with the same shape and scalars as the specified + /// `numpy.ndarray` instance. + /// + /// - Parameter numpyArray: The `numpy.ndarray` instance to convert. + /// - Precondition: The `numpy` Python package must be installed. + /// - Returns: `numpyArray` converted to an `Array`. Returns `nil` if + /// `numpyArray` does not have a compatible scalar `dtype`. + public init?(numpy numpyArray: PythonObject) { + // Check if input is a `numpy.ndarray` instance. + guard Python.isinstance(numpyArray, np.ndarray) == true else { + debugLogNumpyError(""" + PythonObject input has type '\(Python.type(numpyArray))' and is not \ + an instance of 'numpy.ndarray'. + """) + return nil + } + // Check if the dtype of the `ndarray` is compatible with the `Scalar` + // type. + guard Scalar.numpyScalarTypes.contains(numpyArray.dtype) else { + debugLogNumpyError(""" + 'numpy.ndarray' dtype '\(numpyArray.dtype)' is incompatible with \ + Swift type '\(Scalar.self)'. + """) + return nil + } + + let pyShape = numpyArray.__array_interface__["shape"] + guard let dimensions = [Int](pyShape) else { + debugLogNumpyError("cannot access shape of 'numpy.ndarray' instance.") + return nil + } + let shape = TensorShape(dimensions) + + // Make sure that the array is contiguous in memory. This does a copy if + // the array is not already contiguous in memory. + let contiguousNumpyArray = np.ascontiguousarray(numpyArray) + + guard let ptrVal = UInt(contiguousNumpyArray.__array_interface__["data"].tuple2.0) else { + debugLogNumpyError("cannot access data of 'numpy.ndarray' instance.") + return nil + } + // Note: `ptr` is not nil even if the `ndarray` is empty (i.e. has a shape + // of `(0,)`). + guard let ptr = UnsafePointer(bitPattern: ptrVal) else { + fatalError("'numpy.ndarray' data pointer was nil") + } + let buffPtr = UnsafeBufferPointer(start: ptr, count: Int(shape.contiguousSize)) + self.init(shape: shape, scalars: buffPtr) } - - let pyShape = numpyArray.__array_interface__["shape"] - guard let dimensions = [Int](pyShape) else { - debugLogNumpyError("cannot access shape of 'numpy.ndarray' instance.") - return nil - } - let shape = TensorShape(dimensions) - - // Make sure that the array is contiguous in memory. This does a copy if - // the array is not already contiguous in memory. - let contiguousNumpyArray = np.ascontiguousarray(numpyArray) - - guard let ptrVal = - UInt(contiguousNumpyArray.__array_interface__["data"].tuple2.0) else { - debugLogNumpyError("cannot access data of 'numpy.ndarray' instance.") - return nil - } - // Note: `ptr` is not nil even if the `ndarray` is empty (i.e. has a shape - // of `(0,)`). - guard let ptr = UnsafePointer(bitPattern: ptrVal) else { - fatalError("'numpy.ndarray' data pointer was nil") - } - let buffPtr = UnsafeBufferPointer(start: ptr, - count: Int(shape.contiguousSize)) - self.init(shape: shape, scalars: buffPtr) - } } extension ShapedArray where Scalar: NumpyScalarCompatible { - /// Creates a `numpy.ndarray` instance with the same shape and scalars as - /// this `ShapedArray`. - /// - /// - Precondition: The `numpy` Python package must be installed. - public func makeNumpyArray() -> PythonObject { - return scalars.makeNumpyArray().reshape(shape) - } + /// Creates a `numpy.ndarray` instance with the same shape and scalars as + /// this `ShapedArray`. + /// + /// - Precondition: The `numpy` Python package must be installed. + public func makeNumpyArray() -> PythonObject { + return scalars.makeNumpyArray().reshape(shape) + } } extension Tensor where Scalar: NumpyScalarCompatible { - /// Creates a `numpy.ndarray` instance with the same shape and scalars as - /// this tensor. - /// - /// - Precondition: The `numpy` Python package must be installed. - public func makeNumpyArray() -> PythonObject { return array.makeNumpyArray() } + /// Creates a `numpy.ndarray` instance with the same shape and scalars as + /// this tensor. + /// + /// - Precondition: The `numpy` Python package must be installed. + public func makeNumpyArray() -> PythonObject { return array.makeNumpyArray() } } extension TensorShape: PythonConvertible { - public var pythonObject: PythonObject { - return dimensions.pythonObject - } + public var pythonObject: PythonObject { + return dimensions.pythonObject + } } #endif // canImport(Python) From 3cdd8083a87c59f393a1e58c08bf91d0eeffae31 Mon Sep 17 00:00:00 2001 From: Anthony Platanios Date: Sat, 20 Apr 2019 20:41:13 -0400 Subject: [PATCH 53/55] Changed the indentation in the 'Random.swift' file. --- Sources/DeepLearning/Random.swift | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/Sources/DeepLearning/Random.swift b/Sources/DeepLearning/Random.swift index 8c90ccdf4..ade75a826 100644 --- a/Sources/DeepLearning/Random.swift +++ b/Sources/DeepLearning/Random.swift @@ -409,8 +409,8 @@ private func makeUInt64Pair(_ vector: UInt32x4) -> (UInt64, UInt64) { //===------------------------------------------------------------------------------------------===// public protocol RandomDistribution { - associatedtype Sample - func next(using generator: inout G) -> Sample + associatedtype Sample + func next(using generator: inout G) -> Sample } @_fixed_layout @@ -446,7 +446,7 @@ public struct UniformFloatingPointDistribution: RandomDi @_fixed_layout public struct NormalDistribution: RandomDistribution - where T.RawSignificand: FixedWidthInteger { + where T.RawSignificand: FixedWidthInteger { public let mean: T public let standardDeviation: T private let uniformDist = UniformFloatingPointDistribution() @@ -503,10 +503,10 @@ public struct BetaDistribution: RandomDistribution { /// /// - Returns: Sample obtained using Cheng's BB algorithm. private static func chengsAlgorithmBB( - _ alpha0: Float, - _ a: Float, - _ b: Float, - using rng: inout G + _ alpha0: Float, + _ a: Float, + _ b: Float, + using rng: inout G ) -> Float { let alpha = a + b let beta = sqrt((alpha - 2) / (2 * a * b - alpha)) @@ -550,10 +550,10 @@ public struct BetaDistribution: RandomDistribution { /// /// - Returns: Sample obtained using Cheng's BB algorithm. private static func chengsAlgorithmBC( - _ alpha0: Float, - _ a: Float, - _ b: Float, - using rng: inout G + _ alpha0: Float, + _ a: Float, + _ b: Float, + using rng: inout G ) -> Float { let alpha = a + b let beta = 1 / b From 4b87827efe6957319707816ba6b4f3666c65a197 Mon Sep 17 00:00:00 2001 From: Anthony Platanios Date: Sat, 20 Apr 2019 20:41:55 -0400 Subject: [PATCH 54/55] Minor edit. --- Sources/DeepLearning/Operators/NN.swift | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Sources/DeepLearning/Operators/NN.swift b/Sources/DeepLearning/Operators/NN.swift index bd160e111..c461a910b 100644 --- a/Sources/DeepLearning/Operators/NN.swift +++ b/Sources/DeepLearning/Operators/NN.swift @@ -23,8 +23,8 @@ import TensorFlow public extension Tensor where Scalar: TensorFlowFloatingPoint { /// Computes the batch normalized tensor along the specified axis. /// - /// Specifically, returns `(self - mu) / (var + epsilon) * gamma + beta` where `mu` and `var` are - /// respectively the mean and variance of `self` along `axis`. + /// Specifically, returns `(self - mu) / (var + epsilon) * gamma + beta` where `mu` and `var` + /// are respectively the mean and variance of `self` along `axis`. /// /// - Parameters: /// - axis: The batch dimension. From a5edd32155a86140f453088905b6a823e49d1748 Mon Sep 17 00:00:00 2001 From: Anthony Platanios Date: Sat, 20 Apr 2019 20:45:53 -0400 Subject: [PATCH 55/55] Tabs to spaces. --- Sources/DeepLearning/Operators/Basic.swift | 8 ++++---- Sources/DeepLearning/Operators/Math.swift | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/Sources/DeepLearning/Operators/Basic.swift b/Sources/DeepLearning/Operators/Basic.swift index 55d2f8fd3..54f60ec3d 100644 --- a/Sources/DeepLearning/Operators/Basic.swift +++ b/Sources/DeepLearning/Operators/Basic.swift @@ -167,9 +167,9 @@ public extension Tensor { @inlinable @differentiable(wrt: self, vjp: _vjpExpandingShape(at:) where Scalar: TensorFlowFloatingPoint) func expandingShape(at axes: [Int]) -> Tensor { - var result = self - for i in axes { result = Raw.expandDims(result, dim: Tensor(Int32(i))) } - return result + var result = self + for i in axes { result = Raw.expandDims(result, dim: Tensor(Int32(i))) } + return result } /// Returns a rank-lifted `Tensor` with a leading dimension of 1. @@ -231,7 +231,7 @@ internal extension Tensor where Scalar: TensorFlowFloatingPoint { @inlinable func _vjpExpandingShape(at axes: [Int]) -> (Tensor, (Tensor) -> Tensor) { - let value = self.expandingShape(at: axes) + let value = self.expandingShape(at: axes) return (value, { v in v.squeezingShape(at: axes) }) } diff --git a/Sources/DeepLearning/Operators/Math.swift b/Sources/DeepLearning/Operators/Math.swift index 90d3d7bcf..3255aea10 100644 --- a/Sources/DeepLearning/Operators/Math.swift +++ b/Sources/DeepLearning/Operators/Math.swift @@ -1341,8 +1341,8 @@ internal extension Tensor where Scalar: TensorFlowFloatingPoint { let value = sum(squeezingAxes: axes) return (value, { [shape = shapeTensor] in var result = $0 - for i in axes.array.scalars { result = result.expandingShape(at: Int(i)) } - return result.broadcast(toShape: shape) + for i in axes.array.scalars { result = result.expandingShape(at: Int(i)) } + return result.broadcast(toShape: shape) }) } @@ -1359,8 +1359,8 @@ internal extension Tensor where Scalar: TensorFlowFloatingPoint { let count = Raw.gather(params: shapeTensor, indices: axes).product() return (value, { [shape = shapeTensor] in var result = $0 - for i in axes.array.scalars { result = result.expandingShape(at: Int(i)) } - return result.broadcast(toShape: shape) / Tensor(count) + for i in axes.array.scalars { result = result.expandingShape(at: Int(i)) } + return result.broadcast(toShape: shape) / Tensor(count) }) } }