From 0d96d5e89b270b0f110744bfce873ab42f1d35ae Mon Sep 17 00:00:00 2001
From: Tanmay Bakshi <tajymany@gmail.com>
Date: Thu, 18 Apr 2019 15:40:53 -0400
Subject: [PATCH 1/6] Add RNN wrapper for Cells

---
 Sources/DeepLearning/Layer.swift | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)
diff --git a/Sources/DeepLearning/Layer.swift b/Sources/DeepLearning/Layer.swift
index ee68ee9ca..bd5cbee7f 100644
--- a/Sources/DeepLearning/Layer.swift
+++ b/Sources/DeepLearning/Layer.swift
@@ -1397,3 +1397,23 @@ public struct LSTMCell<Scalar: TensorFlowFloatingPoint>: RNNCell {
         return Output(output: newState, state: newState)
     }
 }
+
+public struct RNN<Cell: RNNCell>: Layer {
+    public var cell: Cell
+    
+    init(_ cell: () -> Cell) {
+        self.cell = cell()
+    }
+
+    @differentiable
+    public func applied(to input: [Cell.TimeStepInput]) -> [Cell.Output] {
+        var currentHiddenState = cell.zeroState
+        var outputs: [Cell.Output] = []
+        for timestep in input {
+            let timestepOutput = cell.applied(to: .init(input: timestep, state: currentHiddenState))
+            currentHiddenState = timestepOutput.state
+            outputs.append(timestepOutput)
+        }
+        return outputs
+    }
+}

From 29c1226388cd68eeec8845b550adb87e710bc6f3 Mon Sep 17 00:00:00 2001
From: Richard Wei <rxwei@google.com>
Date: Thu, 18 Apr 2019 16:10:08 -0400
Subject: [PATCH 2/6] Apply suggestions from code review

Co-Authored-By: tanmayb123 <tajymany@gmail.com>
---
 Sources/DeepLearning/Layer.swift | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/Sources/DeepLearning/Layer.swift b/Sources/DeepLearning/Layer.swift
index bd5cbee7f..dd2f23027 100644
--- a/Sources/DeepLearning/Layer.swift
+++ b/Sources/DeepLearning/Layer.swift
@@ -1401,16 +1401,16 @@ public struct LSTMCell<Scalar: TensorFlowFloatingPoint>: RNNCell {
 public struct RNN<Cell: RNNCell>: Layer {
     public var cell: Cell
     
-    init(_ cell: () -> Cell) {
+    public init(_ cell: @autoclosure () -> Cell) {
         self.cell = cell()
     }
 
     @differentiable
-    public func applied(to input: [Cell.TimeStepInput]) -> [Cell.Output] {
+    public func call(_ input: [Cell.TimeStepInput]) -> [Cell.Output] {
         var currentHiddenState = cell.zeroState
         var outputs: [Cell.Output] = []
         for timestep in input {
-            let timestepOutput = cell.applied(to: .init(input: timestep, state: currentHiddenState))
+            let timestepOutput = cell(input: timestep, state: currentHiddenState)
             currentHiddenState = timestepOutput.state
             outputs.append(timestepOutput)
         }

From 224463ade179747a400dbb53d09e7f5b2509044d Mon Sep 17 00:00:00 2001
From: Richard Wei <rxwei@google.com>
Date: Fri, 19 Apr 2019 18:20:16 -0700
Subject: [PATCH 3/6] Implement RNN pullback and add tests.

---
 Sources/DeepLearning/Layer.swift         | 109 +++++++++++++++++++----
 Tests/DeepLearningTests/LayerTests.swift |  29 +++++-
 2 files changed, 120 insertions(+), 18 deletions(-)

diff --git a/Sources/DeepLearning/Layer.swift b/Sources/DeepLearning/Layer.swift
index dd2f23027..ee058c68a 100644
--- a/Sources/DeepLearning/Layer.swift
+++ b/Sources/DeepLearning/Layer.swift
@@ -1281,7 +1281,7 @@ public extension RNNCell {
 }
 
 /// A Simple RNN Cell.
-public struct SimpleRNNCell<Scalar: TensorFlowFloatingPoint>: RNNCell {
+public struct SimpleRNNCell<Scalar: TensorFlowFloatingPoint>: RNNCell, VectorNumeric {
     public var weight: Tensor<Scalar>
     public var bias: Tensor<Scalar>
 
@@ -1304,9 +1304,13 @@ public struct SimpleRNNCell<Scalar: TensorFlowFloatingPoint>: RNNCell {
     /// - Parameters:
     ///   - inputSize: The number of features in 2-D input tensors.
     ///   - hiddenSize: The number of features in 2-D hidden states.
-    public init(inputSize: Int, hiddenSize: Int) {
+    ///   - seed: The random seed for initialization. The default value is random.
+    public init(inputSize: Int, hiddenSize: Int,
+                seed: (Int64, Int64) = (Int64.random(in: Int64.min..<Int64.max),
+                                        Int64.random(in: Int64.min..<Int64.max))) {
         let concatenatedInputSize = inputSize + hiddenSize
-        self.weight = Tensor(glorotUniform: [concatenatedInputSize, hiddenSize])
+        self.weight = Tensor(glorotUniform: [concatenatedInputSize, hiddenSize],
+                             seed: seed)
         self.bias = Tensor(zeros: [hiddenSize])
     }
 
@@ -1326,7 +1330,7 @@ public struct SimpleRNNCell<Scalar: TensorFlowFloatingPoint>: RNNCell {
 }
 
 /// An LSTM Cell.
-public struct LSTMCell<Scalar: TensorFlowFloatingPoint>: RNNCell {
+public struct LSTMCell<Scalar: TensorFlowFloatingPoint>: RNNCell, VectorNumeric {
     public var inputWeight, updateWeight, forgetWeight, outputWeight: Tensor<Scalar>
     public var inputBias, updateBias, forgetBias, outputBias: Tensor<Scalar>
 
@@ -1348,17 +1352,19 @@ public struct LSTMCell<Scalar: TensorFlowFloatingPoint>: RNNCell {
     /// - Parameters:
     ///   - inputSize: The number of features in 2-D input tensors.
     ///   - hiddenSize: The number of features in 2-D hidden states.
-    public init(inputSize: Int, hiddenSize: Int) {
+    public init(inputSize: Int, hiddenSize: Int,
+                seed: (Int64, Int64) = (Int64.random(in: Int64.min..<Int64.max),
+                                        Int64.random(in: Int64.min..<Int64.max))) {
         let concatenatedInputSize = inputSize + hiddenSize
         let gateWeightShape = TensorShape([concatenatedInputSize, hiddenSize])
         let gateBiasShape = TensorShape([hiddenSize])
-        self.inputWeight = Tensor(glorotUniform: gateWeightShape)
+        self.inputWeight = Tensor(glorotUniform: gateWeightShape, seed: seed)
         self.inputBias = Tensor(zeros: gateBiasShape)
-        self.updateWeight = Tensor(glorotUniform: gateWeightShape)
+        self.updateWeight = Tensor(glorotUniform: gateWeightShape, seed: seed)
         self.updateBias = Tensor(zeros: gateBiasShape)
-        self.forgetWeight = Tensor(glorotUniform: gateWeightShape)
+        self.forgetWeight = Tensor(glorotUniform: gateWeightShape, seed: seed)
         self.forgetBias = Tensor(ones: gateBiasShape)
-        self.outputWeight = Tensor(glorotUniform: gateWeightShape)
+        self.outputWeight = Tensor(glorotUniform: gateWeightShape, seed: seed)
         self.outputBias = Tensor(zeros: gateBiasShape)
     }
 
@@ -1399,21 +1405,90 @@ public struct LSTMCell<Scalar: TensorFlowFloatingPoint>: RNNCell {
 }
 
 public struct RNN<Cell: RNNCell>: Layer {
+    public typealias Input = [Cell.TimeStepInput]
+    public typealias Output = [Cell.TimeStepOutput]
+
     public var cell: Cell
     
     public init(_ cell: @autoclosure () -> Cell) {
         self.cell = cell()
     }
 
-    @differentiable
-    public func call(_ input: [Cell.TimeStepInput]) -> [Cell.Output] {
-        var currentHiddenState = cell.zeroState
-        var outputs: [Cell.Output] = []
+    @differentiable(wrt: (self, input), vjp: _vjpCall(_:initialState:))
+    public func call(_ input: [Cell.TimeStepInput],
+                     initialState: Cell.State) -> [Cell.TimeStepOutput] {
+        var currentHiddenState = initialState
+        var timeStepOutputs: [Cell.TimeStepOutput] = []
         for timestep in input {
-            let timestepOutput = cell(input: timestep, state: currentHiddenState)
-            currentHiddenState = timestepOutput.state
-            outputs.append(timestepOutput)
+            let output = cell(input: timestep, state: currentHiddenState)
+            currentHiddenState = output.state
+            timeStepOutputs.append(output.output)
         }
-        return outputs
+        return timeStepOutputs
     }
+
+    @usableFromInline
+    internal func _vjpCall(
+        _ inputs: [Cell.TimeStepInput], initialState: Cell.State
+    ) -> ([Cell.TimeStepOutput],
+          (Array<Cell.TimeStepOutput>.CotangentVector)
+              -> (RNN<Cell>.CotangentVector, Array<Cell.TimeStepInput>.CotangentVector)) {
+        let timeStepCount = inputs.count
+        var currentHiddenState = cell.zeroState
+        var timeStepOutputs: [Cell.TimeStepOutput] = []
+        var backpropagators: [Cell.Backpropagator] = []
+        for timestep in inputs {
+            let (output, backpropagator) =
+                cell.appliedForBackpropagation(to: .init(input: timestep,
+                                                         state: currentHiddenState))
+            currentHiddenState = output.state
+            timeStepOutputs.append(output.output)
+            backpropagators.append(backpropagator)
+        }
+        func pullback(𝛁outputs: Array<Cell.TimeStepOutput>.CotangentVector)
+            -> (RNN<Cell>.CotangentVector, Array<Cell.TimeStepInput>.CotangentVector) {
+            assert(𝛁outputs.base.count == timeStepCount,
+                   "The number of output gradients must equal the number of input gradients")
+            var 𝛁cell = Cell.CotangentVector.zero
+            var 𝛁state = Cell.State.CotangentVector.zero
+            var reversed𝛁inputs: [Cell.TimeStepInput.CotangentVector] = []
+            reversed𝛁inputs.reserveCapacity(timeStepCount)
+            for (𝛁output, backpropagator) in zip(𝛁outputs.base, backpropagators).reversed() {
+                let (new𝛁cell, 𝛁input) = backpropagator(.init(output: 𝛁output, state: 𝛁state))
+                𝛁cell = new𝛁cell
+                𝛁state = 𝛁input.state
+                reversed𝛁inputs.append(𝛁input.input)
+            }
+            return (RNN<Cell>.CotangentVector(cell: 𝛁cell),
+                    Array<Cell.TimeStepInput>.CotangentVector(Array(reversed𝛁inputs.reversed())))
+        }
+        return (timeStepOutputs, pullback)
+    }
+
+    @differentiable(wrt: (self, inputs))
+    public func call(_ inputs: [Cell.TimeStepInput]) -> [Cell.TimeStepOutput] {
+        return self(inputs, initialState: cell.zeroState.withoutDerivative())
+    }
+
+    /* TODO: Uncomment once control flow and differentiation through force unwrapping is supported.
+    @differentiable(wrt: (self, inputs))
+    public func lastOutput(from inputs: [Cell.TimeStepInput],
+                           initialState: Cell.State) -> Cell.TimeStepOutput {
+        precondition(!inputs.isEmpty, "inputs cannot be empty")
+        return self(inputs, initialState: initialState).last!
+    }
+
+    @differentiable(wrt: (self, inputs))
+    public func lastOutput(from inputs: [Cell.TimeStepInput]) -> Cell.TimeStepOutput {
+        precondition(!inputs.isEmpty, "inputs cannot be empty")
+        return self(inputs, initialState: cell.zeroState).last!
+    }
+    */
 }
+
+extension RNN: Equatable where Cell: Equatable {}
+extension RNN: AdditiveArithmetic where Cell: AdditiveArithmetic {}
+extension RNN: VectorNumeric where Cell: VectorNumeric {}
+
+public typealias SimpleRNN<Scalar: TensorFlowFloatingPoint> = RNN<SimpleRNNCell<Scalar>>
+public typealias LSTM<Scalar: TensorFlowFloatingPoint> = RNN<LSTMCell<Scalar>>
diff --git a/Tests/DeepLearningTests/LayerTests.swift b/Tests/DeepLearningTests/LayerTests.swift
index 5bd8ba179..8676eb257 100644
--- a/Tests/DeepLearningTests/LayerTests.swift
+++ b/Tests/DeepLearningTests/LayerTests.swift
@@ -95,6 +95,32 @@ final class LayerTests: XCTestCase {
         XCTAssertEqual(output, expected)
     }
 
+    func testRNN() {
+        let x = Tensor<Float>(rangeFrom: 0.0, to: 0.4, stride: 0.1).rankLifted()
+        let inputs: [Tensor<Float>] = Array(repeating: x, count: 4)
+        let rnn = RNN(SimpleRNNCell<Float>(inputSize: 4, hiddenSize: 4,
+                                           seed: (0xFeedBeef, 0xDeadBeef)))
+        let (outputs, pullback) = rnn.valueWithPullback(at: inputs) { rnn, inputs in
+            return rnn(inputs)
+        }
+        XCTAssertEqual(outputs, [[[-0.0026294366, -0.0058668107,  0.04495003,  0.20311214]],
+                                 [[ 0.06788494,    0.050665878,   0.02415526,  0.09249911]],
+                                 [[ 0.06621192,    0.009049267,   0.065047316, 0.11534518]],
+                                 [[ 0.05612204,    0.00022032857, 0.05407162,  0.09784105]]])
+        let (𝛁rnn, 𝛁inputs) = pullback(.init(inputs))
+        print(𝛁rnn, 𝛁inputs)
+        XCTAssertEqual(𝛁rnn.cell.weight,
+                       [[          0.0,           0.0,           0.0,           0.0],
+                        [-0.0051278225,  0.0013102926,    0.00740262,   0.018119661],
+                        [ -0.010255645,  0.0026205853,    0.01480524,   0.036239322],
+                        [ -0.015383467,   0.003930878,    0.02220786,   0.054358985],
+                        [          0.0,           0.0,           0.0,           0.0],
+                        [          0.0,           0.0,           0.0,           0.0],
+                        [          0.0,           0.0,           0.0,           0.0],
+                        [          0.0,           0.0,           0.0,           0.0]])
+        XCTAssertEqual(𝛁rnn.cell.bias, [-0.051278222,  0.013102926,    0.0740262,   0.18119662])
+    }
+
     static var allTests = [
         ("testConv1D", testConv1D),
         ("testMaxPool1D", testMaxPool1D),
@@ -104,6 +130,7 @@ final class LayerTests: XCTestCase {
         ("testGlobalAvgPool3D", testGlobalAvgPool3D),
         ("testReshape", testReshape),
         ("testFlatten", testFlatten),
-        ("testSimpleRNNCell", testSimpleRNNCell)
+        ("testSimpleRNNCell", testSimpleRNNCell),
+        ("testRNN", testRNN)
     ]
 }

From 1f7190fba7266ba06aee58f799a67cddf2dd1635 Mon Sep 17 00:00:00 2001
From: Richard Wei <rxrwei@gmail.com>
Date: Fri, 19 Apr 2019 22:22:30 -0700
Subject: [PATCH 4/6] Minor improvements.

---
 Sources/DeepLearning/Layer.swift         | 13 ++++++-------
 Tests/DeepLearningTests/LayerTests.swift |  1 -
 2 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/Sources/DeepLearning/Layer.swift b/Sources/DeepLearning/Layer.swift
index ee058c68a..984317b73 100644
--- a/Sources/DeepLearning/Layer.swift
+++ b/Sources/DeepLearning/Layer.swift
@@ -1432,11 +1432,13 @@ public struct RNN<Cell: RNNCell>: Layer {
         _ inputs: [Cell.TimeStepInput], initialState: Cell.State
     ) -> ([Cell.TimeStepOutput],
           (Array<Cell.TimeStepOutput>.CotangentVector)
-              -> (RNN<Cell>.CotangentVector, Array<Cell.TimeStepInput>.CotangentVector)) {
+              -> (CotangentVector, Array<Cell.TimeStepInput>.CotangentVector)) {
         let timeStepCount = inputs.count
         var currentHiddenState = cell.zeroState
         var timeStepOutputs: [Cell.TimeStepOutput] = []
+        timeStepOutputs.reserveCapacity(timeStepCount)
         var backpropagators: [Cell.Backpropagator] = []
+        backpropagators.reserveCapacity(timeStepCount)
         for timestep in inputs {
             let (output, backpropagator) =
                 cell.appliedForBackpropagation(to: .init(input: timestep,
@@ -1445,8 +1447,7 @@ public struct RNN<Cell: RNNCell>: Layer {
             timeStepOutputs.append(output.output)
             backpropagators.append(backpropagator)
         }
-        func pullback(𝛁outputs: Array<Cell.TimeStepOutput>.CotangentVector)
-            -> (RNN<Cell>.CotangentVector, Array<Cell.TimeStepInput>.CotangentVector) {
+        return (timeStepOutputs, { 𝛁outputs in
             assert(𝛁outputs.base.count == timeStepCount,
                    "The number of output gradients must equal the number of input gradients")
             var 𝛁cell = Cell.CotangentVector.zero
@@ -1459,10 +1460,8 @@ public struct RNN<Cell: RNNCell>: Layer {
                 𝛁state = 𝛁input.state
                 reversed𝛁inputs.append(𝛁input.input)
             }
-            return (RNN<Cell>.CotangentVector(cell: 𝛁cell),
-                    Array<Cell.TimeStepInput>.CotangentVector(Array(reversed𝛁inputs.reversed())))
-        }
-        return (timeStepOutputs, pullback)
+            return (.init(cell: 𝛁cell), .init(Array(reversed𝛁inputs.reversed())))
+        })
     }
 
     @differentiable(wrt: (self, inputs))
diff --git a/Tests/DeepLearningTests/LayerTests.swift b/Tests/DeepLearningTests/LayerTests.swift
index 8676eb257..a347c4d7e 100644
--- a/Tests/DeepLearningTests/LayerTests.swift
+++ b/Tests/DeepLearningTests/LayerTests.swift
@@ -108,7 +108,6 @@ final class LayerTests: XCTestCase {
                                  [[ 0.06621192,    0.009049267,   0.065047316, 0.11534518]],
                                  [[ 0.05612204,    0.00022032857, 0.05407162,  0.09784105]]])
         let (𝛁rnn, 𝛁inputs) = pullback(.init(inputs))
-        print(𝛁rnn, 𝛁inputs)
         XCTAssertEqual(𝛁rnn.cell.weight,
                        [[          0.0,           0.0,           0.0,           0.0],
                         [-0.0051278225,  0.0013102926,    0.00740262,   0.018119661],

From 41c2d8c0ddc9870b9991a6f0e4552136d49363d5 Mon Sep 17 00:00:00 2001
From: Richard Wei <rxwei@google.com>
Date: Fri, 19 Apr 2019 22:30:41 -0700
Subject: [PATCH 5/6] Fix a typo.

---
 Sources/DeepLearning/Layer.swift | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Sources/DeepLearning/Layer.swift b/Sources/DeepLearning/Layer.swift
index 984317b73..11e0f27d3 100644
--- a/Sources/DeepLearning/Layer.swift
+++ b/Sources/DeepLearning/Layer.swift
@@ -1449,7 +1449,7 @@ public struct RNN<Cell: RNNCell>: Layer {
         }
         return (timeStepOutputs, { 𝛁outputs in
             assert(𝛁outputs.base.count == timeStepCount,
-                   "The number of output gradients must equal the number of input gradients")
+                   "The number of output gradients must equal the number of time steps")
             var 𝛁cell = Cell.CotangentVector.zero
             var 𝛁state = Cell.State.CotangentVector.zero
             var reversed𝛁inputs: [Cell.TimeStepInput.CotangentVector] = []

From 147faa6e13a6bc7d91a47ed6811265cf4cdfb0ae Mon Sep 17 00:00:00 2001
From: Richard Wei <rxwei@google.com>
Date: Fri, 19 Apr 2019 22:32:17 -0700
Subject: [PATCH 6/6] Assert -> precondition.

---
 Sources/DeepLearning/Layer.swift | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Sources/DeepLearning/Layer.swift b/Sources/DeepLearning/Layer.swift
index 11e0f27d3..4637ba715 100644
--- a/Sources/DeepLearning/Layer.swift
+++ b/Sources/DeepLearning/Layer.swift
@@ -1448,8 +1448,8 @@ public struct RNN<Cell: RNNCell>: Layer {
             backpropagators.append(backpropagator)
         }
         return (timeStepOutputs, { 𝛁outputs in
-            assert(𝛁outputs.base.count == timeStepCount,
-                   "The number of output gradients must equal the number of time steps")
+            precondition(𝛁outputs.base.count == timeStepCount,
+                         "The number of output gradients must equal the number of time steps")
             var 𝛁cell = Cell.CotangentVector.zero
             var 𝛁state = Cell.State.CotangentVector.zero
             var reversed𝛁inputs: [Cell.TimeStepInput.CotangentVector] = []