From 0d96d5e89b270b0f110744bfce873ab42f1d35ae Mon Sep 17 00:00:00 2001 From: Tanmay Bakshi Date: Thu, 18 Apr 2019 15:40:53 -0400 Subject: [PATCH 1/6] Add RNN wrapper for Cells --- Sources/DeepLearning/Layer.swift | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/Sources/DeepLearning/Layer.swift b/Sources/DeepLearning/Layer.swift index ee68ee9ca..bd5cbee7f 100644 --- a/Sources/DeepLearning/Layer.swift +++ b/Sources/DeepLearning/Layer.swift @@ -1397,3 +1397,23 @@ public struct LSTMCell: RNNCell { return Output(output: newState, state: newState) } } + +public struct RNN: Layer { + public var cell: Cell + + init(_ cell: () -> Cell) { + self.cell = cell() + } + + @differentiable + public func applied(to input: [Cell.TimeStepInput]) -> [Cell.Output] { + var currentHiddenState = cell.zeroState + var outputs: [Cell.Output] = [] + for timestep in input { + let timestepOutput = cell.applied(to: .init(input: timestep, state: currentHiddenState)) + currentHiddenState = timestepOutput.state + outputs.append(timestepOutput) + } + return outputs + } +} From 29c1226388cd68eeec8845b550adb87e710bc6f3 Mon Sep 17 00:00:00 2001 From: Richard Wei Date: Thu, 18 Apr 2019 16:10:08 -0400 Subject: [PATCH 2/6] Apply suggestions from code review Co-Authored-By: tanmayb123 --- Sources/DeepLearning/Layer.swift | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Sources/DeepLearning/Layer.swift b/Sources/DeepLearning/Layer.swift index bd5cbee7f..dd2f23027 100644 --- a/Sources/DeepLearning/Layer.swift +++ b/Sources/DeepLearning/Layer.swift @@ -1401,16 +1401,16 @@ public struct LSTMCell: RNNCell { public struct RNN: Layer { public var cell: Cell - init(_ cell: () -> Cell) { + public init(_ cell: @autoclosure () -> Cell) { self.cell = cell() } @differentiable - public func applied(to input: [Cell.TimeStepInput]) -> [Cell.Output] { + public func call(_ input: [Cell.TimeStepInput]) -> [Cell.Output] { var currentHiddenState = cell.zeroState var outputs: [Cell.Output] = [] for timestep in input { - let timestepOutput = cell.applied(to: .init(input: timestep, state: currentHiddenState)) + let timestepOutput = cell(input: timestep, state: currentHiddenState) currentHiddenState = timestepOutput.state outputs.append(timestepOutput) } From 224463ade179747a400dbb53d09e7f5b2509044d Mon Sep 17 00:00:00 2001 From: Richard Wei Date: Fri, 19 Apr 2019 18:20:16 -0700 Subject: [PATCH 3/6] Implement RNN pullback and add tests. --- Sources/DeepLearning/Layer.swift | 109 +++++++++++++++++++---- Tests/DeepLearningTests/LayerTests.swift | 29 +++++- 2 files changed, 120 insertions(+), 18 deletions(-) diff --git a/Sources/DeepLearning/Layer.swift b/Sources/DeepLearning/Layer.swift index dd2f23027..ee058c68a 100644 --- a/Sources/DeepLearning/Layer.swift +++ b/Sources/DeepLearning/Layer.swift @@ -1281,7 +1281,7 @@ public extension RNNCell { } /// A Simple RNN Cell. -public struct SimpleRNNCell: RNNCell { +public struct SimpleRNNCell: RNNCell, VectorNumeric { public var weight: Tensor public var bias: Tensor @@ -1304,9 +1304,13 @@ public struct SimpleRNNCell: RNNCell { /// - Parameters: /// - inputSize: The number of features in 2-D input tensors. /// - hiddenSize: The number of features in 2-D hidden states. - public init(inputSize: Int, hiddenSize: Int) { + /// - seed: The random seed for initialization. The default value is random. + public init(inputSize: Int, hiddenSize: Int, + seed: (Int64, Int64) = (Int64.random(in: Int64.min..: RNNCell { } /// An LSTM Cell. -public struct LSTMCell: RNNCell { +public struct LSTMCell: RNNCell, VectorNumeric { public var inputWeight, updateWeight, forgetWeight, outputWeight: Tensor public var inputBias, updateBias, forgetBias, outputBias: Tensor @@ -1348,17 +1352,19 @@ public struct LSTMCell: RNNCell { /// - Parameters: /// - inputSize: The number of features in 2-D input tensors. /// - hiddenSize: The number of features in 2-D hidden states. - public init(inputSize: Int, hiddenSize: Int) { + public init(inputSize: Int, hiddenSize: Int, + seed: (Int64, Int64) = (Int64.random(in: Int64.min..: RNNCell { } public struct RNN: Layer { + public typealias Input = [Cell.TimeStepInput] + public typealias Output = [Cell.TimeStepOutput] + public var cell: Cell public init(_ cell: @autoclosure () -> Cell) { self.cell = cell() } - @differentiable - public func call(_ input: [Cell.TimeStepInput]) -> [Cell.Output] { - var currentHiddenState = cell.zeroState - var outputs: [Cell.Output] = [] + @differentiable(wrt: (self, input), vjp: _vjpCall(_:initialState:)) + public func call(_ input: [Cell.TimeStepInput], + initialState: Cell.State) -> [Cell.TimeStepOutput] { + var currentHiddenState = initialState + var timeStepOutputs: [Cell.TimeStepOutput] = [] for timestep in input { - let timestepOutput = cell(input: timestep, state: currentHiddenState) - currentHiddenState = timestepOutput.state - outputs.append(timestepOutput) + let output = cell(input: timestep, state: currentHiddenState) + currentHiddenState = output.state + timeStepOutputs.append(output.output) } - return outputs + return timeStepOutputs } + + @usableFromInline + internal func _vjpCall( + _ inputs: [Cell.TimeStepInput], initialState: Cell.State + ) -> ([Cell.TimeStepOutput], + (Array.CotangentVector) + -> (RNN.CotangentVector, Array.CotangentVector)) { + let timeStepCount = inputs.count + var currentHiddenState = cell.zeroState + var timeStepOutputs: [Cell.TimeStepOutput] = [] + var backpropagators: [Cell.Backpropagator] = [] + for timestep in inputs { + let (output, backpropagator) = + cell.appliedForBackpropagation(to: .init(input: timestep, + state: currentHiddenState)) + currentHiddenState = output.state + timeStepOutputs.append(output.output) + backpropagators.append(backpropagator) + } + func pullback(𝛁outputs: Array.CotangentVector) + -> (RNN.CotangentVector, Array.CotangentVector) { + assert(𝛁outputs.base.count == timeStepCount, + "The number of output gradients must equal the number of input gradients") + var 𝛁cell = Cell.CotangentVector.zero + var 𝛁state = Cell.State.CotangentVector.zero + var reversed𝛁inputs: [Cell.TimeStepInput.CotangentVector] = [] + reversed𝛁inputs.reserveCapacity(timeStepCount) + for (𝛁output, backpropagator) in zip(𝛁outputs.base, backpropagators).reversed() { + let (new𝛁cell, 𝛁input) = backpropagator(.init(output: 𝛁output, state: 𝛁state)) + 𝛁cell = new𝛁cell + 𝛁state = 𝛁input.state + reversed𝛁inputs.append(𝛁input.input) + } + return (RNN.CotangentVector(cell: 𝛁cell), + Array.CotangentVector(Array(reversed𝛁inputs.reversed()))) + } + return (timeStepOutputs, pullback) + } + + @differentiable(wrt: (self, inputs)) + public func call(_ inputs: [Cell.TimeStepInput]) -> [Cell.TimeStepOutput] { + return self(inputs, initialState: cell.zeroState.withoutDerivative()) + } + + /* TODO: Uncomment once control flow and differentiation through force unwrapping is supported. + @differentiable(wrt: (self, inputs)) + public func lastOutput(from inputs: [Cell.TimeStepInput], + initialState: Cell.State) -> Cell.TimeStepOutput { + precondition(!inputs.isEmpty, "inputs cannot be empty") + return self(inputs, initialState: initialState).last! + } + + @differentiable(wrt: (self, inputs)) + public func lastOutput(from inputs: [Cell.TimeStepInput]) -> Cell.TimeStepOutput { + precondition(!inputs.isEmpty, "inputs cannot be empty") + return self(inputs, initialState: cell.zeroState).last! + } + */ } + +extension RNN: Equatable where Cell: Equatable {} +extension RNN: AdditiveArithmetic where Cell: AdditiveArithmetic {} +extension RNN: VectorNumeric where Cell: VectorNumeric {} + +public typealias SimpleRNN = RNN> +public typealias LSTM = RNN> diff --git a/Tests/DeepLearningTests/LayerTests.swift b/Tests/DeepLearningTests/LayerTests.swift index 5bd8ba179..8676eb257 100644 --- a/Tests/DeepLearningTests/LayerTests.swift +++ b/Tests/DeepLearningTests/LayerTests.swift @@ -95,6 +95,32 @@ final class LayerTests: XCTestCase { XCTAssertEqual(output, expected) } + func testRNN() { + let x = Tensor(rangeFrom: 0.0, to: 0.4, stride: 0.1).rankLifted() + let inputs: [Tensor] = Array(repeating: x, count: 4) + let rnn = RNN(SimpleRNNCell(inputSize: 4, hiddenSize: 4, + seed: (0xFeedBeef, 0xDeadBeef))) + let (outputs, pullback) = rnn.valueWithPullback(at: inputs) { rnn, inputs in + return rnn(inputs) + } + XCTAssertEqual(outputs, [[[-0.0026294366, -0.0058668107, 0.04495003, 0.20311214]], + [[ 0.06788494, 0.050665878, 0.02415526, 0.09249911]], + [[ 0.06621192, 0.009049267, 0.065047316, 0.11534518]], + [[ 0.05612204, 0.00022032857, 0.05407162, 0.09784105]]]) + let (𝛁rnn, 𝛁inputs) = pullback(.init(inputs)) + print(𝛁rnn, 𝛁inputs) + XCTAssertEqual(𝛁rnn.cell.weight, + [[ 0.0, 0.0, 0.0, 0.0], + [-0.0051278225, 0.0013102926, 0.00740262, 0.018119661], + [ -0.010255645, 0.0026205853, 0.01480524, 0.036239322], + [ -0.015383467, 0.003930878, 0.02220786, 0.054358985], + [ 0.0, 0.0, 0.0, 0.0], + [ 0.0, 0.0, 0.0, 0.0], + [ 0.0, 0.0, 0.0, 0.0], + [ 0.0, 0.0, 0.0, 0.0]]) + XCTAssertEqual(𝛁rnn.cell.bias, [-0.051278222, 0.013102926, 0.0740262, 0.18119662]) + } + static var allTests = [ ("testConv1D", testConv1D), ("testMaxPool1D", testMaxPool1D), @@ -104,6 +130,7 @@ final class LayerTests: XCTestCase { ("testGlobalAvgPool3D", testGlobalAvgPool3D), ("testReshape", testReshape), ("testFlatten", testFlatten), - ("testSimpleRNNCell", testSimpleRNNCell) + ("testSimpleRNNCell", testSimpleRNNCell), + ("testRNN", testRNN) ] } From 1f7190fba7266ba06aee58f799a67cddf2dd1635 Mon Sep 17 00:00:00 2001 From: Richard Wei Date: Fri, 19 Apr 2019 22:22:30 -0700 Subject: [PATCH 4/6] Minor improvements. --- Sources/DeepLearning/Layer.swift | 13 ++++++------- Tests/DeepLearningTests/LayerTests.swift | 1 - 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/Sources/DeepLearning/Layer.swift b/Sources/DeepLearning/Layer.swift index ee058c68a..984317b73 100644 --- a/Sources/DeepLearning/Layer.swift +++ b/Sources/DeepLearning/Layer.swift @@ -1432,11 +1432,13 @@ public struct RNN: Layer { _ inputs: [Cell.TimeStepInput], initialState: Cell.State ) -> ([Cell.TimeStepOutput], (Array.CotangentVector) - -> (RNN.CotangentVector, Array.CotangentVector)) { + -> (CotangentVector, Array.CotangentVector)) { let timeStepCount = inputs.count var currentHiddenState = cell.zeroState var timeStepOutputs: [Cell.TimeStepOutput] = [] + timeStepOutputs.reserveCapacity(timeStepCount) var backpropagators: [Cell.Backpropagator] = [] + backpropagators.reserveCapacity(timeStepCount) for timestep in inputs { let (output, backpropagator) = cell.appliedForBackpropagation(to: .init(input: timestep, @@ -1445,8 +1447,7 @@ public struct RNN: Layer { timeStepOutputs.append(output.output) backpropagators.append(backpropagator) } - func pullback(𝛁outputs: Array.CotangentVector) - -> (RNN.CotangentVector, Array.CotangentVector) { + return (timeStepOutputs, { 𝛁outputs in assert(𝛁outputs.base.count == timeStepCount, "The number of output gradients must equal the number of input gradients") var 𝛁cell = Cell.CotangentVector.zero @@ -1459,10 +1460,8 @@ public struct RNN: Layer { 𝛁state = 𝛁input.state reversed𝛁inputs.append(𝛁input.input) } - return (RNN.CotangentVector(cell: 𝛁cell), - Array.CotangentVector(Array(reversed𝛁inputs.reversed()))) - } - return (timeStepOutputs, pullback) + return (.init(cell: 𝛁cell), .init(Array(reversed𝛁inputs.reversed()))) + }) } @differentiable(wrt: (self, inputs)) diff --git a/Tests/DeepLearningTests/LayerTests.swift b/Tests/DeepLearningTests/LayerTests.swift index 8676eb257..a347c4d7e 100644 --- a/Tests/DeepLearningTests/LayerTests.swift +++ b/Tests/DeepLearningTests/LayerTests.swift @@ -108,7 +108,6 @@ final class LayerTests: XCTestCase { [[ 0.06621192, 0.009049267, 0.065047316, 0.11534518]], [[ 0.05612204, 0.00022032857, 0.05407162, 0.09784105]]]) let (𝛁rnn, 𝛁inputs) = pullback(.init(inputs)) - print(𝛁rnn, 𝛁inputs) XCTAssertEqual(𝛁rnn.cell.weight, [[ 0.0, 0.0, 0.0, 0.0], [-0.0051278225, 0.0013102926, 0.00740262, 0.018119661], From 41c2d8c0ddc9870b9991a6f0e4552136d49363d5 Mon Sep 17 00:00:00 2001 From: Richard Wei Date: Fri, 19 Apr 2019 22:30:41 -0700 Subject: [PATCH 5/6] Fix a typo. --- Sources/DeepLearning/Layer.swift | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Sources/DeepLearning/Layer.swift b/Sources/DeepLearning/Layer.swift index 984317b73..11e0f27d3 100644 --- a/Sources/DeepLearning/Layer.swift +++ b/Sources/DeepLearning/Layer.swift @@ -1449,7 +1449,7 @@ public struct RNN: Layer { } return (timeStepOutputs, { 𝛁outputs in assert(𝛁outputs.base.count == timeStepCount, - "The number of output gradients must equal the number of input gradients") + "The number of output gradients must equal the number of time steps") var 𝛁cell = Cell.CotangentVector.zero var 𝛁state = Cell.State.CotangentVector.zero var reversed𝛁inputs: [Cell.TimeStepInput.CotangentVector] = [] From 147faa6e13a6bc7d91a47ed6811265cf4cdfb0ae Mon Sep 17 00:00:00 2001 From: Richard Wei Date: Fri, 19 Apr 2019 22:32:17 -0700 Subject: [PATCH 6/6] Assert -> precondition. --- Sources/DeepLearning/Layer.swift | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Sources/DeepLearning/Layer.swift b/Sources/DeepLearning/Layer.swift index 11e0f27d3..4637ba715 100644 --- a/Sources/DeepLearning/Layer.swift +++ b/Sources/DeepLearning/Layer.swift @@ -1448,8 +1448,8 @@ public struct RNN: Layer { backpropagators.append(backpropagator) } return (timeStepOutputs, { 𝛁outputs in - assert(𝛁outputs.base.count == timeStepCount, - "The number of output gradients must equal the number of time steps") + precondition(𝛁outputs.base.count == timeStepCount, + "The number of output gradients must equal the number of time steps") var 𝛁cell = Cell.CotangentVector.zero var 𝛁state = Cell.State.CotangentVector.zero var reversed𝛁inputs: [Cell.TimeStepInput.CotangentVector] = []