From 2503813cc4e4c5b98e61035d9146cc7c891281e9 Mon Sep 17 00:00:00 2001 From: Brennan Saeta Date: Fri, 22 Feb 2019 05:53:25 +0000 Subject: [PATCH 1/5] Add a helper function for sequential models. Many deep learning models are composed of sequential layers stacked one on top of each other. It can be relatively tedious to write out the explicit `applied(to:)` function because it's fairly repetitive and the underlying intent is relatively obscured. (It can be especially bothersome because it's the 2nd (or 3rd) time you're writing out all the layers. (The first time is to declare all the instance variables, and the second time (if necessary) is in the initializer.) Fortunately, with a single helper functions, we can make everything both type safe as well as convenient and easily expressible & readable! This commit adds a family of `Sequential` functions that take in a context, an input, and a variable number of layers. It chains through the output of one layer into the input of the next. This API approach has a number of advantages: 1. It avoids introducing new symbolic operators, which can be very confusing to new users. 2. It works with today's AutoDiff implementation. (Yay!) 3. It is very readable and clean. 4. It avoids users "getting stuck". Concretely, if someone implemented a model using my previously proposed `>>>` operator, if they wanted to add a residual (or skip) connection, they would have to basically re-write their whole model using a struct, etc. With this API structure, only "local" changes are required. (e.g. If only one skip-connection is required, they can split the sequential chain into two pieces.) Downsides of this approach: 1. It doesn't DRY-out the types required to define a model. (I have some thoughts here, but there isn't enough room in this margin^H^H^H^H^H^Hcommit message.) 2. We should think hard about how things should look when we have loops. 3. I'm sure there's a better way to code-gen out all the different Sequential airities. (I got bored hand-writing them out after 4...) Suggestions welcome! --- Sources/DeepLearning/Layer.swift | 31 ++++++++++++ Tests/DeepLearningTests/SequentialTests.swift | 48 +++++++++++++++++++ Tests/DeepLearningTests/XCTestManifests.swift | 1 + 3 files changed, 80 insertions(+) create mode 100644 Tests/DeepLearningTests/SequentialTests.swift diff --git a/Sources/DeepLearning/Layer.swift b/Sources/DeepLearning/Layer.swift index 240202fa7..3d2f6d2fc 100644 --- a/Sources/DeepLearning/Layer.swift +++ b/Sources/DeepLearning/Layer.swift @@ -101,6 +101,37 @@ public extension Layer { } } +@differentiable(wrt: (input, l1, l2)) +public func Sequential(in context: Context, from input: L1.Input, _ l1: L1, _ l2: L2) -> L2.Output where L1.Output == L2.Input { + let o1 = l1.applied(to: input, in: context) + return l2.applied(to: o1, in: context) +} + +@differentiable(wrt: (input, l1, l2, l3)) +public func Sequential(in context: Context, from input: L1.Input, _ l1: L1, _ l2: L2, _ l3: L3) -> L3.Output where L1.Output == L2.Input, L2.Output == L3.Input { + let o1 = l1.applied(to: input, in: context) + let o2 = l2.applied(to: o1, in: context) + return l3.applied(to: o2, in: context) +} + +@differentiable(wrt: (input, l1, l2, l3, l4)) +public func Sequential(in context: Context, from input: L1.Input, _ l1: L1, _ l2: L2, _ l3: L3, _ l4: L4) -> L4.Output where L1.Output == L2.Input, L2.Output == L3.Input, L3.Output == L4.Input { + let o1 = l1.applied(to: input, in: context) + let o2 = l2.applied(to: o1, in: context) + let o3 = l3.applied(to: o2, in: context) + return l4.applied(to: o3, in: context) +} + +@differentiable(wrt: (input, l1, l2, l3, l4, l5)) +public func Sequential(in context: Context, from input: L1.Input, _ l1: L1, _ l2: L2, _ l3: L3, _ l4: L4, _ l5: L5) -> L5.Output where L1.Output == L2.Input, L2.Output == L3.Input, L3.Output == L4.Input, L4.Output == L5.Input { + let o1 = l1.applied(to: input, in: context) + let o2 = l2.applied(to: o1, in: context) + let o3 = l3.applied(to: o2, in: context) + let o4 = l4.applied(to: o3, in: context) + return l5.applied(to: o4, in: context) +} + + /// A mutable, shareable, owning reference to a tensor. public final class Parameter { public var value: Tensor diff --git a/Tests/DeepLearningTests/SequentialTests.swift b/Tests/DeepLearningTests/SequentialTests.swift new file mode 100644 index 000000000..dec529190 --- /dev/null +++ b/Tests/DeepLearningTests/SequentialTests.swift @@ -0,0 +1,48 @@ +// Copyright 2019 The TensorFlow Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import XCTest +@testable import DeepLearning + +final class SequentialTests: XCTestCase { + func testSequential() { + + struct Model: Layer { + var dense1 = Dense(inputSize: 2, outputSize: 4, activation: relu) + var dense2 = Dense(inputSize: 4, outputSize: 1, activation: relu) + + @differentiable(wrt: (self, input)) + func applied(to input: Tensor, in context: Context) -> Tensor { + return Sequential(in: context, from: input, dense1, dense2) + } + } + var model = Model() + let optimizer = SGD(learningRate: 0.02, modelType: type(of: model), scalarType: Float.self) + let x: Tensor = [[0, 0], [0, 1], [1, 0], [1, 1]] + let y: Tensor = [0, 1, 1, 0] + let context = Context(learningPhase: .training) + for _ in 0..<1000 { + let 𝛁model = model.gradient { model -> Tensor in + let ŷ = model.applied(to: x, in: context) + return meanSquaredError(predicted: ŷ, expected: y) + } + optimizer.update(&model.allDifferentiableVariables, along: 𝛁model) + } + print(model.inferring(from: [[0, 0], [0, 1], [1, 0], [1, 1]])) + } + + static var allTests = [ + ("testSequential", testSequential) + ] +} \ No newline at end of file diff --git a/Tests/DeepLearningTests/XCTestManifests.swift b/Tests/DeepLearningTests/XCTestManifests.swift index 2118f859f..768ddeccc 100644 --- a/Tests/DeepLearningTests/XCTestManifests.swift +++ b/Tests/DeepLearningTests/XCTestManifests.swift @@ -19,6 +19,7 @@ public func allTests() -> [XCTestCaseEntry] { return [ testCase(PRNGTests.allTests), testCase(TrivialModelTests.allTests), + testCase(SequentialTests.allTests), ] } #endif From ab7c2721f23ce483961da1d7d2d68a58477ed2d9 Mon Sep 17 00:00:00 2001 From: Brennan Saeta Date: Fri, 22 Feb 2019 07:49:17 +0000 Subject: [PATCH 2/5] Switch to using a protocol extension on Differentiable for a nicer syntax and avoiding polluting the global function namespace. Also switch to camelCase. --- Sources/DeepLearning/Layer.swift | 99 +++++++++++++------ Tests/DeepLearningTests/SequentialTests.swift | 2 +- 2 files changed, 72 insertions(+), 29 deletions(-) diff --git a/Sources/DeepLearning/Layer.swift b/Sources/DeepLearning/Layer.swift index 3d2f6d2fc..0550be1e7 100644 --- a/Sources/DeepLearning/Layer.swift +++ b/Sources/DeepLearning/Layer.swift @@ -101,34 +101,77 @@ public extension Layer { } } -@differentiable(wrt: (input, l1, l2)) -public func Sequential(in context: Context, from input: L1.Input, _ l1: L1, _ l2: L2) -> L2.Output where L1.Output == L2.Input { - let o1 = l1.applied(to: input, in: context) - return l2.applied(to: o1, in: context) -} - -@differentiable(wrt: (input, l1, l2, l3)) -public func Sequential(in context: Context, from input: L1.Input, _ l1: L1, _ l2: L2, _ l3: L3) -> L3.Output where L1.Output == L2.Input, L2.Output == L3.Input { - let o1 = l1.applied(to: input, in: context) - let o2 = l2.applied(to: o1, in: context) - return l3.applied(to: o2, in: context) -} - -@differentiable(wrt: (input, l1, l2, l3, l4)) -public func Sequential(in context: Context, from input: L1.Input, _ l1: L1, _ l2: L2, _ l3: L3, _ l4: L4) -> L4.Output where L1.Output == L2.Input, L2.Output == L3.Input, L3.Output == L4.Input { - let o1 = l1.applied(to: input, in: context) - let o2 = l2.applied(to: o1, in: context) - let o3 = l3.applied(to: o2, in: context) - return l4.applied(to: o3, in: context) -} - -@differentiable(wrt: (input, l1, l2, l3, l4, l5)) -public func Sequential(in context: Context, from input: L1.Input, _ l1: L1, _ l2: L2, _ l3: L3, _ l4: L4, _ l5: L5) -> L5.Output where L1.Output == L2.Input, L2.Output == L3.Input, L3.Output == L4.Input, L4.Output == L5.Input { - let o1 = l1.applied(to: input, in: context) - let o2 = l2.applied(to: o1, in: context) - let o3 = l3.applied(to: o2, in: context) - let o4 = l4.applied(to: o3, in: context) - return l5.applied(to: o4, in: context) +extension Differentiable { + + @differentiable(wrt: (self, l1, l2)) + public func sequenced( + in context: Context, through l1: L1, _ l2: L2) + -> L2.Output + where L1.Input == Self, + L1.Output == L2.Input { + let o1 = l1.applied(to: self, in: context) + return l2.applied(to: o1, in: context) + } + + @differentiable(wrt: (self, l1, l2, l3)) + public func sequenced( + in context: Context, through l1: L1, _ l2: L2, _ l3: L3) + -> L3.Output + where L1.Input == Self, + L1.Output == L2.Input, + L2.Output == L3.Input { + let o1 = l1.applied(to: self, in: context) + let o2 = l2.applied(to: o1, in: context) + return l3.applied(to: o2, in: context) + } + + @differentiable(wrt: (self, l1, l2, l3, l4)) + public func sequenced( + in context: Context, through l1: L1, _ l2: L2, _ l3: L3, _ l4: L4) + -> L4.Output + where L1.Input == Self, + L1.Output == L2.Input, + L2.Output == L3.Input, + L3.Output == L4.Input { + let o1 = l1.applied(to: self, in: context) + let o2 = l2.applied(to: o1, in: context) + let o3 = l3.applied(to: o2, in: context) + return l4.applied(to: o3, in: context) + } + + @differentiable(wrt: (self, l1, l2, l3, l4, l5)) + public func sequenced( + in context: Context, through l1: L1, _ l2: L2, _ l3: L3, _ l4: L4, _ l5: L5) + -> L5.Output + where L1.Input == Self, + L1.Output == L2.Input, + L2.Output == L3.Input, + L3.Output == L4.Input, + L4.Output == L5.Input { + let o1 = l1.applied(to: self, in: context) + let o2 = l2.applied(to: o1, in: context) + let o3 = l3.applied(to: o2, in: context) + let o4 = l4.applied(to: o3, in: context) + return l5.applied(to: o4, in: context) + } + + @differentiable(wrt: (self, l1, l2, l3, l4, l5, l6)) + public func sequenced( + in context: Context, through l1: L1, _ l2: L2, _ l3: L3, _ l4: L4, _ l5: L5, _ l6: L6) + -> L6.Output + where L1.Input == Self, + L1.Output == L2.Input, + L2.Output == L3.Input, + L3.Output == L4.Input, + L4.Output == L5.Input, + L5.Output == L6.Input { + let o1 = l1.applied(to: self, in: context) + let o2 = l2.applied(to: o1, in: context) + let o3 = l3.applied(to: o2, in: context) + let o4 = l4.applied(to: o3, in: context) + let o5 = l5.applied(to: o4, in: context) + return l6.applied(to: o5, in: context) + } } diff --git a/Tests/DeepLearningTests/SequentialTests.swift b/Tests/DeepLearningTests/SequentialTests.swift index dec529190..4b159f9b8 100644 --- a/Tests/DeepLearningTests/SequentialTests.swift +++ b/Tests/DeepLearningTests/SequentialTests.swift @@ -24,7 +24,7 @@ final class SequentialTests: XCTestCase { @differentiable(wrt: (self, input)) func applied(to input: Tensor, in context: Context) -> Tensor { - return Sequential(in: context, from: input, dense1, dense2) + return Layer.sequential(in: context, from: input, dense1, dense2) } } var model = Model() From 120ddfeb9b53eef59d327bc707e85f5dc12aca59 Mon Sep 17 00:00:00 2001 From: Brennan Saeta Date: Fri, 22 Feb 2019 07:51:31 +0000 Subject: [PATCH 3/5] Fix up test case that was forgotten! --- Tests/DeepLearningTests/SequentialTests.swift | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Tests/DeepLearningTests/SequentialTests.swift b/Tests/DeepLearningTests/SequentialTests.swift index 4b159f9b8..d5e22a639 100644 --- a/Tests/DeepLearningTests/SequentialTests.swift +++ b/Tests/DeepLearningTests/SequentialTests.swift @@ -24,7 +24,7 @@ final class SequentialTests: XCTestCase { @differentiable(wrt: (self, input)) func applied(to input: Tensor, in context: Context) -> Tensor { - return Layer.sequential(in: context, from: input, dense1, dense2) + return input.sequenced(in: context, through: dense1, dense2) } } var model = Model() From 1372785b430d205a502ad7b4947a9c38f7b4c6a3 Mon Sep 17 00:00:00 2001 From: Brennan Saeta Date: Fri, 22 Feb 2019 07:56:59 +0000 Subject: [PATCH 4/5] Mark extension as public and remove the annotations on each individual function. --- Sources/DeepLearning/Layer.swift | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Sources/DeepLearning/Layer.swift b/Sources/DeepLearning/Layer.swift index 0550be1e7..ad37562fe 100644 --- a/Sources/DeepLearning/Layer.swift +++ b/Sources/DeepLearning/Layer.swift @@ -101,10 +101,10 @@ public extension Layer { } } -extension Differentiable { +public extension Differentiable { @differentiable(wrt: (self, l1, l2)) - public func sequenced( + func sequenced( in context: Context, through l1: L1, _ l2: L2) -> L2.Output where L1.Input == Self, @@ -114,7 +114,7 @@ extension Differentiable { } @differentiable(wrt: (self, l1, l2, l3)) - public func sequenced( + func sequenced( in context: Context, through l1: L1, _ l2: L2, _ l3: L3) -> L3.Output where L1.Input == Self, @@ -126,7 +126,7 @@ extension Differentiable { } @differentiable(wrt: (self, l1, l2, l3, l4)) - public func sequenced( + func sequenced( in context: Context, through l1: L1, _ l2: L2, _ l3: L3, _ l4: L4) -> L4.Output where L1.Input == Self, @@ -140,7 +140,7 @@ extension Differentiable { } @differentiable(wrt: (self, l1, l2, l3, l4, l5)) - public func sequenced( + func sequenced( in context: Context, through l1: L1, _ l2: L2, _ l3: L3, _ l4: L4, _ l5: L5) -> L5.Output where L1.Input == Self, @@ -156,7 +156,7 @@ extension Differentiable { } @differentiable(wrt: (self, l1, l2, l3, l4, l5, l6)) - public func sequenced( + func sequenced( in context: Context, through l1: L1, _ l2: L2, _ l3: L3, _ l4: L4, _ l5: L5, _ l6: L6) -> L6.Output where L1.Input == Self, From afd3901aacc22723e95ca7ef8d8f412098b37b28 Mon Sep 17 00:00:00 2001 From: Brennan Saeta Date: Sun, 24 Feb 2019 02:02:54 +0000 Subject: [PATCH 5/5] Formatting fixes. --- Sources/DeepLearning/Layer.swift | 2 +- Tests/DeepLearningTests/SequentialTests.swift | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/Sources/DeepLearning/Layer.swift b/Sources/DeepLearning/Layer.swift index ad37562fe..c20957dc8 100644 --- a/Sources/DeepLearning/Layer.swift +++ b/Sources/DeepLearning/Layer.swift @@ -101,8 +101,8 @@ public extension Layer { } } +/// Adds helpers for standard feed-forward, sequential models. public extension Differentiable { - @differentiable(wrt: (self, l1, l2)) func sequenced( in context: Context, through l1: L1, _ l2: L2) diff --git a/Tests/DeepLearningTests/SequentialTests.swift b/Tests/DeepLearningTests/SequentialTests.swift index d5e22a639..6591fe320 100644 --- a/Tests/DeepLearningTests/SequentialTests.swift +++ b/Tests/DeepLearningTests/SequentialTests.swift @@ -17,7 +17,6 @@ import XCTest final class SequentialTests: XCTestCase { func testSequential() { - struct Model: Layer { var dense1 = Dense(inputSize: 2, outputSize: 4, activation: relu) var dense2 = Dense(inputSize: 4, outputSize: 1, activation: relu) @@ -42,7 +41,7 @@ final class SequentialTests: XCTestCase { print(model.inferring(from: [[0, 0], [0, 1], [1, 0], [1, 1]])) } - static var allTests = [ + static var allTests = [ ("testSequential", testSequential) ] -} \ No newline at end of file +}