From 2503813cc4e4c5b98e61035d9146cc7c891281e9 Mon Sep 17 00:00:00 2001
From: Brennan Saeta <saeta@google.com>
Date: Fri, 22 Feb 2019 05:53:25 +0000
Subject: [PATCH 1/5] Add a helper function for sequential models.

Many deep learning models are composed of sequential layers stacked one on
top of each other. It can be relatively tedious to write out the explicit
`applied(to:)` function because it's fairly repetitive and the underlying
intent is relatively obscured. (It can be especially bothersome because
it's the 2nd (or 3rd) time you're writing out all the layers. (The first time
is to declare all the instance variables, and the second time (if necessary)
is in the initializer.)

Fortunately, with a single helper functions, we can make everything both type
safe as well as convenient and easily expressible & readable!

This commit adds a family of `Sequential` functions that take in a context, an
input, and a variable number of layers. It chains through the output of one
layer into the input of the next.

This API approach has a number of advantages:

 1. It avoids introducing new symbolic operators, which can be very confusing
    to new users.

 2. It works with today's AutoDiff implementation. (Yay!)

 3. It is very readable and clean.

 4. It avoids users "getting stuck". Concretely, if someone implemented a model
    using my previously proposed `>>>` operator, if they wanted to add a
    residual (or skip) connection, they would have to basically re-write their
    whole model using a struct, etc. With this API structure, only "local"
    changes are required. (e.g. If only one skip-connection is required, they
    can split the sequential chain into two pieces.)

Downsides of this approach:

 1. It doesn't DRY-out the types required to define a model. (I have some
    thoughts here, but there isn't enough room in this
    margin^H^H^H^H^H^Hcommit message.)

 2. We should think hard about how things should look when we have loops.

 3. I'm sure there's a better way to code-gen out all the different Sequential
    airities. (I got bored hand-writing them out after 4...) Suggestions
    welcome!
---
 Sources/DeepLearning/Layer.swift              | 31 ++++++++++++
 Tests/DeepLearningTests/SequentialTests.swift | 48 +++++++++++++++++++
 Tests/DeepLearningTests/XCTestManifests.swift |  1 +
 3 files changed, 80 insertions(+)
 create mode 100644 Tests/DeepLearningTests/SequentialTests.swift

diff --git a/Sources/DeepLearning/Layer.swift b/Sources/DeepLearning/Layer.swift
index 240202fa7..3d2f6d2fc 100644
--- a/Sources/DeepLearning/Layer.swift
+++ b/Sources/DeepLearning/Layer.swift
@@ -101,6 +101,37 @@ public extension Layer {
     }
 }
 
+@differentiable(wrt: (input, l1, l2))
+public func Sequential<L1: Layer, L2: Layer>(in context: Context, from input: L1.Input, _ l1: L1, _ l2: L2) -> L2.Output where L1.Output == L2.Input {
+  let o1 = l1.applied(to: input, in: context)
+  return l2.applied(to: o1, in: context)
+}
+
+@differentiable(wrt: (input, l1, l2, l3))
+public func Sequential<L1: Layer, L2: Layer, L3: Layer>(in context: Context, from input: L1.Input, _ l1: L1, _ l2: L2, _ l3: L3) -> L3.Output where L1.Output == L2.Input, L2.Output == L3.Input {
+  let o1 = l1.applied(to: input, in: context)
+  let o2 = l2.applied(to: o1, in: context)
+  return l3.applied(to: o2, in: context)
+}
+
+@differentiable(wrt: (input, l1, l2, l3, l4))
+public func Sequential<L1: Layer, L2: Layer, L3: Layer, L4: Layer>(in context: Context, from input: L1.Input, _ l1: L1, _ l2: L2, _ l3: L3, _ l4: L4) -> L4.Output where L1.Output == L2.Input, L2.Output == L3.Input, L3.Output == L4.Input {
+  let o1 = l1.applied(to: input, in: context)
+  let o2 = l2.applied(to: o1, in: context)
+  let o3 = l3.applied(to: o2, in: context)
+  return l4.applied(to: o3, in: context)
+}
+
+@differentiable(wrt: (input, l1, l2, l3, l4, l5))
+public func Sequential<L1: Layer, L2: Layer, L3: Layer, L4: Layer, L5: Layer>(in context: Context, from input: L1.Input, _ l1: L1, _ l2: L2, _ l3: L3, _ l4: L4, _ l5: L5) -> L5.Output where L1.Output == L2.Input, L2.Output == L3.Input, L3.Output == L4.Input, L4.Output == L5.Input {
+  let o1 = l1.applied(to: input, in: context)
+  let o2 = l2.applied(to: o1, in: context)
+  let o3 = l3.applied(to: o2, in: context)
+  let o4 = l4.applied(to: o3, in: context)
+  return l5.applied(to: o4, in: context)
+}
+
+
 /// A mutable, shareable, owning reference to a tensor.
 public final class Parameter<Scalar: TensorFlowScalar> {
     public var value: Tensor<Scalar>
diff --git a/Tests/DeepLearningTests/SequentialTests.swift b/Tests/DeepLearningTests/SequentialTests.swift
new file mode 100644
index 000000000..dec529190
--- /dev/null
+++ b/Tests/DeepLearningTests/SequentialTests.swift
@@ -0,0 +1,48 @@
+// Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+import XCTest
+@testable import DeepLearning
+
+final class SequentialTests: XCTestCase {
+    func testSequential() {
+
+        struct Model: Layer {
+            var dense1 = Dense<Float>(inputSize: 2, outputSize: 4, activation: relu)
+            var dense2 = Dense<Float>(inputSize: 4, outputSize: 1, activation: relu)
+
+            @differentiable(wrt: (self, input))
+            func applied(to input: Tensor<Float>, in context: Context) -> Tensor<Float> {
+              return Sequential(in: context, from: input, dense1, dense2)
+            }
+        }
+        var model = Model()
+        let optimizer = SGD(learningRate: 0.02, modelType: type(of: model), scalarType: Float.self)
+        let x: Tensor<Float> = [[0, 0], [0, 1], [1, 0], [1, 1]]
+        let y: Tensor<Float> = [0, 1, 1, 0]
+        let context = Context(learningPhase: .training)
+        for _ in 0..<1000 {
+            let 𝛁model = model.gradient { model -> Tensor<Float> in
+                let ŷ = model.applied(to: x, in: context)
+                return meanSquaredError(predicted: ŷ, expected: y)
+            }
+            optimizer.update(&model.allDifferentiableVariables, along: 𝛁model)
+        }
+        print(model.inferring(from: [[0, 0], [0, 1], [1, 0], [1, 1]]))
+    }
+
+     static var allTests = [
+        ("testSequential", testSequential)
+    ]
+}
\ No newline at end of file
diff --git a/Tests/DeepLearningTests/XCTestManifests.swift b/Tests/DeepLearningTests/XCTestManifests.swift
index 2118f859f..768ddeccc 100644
--- a/Tests/DeepLearningTests/XCTestManifests.swift
+++ b/Tests/DeepLearningTests/XCTestManifests.swift
@@ -19,6 +19,7 @@ public func allTests() -> [XCTestCaseEntry] {
     return [
         testCase(PRNGTests.allTests),
         testCase(TrivialModelTests.allTests),
+        testCase(SequentialTests.allTests),
     ]
 }
 #endif

From ab7c2721f23ce483961da1d7d2d68a58477ed2d9 Mon Sep 17 00:00:00 2001
From: Brennan Saeta <saeta@google.com>
Date: Fri, 22 Feb 2019 07:49:17 +0000
Subject: [PATCH 2/5] Switch to using a protocol extension on Differentiable
 for a nicer syntax and avoiding polluting the global function namespace. Also
 switch to camelCase.

---
 Sources/DeepLearning/Layer.swift              | 99 +++++++++++++------
 Tests/DeepLearningTests/SequentialTests.swift |  2 +-
 2 files changed, 72 insertions(+), 29 deletions(-)

diff --git a/Sources/DeepLearning/Layer.swift b/Sources/DeepLearning/Layer.swift
index 3d2f6d2fc..0550be1e7 100644
--- a/Sources/DeepLearning/Layer.swift
+++ b/Sources/DeepLearning/Layer.swift
@@ -101,34 +101,77 @@ public extension Layer {
     }
 }
 
-@differentiable(wrt: (input, l1, l2))
-public func Sequential<L1: Layer, L2: Layer>(in context: Context, from input: L1.Input, _ l1: L1, _ l2: L2) -> L2.Output where L1.Output == L2.Input {
-  let o1 = l1.applied(to: input, in: context)
-  return l2.applied(to: o1, in: context)
-}
-
-@differentiable(wrt: (input, l1, l2, l3))
-public func Sequential<L1: Layer, L2: Layer, L3: Layer>(in context: Context, from input: L1.Input, _ l1: L1, _ l2: L2, _ l3: L3) -> L3.Output where L1.Output == L2.Input, L2.Output == L3.Input {
-  let o1 = l1.applied(to: input, in: context)
-  let o2 = l2.applied(to: o1, in: context)
-  return l3.applied(to: o2, in: context)
-}
-
-@differentiable(wrt: (input, l1, l2, l3, l4))
-public func Sequential<L1: Layer, L2: Layer, L3: Layer, L4: Layer>(in context: Context, from input: L1.Input, _ l1: L1, _ l2: L2, _ l3: L3, _ l4: L4) -> L4.Output where L1.Output == L2.Input, L2.Output == L3.Input, L3.Output == L4.Input {
-  let o1 = l1.applied(to: input, in: context)
-  let o2 = l2.applied(to: o1, in: context)
-  let o3 = l3.applied(to: o2, in: context)
-  return l4.applied(to: o3, in: context)
-}
-
-@differentiable(wrt: (input, l1, l2, l3, l4, l5))
-public func Sequential<L1: Layer, L2: Layer, L3: Layer, L4: Layer, L5: Layer>(in context: Context, from input: L1.Input, _ l1: L1, _ l2: L2, _ l3: L3, _ l4: L4, _ l5: L5) -> L5.Output where L1.Output == L2.Input, L2.Output == L3.Input, L3.Output == L4.Input, L4.Output == L5.Input {
-  let o1 = l1.applied(to: input, in: context)
-  let o2 = l2.applied(to: o1, in: context)
-  let o3 = l3.applied(to: o2, in: context)
-  let o4 = l4.applied(to: o3, in: context)
-  return l5.applied(to: o4, in: context)
+extension Differentiable {
+
+    @differentiable(wrt: (self, l1, l2))
+    public func sequenced<L1: Layer, L2: Layer>(
+        in context: Context, through l1: L1, _ l2: L2)
+        -> L2.Output
+            where L1.Input == Self,
+                  L1.Output == L2.Input {
+        let o1 = l1.applied(to: self, in: context)
+        return l2.applied(to: o1, in: context)
+    }
+
+    @differentiable(wrt: (self, l1, l2, l3))
+    public func sequenced<L1: Layer, L2: Layer, L3: Layer>(
+        in context: Context, through l1: L1, _ l2: L2, _ l3: L3)
+        -> L3.Output
+            where L1.Input == Self,
+                  L1.Output == L2.Input,
+                  L2.Output == L3.Input {
+        let o1 = l1.applied(to: self, in: context)
+        let o2 = l2.applied(to: o1, in: context)
+        return l3.applied(to: o2, in: context)
+    }
+
+    @differentiable(wrt: (self, l1, l2, l3, l4))
+    public func sequenced<L1: Layer, L2: Layer, L3: Layer, L4: Layer>(
+        in context: Context, through l1: L1, _ l2: L2, _ l3: L3, _ l4: L4)
+        -> L4.Output
+            where L1.Input == Self,
+                  L1.Output == L2.Input,
+                  L2.Output == L3.Input,
+                  L3.Output == L4.Input {
+        let o1 = l1.applied(to: self, in: context)
+        let o2 = l2.applied(to: o1, in: context)
+        let o3 = l3.applied(to: o2, in: context)
+        return l4.applied(to: o3, in: context)
+    }
+
+    @differentiable(wrt: (self, l1, l2, l3, l4, l5))
+    public func sequenced<L1: Layer, L2: Layer, L3: Layer, L4: Layer, L5: Layer>(
+        in context: Context, through l1: L1, _ l2: L2, _ l3: L3, _ l4: L4, _ l5: L5)
+        -> L5.Output
+            where L1.Input == Self,
+                  L1.Output == L2.Input,
+                  L2.Output == L3.Input,
+                  L3.Output == L4.Input,
+                  L4.Output == L5.Input {
+        let o1 = l1.applied(to: self, in: context)
+        let o2 = l2.applied(to: o1, in: context)
+        let o3 = l3.applied(to: o2, in: context)
+        let o4 = l4.applied(to: o3, in: context)
+        return l5.applied(to: o4, in: context)
+    }
+
+    @differentiable(wrt: (self, l1, l2, l3, l4, l5, l6))
+    public func sequenced<L1: Layer, L2: Layer, L3: Layer, L4: Layer, L5: Layer, L6: Layer>(
+        in context: Context, through l1: L1, _ l2: L2, _ l3: L3, _ l4: L4, _ l5: L5, _ l6: L6)
+        -> L6.Output
+            where L1.Input == Self,
+                  L1.Output == L2.Input,
+                  L2.Output == L3.Input,
+                  L3.Output == L4.Input,
+                  L4.Output == L5.Input,
+                  L5.Output == L6.Input {
+        let o1 = l1.applied(to: self, in: context)
+        let o2 = l2.applied(to: o1, in: context)
+        let o3 = l3.applied(to: o2, in: context)
+        let o4 = l4.applied(to: o3, in: context)
+        let o5 = l5.applied(to: o4, in: context)
+        return l6.applied(to: o5, in: context)
+    }
 }
 
 
diff --git a/Tests/DeepLearningTests/SequentialTests.swift b/Tests/DeepLearningTests/SequentialTests.swift
index dec529190..4b159f9b8 100644
--- a/Tests/DeepLearningTests/SequentialTests.swift
+++ b/Tests/DeepLearningTests/SequentialTests.swift
@@ -24,7 +24,7 @@ final class SequentialTests: XCTestCase {
 
             @differentiable(wrt: (self, input))
             func applied(to input: Tensor<Float>, in context: Context) -> Tensor<Float> {
-              return Sequential(in: context, from: input, dense1, dense2)
+              return Layer.sequential(in: context, from: input, dense1, dense2)
             }
         }
         var model = Model()

From 120ddfeb9b53eef59d327bc707e85f5dc12aca59 Mon Sep 17 00:00:00 2001
From: Brennan Saeta <saeta@google.com>
Date: Fri, 22 Feb 2019 07:51:31 +0000
Subject: [PATCH 3/5] Fix up test case that was forgotten!

---
 Tests/DeepLearningTests/SequentialTests.swift | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Tests/DeepLearningTests/SequentialTests.swift b/Tests/DeepLearningTests/SequentialTests.swift
index 4b159f9b8..d5e22a639 100644
--- a/Tests/DeepLearningTests/SequentialTests.swift
+++ b/Tests/DeepLearningTests/SequentialTests.swift
@@ -24,7 +24,7 @@ final class SequentialTests: XCTestCase {
 
             @differentiable(wrt: (self, input))
             func applied(to input: Tensor<Float>, in context: Context) -> Tensor<Float> {
-              return Layer.sequential(in: context, from: input, dense1, dense2)
+              return input.sequenced(in: context, through: dense1, dense2)
             }
         }
         var model = Model()

From 1372785b430d205a502ad7b4947a9c38f7b4c6a3 Mon Sep 17 00:00:00 2001
From: Brennan Saeta <saeta@google.com>
Date: Fri, 22 Feb 2019 07:56:59 +0000
Subject: [PATCH 4/5] Mark extension as public and remove the annotations on
 each individual function.

---
 Sources/DeepLearning/Layer.swift | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/Sources/DeepLearning/Layer.swift b/Sources/DeepLearning/Layer.swift
index 0550be1e7..ad37562fe 100644
--- a/Sources/DeepLearning/Layer.swift
+++ b/Sources/DeepLearning/Layer.swift
@@ -101,10 +101,10 @@ public extension Layer {
     }
 }
 
-extension Differentiable {
+public extension Differentiable {
 
     @differentiable(wrt: (self, l1, l2))
-    public func sequenced<L1: Layer, L2: Layer>(
+    func sequenced<L1: Layer, L2: Layer>(
         in context: Context, through l1: L1, _ l2: L2)
         -> L2.Output
             where L1.Input == Self,
@@ -114,7 +114,7 @@ extension Differentiable {
     }
 
     @differentiable(wrt: (self, l1, l2, l3))
-    public func sequenced<L1: Layer, L2: Layer, L3: Layer>(
+    func sequenced<L1: Layer, L2: Layer, L3: Layer>(
         in context: Context, through l1: L1, _ l2: L2, _ l3: L3)
         -> L3.Output
             where L1.Input == Self,
@@ -126,7 +126,7 @@ extension Differentiable {
     }
 
     @differentiable(wrt: (self, l1, l2, l3, l4))
-    public func sequenced<L1: Layer, L2: Layer, L3: Layer, L4: Layer>(
+    func sequenced<L1: Layer, L2: Layer, L3: Layer, L4: Layer>(
         in context: Context, through l1: L1, _ l2: L2, _ l3: L3, _ l4: L4)
         -> L4.Output
             where L1.Input == Self,
@@ -140,7 +140,7 @@ extension Differentiable {
     }
 
     @differentiable(wrt: (self, l1, l2, l3, l4, l5))
-    public func sequenced<L1: Layer, L2: Layer, L3: Layer, L4: Layer, L5: Layer>(
+    func sequenced<L1: Layer, L2: Layer, L3: Layer, L4: Layer, L5: Layer>(
         in context: Context, through l1: L1, _ l2: L2, _ l3: L3, _ l4: L4, _ l5: L5)
         -> L5.Output
             where L1.Input == Self,
@@ -156,7 +156,7 @@ extension Differentiable {
     }
 
     @differentiable(wrt: (self, l1, l2, l3, l4, l5, l6))
-    public func sequenced<L1: Layer, L2: Layer, L3: Layer, L4: Layer, L5: Layer, L6: Layer>(
+    func sequenced<L1: Layer, L2: Layer, L3: Layer, L4: Layer, L5: Layer, L6: Layer>(
         in context: Context, through l1: L1, _ l2: L2, _ l3: L3, _ l4: L4, _ l5: L5, _ l6: L6)
         -> L6.Output
             where L1.Input == Self,

From afd3901aacc22723e95ca7ef8d8f412098b37b28 Mon Sep 17 00:00:00 2001
From: Brennan Saeta <saeta@google.com>
Date: Sun, 24 Feb 2019 02:02:54 +0000
Subject: [PATCH 5/5] Formatting fixes.

---
 Sources/DeepLearning/Layer.swift              | 2 +-
 Tests/DeepLearningTests/SequentialTests.swift | 5 ++---
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/Sources/DeepLearning/Layer.swift b/Sources/DeepLearning/Layer.swift
index ad37562fe..c20957dc8 100644
--- a/Sources/DeepLearning/Layer.swift
+++ b/Sources/DeepLearning/Layer.swift
@@ -101,8 +101,8 @@ public extension Layer {
     }
 }
 
+/// Adds helpers for standard feed-forward, sequential models.
 public extension Differentiable {
-
     @differentiable(wrt: (self, l1, l2))
     func sequenced<L1: Layer, L2: Layer>(
         in context: Context, through l1: L1, _ l2: L2)
diff --git a/Tests/DeepLearningTests/SequentialTests.swift b/Tests/DeepLearningTests/SequentialTests.swift
index d5e22a639..6591fe320 100644
--- a/Tests/DeepLearningTests/SequentialTests.swift
+++ b/Tests/DeepLearningTests/SequentialTests.swift
@@ -17,7 +17,6 @@ import XCTest
 
 final class SequentialTests: XCTestCase {
     func testSequential() {
-
         struct Model: Layer {
             var dense1 = Dense<Float>(inputSize: 2, outputSize: 4, activation: relu)
             var dense2 = Dense<Float>(inputSize: 4, outputSize: 1, activation: relu)
@@ -42,7 +41,7 @@ final class SequentialTests: XCTestCase {
         print(model.inferring(from: [[0, 0], [0, 1], [1, 0], [1, 1]]))
     }
 
-     static var allTests = [
+    static var allTests = [
         ("testSequential", testSequential)
     ]
-}
\ No newline at end of file
+}