From bff38c017f6684cfa50a8f5a076fd1aa93ce117e Mon Sep 17 00:00:00 2001
From: Brennan Saeta <saeta@google.com>
Date: Sun, 24 Feb 2019 01:31:07 +0000
Subject: [PATCH 1/5] Reduce explicit types required when instantiating the SGD
 optimizer.

Previously, in order to instantiate the Optimizer, you had to call
`type(of: model)` and pass that into the Optimizer constructor in order to get
type inference to pick the right type for `Model`. This could be a little
confusing for new users.

This commit proposes an alternate way to write this:

```swift
let optimizer = SGD(learningRate: 0.01, for: model)
```

The above formulation is clear and readable. It avoids any unnecessary typing
of types, and should be generalizable to the other optimizers. (This is left as
an exercise for the reader. :-D)

We rely on the Swift optimizer to do the right thing such that only a reference
to the model is passed to the `SGD.init` call so that we don't pay for the cost
of a full model copy (which could eventually be very expensive). This is deemed
to be a reasonably safe assumption, especially given the ABI standardization in
Swift 5.
---
 Sources/DeepLearning/Optimizer.swift            | 3 +--
 Tests/DeepLearningTests/TrivialModelTests.swift | 2 +-
 2 files changed, 2 insertions(+), 3 deletions(-)
diff --git a/Sources/DeepLearning/Optimizer.swift b/Sources/DeepLearning/Optimizer.swift
index 4a3186ea7..343634642 100644
--- a/Sources/DeepLearning/Optimizer.swift
+++ b/Sources/DeepLearning/Optimizer.swift
@@ -129,8 +129,7 @@ public class SGD<Model: Layer, Scalar: TensorFlowFloatingPoint>: Optimizer
         momentum: Scalar = 0,
         decay: Scalar = 0,
         nesterov: Bool = false,
-        modelType: Model.Type = Model.self,
-        scalarType: Scalar.Type = Scalar.self
+        for model: Model
     ) {
         precondition(learningRate >= 0, "Learning rate must be non-negative")
         precondition(momentum >= 0, "Momentum must be non-negative")
diff --git a/Tests/DeepLearningTests/TrivialModelTests.swift b/Tests/DeepLearningTests/TrivialModelTests.swift
index 5a79fd266..51ffe5c3d 100644
--- a/Tests/DeepLearningTests/TrivialModelTests.swift
+++ b/Tests/DeepLearningTests/TrivialModelTests.swift
@@ -40,8 +40,8 @@ final class TrivialModelTests: XCTestCase {
                 return l2.applied(to: h1, in: context)
             }
         }
-        let optimizer = SGD<Classifier, Float>(learningRate: 0.02)
         var classifier = Classifier(hiddenSize: 4)
+        let optimizer = SGD(learningRate: 0.02, for: classifier)
         let x: Tensor<Float> = [[0, 0], [0, 1], [1, 0], [1, 1]]
         let y: Tensor<Float> = [[0], [1], [1], [0]]
 

From d55916484720d353cbc61f9a01b8c24a14888661 Mon Sep 17 00:00:00 2001
From: Brennan Saeta <saeta@google.com>
Date: Mon, 25 Feb 2019 20:19:36 +0000
Subject: [PATCH 2/5] Use `__shared` to ensure the model is never copied.

---
 Sources/DeepLearning/Optimizer.swift | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Sources/DeepLearning/Optimizer.swift b/Sources/DeepLearning/Optimizer.swift
index 343634642..1ccaac5ed 100644
--- a/Sources/DeepLearning/Optimizer.swift
+++ b/Sources/DeepLearning/Optimizer.swift
@@ -129,7 +129,7 @@ public class SGD<Model: Layer, Scalar: TensorFlowFloatingPoint>: Optimizer
         momentum: Scalar = 0,
         decay: Scalar = 0,
         nesterov: Bool = false,
-        for model: Model
+        for _: __shared Model
     ) {
         precondition(learningRate >= 0, "Learning rate must be non-negative")
         precondition(momentum >= 0, "Momentum must be non-negative")

From ee6e57ac3c63f1e1585a183ebd90662e310b3d5d Mon Sep 17 00:00:00 2001
From: Brennan Saeta <saeta@google.com>
Date: Mon, 25 Feb 2019 20:42:24 +0000
Subject: [PATCH 3/5] Respond to review comments by:  - Adding back in the
 scalarType  - Fixing the tests  - Updating the other optimizers

---
 Sources/DeepLearning/Optimizer.swift            | 7 ++++---
 Tests/DeepLearningTests/SequentialTests.swift   | 2 +-
 Tests/DeepLearningTests/TrivialModelTests.swift | 2 +-
 3 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/Sources/DeepLearning/Optimizer.swift b/Sources/DeepLearning/Optimizer.swift
index 1ccaac5ed..faa50e99b 100644
--- a/Sources/DeepLearning/Optimizer.swift
+++ b/Sources/DeepLearning/Optimizer.swift
@@ -40,7 +40,7 @@ public class Adam<Model: Layer, Scalar: TensorFlowFloatingPoint>: Optimizer
         beta2: Scalar = 0.999,
         epsilon: Scalar = 1e-8,
         decay: Scalar = 0,
-        modelType: Model.Type = Model.self,
+        for _: __shared Model,
         scalarType: Scalar.Type = Scalar.self
     ) {
         precondition(learningRate >= 0, "Learning rate must be non-negative")
@@ -88,7 +88,7 @@ public class RMSProp<Model: Layer, Scalar: TensorFlowFloatingPoint>: Optimizer
         rho: Scalar = 0.9,
         epsilon: Scalar = 1e-8,
         decay: Scalar = 0,
-        modelType: Model.Type = Model.self,
+        for _: __shared Model,
         scalarType: Scalar.Type = Scalar.self
     ) {
         precondition(learningRate >= 0, "Learning rate must be non-negative")
@@ -129,7 +129,8 @@ public class SGD<Model: Layer, Scalar: TensorFlowFloatingPoint>: Optimizer
         momentum: Scalar = 0,
         decay: Scalar = 0,
         nesterov: Bool = false,
-        for _: __shared Model
+        for _: __shared Model,
+        scalarType: Scalar.Type = Scalar.self
     ) {
         precondition(learningRate >= 0, "Learning rate must be non-negative")
         precondition(momentum >= 0, "Momentum must be non-negative")
diff --git a/Tests/DeepLearningTests/SequentialTests.swift b/Tests/DeepLearningTests/SequentialTests.swift
index 6591fe320..ff8d1d171 100644
--- a/Tests/DeepLearningTests/SequentialTests.swift
+++ b/Tests/DeepLearningTests/SequentialTests.swift
@@ -27,7 +27,7 @@ final class SequentialTests: XCTestCase {
             }
         }
         var model = Model()
-        let optimizer = SGD(learningRate: 0.02, modelType: type(of: model), scalarType: Float.self)
+        let optimizer = SGD(learningRate: 0.02, for: model)
         let x: Tensor<Float> = [[0, 0], [0, 1], [1, 0], [1, 1]]
         let y: Tensor<Float> = [0, 1, 1, 0]
         let context = Context(learningPhase: .training)
diff --git a/Tests/DeepLearningTests/TrivialModelTests.swift b/Tests/DeepLearningTests/TrivialModelTests.swift
index 36c82a08f..981f4ecb7 100644
--- a/Tests/DeepLearningTests/TrivialModelTests.swift
+++ b/Tests/DeepLearningTests/TrivialModelTests.swift
@@ -41,7 +41,7 @@ final class TrivialModelTests: XCTestCase {
             }
         }
         var classifier = Classifier(hiddenSize: 4)
-        let optimizer = SGD(learningRate: 0.02, for: classifier)
+        let optimizer = SGD(learningRate: 0.02, for: classifier, scalarType: Float.self)
         let x: Tensor<Float> = [[0, 0], [0, 1], [1, 0], [1, 1]]
         let y: Tensor<Float> = [[0], [1], [1], [0]]
 

From 220c42ea86216962f470adb3b8cf00a8e02bd1c0 Mon Sep 17 00:00:00 2001
From: Brennan Saeta <saeta@google.com>
Date: Mon, 25 Feb 2019 20:45:39 +0000
Subject: [PATCH 4/5] Move model to be first argument.

Thanks for the suggestion @jekbradbury. Definitely much better.
---
 Sources/DeepLearning/Optimizer.swift            | 6 +++---
 Tests/DeepLearningTests/SequentialTests.swift   | 2 +-
 Tests/DeepLearningTests/TrivialModelTests.swift | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/Sources/DeepLearning/Optimizer.swift b/Sources/DeepLearning/Optimizer.swift
index faa50e99b..bb1f521a9 100644
--- a/Sources/DeepLearning/Optimizer.swift
+++ b/Sources/DeepLearning/Optimizer.swift
@@ -35,12 +35,12 @@ public class Adam<Model: Layer, Scalar: TensorFlowFloatingPoint>: Optimizer
     public let decay: Scalar
 
     public init(
+        for _: __shared Model,
         learningRate: Scalar = 1e-3,
         beta1: Scalar = 0.9,
         beta2: Scalar = 0.999,
         epsilon: Scalar = 1e-8,
         decay: Scalar = 0,
-        for _: __shared Model,
         scalarType: Scalar.Type = Scalar.self
     ) {
         precondition(learningRate >= 0, "Learning rate must be non-negative")
@@ -84,11 +84,11 @@ public class RMSProp<Model: Layer, Scalar: TensorFlowFloatingPoint>: Optimizer
     public let decay: Scalar
 
     public init(
+        for _: __shared Model,
         learningRate: Scalar = 0.001,
         rho: Scalar = 0.9,
         epsilon: Scalar = 1e-8,
         decay: Scalar = 0,
-        for _: __shared Model,
         scalarType: Scalar.Type = Scalar.self
     ) {
         precondition(learningRate >= 0, "Learning rate must be non-negative")
@@ -125,11 +125,11 @@ public class SGD<Model: Layer, Scalar: TensorFlowFloatingPoint>: Optimizer
     public let nesterov: Bool
 
     public init(
+        for _: __shared Model,
         learningRate: Scalar = 0.01,
         momentum: Scalar = 0,
         decay: Scalar = 0,
         nesterov: Bool = false,
-        for _: __shared Model,
         scalarType: Scalar.Type = Scalar.self
     ) {
         precondition(learningRate >= 0, "Learning rate must be non-negative")
diff --git a/Tests/DeepLearningTests/SequentialTests.swift b/Tests/DeepLearningTests/SequentialTests.swift
index ff8d1d171..b8bb932f8 100644
--- a/Tests/DeepLearningTests/SequentialTests.swift
+++ b/Tests/DeepLearningTests/SequentialTests.swift
@@ -27,7 +27,7 @@ final class SequentialTests: XCTestCase {
             }
         }
         var model = Model()
-        let optimizer = SGD(learningRate: 0.02, for: model)
+        let optimizer = SGD(for: model, learningRate: 0.02, scalarType: Float.self)
         let x: Tensor<Float> = [[0, 0], [0, 1], [1, 0], [1, 1]]
         let y: Tensor<Float> = [0, 1, 1, 0]
         let context = Context(learningPhase: .training)
diff --git a/Tests/DeepLearningTests/TrivialModelTests.swift b/Tests/DeepLearningTests/TrivialModelTests.swift
index 981f4ecb7..7882c76b9 100644
--- a/Tests/DeepLearningTests/TrivialModelTests.swift
+++ b/Tests/DeepLearningTests/TrivialModelTests.swift
@@ -41,7 +41,7 @@ final class TrivialModelTests: XCTestCase {
             }
         }
         var classifier = Classifier(hiddenSize: 4)
-        let optimizer = SGD(learningRate: 0.02, for: classifier, scalarType: Float.self)
+        let optimizer = SGD(for: classifier, learningRate: 0.02, scalarType: Float.self)
         let x: Tensor<Float> = [[0, 0], [0, 1], [1, 0], [1, 1]]
         let y: Tensor<Float> = [[0], [1], [1], [0]]
 

From 68458f7224e8e7151b48d54e70d8f0d54da791ff Mon Sep 17 00:00:00 2001
From: Brennan Saeta <saeta@google.com>
Date: Mon, 25 Feb 2019 20:59:36 +0000
Subject: [PATCH 5/5] Remove the default for scalarType to avoid unexpected
 behavior (such as TF-302).

---
 Sources/DeepLearning/Optimizer.swift | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/Sources/DeepLearning/Optimizer.swift b/Sources/DeepLearning/Optimizer.swift
index bb1f521a9..7daa5c73f 100644
--- a/Sources/DeepLearning/Optimizer.swift
+++ b/Sources/DeepLearning/Optimizer.swift
@@ -41,7 +41,7 @@ public class Adam<Model: Layer, Scalar: TensorFlowFloatingPoint>: Optimizer
         beta2: Scalar = 0.999,
         epsilon: Scalar = 1e-8,
         decay: Scalar = 0,
-        scalarType: Scalar.Type = Scalar.self
+        scalarType: Scalar.Type
     ) {
         precondition(learningRate >= 0, "Learning rate must be non-negative")
         precondition(0 <= beta1 && beta1 <= 1, "Beta parameter must be between 0 and 1")
@@ -89,7 +89,7 @@ public class RMSProp<Model: Layer, Scalar: TensorFlowFloatingPoint>: Optimizer
         rho: Scalar = 0.9,
         epsilon: Scalar = 1e-8,
         decay: Scalar = 0,
-        scalarType: Scalar.Type = Scalar.self
+        scalarType: Scalar.Type
     ) {
         precondition(learningRate >= 0, "Learning rate must be non-negative")
         precondition(rho >= 0, "Rho must be non-negative")
@@ -130,7 +130,7 @@ public class SGD<Model: Layer, Scalar: TensorFlowFloatingPoint>: Optimizer
         momentum: Scalar = 0,
         decay: Scalar = 0,
         nesterov: Bool = false,
-        scalarType: Scalar.Type = Scalar.self
+        scalarType: Scalar.Type
     ) {
         precondition(learningRate >= 0, "Learning rate must be non-negative")
         precondition(momentum >= 0, "Momentum must be non-negative")
@@ -171,7 +171,7 @@ public class RiemannSGD<Model: Layer, Scalar: FloatingPoint>: Optimizer
     public init(
         learningRate: Scalar,
         modelType: Model.Type = Model.self,
-        scalarType: Scalar.Type = Scalar.self
+        scalarType: Scalar.Type
     ) {
         self.learningRate = learningRate
     }