From bff38c017f6684cfa50a8f5a076fd1aa93ce117e Mon Sep 17 00:00:00 2001 From: Brennan Saeta Date: Sun, 24 Feb 2019 01:31:07 +0000 Subject: [PATCH 1/5] Reduce explicit types required when instantiating the SGD optimizer. Previously, in order to instantiate the Optimizer, you had to call `type(of: model)` and pass that into the Optimizer constructor in order to get type inference to pick the right type for `Model`. This could be a little confusing for new users. This commit proposes an alternate way to write this: ```swift let optimizer = SGD(learningRate: 0.01, for: model) ``` The above formulation is clear and readable. It avoids any unnecessary typing of types, and should be generalizable to the other optimizers. (This is left as an exercise for the reader. :-D) We rely on the Swift optimizer to do the right thing such that only a reference to the model is passed to the `SGD.init` call so that we don't pay for the cost of a full model copy (which could eventually be very expensive). This is deemed to be a reasonably safe assumption, especially given the ABI standardization in Swift 5. --- Sources/DeepLearning/Optimizer.swift | 3 +-- Tests/DeepLearningTests/TrivialModelTests.swift | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/Sources/DeepLearning/Optimizer.swift b/Sources/DeepLearning/Optimizer.swift index 4a3186ea7..343634642 100644 --- a/Sources/DeepLearning/Optimizer.swift +++ b/Sources/DeepLearning/Optimizer.swift @@ -129,8 +129,7 @@ public class SGD: Optimizer momentum: Scalar = 0, decay: Scalar = 0, nesterov: Bool = false, - modelType: Model.Type = Model.self, - scalarType: Scalar.Type = Scalar.self + for model: Model ) { precondition(learningRate >= 0, "Learning rate must be non-negative") precondition(momentum >= 0, "Momentum must be non-negative") diff --git a/Tests/DeepLearningTests/TrivialModelTests.swift b/Tests/DeepLearningTests/TrivialModelTests.swift index 5a79fd266..51ffe5c3d 100644 --- a/Tests/DeepLearningTests/TrivialModelTests.swift +++ b/Tests/DeepLearningTests/TrivialModelTests.swift @@ -40,8 +40,8 @@ final class TrivialModelTests: XCTestCase { return l2.applied(to: h1, in: context) } } - let optimizer = SGD(learningRate: 0.02) var classifier = Classifier(hiddenSize: 4) + let optimizer = SGD(learningRate: 0.02, for: classifier) let x: Tensor = [[0, 0], [0, 1], [1, 0], [1, 1]] let y: Tensor = [[0], [1], [1], [0]] From d55916484720d353cbc61f9a01b8c24a14888661 Mon Sep 17 00:00:00 2001 From: Brennan Saeta Date: Mon, 25 Feb 2019 20:19:36 +0000 Subject: [PATCH 2/5] Use `__shared` to ensure the model is never copied. --- Sources/DeepLearning/Optimizer.swift | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Sources/DeepLearning/Optimizer.swift b/Sources/DeepLearning/Optimizer.swift index 343634642..1ccaac5ed 100644 --- a/Sources/DeepLearning/Optimizer.swift +++ b/Sources/DeepLearning/Optimizer.swift @@ -129,7 +129,7 @@ public class SGD: Optimizer momentum: Scalar = 0, decay: Scalar = 0, nesterov: Bool = false, - for model: Model + for _: __shared Model ) { precondition(learningRate >= 0, "Learning rate must be non-negative") precondition(momentum >= 0, "Momentum must be non-negative") From ee6e57ac3c63f1e1585a183ebd90662e310b3d5d Mon Sep 17 00:00:00 2001 From: Brennan Saeta Date: Mon, 25 Feb 2019 20:42:24 +0000 Subject: [PATCH 3/5] Respond to review comments by: - Adding back in the scalarType - Fixing the tests - Updating the other optimizers --- Sources/DeepLearning/Optimizer.swift | 7 ++++--- Tests/DeepLearningTests/SequentialTests.swift | 2 +- Tests/DeepLearningTests/TrivialModelTests.swift | 2 +- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/Sources/DeepLearning/Optimizer.swift b/Sources/DeepLearning/Optimizer.swift index 1ccaac5ed..faa50e99b 100644 --- a/Sources/DeepLearning/Optimizer.swift +++ b/Sources/DeepLearning/Optimizer.swift @@ -40,7 +40,7 @@ public class Adam: Optimizer beta2: Scalar = 0.999, epsilon: Scalar = 1e-8, decay: Scalar = 0, - modelType: Model.Type = Model.self, + for _: __shared Model, scalarType: Scalar.Type = Scalar.self ) { precondition(learningRate >= 0, "Learning rate must be non-negative") @@ -88,7 +88,7 @@ public class RMSProp: Optimizer rho: Scalar = 0.9, epsilon: Scalar = 1e-8, decay: Scalar = 0, - modelType: Model.Type = Model.self, + for _: __shared Model, scalarType: Scalar.Type = Scalar.self ) { precondition(learningRate >= 0, "Learning rate must be non-negative") @@ -129,7 +129,8 @@ public class SGD: Optimizer momentum: Scalar = 0, decay: Scalar = 0, nesterov: Bool = false, - for _: __shared Model + for _: __shared Model, + scalarType: Scalar.Type = Scalar.self ) { precondition(learningRate >= 0, "Learning rate must be non-negative") precondition(momentum >= 0, "Momentum must be non-negative") diff --git a/Tests/DeepLearningTests/SequentialTests.swift b/Tests/DeepLearningTests/SequentialTests.swift index 6591fe320..ff8d1d171 100644 --- a/Tests/DeepLearningTests/SequentialTests.swift +++ b/Tests/DeepLearningTests/SequentialTests.swift @@ -27,7 +27,7 @@ final class SequentialTests: XCTestCase { } } var model = Model() - let optimizer = SGD(learningRate: 0.02, modelType: type(of: model), scalarType: Float.self) + let optimizer = SGD(learningRate: 0.02, for: model) let x: Tensor = [[0, 0], [0, 1], [1, 0], [1, 1]] let y: Tensor = [0, 1, 1, 0] let context = Context(learningPhase: .training) diff --git a/Tests/DeepLearningTests/TrivialModelTests.swift b/Tests/DeepLearningTests/TrivialModelTests.swift index 36c82a08f..981f4ecb7 100644 --- a/Tests/DeepLearningTests/TrivialModelTests.swift +++ b/Tests/DeepLearningTests/TrivialModelTests.swift @@ -41,7 +41,7 @@ final class TrivialModelTests: XCTestCase { } } var classifier = Classifier(hiddenSize: 4) - let optimizer = SGD(learningRate: 0.02, for: classifier) + let optimizer = SGD(learningRate: 0.02, for: classifier, scalarType: Float.self) let x: Tensor = [[0, 0], [0, 1], [1, 0], [1, 1]] let y: Tensor = [[0], [1], [1], [0]] From 220c42ea86216962f470adb3b8cf00a8e02bd1c0 Mon Sep 17 00:00:00 2001 From: Brennan Saeta Date: Mon, 25 Feb 2019 20:45:39 +0000 Subject: [PATCH 4/5] Move model to be first argument. Thanks for the suggestion @jekbradbury. Definitely much better. --- Sources/DeepLearning/Optimizer.swift | 6 +++--- Tests/DeepLearningTests/SequentialTests.swift | 2 +- Tests/DeepLearningTests/TrivialModelTests.swift | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Sources/DeepLearning/Optimizer.swift b/Sources/DeepLearning/Optimizer.swift index faa50e99b..bb1f521a9 100644 --- a/Sources/DeepLearning/Optimizer.swift +++ b/Sources/DeepLearning/Optimizer.swift @@ -35,12 +35,12 @@ public class Adam: Optimizer public let decay: Scalar public init( + for _: __shared Model, learningRate: Scalar = 1e-3, beta1: Scalar = 0.9, beta2: Scalar = 0.999, epsilon: Scalar = 1e-8, decay: Scalar = 0, - for _: __shared Model, scalarType: Scalar.Type = Scalar.self ) { precondition(learningRate >= 0, "Learning rate must be non-negative") @@ -84,11 +84,11 @@ public class RMSProp: Optimizer public let decay: Scalar public init( + for _: __shared Model, learningRate: Scalar = 0.001, rho: Scalar = 0.9, epsilon: Scalar = 1e-8, decay: Scalar = 0, - for _: __shared Model, scalarType: Scalar.Type = Scalar.self ) { precondition(learningRate >= 0, "Learning rate must be non-negative") @@ -125,11 +125,11 @@ public class SGD: Optimizer public let nesterov: Bool public init( + for _: __shared Model, learningRate: Scalar = 0.01, momentum: Scalar = 0, decay: Scalar = 0, nesterov: Bool = false, - for _: __shared Model, scalarType: Scalar.Type = Scalar.self ) { precondition(learningRate >= 0, "Learning rate must be non-negative") diff --git a/Tests/DeepLearningTests/SequentialTests.swift b/Tests/DeepLearningTests/SequentialTests.swift index ff8d1d171..b8bb932f8 100644 --- a/Tests/DeepLearningTests/SequentialTests.swift +++ b/Tests/DeepLearningTests/SequentialTests.swift @@ -27,7 +27,7 @@ final class SequentialTests: XCTestCase { } } var model = Model() - let optimizer = SGD(learningRate: 0.02, for: model) + let optimizer = SGD(for: model, learningRate: 0.02, scalarType: Float.self) let x: Tensor = [[0, 0], [0, 1], [1, 0], [1, 1]] let y: Tensor = [0, 1, 1, 0] let context = Context(learningPhase: .training) diff --git a/Tests/DeepLearningTests/TrivialModelTests.swift b/Tests/DeepLearningTests/TrivialModelTests.swift index 981f4ecb7..7882c76b9 100644 --- a/Tests/DeepLearningTests/TrivialModelTests.swift +++ b/Tests/DeepLearningTests/TrivialModelTests.swift @@ -41,7 +41,7 @@ final class TrivialModelTests: XCTestCase { } } var classifier = Classifier(hiddenSize: 4) - let optimizer = SGD(learningRate: 0.02, for: classifier, scalarType: Float.self) + let optimizer = SGD(for: classifier, learningRate: 0.02, scalarType: Float.self) let x: Tensor = [[0, 0], [0, 1], [1, 0], [1, 1]] let y: Tensor = [[0], [1], [1], [0]] From 68458f7224e8e7151b48d54e70d8f0d54da791ff Mon Sep 17 00:00:00 2001 From: Brennan Saeta Date: Mon, 25 Feb 2019 20:59:36 +0000 Subject: [PATCH 5/5] Remove the default for scalarType to avoid unexpected behavior (such as TF-302). --- Sources/DeepLearning/Optimizer.swift | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Sources/DeepLearning/Optimizer.swift b/Sources/DeepLearning/Optimizer.swift index bb1f521a9..7daa5c73f 100644 --- a/Sources/DeepLearning/Optimizer.swift +++ b/Sources/DeepLearning/Optimizer.swift @@ -41,7 +41,7 @@ public class Adam: Optimizer beta2: Scalar = 0.999, epsilon: Scalar = 1e-8, decay: Scalar = 0, - scalarType: Scalar.Type = Scalar.self + scalarType: Scalar.Type ) { precondition(learningRate >= 0, "Learning rate must be non-negative") precondition(0 <= beta1 && beta1 <= 1, "Beta parameter must be between 0 and 1") @@ -89,7 +89,7 @@ public class RMSProp: Optimizer rho: Scalar = 0.9, epsilon: Scalar = 1e-8, decay: Scalar = 0, - scalarType: Scalar.Type = Scalar.self + scalarType: Scalar.Type ) { precondition(learningRate >= 0, "Learning rate must be non-negative") precondition(rho >= 0, "Rho must be non-negative") @@ -130,7 +130,7 @@ public class SGD: Optimizer momentum: Scalar = 0, decay: Scalar = 0, nesterov: Bool = false, - scalarType: Scalar.Type = Scalar.self + scalarType: Scalar.Type ) { precondition(learningRate >= 0, "Learning rate must be non-negative") precondition(momentum >= 0, "Momentum must be non-negative") @@ -171,7 +171,7 @@ public class RiemannSGD: Optimizer public init( learningRate: Scalar, modelType: Model.Type = Model.self, - scalarType: Scalar.Type = Scalar.self + scalarType: Scalar.Type ) { self.learningRate = learningRate }