diff --git a/README.md b/README.md index 7e380e00e..6820810df 100644 --- a/README.md +++ b/README.md @@ -28,7 +28,7 @@ struct Model: Layer { var layer2 = Dense(inputSize: hiddenSize, outputSize: hiddenSize, activation: relu) var layer3 = Dense(inputSize: hiddenSize, outputSize: 3, activation: identity) - @differentiable(wrt: (self, input)) + @differentiable func applied(to input: Tensor, in context: Context) -> Tensor { return input.sequenced(in: context, through: layer1, layer2, layer3) } diff --git a/Sources/DeepLearning/Layer.swift b/Sources/DeepLearning/Layer.swift index 93001e790..eb34e5f5b 100644 --- a/Sources/DeepLearning/Layer.swift +++ b/Sources/DeepLearning/Layer.swift @@ -63,7 +63,7 @@ public protocol Layer: Differentiable & KeyPathIterable /// - context: The contextual informance for the layer application, e.g. the current learning /// phase. /// - Returns: The output. - @differentiable(wrt: (self, input)) + @differentiable func applied(to input: Input, in context: Context) -> Output } @@ -78,7 +78,7 @@ public extension Layer { /// /// - Parameter input: The input to the layer. /// - Returns: The inference output. - @differentiable(wrt: (self, input)) + @differentiable func inferring(from input: Input) -> Output { let context = Context(learningPhase: .inference) return applied(to: input, in: context) @@ -104,7 +104,7 @@ public extension Layer { /// Adds helpers for standard feed-forward, sequential models. public extension Differentiable { - @differentiable(wrt: (self, l1, l2)) + @differentiable func sequenced( in context: Context, through l1: L1, _ l2: L2) -> L2.Output @@ -114,7 +114,7 @@ public extension Differentiable { return l2.applied(to: o1, in: context) } - @differentiable(wrt: (self, l1, l2, l3)) + @differentiable func sequenced( in context: Context, through l1: L1, _ l2: L2, _ l3: L3) -> L3.Output @@ -126,7 +126,7 @@ public extension Differentiable { return l3.applied(to: o2, in: context) } - @differentiable(wrt: (self, l1, l2, l3, l4)) + @differentiable func sequenced( in context: Context, through l1: L1, _ l2: L2, _ l3: L3, _ l4: L4) -> L4.Output @@ -140,7 +140,7 @@ public extension Differentiable { return l4.applied(to: o3, in: context) } - @differentiable(wrt: (self, l1, l2, l3, l4, l5)) + @differentiable func sequenced( in context: Context, through l1: L1, _ l2: L2, _ l3: L3, _ l4: L4, _ l5: L5) -> L5.Output @@ -156,7 +156,7 @@ public extension Differentiable { return l5.applied(to: o4, in: context) } - @differentiable(wrt: (self, l1, l2, l3, l4, l5, l6)) + @differentiable func sequenced( in context: Context, through l1: L1, _ l2: L2, _ l3: L3, _ l4: L4, _ l5: L5, _ l6: L6) -> L6.Output @@ -196,7 +196,7 @@ public struct Dense: Layer { public typealias Activation = @differentiable (Tensor) -> Tensor @noDerivative public let activation: Activation - @differentiable(wrt: (self, input)) + @differentiable public func applied(to input: Tensor, in _: Context) -> Tensor { return activation(matmul(input, weight) + bias) } @@ -230,7 +230,7 @@ public struct Conv2D: Layer { @noDerivative public let strides: (Int32, Int32) @noDerivative public let padding: Padding - @differentiable(wrt: (self, input)) + @differentiable public func applied(to input: Tensor, in _: Context) -> Tensor { return activation(input.convolved2D(withFilter: filter, strides: (1, strides.0, strides.1, 1), @@ -286,7 +286,7 @@ public struct BatchNorm: Layer { /// The running variance. @noDerivative public let runningVariance: Parameter - @differentiable(wrt: (self, input)) + @differentiable private func applyingTraining(to input: Tensor) -> Tensor { let positiveAxis = (input.rank + axis) % input.rank let mean = input.mean(alongAxes: [0, positiveAxis]) @@ -298,13 +298,13 @@ public struct BatchNorm: Layer { return (input - mean) * inv + offset } - @differentiable(wrt: (self, input)) + @differentiable private func applyingInference(to input: Tensor) -> Tensor { let inv = rsqrt(runningVariance.value + epsilon) * scale return (input - runningMean.value) * inv + offset } - @differentiable(wrt: (self, input), vjp: _vjpApplied(to:in:)) + @differentiable(vjp: _vjpApplied(to:in:)) public func applied(to input: Tensor, in context: Context) -> Tensor { switch context.learningPhase { case .training: @@ -360,7 +360,7 @@ public struct MaxPool2D: Layer { self.padding = padding } - @differentiable(wrt: (self, input)) + @differentiable public func applied(to input: Tensor, in _: Context) -> Tensor { return input.maxPooled( kernelSize: poolSize, strides: strides, padding: padding) @@ -383,7 +383,7 @@ public struct AvgPool2D: Layer { self.padding = padding } - @differentiable(wrt: (self, input)) + @differentiable public func applied(to input: Tensor, in _: Context) -> Tensor { return input.averagePooled( kernelSize: poolSize, strides: strides, padding: padding) @@ -410,7 +410,7 @@ public struct LayerNorm: Layer { self.epsilon = epsilon } - @differentiable(wrt: (self, input)) + @differentiable public func applied(to input: Tensor, in _: Context) -> Tensor { let mean = input.mean(alongAxes: axis) let variance = input.variance(alongAxes: axis) @@ -439,17 +439,17 @@ public struct Dropout: Layer self.probability = probability } - @differentiable(wrt: (self, input)) + @differentiable private func applyingTraining(to input: Tensor) -> Tensor { return input.droppingOut(probability: probability) } - @differentiable(wrt: (self, input)) + @differentiable private func applyingInference(to input: Tensor) -> Tensor { return input } - @differentiable(wrt: (self, input), vjp: _vjpApplied(to:in:)) + @differentiable(vjp: _vjpApplied(to:in:)) public func applied(to input: Tensor, in context: Context) -> Tensor { switch context.learningPhase { case .training: @@ -484,7 +484,7 @@ public struct UpSampling2D: Layer { self.size = size } - @differentiable(wrt: (self, input)) + @differentiable public func applied(to input: Tensor, in _: Context) -> Tensor { let shape = input.shape let (batchSize, height, width, channels) = (shape[0], shape[1], shape[2], shape[3]) diff --git a/Tests/DeepLearningTests/SequentialTests.swift b/Tests/DeepLearningTests/SequentialTests.swift index 6591fe320..180906851 100644 --- a/Tests/DeepLearningTests/SequentialTests.swift +++ b/Tests/DeepLearningTests/SequentialTests.swift @@ -21,7 +21,7 @@ final class SequentialTests: XCTestCase { var dense1 = Dense(inputSize: 2, outputSize: 4, activation: relu) var dense2 = Dense(inputSize: 4, outputSize: 1, activation: relu) - @differentiable(wrt: (self, input)) + @differentiable func applied(to input: Tensor, in context: Context) -> Tensor { return input.sequenced(in: context, through: dense1, dense2) } diff --git a/Tests/DeepLearningTests/TrivialModelTests.swift b/Tests/DeepLearningTests/TrivialModelTests.swift index 659e08b46..4afa54f5d 100644 --- a/Tests/DeepLearningTests/TrivialModelTests.swift +++ b/Tests/DeepLearningTests/TrivialModelTests.swift @@ -34,7 +34,7 @@ final class TrivialModelTests: XCTestCase { generator: &Classifier.generator ) } - @differentiable(wrt: (self, input)) + @differentiable func applied(to input: Tensor, in context: Context) -> Tensor { let h1 = l1.applied(to: input, in: context) return l2.applied(to: h1, in: context)