diff --git a/Sources/DeepLearning/Layer.swift b/Sources/DeepLearning/Layer.swift index e86887e8b..9c2011c60 100644 --- a/Sources/DeepLearning/Layer.swift +++ b/Sources/DeepLearning/Layer.swift @@ -187,1574 +187,3 @@ public final class Parameter { self.value = value } } - -/// A densely-connected neural network layer. -/// -/// `Dense` implements the operation `activation(matmul(input, weight) + bias)`, where `weight` is -/// a weight matrix, `bias` is a bias vector, and `activation` is an element-wise activation -/// function. -@_fixed_layout -public struct Dense: Layer { - /// The weight matrix. - public var weight: Tensor - /// The bias vector. - public var bias: Tensor - public typealias Activation = @differentiable (Tensor) -> Tensor - /// The element-wise activation function. - @noDerivative public let activation: Activation - - public init( - weight: Tensor, - bias: Tensor, - activation: @escaping Activation - ) { - self.weight = weight - self.bias = bias - self.activation = activation - } - - /// Returns the output obtained from applying the layer to the given input. - /// - /// - Parameter input: The input to the layer. - /// - Returns: The output. - @differentiable - public func call(_ input: Tensor) -> Tensor { - return activation(matmul(input, weight) + bias) - } -} - -public extension Dense { - /// Creates a `Dense` layer with the specified input size, output size, and element-wise - /// activation function. The weight matrix is created with shape `[inputSize, outputSize]` and - /// is initialized using Glorot uniform initialization with the specified generator. The bias - /// vector is created with shape `[outputSize]` and is initialized with zeros. - /// - /// - Parameters: - /// - inputSize: The dimensionality of the input space. - /// - outputSize: The dimensionality of the output space. - /// - activation: The activation function to use. The default value is `identity(_:)`. - /// - generator: The random number generator for initialization. - /// - /// - Note: Use `init(inputSize:outputSize:activation:seed:)` for faster random initialization. - init( - inputSize: Int, - outputSize: Int, - activation: @escaping Activation = identity, - generator: inout G - ) { - self.init(weight: Tensor(glorotUniform: [inputSize, outputSize], - generator: &generator), - bias: Tensor(zeros: [outputSize]), - activation: activation) - } - - init(inputSize: Int, outputSize: Int, activation: @escaping Activation = identity) { - self.init(inputSize: inputSize, outputSize: outputSize, activation: activation, - generator: &PhiloxRandomNumberGenerator.global) - } -} - -public extension Dense { - /// Creates a `Dense` layer with the specified input size, output size, and element-wise - /// activation function. The weight matrix is created with shape `[inputSize, outputSize]` and - /// is initialized using Glorot uniform initialization with the specified seed. The bias vector - /// is created with shape `[outputSize]` and is initialized with zeros. - /// - /// - Parameters: - /// - inputSize: The dimensionality of the input space. - /// - outputSize: The dimensionality of the output space. - /// - activation: The activation function to use. The default value is `identity(_:)`. - /// - seed: The random seed for initialization. The default value is random. - init( - inputSize: Int, - outputSize: Int, - activation: @escaping Activation = identity, - seed: (Int64, Int64) = (Int64.random(in: Int64.min..: Layer { - /// The 3-D convolution kernel `[width, inputChannels, outputChannels]`. - public var filter: Tensor - /// The bias vector `[outputChannels]`. - public var bias: Tensor - /// An activation function. - public typealias Activation = @differentiable (Tensor) -> Tensor - /// The element-wise activation function. - @noDerivative public let activation: Activation - /// The stride of the sliding window for temporal dimension. - @noDerivative public let stride: Int - /// The padding algorithm for convolution. - @noDerivative public let padding: Padding - - /// Creates a `Conv1D` layer with the specified filter, bias, activation function, stride, and - /// padding. - /// - /// - Parameters: - /// - filter: The 3-D convolution kernel `[width, inputChannels, outputChannels]`. - /// - bias: The bias vector `[outputChannels]`. - /// - activation: The element-wise activation function. - /// - stride: The stride of the sliding window for temporal dimension. - /// - padding: The padding algorithm for convolution. - public init( - filter: Tensor, - bias: Tensor, - activation: @escaping Activation, - stride: Int, - padding: Padding - ) { - self.filter = filter - self.bias = bias - self.activation = activation - self.stride = stride - self.padding = padding - } - - /// Returns the output obtained from applying the layer to the given input. - /// - /// - Parameter input: The input to the layer `[batchCount, width, inputChannels]`. - /// - Returns: The output `[batchCount, newWidth, outputChannels]`. - @differentiable - public func call(_ input: Tensor) -> Tensor { - let conv2D = input.expandingShape(at: 1).convolved2D( - withFilter: filter.expandingShape(at: 0), strides: (1, 1, stride, 1), padding: padding) - return activation(conv2D.squeezingShape(at: 1) + bias) - } -} - -public extension Conv1D where Scalar.RawSignificand: FixedWidthInteger { - /// Creates a `Conv1D` layer with the specified filter shape, stride, padding, and - /// element-wise activation function. The filter tensor is initialized using Glorot uniform - /// initialization with the specified generator. The bias vector is initialized with zeros. - /// - /// - Parameters: - /// - filterShape: The 3-D shape of the filter, representing - /// `[width, inputChannels, outputChannels]`. - /// - stride: The stride of the sliding window for temporal dimension. - /// - padding: The padding algorithm for convolution. - /// - activation: The element-wise activation function. - /// - generator: The random number generator for initialization. - /// - /// - Note: Use `init(filterShape:stride:padding:activation:seed:)` for faster random - /// initialization. - init( - filterShape: (Int, Int, Int), - stride: Int = 1, - padding: Padding = .valid, - activation: @escaping Activation = identity, - generator: inout G - ) { - let filterTensorShape = TensorShape([ - filterShape.0, filterShape.1, filterShape.2]) - self.init( - filter: Tensor(glorotUniform: filterTensorShape), - bias: Tensor(zeros: TensorShape([filterShape.2])), - activation: activation, - stride: stride, - padding: padding) - } -} - -public extension Conv1D { - /// Creates a `Conv1D` layer with the specified filter shape, strides, padding, and - /// element-wise activation function. The filter tensor is initialized using Glorot uniform - /// initialization with the specified seed. The bias vector is initialized with zeros. - /// - /// - Parameters: - /// - filterShape: The 3-D shape of the filter, representing - /// `[width, inputChannels, outputChannels]`. - /// - stride: The stride of the sliding window for temporal dimension. - /// - padding: The padding algorithm for convolution. - /// - activation: The element-wise activation function. - /// - seed: The random seed for initialization. The default value is random. - init( - filterShape: (Int, Int, Int), - stride: Int = 1, - padding: Padding = .valid, - activation: @escaping Activation = identity, - seed: (Int64, Int64) = (Int64.random(in: Int64.min..: Layer { - /// The 4-D convolution kernel. - public var filter: Tensor - /// The bias vector. - public var bias: Tensor - /// An activation function. - public typealias Activation = @differentiable (Tensor) -> Tensor - /// The element-wise activation function. - @noDerivative public let activation: Activation - /// The strides of the sliding window for spatial dimensions. - @noDerivative public let strides: (Int, Int) - /// The padding algorithm for convolution. - @noDerivative public let padding: Padding - - /// Creates a `Conv2D` layer with the specified filter, bias, activation function, strides, and - /// padding. - /// - /// - Parameters: - /// - filter: The 4-D convolution kernel. - /// - bias: The bias vector. - /// - activation: The element-wise activation function. - /// - strides: The strides of the sliding window for spatial dimensions. - /// - padding: The padding algorithm for convolution. - public init( - filter: Tensor, - bias: Tensor, - activation: @escaping Activation, - strides: (Int, Int), - padding: Padding - ) { - self.filter = filter - self.bias = bias - self.activation = activation - self.strides = strides - self.padding = padding - } - - /// Returns the output obtained from applying the layer to the given input. - /// - /// - Parameter input: The input to the layer. - /// - Returns: The output. - @differentiable - public func call(_ input: Tensor) -> Tensor { - return activation(input.convolved2D(withFilter: filter, - strides: (1, strides.0, strides.1, 1), - padding: padding) + bias) - } -} - -public extension Conv2D { - /// Creates a `Conv2D` layer with the specified filter shape, strides, padding, and - /// element-wise activation function. The filter tensor is initialized using Glorot uniform - /// initialization with the specified generator. The bias vector is initialized with zeros. - /// - /// - Parameters: - /// - filterShape: The shape of the 4-D convolution kernel. - /// - strides: The strides of the sliding window for spatial dimensions. - /// - padding: The padding algorithm for convolution. - /// - activation: The element-wise activation function. - /// - generator: The random number generator for initialization. - /// - /// - Note: Use `init(filterShape:strides:padding:activation:seed:)` for faster random - /// initialization. - init( - filterShape: (Int, Int, Int, Int), - strides: (Int, Int) = (1, 1), - padding: Padding = .valid, - activation: @escaping Activation = identity, - generator: inout G - ) { - let filterTensorShape = TensorShape([ - filterShape.0, filterShape.1, filterShape.2, filterShape.3]) - self.init( - filter: Tensor(glorotUniform: filterTensorShape, generator: &generator), - bias: Tensor(zeros: TensorShape([filterShape.3])), - activation: activation, - strides: strides, - padding: padding) - } -} - -public extension Conv2D { - /// Creates a `Conv2D` layer with the specified filter shape, strides, padding, and - /// element-wise activation function. The filter tensor is initialized using Glorot uniform - /// initialization with the specified seed. The bias vector is initialized with zeros. - /// - /// - Parameters: - /// - filterShape: The shape of the 4-D convolution kernel. - /// - strides: The strides of the sliding window for spatial dimensions. - /// - padding: The padding algorithm for convolution. - /// - activation: The element-wise activation function. - /// - seed: The random seed for initialization. The default value is random. - init( - filterShape: (Int, Int, Int, Int), - strides: (Int, Int) = (1, 1), - padding: Padding = .valid, - activation: @escaping Activation = identity, - seed: (Int64, Int64) = (Int64.random(in: Int64.min..: Layer { - /// The 5-D convolution kernel. - public var filter: Tensor - /// The bias vector. - public var bias: Tensor - /// An activation function. - public typealias Activation = @differentiable (Tensor) -> Tensor - /// The element-wise activation function. - @noDerivative public let activation: Activation - /// The strides of the sliding window for spatial dimensions. - @noDerivative public let strides: (Int, Int, Int) - /// The padding algorithm for convolution. - @noDerivative public let padding: Padding - - /// Creates a `Conv3D` layer with the specified filter, bias, activation function, strides, and - /// padding. - /// - /// - Parameters: - /// - filter: The 5-D convolution kernel. - /// - bias: The bias vector. - /// - activation: The element-wise activation function. - /// - strides: The strides of the sliding window for spatial dimensions. - /// - padding: The padding algorithm for convolution. - public init( - filter: Tensor, - bias: Tensor, - activation: @escaping Activation, - strides: (Int, Int, Int), - padding: Padding - ) { - self.filter = filter - self.bias = bias - self.activation = activation - self.strides = strides - self.padding = padding - } - - /// Returns the output obtained from applying the layer to the given input. - /// - /// - Parameter input: The input to the layer. - /// - Returns: The output. - @differentiable - public func call(_ input: Tensor) -> Tensor { - return activation(input.convolved3D(withFilter: filter, - strides: (1, strides.0, strides.1, strides.2, 1), - padding: padding) + bias) - } -} - -public extension Conv3D { - /// Creates a `Conv3D` layer with the specified filter shape, strides, padding, and - /// element-wise activation function. The filter tensor is initialized using Glorot uniform - /// initialization with the specified generator. The bias vector is initialized with zeros. - /// - /// - Parameters: - /// - filterShape: The shape of the 5-D convolution kernel. - /// - strides: The strides of the sliding window for spatial/spatio-temporal dimensions. - /// - padding: The padding algorithm for convolution. - /// - activation: The element-wise activation function. - /// - generator: The random number generator for initialization. - /// - /// - Note: Use `init(filterShape:strides:padding:activation:seed:)` for faster random - /// initialization. - init( - filterShape: (Int, Int, Int, Int, Int), - strides: (Int, Int, Int) = (1, 1, 1), - padding: Padding = .valid, - activation: @escaping Activation = identity, - generator: inout G - ) { - let filterTensorShape = TensorShape([ - filterShape.0, filterShape.1, filterShape.2, filterShape.3, filterShape.4]) - self.init( - filter: Tensor(glorotUniform: filterTensorShape, generator: &generator), - bias: Tensor(zeros: TensorShape([filterShape.4])), - activation: activation, - strides: strides, - padding: padding) - } -} - -public extension Conv3D { - /// Creates a `Conv3D` layer with the specified filter shape, strides, padding, and - /// element-wise activation function. The filter tensor is initialized using Glorot uniform - /// initialization with the specified seed. The bias vector is initialized with zeros. - /// - /// - Parameters: - /// - filterShape: The shape of the 5-D convolution kernel. - /// - strides: The strides of the sliding window for spatial/spatio-temporal dimensions. - /// - padding: The padding algorithm for convolution. - /// - activation: The element-wise activation function. - /// - seed: The random seed for initialization. The default value is random. - init( - filterShape: (Int, Int, Int, Int, Int), - strides: (Int, Int, Int) = (1, 1, 1), - padding: Padding = .valid, - activation: @escaping Activation = identity, - seed: (Int64, Int64) = (Int64.random(in: Int64.min.. - /// The bias vector. - public var bias: Tensor - /// An activation function. - public typealias Activation = @differentiable (Tensor) -> Tensor - /// The element-wise activation function. - @noDerivative public let activation: Activation - /// The strides of the sliding window for spatial dimensions. - @noDerivative public let strides: (Int, Int) - /// The padding algorithm for convolution. - @noDerivative public let padding: Padding - @noDerivative public let paddingIndex: Int - - /// Creates a `TransposedConv2D` layer with the specified filter, bias, - /// activation function, strides, and padding. - /// - /// - Parameters: - /// - filter: The 4-D convolution kernel. - /// - bias: The bias vector. - /// - activation: The element-wise activation function. - /// - strides: The strides of the sliding window for spatial dimensions. - /// - padding: The padding algorithm for convolution. - public init( - filter: Tensor, - bias: Tensor, - activation: @escaping Activation, - strides: (Int, Int), - padding: Padding - ) { - self.filter = filter - self.bias = bias - self.activation = activation - self.strides = strides - self.padding = padding - self.paddingIndex = padding == .same ? 0 : 1 - } - - /// Returns the output obtained from applying the layer to the given input. - /// - /// - Parameter input: The input to the layer. - /// - Returns: The output. - @differentiable - public func call(_ input: Tensor) -> Tensor { - let batchSize = input.shape[0] - let w = (input.shape[1] - (1 * paddingIndex)) * - strides.0 + (filter.shape[0] * paddingIndex) - let h = (input.shape[2] - (1 * paddingIndex)) * - strides.1 + (filter.shape[1] * paddingIndex) - let c = filter.shape[2] - let newShape = Tensor([Int32(batchSize), Int32(w), Int32(h), Int32(c)]) - return activation(input.conv2DBackpropInput(shape: newShape, filter: filter, - strides: (1, strides.0, strides.1, 1), - padding: padding) + bias) - } -} - -public extension TransposedConv2D { - /// Creates a `TransposedConv2D` layer with the specified filter shape, strides, padding, and - /// element-wise activation function. The filter tensor is initialized using Glorot uniform - /// initialization with the specified generator. The bias vector is initialized with zeros. - /// - /// - Parameters: - /// - filterShape: The shape of the 4-D convolution kernel. - /// - strides: The strides of the sliding window for spatial dimensions. - /// - padding: The padding algorithm for convolution. - /// - activation: The element-wise activation function. - /// - generator: The random number generator for initialization. - /// - /// - Note: Use `init(filterShape:strides:padding:activation:seed:)` for faster random - /// initialization. - init( - filterShape: (Int, Int, Int, Int), - strides: (Int, Int) = (1, 1), - padding: Padding = .valid, - activation: @escaping Activation = identity, - generator: inout G - ) { - let filterTensorShape = TensorShape([ - filterShape.0, filterShape.1, filterShape.2, filterShape.3]) - self.init( - filter: Tensor(glorotUniform: filterTensorShape, generator: &generator), - bias: Tensor(zeros: TensorShape([filterShape.3])), - activation: activation, - strides: strides, - padding: padding) - } -} - -public extension TransposedConv2D { - /// Creates a `TransposedConv2D` layer with the specified filter shape, strides, padding, and - /// element-wise activation function. The filter tensor is initialized using Glorot uniform - /// initialization with the specified seed. The bias vector is initialized with zeros. - /// - /// - Parameters: - /// - filterShape: The shape of the 4-D convolution kernel. - /// - strides: The strides of the sliding window for spatial dimensions. - /// - padding: The padding algorithm for convolution. - /// - activation: The element-wise activation function. - /// - seed: The random seed for initialization. The default value is random. - init( - filterShape: (Int, Int, Int, Int), - strides: (Int, Int) = (1, 1), - padding: Padding = .valid, - activation: @escaping Activation = identity, - seed: (Int64, Int64) = (Int64.random(in: Int64.min..: Layer { - /// The feature dimension. - @noDerivative public let axis: Int - /// The momentum for the running mean and running variance. - @noDerivative public let momentum: Tensor - /// The offset value, also known as beta. - public var offset: Tensor - /// The scale value, also known as gamma. - public var scale: Tensor - /// The variance epsilon value. - @noDerivative public let epsilon: Tensor - /// The running mean. - @noDerivative public let runningMean: Parameter - /// The running variance. - @noDerivative public let runningVariance: Parameter - - /// Creates a batch normalization layer. - /// - /// - Parameters: - /// - axis: The axis that should not be normalized (typically the feature axis). - /// - momentum: The momentum for the moving average. - /// - offset: The offset to be added to the normalized tensor. - /// - scale: The scale to multiply the normalized tensor by. - /// - epsilon: A small scalar added to the denominator to improve numerical stability. - /// - runningMean: The running mean. - /// - runningVariance: The running variance. - public init( - axis: Int, - momentum: Tensor, - offset: Tensor, - scale: Tensor, - epsilon: Tensor, - runningMean: Tensor, - runningVariance: Tensor - ) { - self.axis = axis - self.momentum = momentum - self.offset = offset - self.scale = scale - self.epsilon = epsilon - self.runningMean = Parameter(runningMean) - self.runningVariance = Parameter(runningVariance) - } - - @differentiable - private func applyingTraining(to input: Tensor) -> Tensor { - let positiveAxis = (input.rank + axis) % input.rank - var normalizedAxes = Array(0..) -> Tensor { - let inv = rsqrt(runningVariance.value + epsilon) * scale - return (input - runningMean.value) * inv + offset - } - - /// Returns the output obtained from applying the layer to the given input. - /// - /// - Parameter input: The input to the layer. - /// - Returns: The output. - @differentiable(vjp: _vjpApplied(to:)) - public func call(_ input: Tensor) -> Tensor { - switch Context.local.learningPhase { - case .training: - return applyingTraining(to: input) - case .inference: - return applyingInference(to: input) - } - } - - @usableFromInline - func _vjpApplied(to input: Tensor) -> - (Tensor, (Tensor) -> - (BatchNorm.TangentVector, Tensor)) { - switch Context.local.learningPhase { - case .training: - return valueWithPullback(at: input) { - $0.applyingTraining(to: $1) - } - case .inference: - return valueWithPullback(at: input) { - $0.applyingInference(to: $1) - } - } - } - - /// Creates a batch normalization layer. - /// - /// - Parameters: - /// - featureCount: The number of features. - /// - axis: The axis that should be normalized (typically the features axis). - /// - momentum: The momentum for the moving average. - /// - epsilon: A small scalar added to the denominator to improve numerical stability. - public init(featureCount: Int, - axis: Int = -1, - momentum: Tensor = Tensor(0.99), - epsilon: Tensor = Tensor(0.001)) { - self.axis = axis - self.momentum = momentum - self.scale = Tensor(ones: [featureCount]) - self.offset = Tensor(zeros: [featureCount]) - self.epsilon = epsilon - self.runningMean = Parameter(Tensor(0)) - self.runningVariance = Parameter(Tensor(1)) - } -} - -/// A max pooling layer for temporal data. -@_fixed_layout -public struct MaxPool1D: Layer { - /// The size of the sliding reduction window for pooling. - @noDerivative let poolSize: Int - /// The stride of the sliding window for temporal dimension. - @noDerivative let stride: Int - /// The padding algorithm for pooling. - @noDerivative let padding: Padding - - /// Creates a max pooling layer. - /// - /// - Parameters: - /// - poolSize: The size of the sliding reduction window for pooling. - /// - stride: The stride of the sliding window for temporal dimension. - /// - padding: The padding algorithm for pooling. - public init( - poolSize: Int, - stride: Int, - padding: Padding - ) { - self.poolSize = poolSize - self.stride = stride - self.padding = padding - } - - /// Returns the output obtained from applying the layer to the given input. - /// - /// - Parameter input: The input to the layer. - /// - Returns: The output. - @differentiable - public func call(_ input: Tensor) -> Tensor { - return input.expandingShape(at: 1).maxPooled2D( - kernelSize: (1, 1, poolSize, 1), strides: (1, 1, stride, 1), padding: padding - ).squeezingShape(at: 1) - } -} - -/// A max pooling layer for spatial data. -@_fixed_layout -public struct MaxPool2D: Layer { - /// The size of the sliding reduction window for pooling. - @noDerivative let poolSize: (Int, Int, Int, Int) - /// The strides of the sliding window for each dimension of a 4-D input. - /// Strides in non-spatial dimensions must be `1`. - @noDerivative let strides: (Int, Int, Int, Int) - /// The padding algorithm for pooling. - @noDerivative let padding: Padding - - /// Creates a max pooling layer. - public init( - poolSize: (Int, Int, Int, Int), - strides: (Int, Int, Int, Int), - padding: Padding - ) { - self.poolSize = poolSize - self.strides = strides - self.padding = padding - } - - /// Returns the output obtained from applying the layer to the given input. - /// - /// - Parameter input: The input to the layer. - /// - Returns: The output. - @differentiable - public func call(_ input: Tensor) -> Tensor { - return input.maxPooled2D( - kernelSize: poolSize, strides: strides, padding: padding) - } -} - -public extension MaxPool2D { - /// Creates a max pooling layer. - /// - /// - Parameters: - /// - poolSize: Vertical and horizontal factors by which to downscale. - /// - strides: The strides. - /// - padding: The padding. - init(poolSize: (Int, Int), strides: (Int, Int), padding: Padding = .valid) { - self.init(poolSize: (1, poolSize.0, poolSize.1, 1), - strides: (1, strides.0, strides.1, 1), - padding: padding) - } -} - -/// A max pooling layer for spatial or spatio-temporal data. -@_fixed_layout -public struct MaxPool3D: Layer { - /// The size of the sliding reduction window for pooling. - @noDerivative let poolSize: (Int, Int, Int, Int, Int) - /// The strides of the sliding window for each dimension of a 5-D input. - /// Strides in non-spatial dimensions must be `1`. - @noDerivative let strides: (Int, Int, Int, Int, Int) - /// The padding algorithm for pooling. - @noDerivative let padding: Padding - - /// Creates a max pooling layer. - public init( - poolSize: (Int, Int, Int, Int, Int), - strides: (Int, Int, Int, Int, Int), - padding: Padding - ) { - self.poolSize = poolSize - self.strides = strides - self.padding = padding - } - - /// Returns the output obtained from applying the layer to the given input. - /// - /// - Parameter input: The input to the layer. - /// - Returns: The output. - @differentiable - public func call(_ input: Tensor) -> Tensor { - return input.maxPooled3D(kernelSize: poolSize, strides: strides, padding: padding) - } -} - -public extension MaxPool3D { - /// Creates a max pooling layer. - /// - /// - Parameters: - /// - poolSize: Vertical and horizontal factors by which to downscale. - /// - strides: The strides. - /// - padding: The padding. - init(poolSize: (Int, Int, Int), strides: (Int, Int, Int), padding: Padding = .valid) { - self.init(poolSize: (1, poolSize.0, poolSize.1, poolSize.2, 1), - strides: (1, strides.0, strides.1, strides.2, 1), - padding: padding) - } -} - -public extension MaxPool3D { - /// Creates a max pooling layer with the specified pooling window size and stride. All - /// pooling sizes and strides are the same. - init(poolSize: Int, stride: Int, padding: Padding = .valid) { - self.init(poolSize: (poolSize, poolSize, poolSize), - strides: (stride, stride, stride), - padding: padding) - } -} - -/// An average pooling layer for temporal data. -@_fixed_layout -public struct AvgPool1D: Layer { - /// The size of the sliding reduction window for pooling. - @noDerivative let poolSize: Int - /// The stride of the sliding window for temporal dimension. - @noDerivative let stride: Int - /// The padding algorithm for pooling. - @noDerivative let padding: Padding - - /// Creates an average pooling layer. - /// - /// - Parameters: - /// - poolSize: The size of the sliding reduction window for pooling. - /// - stride: The stride of the sliding window for temporal dimension. - /// - padding: The padding algorithm for pooling. - public init( - poolSize: Int, - stride: Int, - padding: Padding - ) { - self.poolSize = poolSize - self.stride = stride - self.padding = padding - } - - /// Returns the output obtained from applying the layer to the given input. - /// - /// - Parameter input: The input to the layer. - /// - Returns: The output. - @differentiable - public func call(_ input: Tensor) -> Tensor { - return input.expandingShape(at: 1).averagePooled2D( - kernelSize: (1, 1, poolSize, 1), strides: (1, 1, stride, 1), padding: padding - ).squeezingShape(at: 1) - } -} - -/// An average pooling layer for spatial data. -@_fixed_layout -public struct AvgPool2D: Layer { - /// The size of the sliding reduction window for pooling. - @noDerivative let poolSize: (Int, Int, Int, Int) - /// The strides of the sliding window for each dimension of a 4-D input. - /// Strides in non-spatial dimensions must be `1`. - @noDerivative let strides: (Int, Int, Int, Int) - /// The padding algorithm for pooling. - @noDerivative let padding: Padding - - /// Creates an average pooling layer. - public init( - poolSize: (Int, Int, Int, Int), - strides: (Int, Int, Int, Int), - padding: Padding - ) { - self.poolSize = poolSize - self.strides = strides - self.padding = padding - } - - /// Returns the output obtained from applying the layer to the given input. - /// - /// - Parameter input: The input to the layer. - /// - Returns: The output. - @differentiable - public func call(_ input: Tensor) -> Tensor { - return input.averagePooled2D(kernelSize: poolSize, strides: strides, padding: padding) - } -} - -public extension AvgPool2D { - /// Creates an average pooling layer. - /// - /// - Parameters: - /// - poolSize: Vertical and horizontal factors by which to downscale. - /// - strides: The strides. - /// - padding: The padding. - init(poolSize: (Int, Int), strides: (Int, Int), padding: Padding = .valid) { - self.init(poolSize: (1, poolSize.0, poolSize.1, 1), - strides: (1, strides.0, strides.1, 1), - padding: padding) - } -} - -/// An average pooling layer for spatial or spatio-temporal data. -@_fixed_layout -public struct AvgPool3D: Layer { - /// The size of the sliding reduction window for pooling. - @noDerivative let poolSize: (Int, Int, Int, Int, Int) - /// The strides of the sliding window for each dimension of a 5-D input. - /// Strides in non-spatial dimensions must be `1`. - @noDerivative let strides: (Int, Int, Int, Int, Int) - /// The padding algorithm for pooling. - @noDerivative let padding: Padding - - /// Creates an average pooling layer. - public init( - poolSize: (Int, Int, Int, Int, Int), - strides: (Int, Int, Int, Int, Int), - padding: Padding - ) { - self.poolSize = poolSize - self.strides = strides - self.padding = padding - } - - /// Returns the output obtained from applying the layer to the given input. - /// - /// - Parameter input: The input to the layer. - /// - Returns: The output. - @differentiable - public func call(_ input: Tensor) -> Tensor { - return input.averagePooled3D(kernelSize: poolSize, strides: strides, padding: padding) - } -} - -public extension AvgPool3D { - /// Creates an average pooling layer. - /// - /// - Parameters: - /// - poolSize: Vertical and horizontal factors by which to downscale. - /// - strides: The strides. - /// - padding: The padding. - init(poolSize: (Int, Int, Int), strides: (Int, Int, Int), padding: Padding = .valid) { - self.init(poolSize: (1, poolSize.0, poolSize.1, poolSize.2, 1), - strides: (1, strides.0, strides.1, strides.2, 1), - padding: padding) - } -} - -public extension AvgPool3D { - /// Creates an average pooling layer with the specified pooling window size and stride. All - /// pooling sizes and strides are the same. - init(poolSize: Int, strides: Int, padding: Padding = .valid) { - self.init(poolSize: (poolSize, poolSize, poolSize), - strides: (strides, strides, strides), - padding: padding) - } -} - -/// A global average pooling layer for temporal data. -@_fixed_layout -public struct GlobalAvgPool1D: Layer { - /// Creates a global average pooling layer. - public init() {} - - /// Returns the output obtained from applying the layer to the given input. - /// - /// - Parameter input: The input to the layer. - /// - Returns: The output. - @differentiable - public func call(_ input: Tensor) -> Tensor { - return input.mean(squeezingAxes: 1) - } -} - -/// A global average pooling layer for spatial data. -@_fixed_layout -public struct GlobalAvgPool2D: Layer { - /// Creates a global average pooling layer. - public init() {} - - /// Returns the output obtained from applying the layer to the given input. - /// - /// - Parameter input: The input to the layer. - /// - Returns: The output. - @differentiable - public func call(_ input: Tensor) -> Tensor { - return input.mean(squeezingAxes: [1, 2]) - } -} - -/// A global average pooling layer for spatial and spatio-temporal data. -@_fixed_layout -public struct GlobalAvgPool3D: Layer { - /// Creates a global average pooling layer. - public init() {} - - /// Returns the output obtained from applying the layer to the given input. - /// - /// - Parameter input: The input to the layer. - /// - Returns: The output. - @differentiable - public func call(_ input: Tensor) -> Tensor { - return input.mean(squeezingAxes: [1, 2, 3]) - } -} - -/// A layer that applies layer normalization over a mini-batch of inputs. -/// -/// Reference: [Layer Normalization](https://arxiv.org/abs/1607.06450). -@_fixed_layout -public struct LayerNorm: Layer { - /// The offset value, also known as beta. - public var offset: Tensor - /// The scale value, also known as gamma. - public var scale: Tensor - /// The axis. - @noDerivative public let axis: Int - /// The variance epsilon value. - @noDerivative public let epsilon: Tensor - - /// Creates a layer normalization layer. - public init( - offset: Tensor, - scale: Tensor, - axis: Int, - epsilon: Tensor - ) { - self.offset = offset - self.scale = scale - self.axis = axis - self.epsilon = epsilon - } - - /// Creates a layer normalization layer. - /// - /// - Parameters: - /// - featureCount: The number of features. - /// - axis: The axis that should be normalized. - /// - epsilon: The small scalar added to variance. - public init(featureCount: Int, - axis: Int, - epsilon: Tensor = Tensor(0.001)) { - self.init( - offset: Tensor(zeros: [featureCount]), - scale: Tensor(ones: [featureCount]), - axis: axis, - epsilon: epsilon - ) - } - - /// Returns the output obtained from applying the layer to the given input. - /// - /// - Parameter input: The input to the layer. - /// - Returns: The output. - @differentiable - public func call(_ input: Tensor) -> Tensor { - let mean = input.mean(alongAxes: axis) - let variance = input.variance(alongAxes: axis) - let inv = rsqrt(variance + epsilon) * scale - return (input - mean) * inv + offset - } -} - -public extension Tensor where Scalar: TensorFlowFloatingPoint { - /// Computes dropout given a probability. - @differentiable(wrt: self where Scalar: Differentiable) - func droppingOut(probability: Double) -> Tensor { - let noise = Tensor(randomUniform: shape) - let keepMask = noise .>= Scalar(probability) - let keepProbability = Scalar(1.0 - probability) - return self * Tensor(keepMask) / Tensor(keepProbability) - } -} - -/// A dropout layer. -/// -/// Dropout consists in randomly setting a fraction of input units to `0` at each update during -/// training time, which helps prevent overfitting. -@_fixed_layout -public struct Dropout: Layer { - @noDerivative public let probability: Double - - /// Creates a dropout layer. - /// - /// - Parameter probability: The drop probability. - public init(probability: Double) { - self.probability = probability - } - - @differentiable - private func applyingTraining(to input: Tensor) -> Tensor { - return input.droppingOut(probability: probability) - } - - @differentiable - private func applyingInference(to input: Tensor) -> Tensor { - return input - } - - /// Returns the output obtained from applying the layer to the given input. - /// - /// - Parameter input: The input to the layer. - /// - Returns: The output. - @differentiable(vjp: _vjpApplied(to:)) - public func call(_ input: Tensor) -> Tensor { - switch Context.local.learningPhase { - case .training: - return applyingTraining(to: input) - case .inference: - return applyingInference(to: input) - } - } - - @usableFromInline - func _vjpApplied(to input: Tensor) -> - (Tensor, (Tensor) -> - (Dropout.TangentVector, Tensor)) { - switch Context.local.learningPhase { - case .training: - return valueWithPullback(at: input) { - $0.applyingTraining(to: $1) - } - case .inference: - return valueWithPullback(at: input) { - $0.applyingInference(to: $1) - } - } - } -} - -/// An upsampling layer for 1-D inputs. -@_fixed_layout -public struct UpSampling1D: Layer { - @noDerivative public let size: Int - - /// Creates an upsampling layer. - /// - /// - Parameter size: The upsampling factor for timesteps. - public init(size: Int) { - self.size = size - } - - /// Returns the output obtained from applying the layer to the given input. - /// - /// - Parameter input: The input to the layer. - /// - Returns: The output. - @differentiable - public func call(_ input: Tensor) -> Tensor { - let shape = input.shape - let (batchSize, timesteps, channels) = (shape[0], shape[1], shape[2]) - let scaleOnes = Tensor(ones: [1, 1, size, 1]) - let upSampling = input.reshaped(to: [batchSize, timesteps, 1, channels]) * scaleOnes - return upSampling.reshaped(to: [batchSize, timesteps * size, channels]) - } -} - -/// An upsampling layer for 2-D inputs. -@_fixed_layout -public struct UpSampling2D: Layer { - @noDerivative public let size: Int - - /// Creates an upsampling layer. - /// - /// - Parameter size: The upsampling factor for rows and columns. - public init(size: Int) { - self.size = size - } - - /// Returns the output obtained from applying the layer to the given input. - /// - /// - Parameter input: The input to the layer. - /// - Returns: The output. - @differentiable - public func call(_ input: Tensor) -> Tensor { - let shape = input.shape - let (batchSize, height, width, channels) = (shape[0], shape[1], shape[2], shape[3]) - let scaleOnes = Tensor(ones: [1, 1, size, 1, size, 1]) - let upSampling = input.reshaped(to: [batchSize, height, 1, width, 1, channels]) * scaleOnes - return upSampling.reshaped(to: [batchSize, height * size, width * size, channels]) - } -} - -/// An upsampling layer for 3-D inputs. -@_fixed_layout -public struct UpSampling3D: Layer { - @noDerivative public let size: Int - - /// Creates an upsampling layer. - /// - /// - Parameter size: The upsampling factor for rows and columns. - public init(size: Int) { - self.size = size - } - - /// Repeats the elements of a tensor along an axis, like `np.repeat`. - /// Function adapted from `def repeat_elements`: - /// https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/keras/backend.py - @differentiable(vjp: _vjpRepeatingElements) - private func repeatingElements( - _ input: Tensor, alongAxis axis: Int, count: Int - ) -> Tensor { - let splits = Raw.split(splitDim: Tensor(Int32(axis)), - value: input, numSplit: Int64(input.shape[axis])) - let repeated = splits.flatMap { x in Array(repeating: x, count: count) } - return Tensor(concatenating: repeated, alongAxis: axis) - } - - private func _vjpRepeatingElements( - _ input: Tensor, alongAxis axis: Int, count: Int - ) -> (Tensor, (Tensor) -> (AllDifferentiableVariables, Tensor)) { - let value = repeatingElements(input, alongAxis: axis, count: count) - return (value, { v in - let splits = Raw.split(splitDim: Tensor(Int32(axis)), - value: v, numSplit: Int64(input.shape[axis])) - let summed = splits.map { x in x.sum(alongAxes: axis) } - let concatenated = Tensor(concatenating: summed, alongAxis: axis) - return (.zero, concatenated) - }) - } - - /// Returns the output obtained from applying the layer to the given input. - /// - /// - Parameter input: The input to the layer. - /// - Returns: The output. - @differentiable - public func call(_ input: Tensor) -> Tensor { - let shape = input.shape - let (batchSize, height, width, depth, channels) = - (shape[0], shape[1], shape[2], shape[3], shape[4]) - var result = repeatingElements(input, alongAxis: 1, count: size) - result = repeatingElements(result, alongAxis: 2, count: size) - result = repeatingElements(result, alongAxis: 3, count: size) - return result - } -} - -/// A flatten layer. -/// -/// A flatten layer flattens the input when applied without affecting the batch size. -@_fixed_layout -public struct Flatten: Layer { - /// Creates a flatten layer. - public init() {} - - /// Returns the output obtained from applying the layer to the given input. - /// - /// - Parameter input: The input to the layer. - /// - Returns: The output. - @differentiable - public func call(_ input: Tensor) -> Tensor { - let batchSize = input.shape[0] - let remaining = input.shape[1..: Layer { - /// The target shape. - @noDerivative public let shape: Tensor - - // TF-331 workaround: - @usableFromInline - internal var _nontrivial = Tensor(0) - - /// Creates a reshape layer. - /// - /// - Parameter shape: The target shape, represented by a tensor. - public init(shape: Tensor) { - self.shape = shape - } - - /// Creates a reshape layer. - /// - /// - Parameter shape: The target shape. - public init(_ shape: TensorShape) { - self.init(shape: Tensor(shape.dimensions.map(Int32.init))) - } - - /// Returns the output obtained from applying the layer to the given input. - /// - /// - Parameter input: The input to the layer. - /// - Returns: The output. - @differentiable - public func call(_ input: Tensor) -> Tensor { - return input.reshaped(toShape: shape) - } -} - -/// An input to a recurrent neural network. -public struct RNNCellInput: Differentiable { - /// The input at the current time step. - public var input: Input - /// The previous state. - public var state: State - - @differentiable - public init(input: Input, state: State) { - self.input = input - self.state = state - } -} - -/// An output to a recurrent neural network. -public struct RNNCellOutput: Differentiable { - /// The output at the current time step. - public var output: Output - /// The current state. - public var state: State - - @differentiable - public init(output: Output, state: State) { - self.output = output - self.state = state - } -} - -/// A recurrent neural network cell. -public protocol RNNCell: Layer where Input == RNNCellInput, - Output == RNNCellOutput { - /// The input at a time step. - associatedtype TimeStepInput: Differentiable - /// The output at a time step. - associatedtype TimeStepOutput: Differentiable - /// The state that may be preserved across time steps. - associatedtype State: Differentiable - /// The zero state. - var zeroState: State { get } -} - -public extension RNNCell { - /// Returns the new state obtained from applying the RNN cell to the input at the current time - /// step and the previous state. - /// - /// - Parameters: - /// - timeStepInput: The input at the current time step. - /// - previousState: The previous state of the RNN cell. - /// - Returns: The output. - @differentiable - func call(input: TimeStepInput, state: State) -> RNNCellOutput { - return self(RNNCellInput(input: input, state: state)) - } -} - -/// A simple RNN cell. -public struct SimpleRNNCell: RNNCell, VectorNumeric { - public var weight: Tensor - public var bias: Tensor - - @noDerivative public var stateShape: TensorShape { - return TensorShape([1, weight.shape[1]]) - } - - public var zeroState: State { - return State(Tensor(zeros: stateShape)) - } - - // TODO(TF-507): Revert to `typealias State = Tensor` after - // SR-10697 is fixed. - public struct State: Equatable, Differentiable, VectorNumeric, KeyPathIterable { - public let value: Tensor - public init(_ value: Tensor) { - self.value = value - } - } - - public typealias TimeStepInput = Tensor - public typealias TimeStepOutput = State - public typealias Input = RNNCellInput - public typealias Output = RNNCellOutput - - /// Creates a `SimpleRNNCell` with the specified input size and hidden state size. - /// - /// - Parameters: - /// - inputSize: The number of features in 2-D input tensors. - /// - hiddenSize: The number of features in 2-D hidden states. - /// - seed: The random seed for initialization. The default value is random. - public init(inputSize: Int, hiddenSize: Int, - seed: (Int64, Int64) = (Int64.random(in: Int64.min..