tensorflow · dan-zheng · May 22, 2019 · May 21, 2019 · May 21, 2019 · May 22, 2019
diff --git a/Sources/DeepLearning/Layer.swift b/Sources/DeepLearning/Layer.swift
@@ -513,6 +513,122 @@ public extension Conv2D {
     }
 }
 
+/// A 3-D convolution layer for spatial/spatio-temporal convolution over images.
+///
+/// This layer creates a convolution filter that is convolved with the layer input to produce a
+/// tensor of outputs.
+@_fixed_layout
+public struct Conv3D<Scalar: TensorFlowFloatingPoint>: Layer {
+    /// The 5-D convolution kernel.
+    public var filter: Tensor<Scalar>
+    /// The bias vector.
+    public var bias: Tensor<Scalar>
+    /// An activation function.
+    public typealias Activation = @differentiable (Tensor<Scalar>) -> Tensor<Scalar>
+    /// The element-wise activation function.
+    @noDerivative public let activation: Activation
+    /// The strides of the sliding window for spatial dimensions.
+    @noDerivative public let strides: (Int, Int, Int)
+    /// The padding algorithm for convolution.
+    @noDerivative public let padding: Padding
+
+    /// Creates a `Conv3D` layer with the specified filter, bias, activation function, strides, and
+    /// padding.
+    ///
+    /// - Parameters:
+    ///   - filter: The 5-D convolution kernel.
+    ///   - bias: The bias vector.
+    ///   - activation: The element-wise activation function.
+    ///   - strides: The strides of the sliding window for spatial dimensions.
+    ///   - padding: The padding algorithm for convolution.
+    public init(
+        filter: Tensor<Scalar>,
+        bias: Tensor<Scalar>,
+        activation: @escaping Activation,
+        strides: (Int, Int, Int),
+        padding: Padding
+    ) {
+        self.filter = filter
+        self.bias = bias
+        self.activation = activation
+        self.strides = strides
+        self.padding = padding
+    }
+
+    /// Returns the output obtained from applying the layer to the given input.
+    ///
+    /// - Parameter input: The input to the layer.
+    /// - Returns: The output.
+    @differentiable
+    public func call(_ input: Tensor<Scalar>) -> Tensor<Scalar> {
+        return activation(input.convolved3D(withFilter: filter,
+                                            strides: (1, strides.0, strides.1, strides.2, 1),
+                                            padding: padding) + bias)
+    }
+}
+
+public extension Conv3D {
+    /// Creates a `Conv3D` layer with the specified filter shape, strides, padding, and
+    /// element-wise activation function. The filter tensor is initialized using Glorot uniform
+    /// initialization with the specified generator. The bias vector is initialized with zeros.
+    ///
+    /// - Parameters:
+    ///   - filterShape: The shape of the 5-D convolution kernel.
+    ///   - strides: The strides of the sliding window for spatial/spatio-temporal dimensions.
+    ///   - padding: The padding algorithm for convolution.
+    ///   - activation: The element-wise activation function.
+    ///   - generator: The random number generator for initialization.
+    ///
+    /// - Note: Use `init(filterShape:strides:padding:activation:seed:)` for faster random
+    ///   initialization.
+    init<G: RandomNumberGenerator>(
+        filterShape: (Int, Int, Int, Int, Int),
+        strides: (Int, Int, Int) = (1, 1, 1),
+        padding: Padding = .valid,
+        activation: @escaping Activation = identity,
+        generator: inout G
+    ) {
+        let filterTensorShape = TensorShape([
+            filterShape.0, filterShape.1, filterShape.2, filterShape.3, filterShape.4])
+        self.init(
+            filter: Tensor(glorotUniform: filterTensorShape, generator: &generator),
+            bias: Tensor(zeros: TensorShape([filterShape.4])),
+            activation: activation,
+            strides: strides,
+            padding: padding)
+    }
+}
+
+public extension Conv3D {
+    /// Creates a `Conv3D` layer with the specified filter shape, strides, padding, and
+    /// element-wise activation function. The filter tensor is initialized using Glorot uniform
+    /// initialization with the specified seed. The bias vector is initialized with zeros.
+    ///
+    /// - Parameters:
+    ///   - filterShape: The shape of the 5-D convolution kernel.
+    ///   - strides: The strides of the sliding window for spatial/spatio-temporal dimensions.
+    ///   - padding: The padding algorithm for convolution.
+    ///   - activation: The element-wise activation function.
+    ///   - seed: The random seed for initialization. The default value is random.
+    init(
+        filterShape: (Int, Int, Int, Int, Int),
+        strides: (Int, Int, Int) = (1, 1, 1),
+        padding: Padding = .valid,
+        activation: @escaping Activation = identity,
+        seed: (Int64, Int64) = (Int64.random(in: Int64.min..<Int64.max),
+                                Int64.random(in: Int64.min..<Int64.max))
+    ) {
+        let filterTensorShape = TensorShape([
+            filterShape.0, filterShape.1, filterShape.2, filterShape.3, filterShape.4])
+        self.init(
+            filter: Tensor(glorotUniform: filterTensorShape, seed: seed),
+            bias: Tensor(zeros: TensorShape([filterShape.4])),
+            activation: activation,
+            strides: strides,
+            padding: padding)
+    }
+}
+
 /// A 2-D transposed convolution layer (e.g. spatial transposed convolution over images).
 ///
 /// This layer creates a convolution filter that is transpose-convolved with the layer input

diff --git a/Sources/DeepLearning/Operators.swift b/Sources/DeepLearning/Operators.swift
@@ -225,6 +225,100 @@ public extension Tensor where Scalar: TensorFlowFloatingPoint {
         })
     }
 
+    /// TensorFlow builtin conv3d gradient helper for the input.
+    @inlinable
+    @differentiable(wrt: (self, filter), vjp: _vjpConv3DBackpropInput)
+    internal func conv3DBackpropInput(
+        shape: Tensor<Int32>,
+        filter: Tensor,
+        strides: (Int, Int, Int, Int, Int),
+        padding: Padding
+    ) -> Tensor {
+        return Raw.conv3DBackpropInputV2(
+            inputSizes: shape,
+            filter: filter,
+            outBackprop: self,
+            strides: [Int32(strides.0), Int32(strides.1), Int32(strides.2),
+                      Int32(strides.3), Int32(strides.4)],
+            padding: padding.raw)
+    }
+
+    /// TensorFlow builtin conv3d gradient helper for the filter.
+    @inlinable
+    @differentiable(wrt: (self, input), vjp: _vjpConv3DBackpropFilter)
+    internal func conv3DBackpropFilter(
+        input: Tensor,
+        filterSizes: Tensor<Int32>,
+        strides: (Int, Int, Int, Int, Int),
+        padding: Padding
+    ) -> Tensor {
+        return Raw.conv3DBackpropFilterV2(
+            self,
+            filterSizes: filterSizes,
+            outBackprop: self,
+            strides: [Int32(strides.0), Int32(strides.1), Int32(strides.2),
+                      Int32(strides.3), Int32(strides.4)],
+            padding: padding.raw)
+    }
+
+    @inlinable
+    internal func _vjpConv3DBackpropInput(
+        _ shape: Tensor<Int32>,
+        _ filter: Tensor,
+        _ strides: (Int, Int, Int, Int, Int),
+        _ padding: Padding
+    ) -> (Tensor, (Tensor) -> (Tensor, Tensor)) {
+        let value = conv3DBackpropInput(shape: shape, filter: filter, strides: strides,
+                                        padding: padding)
+        return (value, { v in
+            return (
+                self.conv3DBackpropFilter(input: v, filterSizes: shape, strides: strides,
+                                          padding: padding),
+                v.convolved3D(withFilter: filter, strides: strides, padding: padding)
+            )
+        })
+    }
+
+    @inlinable
+    internal func _vjpConv3DBackpropFilter(
+        _ input: Tensor,
+        _ filterSizes: Tensor<Int32>,
+        _ strides: (Int, Int, Int, Int, Int),
+        _ padding: Padding
+    ) -> (Tensor, (Tensor) -> (Tensor, Tensor)) {
+        let value = conv3DBackpropFilter(input: input, filterSizes: filterSizes,
+                                         strides: strides, padding: padding)
+        return (value, { v in
+            return (
+                self.conv3DBackpropInput(shape: filterSizes, filter: v, strides: strides,
+                                         padding: padding),
+                input.convolved3D(withFilter: v, strides: strides, padding: padding)
+            )
+        })
+    }
+
+    @inlinable
+    internal func _vjpConvolved3D(
+        filter: Tensor,
+        strides: (Int, Int, Int, Int, Int),
+        padding: Padding
+    ) -> (Tensor, (Tensor) -> (Tensor, Tensor)) {
+        let value = convolved3D(withFilter: filter, strides: strides,
+                                padding: padding)
+        return (value, { v in
+            return (
+                v.conv3DBackpropInput(
+                    shape: self.shapeTensor, filter: filter,
+                    strides: strides, padding: padding
+                ),
+                v.conv3DBackpropFilter(
+                    input: self, filterSizes: filter.shapeTensor,
+                    strides: strides, padding: padding
+                )
+            )
+        })
+    }
+
     @inlinable
     internal func _vjpMaxPooled2D(
         kernelSize: (Int, Int, Int, Int),
@@ -345,6 +439,34 @@ public extension Tensor where Scalar: FloatingPoint {
             explicitPaddings: [])
     }
 
+    /// Computes a 3-D convolution using `self` as input, with the specified
+    /// filter, strides, and padding.
+    ///
+    /// - Parameters:
+    ///     - filter: The convolution filter.
+    ///     - strides: The strides of the sliding filter for each dimension of the
+    ///         input.
+    ///     - padding: The padding for the operation.
+    /// - Precondition: `self` must have rank 5.
+    /// - Precondition: `filter` must have rank 5.
+    @inlinable @inline(__always)
+    @differentiable(
+        wrt: (self, filter), vjp: _vjpConvolved3D
+        where Scalar: TensorFlowFloatingPoint
+    )
+    func convolved3D(
+        withFilter filter: Tensor,
+        strides: (Int, Int, Int, Int, Int),
+        padding: Padding
+    ) -> Tensor {
+        return Raw.conv3D(
+            self,
+            filter: filter,
+            strides: [Int32(strides.0), Int32(strides.1), Int32(strides.2),
+                      Int32(strides.3), Int32(strides.4)],
+            padding: padding.raw)
+    }
+
     /// Computes a 2-D max pooling, with the specified kernel sizes, strides, and
     /// padding.
     ///

diff --git a/Tests/DeepLearningTests/LayerTests.swift b/Tests/DeepLearningTests/LayerTests.swift
@@ -23,7 +23,19 @@ final class LayerTests: XCTestCase {
         let input = Tensor<Float>([[0, 1, 2, 3, 4], [10, 11, 12, 13, 14]]).expandingShape(at: 2)
         let output = layer.inferring(from: input)
         let expected = Tensor<Float>([[[1, 4], [2, 7], [3, 10]], [[11, 34], [12, 37], [13, 40]]])
-        XCTAssertEqual(round(output), expected)
+        XCTAssertEqual(output, expected)
+    }
+
+    func testConv3D() {
+        let filter =  Tensor(shape: [1, 2, 2, 2, 1], scalars: (0..<8).map(Float.init))
+        let bias = Tensor<Float>([-1, 1])
+        let layer = Conv3D<Float>(filter: filter, bias: bias, activation: identity,
+                                  strides: (1, 2, 1), padding: .valid)
+        let input = Tensor(shape: [2, 2, 2, 2, 2], scalars: (0..<32).map(Float.init))
+        let output = layer.inferring(from: input)
+        let expected = Tensor<Float>(shape: [2, 2, 1, 1, 2],
+                                     scalars: [139, 141, 363, 365, 587, 589, 811, 813])
+        XCTAssertEqual(output, expected)
     }
 
     func testMaxPool1D() {
@@ -68,7 +80,7 @@ final class LayerTests: XCTestCase {
 
     func testAvgPool3D() {
         let layer = AvgPool3D<Float>(poolSize: (2, 4, 5), strides: (1, 1, 1), padding: .valid)
-        let input = Tensor(shape: [1, 2, 4, 5, 1], scalars: (0..<20).map(Float.init))
+        let input = Tensor(shape: [1, 2, 4, 5, 1], scalars: (0..<40).map(Float.init))
         let output = layer.inferring(from: input)
         let expected = Tensor<Float>([[[[[9.5]]]]])
         XCTAssertEqual(output, expected)
@@ -187,6 +199,7 @@ final class LayerTests: XCTestCase {
 
     static var allTests = [
         ("testConv1D", testConv1D),
+        ("testConv3D", testConv3D),
         ("testMaxPool1D", testMaxPool1D),
         ("testMaxPool2D", testMaxPool2D),
         ("testMaxPool3D", testMaxPool3D),