owkin · jean-francoisreboud · Dec 5, 2022 · Dec 5, 2022 · Dec 5, 2022
@@ -4,6 +4,7 @@ All notable changes to this project will be documented in this file.
 
 ## [unreleased]
 
+🚨 **integration**: test IDFT and complex numbers ([#28](https://github.com/owkin/MAKit/pull/28))\
 🚨 **layer_2d**: add tests for non dirty status ([#27](https://github.com/owkin/MAKit/pull/27))\
 🔨 **tests**: factorize transform tests ([#26](https://github.com/owkin/MAKit/pull/26))\
 🪜 **layer_2d**: FTFrequences2D & Multiply2D ([#25](https://github.com/owkin/MAKit/pull/25))\

@@ -841,10 +841,6 @@ public class Model: BaseModel
                     MAKit.Time.start()
 
                     try layer.backwardGPU()
-                    if layer.mustComputeBackward
-                    {
-                        layer.propagateDirty()
-                    }
 
                     try MAKit.Time.stop(id: "BackwardGPU", description: desc)
                 }
@@ -861,10 +857,6 @@ public class Model: BaseModel
                     MAKit.Time.start()
 
                     layer.backwardCPU()
-                    if layer.mustComputeBackward
-                    {
-                        layer.propagateDirty()
-                    }
 
                     try MAKit.Time.stop(id: "BackwardCPU", description: desc)
                 }

@@ -13,6 +13,12 @@
 ///
 public class Multiply2D: LayerMerge2D
 {
+    ///
+    /// List of output buffers.
+    /// Shape ~ (batch, nbChannels, height, width).
+    ///
+    var _otherOuts: [MetalBuffer<Float>] = []
+
     ///
     /// Create a layer with a 2D shape neural structure.
     ///
@@ -83,6 +89,72 @@ public class Multiply2D: LayerMerge2D
         return layer
     }
 
+    ///
+    /// Clean state resources in the CPU execution context.
+    ///
+    /// We clean the neurons' state (forward and backward).
+    ///
+    public override func resetKernelCPU()
+    {
+        super.resetKernelCPU()
+        _otherOuts = []
+    }
+
+    ///
+    /// Clean state resources in the GPU execution context.
+    ///
+    /// We clean the neurons' state (forward and backward).
+    ///
+    public override func resetKernelGPU()
+    {
+        super.resetKernelGPU()
+        _otherOuts = []
+    }
+
+    ///
+    /// Initialize state resources in the CPU execution context.
+    ///
+    /// We initialize the neurons' state (forward and backward).
+    ///
+    public override func checkStateCPU(batchSize: Int) throws
+    {
+        try super.checkStateCPU(batchSize: batchSize)
+
+        if _otherOuts.count == 0
+        {
+            for _ in 0..<_layersPrev.count
+            {
+                let buffer = MetalSharedBuffer<Float>(
+                    batchSize * nbChannels * height * width,
+                    deviceID: deviceID
+                )
+                _otherOuts.append(buffer)
+            }
+        }
+    }
+
+    ///
+    /// Initialize state resources in the GPU execution context.
+    ///
+    /// We initialize the neurons' forward state.
+    ///
+    public override func checkStateForwardGPU(batchSize: Int) throws
+    {
+        try super.checkStateForwardGPU(batchSize: batchSize)
+
+        if _otherOuts.count == 0
+        {
+            for _ in 0..<_layersPrev.count
+            {
+                let buffer = MetalPrivateBuffer<Float>(
+                    batchSize * nbChannels * height * width,
+                    deviceID: deviceID
+                )
+                _otherOuts.append(buffer)
+            }
+        }
+    }
+
     ///
     /// Apply the forward pass of the Gradient Checking in CPU execution context.
     ///
@@ -273,18 +345,37 @@ public class Multiply2D: LayerMerge2D
         for elem in 0..<batchSize {
         for depth in 0..<nbChannels
         {
+            let offsetStart = (depth + nbChannels * elem) * height
+
             for i in 0..<height {
             for j in 0..<width
             {
+                let offset = j + (offsetStart + i) * width
+
                 var mult = 1.0
                 for num in 0..<_layersPrev.count
                 {
                     let neuronsPrev =
                         (_layersPrev[num] as! Layer2D).neurons
                     mult *= neuronsPrev[depth].get(i, j)!.v[elem].out
                 }
-
                 neurons[depth].get(i, j)!.v[elem].out = mult
+
+                for num1 in 0..<_layersPrev.count
+                {
+                    let buffer = (_otherOuts[num1] as! MetalSharedBuffer).buffer
+
+                    mult = 1.0
+                    for num2 in 0..<_layersPrev.count {
+                    if num2 != num1
+                    {
+                        let neuronsPrev =
+                            (_layersPrev[num2] as! Layer2D).neurons
+                        mult *= neuronsPrev[depth].get(i, j)!.v[elem].out
+                    }}
+
+                    buffer[offset] = Float(mult)
+                }
             }}
         }}
     }
@@ -298,19 +389,19 @@ public class Multiply2D: LayerMerge2D
     {
         try checkStateForwardGPU(batchSize: batchSize)
 
-        var first = true
-        for num in 0..<_layersPrev.count
+        var first1 = true
+        for num1 in 0..<_layersPrev.count
         {
-            let nbElems = (_layersPrev[num] as! Layer2D).outs.nbElems
+            let nbElems = (_layersPrev[num1] as! Layer2D).outs.nbElems
             let pNbElems: [UInt32] = [UInt32(nbElems)]
 
-            let command: MetalCommand
-            if first
+            var command: MetalCommand
+            if first1
             {
                 command = MetalKernel.get.createCommand(
                     "sum1", deviceID: deviceID
                 )
-                first = false
+                first1 = false
             }
             else
             {
@@ -320,13 +411,41 @@ public class Multiply2D: LayerMerge2D
             }
 
             command.setBuffer(
-                (_layersPrev[num] as! Layer2D).outs.metal, atIndex: 0
+                (_layersPrev[num1] as! Layer2D).outs.metal, atIndex: 0
             )
             command.setBytes(pNbElems, atIndex: 1)
             command.setBuffer(outs.metal, atIndex: 2)
 
             command.dispatchThreads(nbElems)
             command.enqueue()
+
+            var first2 = true
+            for num2 in 0..<_layersPrev.count {
+            if num2 != num1
+            {
+                if first2
+                {
+                    command = MetalKernel.get.createCommand(
+                        "sum1", deviceID: deviceID
+                    )
+                    first2 = false
+                }
+                else
+                {
+                    command = MetalKernel.get.createCommand(
+                        "multiplyForward", deviceID: deviceID
+                    )
+                }
+
+                command.setBuffer(
+                    (_layersPrev[num2] as! Layer2D).outs.metal, atIndex: 0
+                )
+                command.setBytes(pNbElems, atIndex: 1)
+                command.setBuffer(_otherOuts[num1].metal, atIndex: 2)
+
+                command.dispatchThreads(nbElems)
+                command.enqueue()
+            }}
         }
     }
 
@@ -346,14 +465,19 @@ public class Multiply2D: LayerMerge2D
             }
 
             let neuronsPrev = (_layersPrev[num] as! Layer2D).neurons
+            let buffer = (_otherOuts[num] as! MetalSharedBuffer).buffer
+
             for elem in 0..<batchSize {
             for depth in 0..<nbChannels
             {
+                let offsetStart = (depth + nbChannels * elem) * height
+
                 for i in 0..<height {
                 for j in 0..<width
                 {
-                    let out = neurons[depth].get(i, j)!.v[elem].out
-                    let tmp = out / neuronsPrev[depth].get(i, j)!.v[elem].out
+                    let offset = j + (offsetStart + i) * width
+
+                    let tmp = Double(buffer[offset])
                     let deltaCur = neurons[depth].get(i, j)!.v[elem].delta
 
                     if _layersPrev[num].dirty
@@ -401,12 +525,11 @@ public class Multiply2D: LayerMerge2D
             let command = MetalKernel.get.createCommand(
                 "multiplyBackward", deviceID: deviceID
             )
-            command.setBuffer(layerPrev.outs.metal, atIndex: 0)
-            command.setBuffer(outs.metal, atIndex: 1)
-            command.setBuffer(delta.metal, atIndex: 2)
-            command.setBytes(pNbElems, atIndex: 3)
-            command.setBytes(pDirty, atIndex: 4)
-            command.setBuffer(layerPrev.delta.metal, atIndex: 5)
+            command.setBuffer(_otherOuts[num].metal, atIndex: 0)
+            command.setBuffer(delta.metal, atIndex: 1)
+            command.setBytes(pNbElems, atIndex: 2)
+            command.setBytes(pDirty, atIndex: 3)
+            command.setBuffer(layerPrev.delta.metal, atIndex: 4)
 
             command.dispatchThreads(nbElems)
             command.enqueue()

@@ -78,7 +78,6 @@ kernel void multiplyForward(
 }
 
 kernel void multiplyBackward(
-    const device float * outsPrev,
     const device float * outs,
     const device float * delta,
     constant uint * pNbElems,
@@ -89,7 +88,7 @@ kernel void multiplyBackward(
     uint nbElems;
     uint dirty;
 
-    if (pNbElems && pDirty && outsPrev && outs && delta && deltaPrev)
+    if (pNbElems && pDirty && outs && delta && deltaPrev)
     {
         nbElems = pNbElems[0];
         dirty = *pDirty;
@@ -102,7 +101,7 @@ kernel void multiplyBackward(
         return ;
     }
 
-    float tmp = outs[id] / outsPrev[id];
+    float tmp = outs[id];
     float deltaCur = delta[id];
 
     if (dirty)

@@ -177,9 +177,9 @@ class ModelTest2
             params: params
         )
 
-        // Load weights from `PyTorch`.
         let model = Model(model: context.model, modelsPrev: [])
 
+        // Load weights from `PyTorch`.
         let pythonLib = Python.import("python_lib")
         let data = pythonLib.load_test2_weights()
 
@@ -223,3 +223,86 @@ class ModelTest2
         return model
     }
 }
+
+/// Model to test against PyTorch.
+class ModelTest3
+{
+    ///
+    /// Create the model.
+    ///
+    /// Principle features:
+    ///   - 2D Frequences & scale
+    ///   - IRDFT
+    ///   - (Decorrelate color)
+    ///
+    /// - Parameter size: The size of the input data.
+    /// - Returns: The built model.
+    ///
+    static func build(_ size: Int) -> Model
+    {
+        let context = ModelContext(name: "ModelTest2", curID: 0)
+        let params = MAKit.Model.Params(context: context)
+
+        var layer: Layer2D
+        layer = Input2D(
+            nbChannels: 6, width: size, height: size,
+            params: params
+        )
+        let firstLayer: Layer2D = layer
+
+        layer = FTFrequences2D(
+            nbChannels: 6, dimension: size,
+            params: params
+        )
+
+        layer = Multiply2D(
+            layersPrev: [firstLayer, layer], params: params
+        )
+
+        layer = IRDFT2RGB(
+            layerPrev: layer, params: params
+        )
+
+        layer = LinearScale2D(
+            layerPrev: layer,
+            weight: 1.0 / 4.0, bias: 0.0,
+            params: params
+        )
+
+        /*
+         Introduces float rounding approximation issues!
+         layer = DecorrelateRGB(
+            layerPrev: layer,
+            correlation: [
+                0.26, 0.26, 0.27,
+                0.09, 0.00, -0.09,
+                0.02, -0.05, 0.03
+            ].map { $0 / 0.4619524 },
+            params: params
+        )
+        */
+
+        layer = Activation2D(
+            layerPrev: layer,
+            activation: Sigmoid.str,
+            params: params
+        )
+
+        layer = LinearScale2D(
+            layerPrev: layer,
+            weight: 2.0, bias: -1.0,
+            params: params
+        )
+
+        var head: Layer1D = AvgPool2D(
+            layerPrev: layer, params: params
+        )
+
+        head = SelectNeurons1D(
+            layerPrev: head, neurons: [0], coeffs: [1.0], params: params
+        )
+
+        let model = Model(model: context.model, modelsPrev: [])
+        return model
+    }
+}