Fix bug in batchnorm op.

pcpLiu · Dec 12, 2017 · 8b841ed · 8b841ed
1 parent 393cdc2
commit 8b841ed
Show file tree

Hide file tree

Showing 5 changed files with 32 additions and 10 deletions.
diff --git a/.mkdocs.yml b/.mkdocs.yml
@@ -13,7 +13,7 @@ site_author: pcpLiu
 site_description: Serrano Framework Guides
 copyright:  Copyright (c) 2017, Zhonghao (Tim) Liu
 
-site_dir: generated_docs/v0.1.2-alpha/guides
+site_dir: generated_docs/v0.1.5-alpha/guides
 
 google_analytics:
   - 'UA-54280547-7'
@@ -44,7 +44,7 @@ extra_javascript:
   - 'https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.0/MathJax.js?config=TeX-MML-AM_CHTML'
 
 extra:
-    version: v0.1.2-alpha
+    version: v0.1.5-alpha
 
 
 pages:
@@ -66,6 +66,7 @@ pages:
     - Neural network:
       - Common: 
         - Fully Connected: 'operators/fully_connected.md'
+        - BatchNormalization: 'operators/batchnorm.md'
       - Convolution: 'operators/conv.md'
       - Pooling: 'operators/pooling.md'
       - Activation: 'operators/activation.md'

diff --git a/Source/Serrano/operators/nn/common/batchnorm_op.metal b/Source/Serrano/operators/nn/common/batchnorm_op.metal
@@ -15,6 +15,7 @@ typedef struct {
 	int channels;
 	int inputWidth;
 	int inputHeight;
+    float epsilon;
 } BatchNormInfo;
 
 void kernel batchNorm_inference(constant float* input        [[ buffer(0) ]],
@@ -38,5 +39,5 @@ void kernel batchNorm_inference(constant float* input        [[ buffer(0) ]],
 		inputOutputOffset = thread_id.z * info.inputHeight * info.inputWidth + thread_id.y * info.inputWidth + thread_id.x;
 	}
 
-	output[inputOutputOffset] = scale[thread_id.z] * (input[inputOutputOffset] - mean[thread_id.z]) / var[thread_id.z]  + offset[thread_id.z];
+	output[inputOutputOffset] = scale[thread_id.z] * (input[inputOutputOffset] - mean[thread_id.z]) / sqrt(var[thread_id.z] + info.epsilon) + offset[thread_id.z];
 }
diff --git a/Source/Serrano/operators/nn/common/batchnorm_op.swift b/Source/Serrano/operators/nn/common/batchnorm_op.swift
@@ -25,13 +25,15 @@ public struct BatchNormInfo {
     var channels: MetalInt
     var inputWidth: MetalInt
     var inputHeight: MetalInt
+    var epsilon: MetalFloat
 
-    public static func makeBatchNormInfo(channelOrder: TensorChannelOrder, input: Tensor) -> BatchNormInfo {
+    public static func makeBatchNormInfo(channelOrder: TensorChannelOrder, epsilon: Float, input: Tensor) -> BatchNormInfo {
         let (channel, height, width) = parseImgChannelShapeInfo(channelOrder, shapeArray: input.shape.shapeArray)
         return BatchNormInfo(channelPosition: channelOrder.rawValue.metalShort,
                              channels: channel.metalInt,
                              inputWidth: width.metalInt,
-                             inputHeight: height.metalInt)
+                             inputHeight: height.metalInt,
+                             epsilon: epsilon.metalFloat)
     }
 }
 
@@ -313,6 +315,10 @@ public class BatchNormOperator: ComputableOperator {
 
     /// CPU in inference
     internal func cpu_inference() {
+        //TODO: FIX ERROR
+        // sqrt(var + epsilon)
+//        let
+
         // get reciprocal of movingVar
         let movingVarienceReciprocal = Tensor(repeatingValue: 0.0, tensorShape: self.movingVar!.shape)
         var count = Int32(self.movingVar!.count)
@@ -421,7 +427,7 @@ public class BatchNormOperator: ComputableOperator {
 
             let inputBufferResource = input.gpuBufferResource()
             let outputBufferResource = output.gpuBufferResource()
-            var info = BatchNormInfo.makeBatchNormInfo(channelOrder: self.channelOrder, input: input)
+            var info = BatchNormInfo.makeBatchNormInfo(channelOrder: self.channelOrder, epsilon: self.epsilon, input: input)
 
             let encoder = commandBuffer!.makeComputeCommandEncoder()
             encoder.setComputePipelineState(kernel!)

diff --git a/docs/doc_generate.sh b/docs/doc_generate.sh
@@ -5,11 +5,11 @@ jazzy \
   --author pcpLiu \
   --author_url https://github.com/pcpLiu \
   --github_url https://github.com/pcpLiu/Serrano \
-  --github-file-prefix https://github.com/pcpLiu/Serrano/tree/v0.1.2-alpha \
-  --module-version v0.1.2-alpha \
+  --github-file-prefix https://github.com/pcpLiu/Serrano/tree/v0.1.5-alpha \
+  --module-version v0.1.5-alpha \
   --xcodebuild-arguments -scheme,SerranoFramework \
   --module Serrano \
   --min-acl internal \
-  --root-url http://serrano-lib.org/docs/v0.1.2-alpha/api/ \
-  --output generated_docs/v0.1.2-alpha/api/ \
+  --root-url http://serrano-lib.org/docs/v0.1.5-alpha/api/ \
+  --output generated_docs/v0.1.5-alpha/api/ \
   --theme fullwidth
diff --git a/docs/operators/batchnorm.md b/docs/operators/batchnorm.md
@@ -0,0 +1,14 @@
+Currently, Serrano supports 2D BatchNormalization ([API](http://serrano-lib.org/docs/latest/api/Classes/BatchNormOperator.html)).
+
+**Notes**
+
+- The `inputTensors` and `outputTensors` should have same number of tensors. 
+- There can be multiple tensors in `inputTensors` and all tensors in `inputTensors` should have same shapes. Operator will do calculation for each input tensor independently.
+
+## Initialization
+
+```swift
+let bn  = BatchNormOperator(channelOrder = TensorChannelOrder.Last)
+```
+
+- `channelOrder`. The feature channel.