sony · TE-StephenTiedemann · Sep 24, 2019 · Aug 1, 2019 · Aug 1, 2019 · Sep 20, 2019
diff --git a/build-tools/code_generator/function_types.yaml b/build-tools/code_generator/function_types.yaml
@@ -420,6 +420,9 @@ INQConvolution:
 FixedPointQuantize:
   float: [float]
   half: [Half]
+MinMaxQuantize:
+  float: [float]
+  half: [Half]
 Pow2Quantize:
   float: [float]
   half: [Half]

diff --git a/build-tools/code_generator/functions.yaml b/build-tools/code_generator/functions.yaml
@@ -5142,6 +5142,135 @@ Quantization Neural Network Layers:
     function_ids:
       BifB: 113
     c_runtime: not support
+  MinMaxQuantize:
+    snake_name: min_max_quantize
+    doc: |+
+      This function uniformly quantizes values in the range of min and max quantization levels.
+
+      Min-max quantization is defined as the following equation
+
+      .. math::
+
+          y = round \left(\frac{\min(\max(x, m), M) - m}{scale} \right) \times scale + m, 
+
+      where the :math:`scale` is defined as 
+
+      .. math::
+
+          scale = \frac{M - m}{M_q - m_q}, 
+
+      and 
+
+      .. math::
+
+          m_q = ql_{min}, \\
+          M_q = ql_{max}, \\
+          m = qr_{min}, \\
+          M = qr_{max}.
+
+      In the backward pass when using `ste_fine_grained` as false,
+
+          .. math::
+
+            \frac{\partial q_i}{\partial x_i} = 1.
+
+
+      In the backward pass when using `ste_fine_grained` as true,
+
+          .. math::
+
+             \frac{\partial q_i}{\partial x_i}= \left\{
+           \begin{array}{ll}
+             0 & if \ \ \ x_i > M \\
+             1 & if \ \ m \le x_i \le M \\
+             0 & if \ \ x_i < m \\
+           \end{array} \right..
+
+      :math:`qr_{min}` and :math:`qr_{max}` are treaded as follows.
+
+          * `x_min_max` is `True` and `ema` is `True`: 
+            Exponential moving average are computed for each :math:`min(x)` and :math:`max(x)` 
+            then stored in :math:`qr_{min}` and :math:`qr_{max}`.
+          * `x_min_max` is `True` and `ema` is `False`:
+            :math:`min(x)` and :math:`max(x)` are computed then stored in :math:`qr_{min}` and :math:`qr_{max}`.
+          * `x_min_max` is `False` and `ema` is `True`:
+            Exponential moving average stored in :math:`qr_{min}` and :math:`qr_{max}` are used.
+          * `x_min_max` is `False` and `ema` is `False`
+            Gradients of :math:`qr_{min}` and :math:`qr_{max}` are computed in the backward pass.
+
+      More precisely, in inference of the min-max quantization, one has to consider *zero-point (zp)*
+      which corresponds
+      to the real value 0, and its data type is an integer. *zero-point* is defined as 
+
+          .. math::
+
+             && zp_f = ql_{min} -\frac{qr_{min}}{scale}, \\
+             && zp = \left\{
+           \begin{array}{ll}
+             ql_{max} & if \ \ \ zp_f >= ql_{max} \\
+             round(zp_f) & if \ \ otherwise \\
+             ql_{min}  & if \ \ zp_f <= ql_{min} \\
+           \end{array} \right..
+
+      Accordingly, in order to simulate quantization effect of *zero-point*, 
+      during both forward and backward pass, :math:`qr_{min}` and :math:`qr_{max}` are adjusted as follows,
+
+          .. math::
+
+             qr_{min}^{adj} = ql_{min} - zp * scale, \\
+             qr_{max}^{adj} = ql_{max} - zp * scale.
+
+      These operations are often called *nudge*. 
+
+      Finally, in the formulas of the min-max quantization, :math:`m` and :math:`M` are replaced by
+      :math:`qr_{min}^{adj}` and :math:`qr_{max}^{adj}` respectively.
+
+      .. note::
+
+      	Quantized values are stored as floating point number, since this function is for simulation purposes.
+
+    inputs:
+      x:
+        doc: N-D array innput.
+      qr_min:
+        doc: Minimum value for the quantization range, modified during forward execution
+          when x_min_max is True.
+      qr_max:
+        doc: Maximum value for the quantization range, modified during forward execution
+          when x_min_max is True.
+      ql_min:
+        doc: Minimum value for the quantization level, typically 0.
+      ql_max:
+        doc: Maximum value for the quantization level, typically 255.
+    arguments:
+      decay:
+        doc: Decay rate for the exponential moving average.
+        type: float
+        default: '0.999'
+      x_min_max:
+        doc: Use the min and max of x to compute quantization ranges.
+        type: bool
+        default: 'False'
+      ema:
+        doc: Use the exponential moving average for the min and max quantization ranges.
+        type: bool
+        default: 'False'
+      ste_fine_grained:
+        doc: Straight Through Estimator is fine-grained or not.
+        type: bool
+        default: 'True'
+      eps:
+        doc: Epsilon, or small value to ensure :math:`qr_{max} - qr_{min}` must be
+          greater than the epsilon.
+        type: float
+        default: '0.01'
+    outputs:
+      y:
+        doc: N-D array.
+    c_runtime: not support
+    function_ids:
+      fBBB: 273
+      fBBBf: 274
   Pow2Quantize:
     snake_name: pow2_quantize
     doc: |2+

diff --git a/doc/python/api/function.rst b/doc/python/api/function.rst
@@ -247,6 +247,7 @@ Quantized Neural Network Layers
 .. autofunction:: binary_weight_affine
 .. autofunction:: binary_weight_convolution
 .. autofunction:: fixed_point_quantize
+.. autofunction:: min_max_quantize
 .. autofunction:: pow2_quantize
 .. autofunction:: prune
 

diff --git a/doc/python/api/parametric_function.rst b/doc/python/api/parametric_function.rst
@@ -96,10 +96,13 @@ Here is the list of parametric functions.
 
 .. autofunction:: fixed_point_quantized_affine
 .. autofunction:: fixed_point_quantized_convolution
+.. autofunction:: min_max_quantized_affine
+.. autofunction:: min_max_quantized_convolution
 .. autofunction:: pow2_quantized_affine
 .. autofunction:: pow2_quantized_convolution
 .. autofunction:: pruned_affine
 .. autofunction:: pruned_convolution
+.. autofunction:: min_max_quantize
 
 .. autofunction:: lstm_cell
 

diff --git a/include/nbla/function/min_max_quantize.hpp b/include/nbla/function/min_max_quantize.hpp
@@ -0,0 +1,115 @@
+// Copyright (c) 2017 Sony Corporation. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// fixed_point_quantize.hpp
+#ifndef NBLA_FUNCTION_MIN_MAX_QUANTIZE_HPP
+#define NBLA_FUNCTION_MIN_MAX_QUANTIZE_HPP
+
+#include <nbla/cpu.hpp>
+#include <nbla/function.hpp>
+#include <nbla/function_registry.hpp>
+
+namespace nbla {
+
+NBLA_REGISTER_FUNCTION_HEADER(MinMaxQuantize, float, bool, bool, bool, float);
+
+/** MinMaxQuantize quantizes values in integer representation.
+
+Inputs:
+- N-D array of input
+- N-D array of minimum quantization range (modified during forward execution)
+- N-D array of maximum quantization range (modified during forward execution)
+- N-D array of minimum quantization level
+- N-D array of maximum quantization level
+execution)
+
+@tparam T Data type for computation.
+
+@param decay Decay rate for the exponential moving average.
+@param x_min_max Use the min and max of x to compute quantization ranges.
+@param ema Use the exponential moving average for the min and max quantization.
+ranges.
+@param ste_fine_grained Straight Through Estimator is fine-grained or not.
+
+\ingroup FunctionImplGrp
+ */
+template <typename T>
+class MinMaxQuantize : public BaseFunction<float, bool, bool, bool, float> {
+protected:
+  float decay_;
+  bool x_min_max_;
+  bool ema_;
+  bool ste_fine_grained_;
+  float eps_;
+  shared_ptr<Function> identity_;
+  shared_ptr<Function> round_;
+  shared_ptr<Function> add2_;
+  shared_ptr<Function> sub2_;
+  shared_ptr<Function> mul2_;
+  shared_ptr<Function> div2_;
+  shared_ptr<Function> minimum2_;
+  shared_ptr<Function> maximum2_;
+  shared_ptr<Function> mul_scalar_;
+  shared_ptr<Function> mul_scalar2_;
+  shared_ptr<Function> min_;
+  shared_ptr<Function> max_;
+  shared_ptr<Function> broadcast_;
+  shared_ptr<Function> greater_equal_;
+  shared_ptr<Function> less_equal_;
+  shared_ptr<Function> greater_;
+  shared_ptr<Function> less_;
+  shared_ptr<Function> sum_;
+  VariablePtr scale_sptr_;
+
+public:
+  MinMaxQuantize(const Context &ctx, float decay, bool x_min_max, bool ema,
+                 bool ste_fine_grained, float eps)
+      : BaseFunction(ctx, decay, x_min_max, ema, ste_fine_grained, eps),
+        decay_(decay), x_min_max_(x_min_max), ema_(ema),
+        ste_fine_grained_(ste_fine_grained), eps_(eps) {}
+  virtual ~MinMaxQuantize() {}
+  virtual shared_ptr<Function> copy() const {
+    return create_MinMaxQuantize(ctx_, decay_, x_min_max_, ema_,
+                                 ste_fine_grained_, eps_);
+  }
+  virtual int min_inputs() { return 5; }
+  virtual int min_outputs() { return 1; }
+  virtual vector<dtypes> in_types() {
+    return vector<dtypes>{get_dtype<T>(), get_dtype<T>(), get_dtype<T>(),
+                          get_dtype<T>(), get_dtype<T>(), get_dtype<T>()};
+  }
+  virtual vector<dtypes> out_types() { return vector<dtypes>{get_dtype<T>()}; }
+  virtual vector<string> allowed_array_classes() {
+    return SingletonManager::get<Cpu>()->array_classes();
+  }
+  virtual string name() { return "MinMaxQuantize"; }
+
+protected:
+  NBLA_API virtual void setup_impl(const Variables &inputs,
+                                   const Variables &outputs);
+  NBLA_API virtual void forward_impl(const Variables &inputs,
+                                     const Variables &outputs);
+  NBLA_API virtual void backward_impl(const Variables &inputs,
+                                      const Variables &outputs,
+                                      const vector<bool> &propagate_down,
+                                      const vector<bool> &accum);
+  NBLA_API virtual void nudge_range(Variable *qr_min, Variable *qr_max);
+  NBLA_API virtual void nudge_qr_min_max(Variable *qr_min, Variable *qr_max,
+                                         Variable *ql_min, Variable *ql_max,
+                                         Variable *scale,
+                                         Variable *qr_min_nudged,
+                                         Variable *qr_max_nudged);
+};
+}
+#endif
diff --git a/python/src/nnabla/backward_function/min_max_quantize.py b/python/src/nnabla/backward_function/min_max_quantize.py
@@ -0,0 +1,56 @@
+# Copyright (c) 2017 Sony Corporation. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import numpy as np
+import nnabla as nn
+import nnabla.functions as F
+from .backward_function import BackwardFunction
+
+
+class MinMaxQuantizeBackward(BackwardFunction):
+
+    @property
+    def name(self):
+        return 'MinMaxQuantizeBackward'
+
+    def _create_forward_inputs_and_outputs(self, inputs, outputs):
+        msg = ("Implement this function correctly \n"
+               "if the backward function takes the output(s) of the forward function.\n"
+               "See the SigmoidBackward in that case.\n"
+               "Delete this error message after implementing.")
+        raise Exception(msg)
+
+        # Inputs on the forward graph
+        inputs_fwd = []
+        for i in range(self._num_inputs_fwd):
+            need_grad = self.forward_func.inputs[i].need_grad
+            v = nn.Variable(inputs[i].shape, need_grad=need_grad)
+            v.data = inputs[i].data
+            v.grad = outputs[i].data
+            inputs_fwd += [v]
+        # Outputs on the forward graph
+        outputs_fwd = []
+        for i in range(self._num_outputs_fwd):
+            inp = inputs[self._num_inputs_fwd + i]
+            v = nn.Variable(inp.shape)
+            v.grad = inp.data
+            outputs_fwd += [v]
+        return inputs_fwd, outputs_fwd
+
+    def backward_impl(self, inputs, outputs, prop_down, accum):
+        # inputs: [inputs_fwd_graph] + [inputs_bwd_graph] or
+        # [inputs_fwd_graph] + [outputs_fwd_graph] + [inputs_bwd_graph]
+
+        raise NotImplementedError(
+            "The backward method of MinMaxQuantizeBackward class is not implemented.")