mlpack · ShikharJ · Sep 28, 2018 · Sep 16, 2018 · Sep 16, 2018 · Sep 16, 2018
diff --git a/src/mlpack/methods/ann/layer/batch_norm.hpp b/src/mlpack/methods/ann/layer/batch_norm.hpp
@@ -60,11 +60,11 @@ class BatchNorm
   BatchNorm();
 
   /**
-  * Create the BatchNorm layer object for a specified number of input units.
-  *
-  * @param size The number of input units.
-  * @param eps The epsilon added to variance to ensure numerical stability.
-  */
+   * Create the BatchNorm layer object for a specified number of input units.
+   *
+   * @param size The number of input units.
+   * @param eps The epsilon added to variance to ensure numerical stability.
+   */
   BatchNorm(const size_t size, const double eps = 1e-8);
 
   /**
@@ -133,10 +133,10 @@ class BatchNorm
   bool& Deterministic() { return deterministic; }
 
   //! Get the mean over the training data.
-  OutputDataType TrainingMean() { return stats.mean(); }
+  OutputDataType TrainingMean() { return runningMean; }
 
   //! Get the variance over the training data.
-  OutputDataType TrainingVariance() { return stats.var(1); }
+  OutputDataType TrainingVariance() { return runningVariance / count; }
 
   /**
    * Serialize the layer
@@ -151,6 +151,9 @@ class BatchNorm
   //! Locally-stored epsilon value.
   double eps;
 
+  //! Whether we are in loading or saving mode.
+  bool loading;
+
   //! Locally-stored scale parameter.
   OutputDataType gamma;
 
@@ -166,14 +169,20 @@ class BatchNorm
    */
   bool deterministic;
 
+  //! Locally-stored running mean/variance counter.
+  size_t count;
+
   //! Locally-stored mean object.
   OutputDataType mean;
 
   //! Locally-stored variance object.
   OutputDataType variance;
 
-  //! Locally-stored running statistics object.
-  arma::running_stat_vec<arma::colvec> stats;
+  //! Locally-stored mean object.
+  OutputDataType runningMean;
+
+  //! Locally-stored variance object.
+  OutputDataType runningVariance;
 
   //! Locally-stored gradient object.
   OutputDataType gradient;

diff --git a/src/mlpack/methods/ann/layer/batch_norm_impl.hpp b/src/mlpack/methods/ann/layer/batch_norm_impl.hpp
@@ -22,32 +22,41 @@ namespace ann { /** Artificial Neural Network. */
 
 template<typename InputDataType, typename OutputDataType>
 BatchNorm<InputDataType, OutputDataType>::BatchNorm() :
-    size(10),
     eps(1e-8),
-    deterministic(false)
+    loading(false),
+    deterministic(false),
+    count(0)
 {
   // Nothing to do here.
 }
-
 template <typename InputDataType, typename OutputDataType>
 BatchNorm<InputDataType, OutputDataType>::BatchNorm(
     const size_t size, const double eps) :
     size(size),
     eps(eps),
-    deterministic(false)
+    loading(false),
+    deterministic(false),
+    count(0)
 {
   weights.set_size(size + size, 1);
+  runningMean.zeros(size, 1);
+  runningVariance.zeros(size, 1);
 }
 
 template<typename InputDataType, typename OutputDataType>
 void BatchNorm<InputDataType, OutputDataType>::Reset()
 {
   gamma = arma::mat(weights.memptr(), size, 1, false, false);
   beta = arma::mat(weights.memptr() + gamma.n_elem, size, 1, false, false);
+
+  if (!loading)
+  {
+    gamma.fill(1.0);
+    beta.fill(0.0);
+  }
+
   deterministic = false;
-  gamma.fill(1.0);
-  beta.fill(0.0);
-  stats.reset();
+  loading = false;
 }
 
 template<typename InputDataType, typename OutputDataType>
@@ -59,29 +68,30 @@ void BatchNorm<InputDataType, OutputDataType>::Forward(
   // the forward pass when deterministic is set to true.
   if (deterministic)
   {
-    // Mini--batch mean using the stats object.
-    mean = stats.mean();
-
-    // Mini--batch variance using the stats object.
-    variance = stats.var(1);
-
     // Normalize the input and scale and shift the output.
-    output = input.each_col() - mean;
-    output.each_col() %= gamma / arma::sqrt(variance + eps);
+    output = input.each_col() - runningMean;
+    output.each_col() %= gamma / arma::sqrt(runningVariance / count + eps);
     output.each_col() += beta;
   }
   else
   {
     mean = arma::mean(input, 1);
     variance = arma::var(input, 1, 1);
 
-    for (size_t i = 0; i < input.n_cols; i++)
-      stats(input.col(i));
-
     // Normalize the input.
     output = input.each_col() - mean;
     output.each_col() /= arma::sqrt(variance + eps);
 
+    // Use Welford method to compute the sample variance and mean.
+    for (size_t i = 0; i < input.n_cols; i++)
+    {
+      count += 1;
+
+      OutputDataType delta = input.col(i) - runningMean;
+      runningMean = runningMean + delta / count;
+      runningVariance += delta % (input.col(i) - runningMean);
+    }
+
     // Reused in the backward and gradient step.
     normalized = output;
 
@@ -139,8 +149,20 @@ template<typename Archive>
 void BatchNorm<InputDataType, OutputDataType>::serialize(
     Archive& ar, const unsigned int /* version */)
 {
+  ar & BOOST_SERIALIZATION_NVP(size);
+
+  if (Archive::is_loading::value)
+  {
+    weights.set_size(size + size, 1);
+    loading = false;
+  }
+
+  ar & BOOST_SERIALIZATION_NVP(eps);
   ar & BOOST_SERIALIZATION_NVP(gamma);
   ar & BOOST_SERIALIZATION_NVP(beta);
+  ar & BOOST_SERIALIZATION_NVP(count);
+  ar & BOOST_SERIALIZATION_NVP(runningMean);
+  ar & BOOST_SERIALIZATION_NVP(runningVariance);
 }
 
 } // namespace ann

diff --git a/src/mlpack/methods/ann/layer/elu.hpp b/src/mlpack/methods/ann/layer/elu.hpp
@@ -194,7 +194,7 @@ class ELU
   template<typename eT>
   void Fn(const arma::Mat<eT>& x, arma::Mat<eT>& y)
   {
-    y.set_size(size(x));
+    y.set_size(arma::size(x));
 
     for (size_t i = 0; i < x.n_elem; i++)
     {

diff --git a/src/mlpack/methods/ann/layer/layer_norm.hpp b/src/mlpack/methods/ann/layer/layer_norm.hpp
@@ -69,11 +69,11 @@ class LayerNorm
   LayerNorm();
 
   /**
-  * Create the LayerNorm object for a specified number of input units.
-  *
-  * @param size The number of input units.
-  * @param eps The epsilon added to variance to ensure numerical stability.
-  */
+   * Create the LayerNorm object for a specified number of input units.
+   *
+   * @param size The number of input units.
+   * @param eps The epsilon added to variance to ensure numerical stability.
+   */
   LayerNorm(const size_t size, const double eps = 1e-8);
 
   /**
@@ -155,6 +155,9 @@ class LayerNorm
   //! Locally-stored epsilon value.
   double eps;
 
+  //! Whether we are in loading or saving mode.
+  bool loading;
+
   //! Locally-stored scale parameter.
   OutputDataType gamma;
 

diff --git a/src/mlpack/methods/ann/layer/layer_norm_impl.hpp b/src/mlpack/methods/ann/layer/layer_norm_impl.hpp
@@ -19,10 +19,11 @@
 namespace mlpack {
 namespace ann { /** Artificial Neural Network. */
 
+
 template<typename InputDataType, typename OutputDataType>
 LayerNorm<InputDataType, OutputDataType>::LayerNorm() :
-    size(10),
-    eps(1e-8)
+    eps(1e-8),
+    loading(false)
 {
   // Nothing to do here.
 }
@@ -31,7 +32,8 @@ template <typename InputDataType, typename OutputDataType>
 LayerNorm<InputDataType, OutputDataType>::LayerNorm(
     const size_t size, const double eps) :
     size(size),
-    eps(eps)
+    eps(eps),
+    loading(false)
 {
   weights.set_size(size + size, 1);
 }
@@ -41,8 +43,14 @@ void LayerNorm<InputDataType, OutputDataType>::Reset()
 {
   gamma = arma::mat(weights.memptr(), size, 1, false, false);
   beta = arma::mat(weights.memptr() + gamma.n_elem, size, 1, false, false);
-  gamma.fill(1.0);
-  beta.fill(0.0);
+
+  if (!loading)
+  {
+    gamma.fill(1.0);
+    beta.fill(0.0);
+  }
+
+  loading = false;
 }
 
 template<typename InputDataType, typename OutputDataType>
@@ -55,6 +63,7 @@ void LayerNorm<InputDataType, OutputDataType>::Forward(
 
   // Normalize the input.
   output = input.each_row() - mean;
+
   output.each_row() /= arma::sqrt(variance + eps);
 
   // Reused in the backward and gradient step.
@@ -113,6 +122,15 @@ template<typename Archive>
 void LayerNorm<InputDataType, OutputDataType>::serialize(
     Archive& ar, const unsigned int /* version */)
 {
+  ar & BOOST_SERIALIZATION_NVP(size);
+
+  if (Archive::is_loading::value)
+  {
+    weights.set_size(size + size, 1);
+    loading = true;
+  }
+
+  ar & BOOST_SERIALIZATION_NVP(eps);
   ar & BOOST_SERIALIZATION_NVP(gamma);
   ar & BOOST_SERIALIZATION_NVP(beta);
 }

diff --git a/src/mlpack/methods/ann/loss_functions/earth_mover_distance_impl.hpp b/src/mlpack/methods/ann/loss_functions/earth_mover_distance_impl.hpp
@@ -45,7 +45,7 @@ void EarthMoverDistance<InputDataType, OutputDataType>::Backward(
 template<typename InputDataType, typename OutputDataType>
 template<typename Archive>
 void EarthMoverDistance<InputDataType, OutputDataType>::serialize(
-    Archive& ar,
+    Archive& /* ar */,
     const unsigned int /* version */)
 {
   /* Nothing to do here */

diff --git a/src/mlpack/tests/serialization_test.cpp b/src/mlpack/tests/serialization_test.cpp
@@ -11,6 +11,11 @@
  */
 #include <mlpack/core.hpp>
 
+#include <mlpack/methods/ann/layer/layer.hpp>
+#include <mlpack/methods/ann/layer/layer_types.hpp>
+#include <mlpack/methods/ann/init_rules/random_init.hpp>
+#include <mlpack/methods/ann/ffn.hpp>
+
 #include <boost/test/unit_test.hpp>
 #include "test_tools.hpp"
 #include "serialization.hpp"
@@ -1784,4 +1789,60 @@ BOOST_AUTO_TEST_CASE(ssRBMTest)
   CheckMatrices(Rbm.Weight(), RbmBinary.Weight());
 }
 
+// General ANN serialization test.
+template<typename LayerType>
+void ANNLayerSerializationTest(LayerType& layer)
+{
+  arma::mat input(5, 100, arma::fill::randu);
+  arma::mat output(5, 100, arma::fill::randu);
+
+  FFN<NegativeLogLikelihood<>, ann::RandomInitialization> model;
+  model.Add<Linear<>>(input.n_rows, 10);
+  model.Add<LayerType>(layer);
+  model.Add<ReLULayer<>>();
+  model.Add<Linear<>>(10, output.n_rows);
+  model.Add<LogSoftMax<>>();
+
+  optimization::StandardSGD opt(0.1, 1, 5, -100, false);
+  model.Train(input, output, opt);
+
+  arma::mat originalOutput;
+  model.Predict(input.col(0), originalOutput);
+
+  // Now serialize the model.
+  FFN<NegativeLogLikelihood<>, ann::RandomInitialization> xmlModel, textModel,
+      binaryModel;
+  SerializeObjectAll(model, xmlModel, textModel, binaryModel);
+
+  // Ensure that predictions are the same.
+  arma::mat modelOutput, xmlOutput, textOutput, binaryOutput;
+  model.Predict(input.col(0), modelOutput);
+  xmlModel.Predict(input.col(0), xmlOutput);
+  textModel.Predict(input.col(0), textOutput);
+  binaryModel.Predict(input.col(0), binaryOutput);
+
+  CheckMatrices(originalOutput, modelOutput, 1e-5);
+  CheckMatrices(originalOutput, xmlOutput, 1e-5);
+  CheckMatrices(originalOutput, textOutput, 1e-5);
+  CheckMatrices(originalOutput, binaryOutput, 1e-5);
+}
+
+/**
+ * Simple serialization test for batch normalization layer.
+ */
+BOOST_AUTO_TEST_CASE(BatchNormSerializationTest)
+{
+  BatchNorm<> layer(10);
+  ANNLayerSerializationTest(layer);
+}
+
+/**
+ * Simple serialization test for layer normalization layer.
+ */
+BOOST_AUTO_TEST_CASE(LayerNormSerializationTest)
+{
+  LayerNorm<> layer(10);
+  ANNLayerSerializationTest(layer);
+}
+
 BOOST_AUTO_TEST_SUITE_END();