Merge pull request #1349 from sourabhvarshney111/alphaDropout

Add Alpha dropout.
mlpack · Apr 7, 2018 · bb503e2 · bb503e2
2 parents b6b96de + 8ed60d7
commit bb503e2
Show file tree

Hide file tree

Showing 5 changed files with 359 additions and 0 deletions.
diff --git a/src/mlpack/methods/ann/layer/CMakeLists.txt b/src/mlpack/methods/ann/layer/CMakeLists.txt
@@ -5,6 +5,8 @@ set(SOURCES
   add_impl.hpp
   add_merge.hpp
   add_merge_impl.hpp
+  alpha_dropout.hpp
+  alpha_dropout_impl.hpp
   base_layer.hpp
   bilinear_interpolation.hpp
   bilinear_interpolation_impl.hpp

diff --git a/src/mlpack/methods/ann/layer/alpha_dropout.hpp b/src/mlpack/methods/ann/layer/alpha_dropout.hpp
@@ -0,0 +1,178 @@
+/**
+ * @file alpha_dropout.hpp
+ * @author Dakshit Agrawal
+ *
+ * Definition of the Alpha-Dropout class, which implements a regularizer that
+ * randomly sets units to alpha-dash to prevent them from co-adapting and
+ * makes an affine transformation so as to keep the mean and variance of
+ * outputs at their original values.
+ *
+ *
+ * mlpack is free software; you may redistribute it and/or modify it under the
+ * terms of the 3-clause BSD license.  You should have received a copy of the
+ * 3-clause BSD license along with mlpack.  If not, see
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
+ */
+
+#ifndef MLPACK_METHODS_ANN_LAYER_ALPHA_DROPOUT_HPP
+#define MLPACK_METHODS_ANN_LAYER_ALPHA_DROPOUT_HPP
+
+#include <mlpack/prereqs.hpp>
+
+namespace mlpack {
+namespace ann /** Artificial Neural Network. */ {
+
+/**
+ * The alpha - dropout layer is a regularizer that randomly with probability
+ * 'ratio' sets input values to alphaDash. The alpha - dropout layer is mostly
+ * used for SELU activation function where successive layers don't have same
+ * mean and variance.
+ *
+ * For more information, see the following.
+ *
+ * @code
+ * @article{Klambauer2017,
+ *   author  = {Gunter Klambauer and Thomas Unterthiner and
+ *              Andreas Mayr},
+ *   title   = {Self-Normalizing Neural Networks},
+ *   journal = {Advances in Neural Information Processing Systems},
+ *   year    = {2017}
+ * }
+ * }
+ * @endcode
+ *
+ * @tparam InputDataType Type of the input data (arma::colvec, arma::mat,
+ *         arma::sp_mat or arma::cube).
+ * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat,
+ *         arma::sp_mat or arma::cube).
+ */
+template <typename InputDataType = arma::mat,
+          typename OutputDataType = arma::mat>
+class AlphaDropout
+{
+ public:
+  /**
+   * Create the Alpha_Dropout object using the specified ratio.
+   *
+   * @param ratio The probability of setting a value to alphaDash.
+   * @param alphaDash The dropout scaling parameter.
+   */
+  AlphaDropout(const double ratio = 0.5,
+               const double alphaDash = -alpha * lambda);
+
+  /**
+   * Ordinary feed forward pass of the alpha_dropout layer.
+   *
+   * @param input Input data used for evaluating the specified function.
+   * @param output Resulting output activation.
+   */
+  template<typename eT>
+  void Forward(const arma::Mat<eT>&& input, arma::Mat<eT>&& output);
+
+  /**
+   * Ordinary feed backward pass of the alpha_dropout layer.
+   *
+   * @param input The propagated input activation.
+   * @param gy The backpropagated error.
+   * @param g The calculated gradient.
+   */
+  template<typename eT>
+  void Backward(
+      const arma::Mat<eT>&& /* input */,
+      arma::Mat<eT>&& gy,
+      arma::Mat<eT>&& g);
+
+  //! Get the input parameter.
+  InputDataType const& InputParameter() const { return inputParameter; }
+  //! Modify the input parameter.
+  InputDataType& InputParameter() { return inputParameter; }
+
+  //! Get the output parameter.
+  OutputDataType const& OutputParameter() const { return outputParameter; }
+  //! Modify the output parameter.
+  OutputDataType& OutputParameter() { return outputParameter; }
+
+  //! Get the detla.
+  OutputDataType const& Delta() const { return delta; }
+  //! Modify the delta.
+  OutputDataType& Delta() { return delta; }
+
+  //! The value of the deterministic parameter.
+  bool Deterministic() const { return deterministic; }
+  //! Modify the value of the deterministic parameter.
+  bool& Deterministic() { return deterministic; }
+
+  //! The probability of setting a value to alphaDash.
+  double Ratio() const { return ratio; }
+
+  //! Value to be multiplied with x for affine transformation.
+  double A() const { return a; }
+
+  //! Value to be added to a*x for affine transformation.
+  double B() const { return b; }
+
+  //! Value of alphaDash.
+  double AlphaDash() const {return alphaDash; }
+
+  //! Get the mask.
+  OutputDataType const& Mask() const {return mask;}
+
+  //! Modify the probability of setting a value to alphaDash.  As
+  //! 'a' and 'b' depend on 'ratio', modify them as well.
+  void Ratio(const double r)
+  {
+    ratio = r;
+    a = pow((1 - ratio) * (1 + ratio * pow(alphaDash, 2)), -0.5);
+    b = -a * alphaDash * ratio;
+  }
+
+
+  /**
+   * Serialize the layer.
+   */
+  template<typename Archive>
+  void serialize(Archive& ar, const unsigned int /* version */);
+
+ private:
+  //! Locally-stored delta object.
+  OutputDataType delta;
+
+  //! Locally-stored input parameter object.
+  InputDataType inputParameter;
+
+  //! Locally-stored output parameter object.
+  OutputDataType outputParameter;
+
+  //! Locally-stored mast object.
+  OutputDataType mask;
+
+  //! The probability of setting a value to aplha_dash.
+  double ratio;
+
+  //! If true dropout and scaling is disabled, see notes above.
+  bool deterministic;
+
+  //! Value of alpha for normalized inputs (taken from SELU)
+  static constexpr double alpha = 1.6732632423543772848170429916717;
+
+  //! Value of lambda for normalized inputs (taken from SELU)
+  static constexpr double lambda = 1.0507009873554804934193349852946;
+
+  //! The low variance value of SELU activation function.
+  double alphaDash;
+
+  //! Value to be multiplied with x for affine transformation.
+  double a;
+
+  //! Value to be added to a*x for affine transformation.
+  double b;
+}; // class AlphaDropout
+
+} // namespace ann
+} // namespace mlpack
+
+// Include implementation.
+#include "alpha_dropout_impl.hpp"
+
+
+#endif
diff --git a/src/mlpack/methods/ann/layer/alpha_dropout_impl.hpp b/src/mlpack/methods/ann/layer/alpha_dropout_impl.hpp
@@ -0,0 +1,85 @@
+/**
+ * @file alpha_dropout_impl.hpp
+ * @author Dakshit Agrawal
+ *
+ * Definition of the Alpha-Dropout class, which implements a regularizer that
+ * randomly sets units to alpha-dash to prevent them from co-adapting and
+ * makes an affine transformation so as to keep the mean and variance of
+ * outputs at their original values.
+ *
+ * mlpack is free software; you may redistribute it and/or modify it under the
+ * terms of the 3-clause BSD license.  You should have received a copy of the
+ * 3-clause BSD license along with mlpack.  If not, see
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
+ */
+
+#ifndef MLPACK_METHODS_ANN_LAYER_ALPHA_DROPOUT_IMPL_HPP
+#define MLPACK_METHODS_ANN_LAYER_ALPHA_DROPOUT_IMPL_HPP
+
+// In case it hasn't yet been included.
+#include "alpha_dropout.hpp"
+
+namespace mlpack {
+namespace ann /** Artificial Neural Network. */ {
+
+template<typename InputDataType, typename OutputDataType>
+AlphaDropout<InputDataType, OutputDataType>::AlphaDropout(
+    const double ratio,
+    const double alphaDash) :
+    ratio(ratio),
+    alphaDash(alphaDash),
+    deterministic(false)
+{
+  Ratio(ratio);
+}
+
+template<typename InputDataType, typename OutputDataType>
+template<typename eT>
+void AlphaDropout<InputDataType, OutputDataType>::Forward(
+    const arma::Mat<eT>&& input,
+    arma::Mat<eT>&& output)
+{
+  // The dropout mask will not be multiplied in the deterministic mode
+  // (during testing).
+  if (deterministic)
+  {
+    output = input;
+  }
+  else
+  {
+    // Set values to alphaDash with probability ratio.  Then apply affine
+    // transformation so as to keep mean and variance of outputs to their
+    // original values.
+
+    mask = arma::randu< arma::Mat<eT> >(input.n_rows, input.n_cols);
+    mask.transform( [&](double val) { return (val > ratio); } );
+    output = (input % mask + alphaDash * (1 - mask)) * a + b;
+  }
+}
+
+template<typename InputDataType, typename OutputDataType>
+template<typename eT>
+void AlphaDropout<InputDataType, OutputDataType>::Backward(
+    const arma::Mat<eT>&& /* input */,
+    arma::Mat<eT>&& gy,
+    arma::Mat<eT>&& g)
+{
+  g = gy % mask * a;
+}
+
+template<typename InputDataType, typename OutputDataType>
+template<typename Archive>
+void AlphaDropout<InputDataType, OutputDataType>::serialize(
+    Archive& ar,
+    const unsigned int /* version */)
+{
+  ar & BOOST_SERIALIZATION_NVP(ratio);
+  ar & BOOST_SERIALIZATION_NVP(alphaDash);
+  ar & BOOST_SERIALIZATION_NVP(a);
+  ar & BOOST_SERIALIZATION_NVP(b);
+}
+
+} // namespace ann
+} // namespace mlpack
+
+#endif
diff --git a/src/mlpack/methods/ann/layer/layer_types.hpp b/src/mlpack/methods/ann/layer/layer_types.hpp
@@ -16,6 +16,7 @@
 
 // Layer modules.
 #include <mlpack/methods/ann/layer/add.hpp>
+#include <mlpack/methods/ann/layer/alpha_dropout.hpp>
 #include <mlpack/methods/ann/layer/base_layer.hpp>
 #include <mlpack/methods/ann/layer/batch_norm.hpp>
 #include <mlpack/methods/ann/layer/bilinear_interpolation.hpp>
@@ -123,6 +124,7 @@ using LayerTypes = boost::variant<
     CrossEntropyError<arma::mat, arma::mat>*,
     DropConnect<arma::mat, arma::mat>*,
     Dropout<arma::mat, arma::mat>*,
+    AlphaDropout<arma::mat, arma::mat>*,
     ELU<arma::mat, arma::mat>*,
     Glimpse<arma::mat, arma::mat>*,
     HardTanH<arma::mat, arma::mat>*,

diff --git a/src/mlpack/tests/ann_layer_test.cpp b/src/mlpack/tests/ann_layer_test.cpp
@@ -405,6 +405,98 @@ BOOST_AUTO_TEST_CASE(NoDropoutTest)
   BOOST_REQUIRE_EQUAL(arma::accu(output), arma::accu(input));
 }
 
+/*
+ * Perform test to check whether mean and variance remain nearly same
+ * after alpha_dropout.
+ */
+
+BOOST_AUTO_TEST_CASE(SimpleAlphaDropoutLayerTest)
+{
+  // Initialize the probability of setting a value to alphaDash.
+  const double p = .2;
+
+  // Initialize the input parameter having a mean nearabout 0
+  // and variance nearabout 1.
+
+  arma::mat input = arma::randn<arma::mat>(1000, 1);
+
+  AlphaDropout<> module(p);
+  module.Deterministic() = false;
+
+  // Test the Forward function when training phase.
+  arma::mat output;
+  module.Forward(std::move(input), std::move(output));
+  // Check whether mean remains nearly same.
+  BOOST_REQUIRE_LE(
+     arma::as_scalar(arma::abs(arma::mean(input) - arma::mean(output))), 0.1);
+  // Check whether variance remains nearly same
+  BOOST_REQUIRE_LE(
+     arma::as_scalar(arma::abs(arma::var(input) - arma::var(output))), 0.1);
+
+  // Test the Backward function when training phase.
+  arma::mat delta;
+  module.Backward(std::move(input), std::move(input), std::move(delta));
+  BOOST_REQUIRE_LE(
+          arma::as_scalar(arma::abs(arma::mean(delta) - 0)), 0.05);
+
+  // Test the Forward function when testing phase.
+  module.Deterministic() = true;
+  module.Forward(std::move(input), std::move(output));
+  BOOST_REQUIRE_EQUAL(arma::accu(input), arma::accu(output));
+}
+
+/**
+ * Perform alpha_dropout x times using ones as input, sum the number of ones
+ * and validate that the layer is producing approximately the correct number
+ * of ones.
+ */
+BOOST_AUTO_TEST_CASE(AlphaDropoutProbabilityTest)
+{
+  arma::mat input = arma::ones(1500, 1);
+  const size_t iterations = 10;
+
+  double probability[5] = { 0.1, 0.3, 0.4, 0.7, 0.8 };
+  for (size_t trial = 0; trial < 5; ++trial)
+  {
+    double nonzeroCount = 0;
+    for (size_t i = 0; i < iterations; ++i)
+    {
+      AlphaDropout<> module(probability[trial]);
+      module.Deterministic() = false;
+
+      arma::mat output;
+      module.Forward(std::move(input), std::move(output));
+
+      // Return a column vector containing the indices of elements of X
+      // that are not alphaDash, we just need the number of
+      // nonAlphaDash values.
+      arma::uvec nonAlphaDash = arma::find(module.Mask());
+      nonzeroCount += nonAlphaDash.n_elem;
+    }
+
+    const double expected = input.n_elem * (1-probability[trial]) * iterations;
+
+    const double error = fabs(nonzeroCount - expected) / expected;
+
+    BOOST_REQUIRE_LE(error, 0.15);
+  }
+}
+
+/*
+* Perform alpha_dropout with probability 1 - p where p = 0, means no alpha_dropout.
+*/
+BOOST_AUTO_TEST_CASE(NoAlphaDropoutTest)
+{
+  arma::mat input = arma::ones(1500, 1);
+  AlphaDropout<> module(0);
+  module.Deterministic() = false;
+
+  arma::mat output;
+  module.Forward(std::move(input), std::move(output));
+
+  BOOST_REQUIRE_EQUAL(arma::accu(output), arma::accu(input));
+}
+
 /**
  * Simple linear module test.
  */