Skip to content

Commit

Permalink
Merge pull request #1349 from sourabhvarshney111/alphaDropout
Browse files Browse the repository at this point in the history
Add Alpha dropout.
  • Loading branch information
zoq committed Apr 7, 2018
2 parents b6b96de + 8ed60d7 commit bb503e2
Show file tree
Hide file tree
Showing 5 changed files with 359 additions and 0 deletions.
2 changes: 2 additions & 0 deletions src/mlpack/methods/ann/layer/CMakeLists.txt
Expand Up @@ -5,6 +5,8 @@ set(SOURCES
add_impl.hpp
add_merge.hpp
add_merge_impl.hpp
alpha_dropout.hpp
alpha_dropout_impl.hpp
base_layer.hpp
bilinear_interpolation.hpp
bilinear_interpolation_impl.hpp
Expand Down
178 changes: 178 additions & 0 deletions src/mlpack/methods/ann/layer/alpha_dropout.hpp
@@ -0,0 +1,178 @@
/**
* @file alpha_dropout.hpp
* @author Dakshit Agrawal
*
* Definition of the Alpha-Dropout class, which implements a regularizer that
* randomly sets units to alpha-dash to prevent them from co-adapting and
* makes an affine transformation so as to keep the mean and variance of
* outputs at their original values.
*
*
* mlpack is free software; you may redistribute it and/or modify it under the
* terms of the 3-clause BSD license. You should have received a copy of the
* 3-clause BSD license along with mlpack. If not, see
* http://www.opensource.org/licenses/BSD-3-Clause for more information.
*/

#ifndef MLPACK_METHODS_ANN_LAYER_ALPHA_DROPOUT_HPP
#define MLPACK_METHODS_ANN_LAYER_ALPHA_DROPOUT_HPP

#include <mlpack/prereqs.hpp>

namespace mlpack {
namespace ann /** Artificial Neural Network. */ {

/**
* The alpha - dropout layer is a regularizer that randomly with probability
* 'ratio' sets input values to alphaDash. The alpha - dropout layer is mostly
* used for SELU activation function where successive layers don't have same
* mean and variance.
*
* For more information, see the following.
*
* @code
* @article{Klambauer2017,
* author = {Gunter Klambauer and Thomas Unterthiner and
* Andreas Mayr},
* title = {Self-Normalizing Neural Networks},
* journal = {Advances in Neural Information Processing Systems},
* year = {2017}
* }
* }
* @endcode
*
* @tparam InputDataType Type of the input data (arma::colvec, arma::mat,
* arma::sp_mat or arma::cube).
* @tparam OutputDataType Type of the output data (arma::colvec, arma::mat,
* arma::sp_mat or arma::cube).
*/
template <typename InputDataType = arma::mat,
typename OutputDataType = arma::mat>
class AlphaDropout
{
public:
/**
* Create the Alpha_Dropout object using the specified ratio.
*
* @param ratio The probability of setting a value to alphaDash.
* @param alphaDash The dropout scaling parameter.
*/
AlphaDropout(const double ratio = 0.5,
const double alphaDash = -alpha * lambda);

/**
* Ordinary feed forward pass of the alpha_dropout layer.
*
* @param input Input data used for evaluating the specified function.
* @param output Resulting output activation.
*/
template<typename eT>
void Forward(const arma::Mat<eT>&& input, arma::Mat<eT>&& output);

/**
* Ordinary feed backward pass of the alpha_dropout layer.
*
* @param input The propagated input activation.
* @param gy The backpropagated error.
* @param g The calculated gradient.
*/
template<typename eT>
void Backward(
const arma::Mat<eT>&& /* input */,
arma::Mat<eT>&& gy,
arma::Mat<eT>&& g);

//! Get the input parameter.
InputDataType const& InputParameter() const { return inputParameter; }
//! Modify the input parameter.
InputDataType& InputParameter() { return inputParameter; }

//! Get the output parameter.
OutputDataType const& OutputParameter() const { return outputParameter; }
//! Modify the output parameter.
OutputDataType& OutputParameter() { return outputParameter; }

//! Get the detla.
OutputDataType const& Delta() const { return delta; }
//! Modify the delta.
OutputDataType& Delta() { return delta; }

//! The value of the deterministic parameter.
bool Deterministic() const { return deterministic; }
//! Modify the value of the deterministic parameter.
bool& Deterministic() { return deterministic; }

//! The probability of setting a value to alphaDash.
double Ratio() const { return ratio; }

//! Value to be multiplied with x for affine transformation.
double A() const { return a; }

//! Value to be added to a*x for affine transformation.
double B() const { return b; }

//! Value of alphaDash.
double AlphaDash() const {return alphaDash; }

//! Get the mask.
OutputDataType const& Mask() const {return mask;}

//! Modify the probability of setting a value to alphaDash. As
//! 'a' and 'b' depend on 'ratio', modify them as well.
void Ratio(const double r)
{
ratio = r;
a = pow((1 - ratio) * (1 + ratio * pow(alphaDash, 2)), -0.5);
b = -a * alphaDash * ratio;
}


/**
* Serialize the layer.
*/
template<typename Archive>
void serialize(Archive& ar, const unsigned int /* version */);

private:
//! Locally-stored delta object.
OutputDataType delta;

//! Locally-stored input parameter object.
InputDataType inputParameter;

//! Locally-stored output parameter object.
OutputDataType outputParameter;

//! Locally-stored mast object.
OutputDataType mask;

//! The probability of setting a value to aplha_dash.
double ratio;

//! If true dropout and scaling is disabled, see notes above.
bool deterministic;

//! Value of alpha for normalized inputs (taken from SELU)
static constexpr double alpha = 1.6732632423543772848170429916717;

//! Value of lambda for normalized inputs (taken from SELU)
static constexpr double lambda = 1.0507009873554804934193349852946;

//! The low variance value of SELU activation function.
double alphaDash;

//! Value to be multiplied with x for affine transformation.
double a;

//! Value to be added to a*x for affine transformation.
double b;
}; // class AlphaDropout

} // namespace ann
} // namespace mlpack

// Include implementation.
#include "alpha_dropout_impl.hpp"


#endif
85 changes: 85 additions & 0 deletions src/mlpack/methods/ann/layer/alpha_dropout_impl.hpp
@@ -0,0 +1,85 @@
/**
* @file alpha_dropout_impl.hpp
* @author Dakshit Agrawal
*
* Definition of the Alpha-Dropout class, which implements a regularizer that
* randomly sets units to alpha-dash to prevent them from co-adapting and
* makes an affine transformation so as to keep the mean and variance of
* outputs at their original values.
*
* mlpack is free software; you may redistribute it and/or modify it under the
* terms of the 3-clause BSD license. You should have received a copy of the
* 3-clause BSD license along with mlpack. If not, see
* http://www.opensource.org/licenses/BSD-3-Clause for more information.
*/

#ifndef MLPACK_METHODS_ANN_LAYER_ALPHA_DROPOUT_IMPL_HPP
#define MLPACK_METHODS_ANN_LAYER_ALPHA_DROPOUT_IMPL_HPP

// In case it hasn't yet been included.
#include "alpha_dropout.hpp"

namespace mlpack {
namespace ann /** Artificial Neural Network. */ {

template<typename InputDataType, typename OutputDataType>
AlphaDropout<InputDataType, OutputDataType>::AlphaDropout(
const double ratio,
const double alphaDash) :
ratio(ratio),
alphaDash(alphaDash),
deterministic(false)
{
Ratio(ratio);
}

template<typename InputDataType, typename OutputDataType>
template<typename eT>
void AlphaDropout<InputDataType, OutputDataType>::Forward(
const arma::Mat<eT>&& input,
arma::Mat<eT>&& output)
{
// The dropout mask will not be multiplied in the deterministic mode
// (during testing).
if (deterministic)
{
output = input;
}
else
{
// Set values to alphaDash with probability ratio. Then apply affine
// transformation so as to keep mean and variance of outputs to their
// original values.

mask = arma::randu< arma::Mat<eT> >(input.n_rows, input.n_cols);
mask.transform( [&](double val) { return (val > ratio); } );
output = (input % mask + alphaDash * (1 - mask)) * a + b;
}
}

template<typename InputDataType, typename OutputDataType>
template<typename eT>
void AlphaDropout<InputDataType, OutputDataType>::Backward(
const arma::Mat<eT>&& /* input */,
arma::Mat<eT>&& gy,
arma::Mat<eT>&& g)
{
g = gy % mask * a;
}

template<typename InputDataType, typename OutputDataType>
template<typename Archive>
void AlphaDropout<InputDataType, OutputDataType>::serialize(
Archive& ar,
const unsigned int /* version */)
{
ar & BOOST_SERIALIZATION_NVP(ratio);
ar & BOOST_SERIALIZATION_NVP(alphaDash);
ar & BOOST_SERIALIZATION_NVP(a);
ar & BOOST_SERIALIZATION_NVP(b);
}

} // namespace ann
} // namespace mlpack

#endif
2 changes: 2 additions & 0 deletions src/mlpack/methods/ann/layer/layer_types.hpp
Expand Up @@ -16,6 +16,7 @@

// Layer modules.
#include <mlpack/methods/ann/layer/add.hpp>
#include <mlpack/methods/ann/layer/alpha_dropout.hpp>
#include <mlpack/methods/ann/layer/base_layer.hpp>
#include <mlpack/methods/ann/layer/batch_norm.hpp>
#include <mlpack/methods/ann/layer/bilinear_interpolation.hpp>
Expand Down Expand Up @@ -123,6 +124,7 @@ using LayerTypes = boost::variant<
CrossEntropyError<arma::mat, arma::mat>*,
DropConnect<arma::mat, arma::mat>*,
Dropout<arma::mat, arma::mat>*,
AlphaDropout<arma::mat, arma::mat>*,
ELU<arma::mat, arma::mat>*,
Glimpse<arma::mat, arma::mat>*,
HardTanH<arma::mat, arma::mat>*,
Expand Down
92 changes: 92 additions & 0 deletions src/mlpack/tests/ann_layer_test.cpp
Expand Up @@ -405,6 +405,98 @@ BOOST_AUTO_TEST_CASE(NoDropoutTest)
BOOST_REQUIRE_EQUAL(arma::accu(output), arma::accu(input));
}

/*
* Perform test to check whether mean and variance remain nearly same
* after alpha_dropout.
*/

BOOST_AUTO_TEST_CASE(SimpleAlphaDropoutLayerTest)
{
// Initialize the probability of setting a value to alphaDash.
const double p = .2;

// Initialize the input parameter having a mean nearabout 0
// and variance nearabout 1.

arma::mat input = arma::randn<arma::mat>(1000, 1);

AlphaDropout<> module(p);
module.Deterministic() = false;

// Test the Forward function when training phase.
arma::mat output;
module.Forward(std::move(input), std::move(output));
// Check whether mean remains nearly same.
BOOST_REQUIRE_LE(
arma::as_scalar(arma::abs(arma::mean(input) - arma::mean(output))), 0.1);
// Check whether variance remains nearly same
BOOST_REQUIRE_LE(
arma::as_scalar(arma::abs(arma::var(input) - arma::var(output))), 0.1);

// Test the Backward function when training phase.
arma::mat delta;
module.Backward(std::move(input), std::move(input), std::move(delta));
BOOST_REQUIRE_LE(
arma::as_scalar(arma::abs(arma::mean(delta) - 0)), 0.05);

// Test the Forward function when testing phase.
module.Deterministic() = true;
module.Forward(std::move(input), std::move(output));
BOOST_REQUIRE_EQUAL(arma::accu(input), arma::accu(output));
}

/**
* Perform alpha_dropout x times using ones as input, sum the number of ones
* and validate that the layer is producing approximately the correct number
* of ones.
*/
BOOST_AUTO_TEST_CASE(AlphaDropoutProbabilityTest)
{
arma::mat input = arma::ones(1500, 1);
const size_t iterations = 10;

double probability[5] = { 0.1, 0.3, 0.4, 0.7, 0.8 };
for (size_t trial = 0; trial < 5; ++trial)
{
double nonzeroCount = 0;
for (size_t i = 0; i < iterations; ++i)
{
AlphaDropout<> module(probability[trial]);
module.Deterministic() = false;

arma::mat output;
module.Forward(std::move(input), std::move(output));

// Return a column vector containing the indices of elements of X
// that are not alphaDash, we just need the number of
// nonAlphaDash values.
arma::uvec nonAlphaDash = arma::find(module.Mask());
nonzeroCount += nonAlphaDash.n_elem;
}

const double expected = input.n_elem * (1-probability[trial]) * iterations;

const double error = fabs(nonzeroCount - expected) / expected;

BOOST_REQUIRE_LE(error, 0.15);
}
}

/*
* Perform alpha_dropout with probability 1 - p where p = 0, means no alpha_dropout.
*/
BOOST_AUTO_TEST_CASE(NoAlphaDropoutTest)
{
arma::mat input = arma::ones(1500, 1);
AlphaDropout<> module(0);
module.Deterministic() = false;

arma::mat output;
module.Forward(std::move(input), std::move(output));

BOOST_REQUIRE_EQUAL(arma::accu(output), arma::accu(input));
}

/**
* Simple linear module test.
*/
Expand Down

0 comments on commit bb503e2

Please sign in to comment.