convert prelu to working condition

mlpack · Feb 14, 2023 · 5908a95 · 5908a95
1 parent e6616bd
commit 5908a95
Show file tree

Hide file tree

Showing 6 changed files with 257 additions and 107 deletions.
diff --git a/src/mlpack/methods/ann/layer/layer_types.hpp b/src/mlpack/methods/ann/layer/layer_types.hpp
@@ -43,6 +43,7 @@
 #include <mlpack/methods/ann/layer/mean_pooling.hpp>
 #include <mlpack/methods/ann/layer/noisylinear.hpp>
 #include <mlpack/methods/ann/layer/padding.hpp>
+#include <mlpack/methods/ann/layer/parametric_relu.hpp>
 #include <mlpack/methods/ann/layer/radial_basis_function.hpp>
 #include <mlpack/methods/ann/layer/softmax.hpp>
 

diff --git a/src/mlpack/methods/ann/layer/not_adapted/parametric_relu_impl.hpp b/src/mlpack/methods/ann/layer/not_adapted/parametric_relu_impl.hpp
diff --git a/...ann/layer/not_adapted/parametric_relu.hpp → ...ack/methods/ann/layer/parametric_relu.hpp b/...ann/layer/not_adapted/parametric_relu.hpp → ...ack/methods/ann/layer/parametric_relu.hpp
@@ -12,8 +12,8 @@
  * 3-clause BSD license along with mlpack.  If not, see
  * http://www.opensource.org/licenses/BSD-3-Clause for more information.
  */
-#ifndef MLPACK_METHODS_ANN_LAYER_PReLU_HPP
-#define MLPACK_METHODS_ANN_LAYER_PReLU_HPP
+#ifndef MLPACK_METHODS_ANN_LAYER_PRELU_HPP
+#define MLPACK_METHODS_ANN_LAYER_PRELU_HPP
 
 #include <mlpack/prereqs.hpp>
 
@@ -34,14 +34,12 @@ namespace mlpack {
  * \right.
  * @f}
  *
- * @tparam InputType The type of the layer's inputs. The layer automatically
- *    cast inputs to this type (Default: arma::mat).
- * @tparam OutputType The type of the computation which also causes the output
- *    to also be in this type. The type also allows the computation and weight
- *    type to differ from the input type (Default: arma::mat).
+ * @tparam MatType Matrix representation to accept as input and allows the
+ *         computation and weight type to differ from the input type
+ *         (Default: arma::mat).
  */
-template<typename InputType = arma::mat, typename OutputType = arma::mat>
-class PReLUType : public Layer<InputType, OutputType>
+template<typename MatType = arma::mat>
+class PReLUType : public Layer<MatType>
 {
  public:
   /**
@@ -57,8 +55,20 @@ class PReLUType : public Layer<InputType, OutputType>
   //! Clone the PReLUType object. This handles polymorphism correctly.
   PReLUType* Clone() const { return new PReLUType(*this); }
 
+  // Virtual destructor.
+  virtual ~PReLUType() { }
+
+  //! Copy the given PReLUType.
+  PReLUType(const PReLUType& other);
+  //! Take ownership of the given PReLUType.
+  PReLUType(PReLUType&& other);
+  //! Copy the given PReLUType.
+  PReLUType& operator=(const PReLUType& other);
+  //! Take ownership of the given PReLUType.
+  PReLUType& operator=(PReLUType&& other);
+
   //! Reset the layer parameter.
-  void SetWeights(typename OutputType::elem_type* weightsPtr);
+  void SetWeights(typename MatType::elem_type* weightsPtr);
 
   /**
    * Ordinary feed forward pass of a neural network, evaluating the function
@@ -67,7 +77,7 @@ class PReLUType : public Layer<InputType, OutputType>
    * @param input Input data used for evaluating the specified function.
    * @param output Resulting output activation.
    */
-  void Forward(const InputType& input, OutputType& output);
+  void Forward(const MatType& input, MatType& output);
 
   /**
    * Ordinary feed backward pass of a neural network, calculating the function
@@ -78,7 +88,7 @@ class PReLUType : public Layer<InputType, OutputType>
    * @param gy The backpropagated error.
    * @param g The calculated gradient.
    */
-  void Backward(const InputType& input, const OutputType& gy, OutputType& g);
+  void Backward(const MatType& input, const MatType& gy, MatType& g);
 
   /**
    * Calculate the gradient using the output delta and the input activation.
@@ -87,14 +97,14 @@ class PReLUType : public Layer<InputType, OutputType>
    * @param error The calculated error.
    * @param gradient The calculated gradient.
    */
-  void Gradient(const InputType& input,
-                const OutputType& error,
-                OutputType& gradient);
+  void Gradient(const MatType& input,
+                const MatType& error,
+                MatType& gradient);
 
   //! Get the parameters.
-  OutputType const& Parameters() const { return alpha; }
+  MatType const& Parameters() const { return alpha; }
   //! Modify the parameters.
-  OutputType& Parameters() { return alpha; }
+  MatType& Parameters() { return alpha; }
 
   //! Get the non zero gradient.
   double const& Alpha() const { return alpha(0); }
@@ -112,7 +122,7 @@ class PReLUType : public Layer<InputType, OutputType>
 
  private:
   //! Leakyness Parameter object.
-  OutputType alpha;
+  MatType alpha;
 
   //! Leakyness Parameter given by user in the range 0 < alpha < 1.
   double userAlpha;
@@ -121,7 +131,7 @@ class PReLUType : public Layer<InputType, OutputType>
 // Convenience typedefs.
 
 // Standard PReLU layer.
-typedef PReLUType<arma::mat, arma::mat> PReLU;
+typedef PReLUType<arma::mat> PReLU;
 
 } // namespace mlpack
 

diff --git a/src/mlpack/methods/ann/layer/parametric_relu_impl.hpp b/src/mlpack/methods/ann/layer/parametric_relu_impl.hpp
@@ -0,0 +1,140 @@
+/**
+ * @file methods/ann/layer/parametric_relu_impl.hpp
+ * @author Prasanna Patil
+ *
+ * Definition of PReLU layer first introduced in the,
+ * Kaiming He, Xiangyu Zhang, Shaoqing, Ren Jian Sun,
+ * "Delving Deep into Rectifiers:
+ * Surpassing Human-Level Performance on ImageNet Classification", 2014
+ *
+ * mlpack is free software; you may redistribute it and/or modify it under the
+ * terms of the 3-clause BSD license.  You should have received a copy of the
+ * 3-clause BSD license along with mlpack.  If not, see
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
+ */
+#ifndef MLPACK_METHODS_ANN_LAYER_PRELU_IMPL_HPP
+#define MLPACK_METHODS_ANN_LAYER_PRELU_IMPL_HPP
+
+// In case it hasn't yet been included.
+#include "parametric_relu.hpp"
+
+namespace mlpack {
+
+template<typename MatType>
+PReLUType<MatType>::PReLUType(const double userAlpha) :
+    Layer<MatType>(),
+    userAlpha(userAlpha)
+{
+  alpha.set_size(WeightSize(), 1);
+  alpha(0) = userAlpha;
+}
+
+template<typename MatType>
+PReLUType<MatType>::PReLUType(
+    const PReLUType& other) :
+    Layer<MatType>(other),
+    userAlpha(other.userAlpha)
+{
+  alpha.set_size(WeightSize(), 1);
+  alpha(0) = userAlpha;
+}
+
+template<typename MatType>
+PReLUType<MatType>::PReLUType(
+    PReLUType&& other) :
+    Layer<MatType>(std::move(other)),
+    userAlpha(std::move(other.userAlpha))
+{
+  alpha.set_size(WeightSize(), 1);
+  alpha(0) = userAlpha;
+}
+
+template<typename MatType>
+PReLUType<MatType>&
+PReLUType<MatType>::operator=(const PReLUType& other)
+{
+  if (&other != this)
+  {
+    Layer<MatType>::operator=(other);
+    userAlpha = other.userAlpha;
+    alpha = other.alpha;
+  }
+
+  return *this;
+}
+
+template<typename MatType>
+PReLUType<MatType>&
+PReLUType<MatType>::operator=(PReLUType&& other)
+{
+  if (&other != this)
+  {
+    Layer<MatType>::operator=(std::move(other));
+    userAlpha = std::move(other.userAlpha);
+    alpha = std::move(other.alpha);
+  }
+
+  return *this;
+}
+
+template<typename MatType>
+void PReLUType<MatType>::SetWeights(
+    typename MatType::elem_type* weightsPtr)
+{
+  alpha = arma::mat(weightsPtr, 1, 1, false, false);
+
+  //! Set value of alpha to the one given by user.
+  // TODO: this doesn't even make any sense.  is it trainable or not?
+  // why is there userAlpha?  is that for initialization only?
+  alpha(0) = userAlpha;
+}
+
+template<typename MatType>
+void PReLUType<MatType>::Forward(
+    const MatType& input, MatType& output)
+{
+  // TODO: use transform()?
+  output = input;
+  #pragma omp for
+  for (size_t i = 0; i < input.n_elem; ++i)
+    output(i) *= (input(i) >= 0) ? 1 : alpha(0);
+}
+
+template<typename MatType>
+void PReLUType<MatType>::Backward(
+    const MatType& input, const MatType& gy, MatType& g)
+{
+  MatType derivative;
+  derivative.set_size(arma::size(input));
+  #pragma omp for
+  for (size_t i = 0; i < input.n_elem; ++i)
+    derivative(i) = (input(i) >= 0) ? 1 : alpha(0);
+
+  g = gy % derivative;
+}
+
+template<typename MatType>
+void PReLUType<MatType>::Gradient(
+    const MatType& input,
+    const MatType& error,
+    MatType& gradient)
+{
+  MatType zeros = arma::zeros<MatType>(input.n_rows, input.n_cols);
+  gradient.set_size(WeightSize(), 1);
+  gradient(0) = arma::accu(error % arma::min(zeros, input)) / input.n_cols;
+}
+
+template<typename MatType>
+template<typename Archive>
+void PReLUType<MatType>::serialize(
+    Archive& ar,
+    const uint32_t /* version */)
+{
+  ar(cereal::base_class<Layer<MatType>>(this));
+
+  ar(CEREAL_NVP(alpha));
+}
+
+} // namespace mlpack
+
+#endif