diff --git a/.travis.yml b/.travis.yml index 838397c895d..b2cd2e96fed 100644 --- a/.travis.yml +++ b/.travis.yml @@ -11,7 +11,7 @@ before_install: - printenv - sudo cp .travis/config.hpp /usr/include/armadillo_bits/config.hpp install: - - mkdir build && cd build && cmake -DDEBUG=OFF -DPROFILE=OFF .. && make -j4 + - mkdir build && cd build && cmake -DDEBUG=OFF -DPROFILE=OFF .. && make -j2 script: - travis_wait 30 ./bin/mlpack_test -p notifications: diff --git a/src/mlpack/methods/CMakeLists.txt b/src/mlpack/methods/CMakeLists.txt index f292e9756c9..dde69de0b1b 100644 --- a/src/mlpack/methods/CMakeLists.txt +++ b/src/mlpack/methods/CMakeLists.txt @@ -51,7 +51,6 @@ set(DIRS randomized_svd range_search rann - rmva regularized_svd softmax_regression sparse_autoencoder diff --git a/src/mlpack/methods/ann/CMakeLists.txt b/src/mlpack/methods/ann/CMakeLists.txt index 6ff7011404b..46174b88578 100644 --- a/src/mlpack/methods/ann/CMakeLists.txt +++ b/src/mlpack/methods/ann/CMakeLists.txt @@ -1,12 +1,8 @@ # Define the files we need to compile # Anything not in this list will not be compiled into mlpack. set(SOURCES - cnn.hpp - cnn_impl.hpp ffn.hpp ffn_impl.hpp - network_util.hpp - network_util_impl.hpp rnn.hpp rnn_impl.hpp ) @@ -23,6 +19,4 @@ set(MLPACK_SRCS ${MLPACK_SRCS} ${DIR_SRCS} PARENT_SCOPE) add_subdirectory(activation_functions) add_subdirectory(init_rules) add_subdirectory(layer) -add_subdirectory(performance_functions) -add_subdirectory(pooling_rules) add_subdirectory(convolution_rules) diff --git a/src/mlpack/methods/ann/cnn.hpp b/src/mlpack/methods/ann/cnn.hpp deleted file mode 100644 index 72e0803f179..00000000000 --- a/src/mlpack/methods/ann/cnn.hpp +++ /dev/null @@ -1,448 +0,0 @@ -/** - * @file cnn.hpp - * @author Shangtong Zhang - * @author Marcus Edel - * - * Definition of the CNN class, which implements convolutional neural networks. - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. - */ -#ifndef MLPACK_METHODS_ANN_CNN_HPP -#define MLPACK_METHODS_ANN_CNN_HPP - -#include - -#include -#include -#include -#include -#include - -namespace mlpack { -namespace ann /** Artificial Neural Network. */ { - -/** - * An implementation of a standard convolutional network. - * - * @tparam LayerTypes Contains all layer modules used to construct the network. - * @tparam OutputLayerType The outputlayer type used to evaluate the network. - * @tparam PerformanceFunction Performance strategy used to calculate the error. - */ -template < - typename LayerTypes, - typename OutputLayerType, - typename InitializationRuleType = NguyenWidrowInitialization, - class PerformanceFunction = CrossEntropyErrorFunction<> -> -class CNN -{ - public: - //! Convenience typedef for the internal model construction. - using NetworkType = CNN; - - /** - * Create the CNN object with the given predictors and responses set (this is - * the set that is used to train the network) and the given optimizer. - * Optionally, specify which initialize rule and performance function should - * be used. - * - * @param network Network modules used to construct the network. - * @param outputLayer Outputlayer used to evaluate the network. - * @param predictors Input training variables. - * @param responses Outputs resulting from input training variables. - * @param optimizer Instantiated optimizer used to train the model. - * @param initializeRule Optional instantiated InitializationRule object - * for initializing the network paramter. - * @param performanceFunction Optional instantiated PerformanceFunction - * object used to claculate the error. - */ - template class OptimizerType> - CNN(LayerType &&network, - OutputType &&outputLayer, - const arma::cube& predictors, - const arma::mat& responses, - OptimizerType& optimizer, - InitializationRuleType initializeRule = InitializationRuleType(), - PerformanceFunction performanceFunction = PerformanceFunction()); - - /** - * Create the CNN object with the given predictors and responses set (this is - * the set that is used to train the network). Optionally, specify which - * initialize rule and performance function should be used. - * - * @param network Network modules used to construct the network. - * @param outputLayer Outputlayer used to evaluate the network. - * @param predictors Input training variables. - * @param responses Outputs resulting from input training variables. - * @param initializeRule Optional instantiated InitializationRule object - * for initializing the network paramter. - * @param performanceFunction Optional instantiated PerformanceFunction - * object used to claculate the error. - */ - template - CNN(LayerType &&network, - OutputType &&outputLayer, - const arma::cube& predictors, - const arma::mat& responses, - InitializationRuleType initializeRule = InitializationRuleType(), - PerformanceFunction performanceFunction = PerformanceFunction()); - - /** - * Create the CNN object with an empty predictors and responses set and - * default optimizer. Make sure to call Train(predictors, responses) when - * training. - * - * @param network Network modules used to construct the network. - * @param outputLayer Outputlayer used to evaluate the network. - * @param initializeRule Optional instantiated InitializationRule object - * for initializing the network paramter. - * @param performanceFunction Optional instantiated PerformanceFunction - * object used to claculate the error. - */ - template - CNN(LayerType &&network, - OutputType &&outputLayer, - InitializationRuleType initializeRule = InitializationRuleType(), - PerformanceFunction performanceFunction = PerformanceFunction()); - /** - * Train the convolutional neural network on the given input data. By default, the - * RMSprop optimization algorithm is used, but others can be specified - * (such as mlpack::optimization::SGD). - * - * This will use the existing model parameters as a starting point for the - * optimization. If this is not what you want, then you should access the - * parameters vector directly with Parameters() and modify it as desired. - * - * @tparam OptimizerType Type of optimizer to use to train the model. - * @param predictors Input training variables. - * @param responses Outputs results from input training variables. - */ - template< - template class OptimizerType = mlpack::optimization::RMSprop - > - void Train(const arma::cube& predictors, const arma::mat& responses); - - /** - * Train the convolutional neural network with the given instantiated optimizer. - * Using this overload allows configuring the instantiated optimizer before - * training is performed. - * - * This will use the existing model parameters as a starting point for the - * optimization. If this is not what you want, then you should access the - * parameters vector directly with Parameters() and modify it as desired. - * - * @param optimizer Instantiated optimizer used to train the model. - */ - template< - template class OptimizerType = mlpack::optimization::RMSprop - > - void Train(OptimizerType& optimizer); - - /** - * Train the convolutional neural network on the given input data using the - * given optimizer. - * - * This will use the existing model parameters as a starting point for the - * optimization. If this is not what you want, then you should access the - * parameters vector directly with Parameters() and modify it as desired. - * - * @tparam OptimizerType Type of optimizer to use to train the model. - * @param predictors Input training variables. - * @param responses Outputs results from input training variables. - * @param optimizer Instantiated optimizer used to train the model. - */ - template< - template class OptimizerType = mlpack::optimization::RMSprop - > - void Train(const arma::cube& predictors, - const arma::mat& responses, - OptimizerType& optimizer); - - /** - * Predict the responses to a given set of predictors. The responses will - * reflect the output of the given output layer as returned by the - * OutputClass() function. - * - * @param predictors Input predictors. - * @param responses Matrix to put output predictions of responses into. - */ - void Predict(arma::cube& predictors, arma::mat& responses); - - /** - * Evaluate the convolutional neural network with the given parameters. This - * function is usually called by the optimizer to train the model. - * - * @param parameters Matrix model parameters. - * @param i Index of point to use for objective function evaluation. - * @param deterministic Whether or not to train or test the model. Note some - * layer act differently in training or testing mode. - */ - double Evaluate(const arma::mat& parameters, - const size_t i, - const bool deterministic = true); - - /** - * Evaluate the gradient of the convolutional neural network with the given - * parameters, and with respect to only one point in the dataset. This is - * useful for optimizers such as SGD, which require a separable objective - * function. - * - * @param parameters Matrix of the model parameters to be optimized. - * @param i Index of points to use for objective function gradient evaluation. - * @param gradient Matrix to output gradient into. - */ - void Gradient(const arma::mat& parameters, - const size_t i, - arma::mat& gradient); - - //! Return the number of separable functions (the number of predictor points). - size_t NumFunctions() const { return numFunctions; } - - //! Return the initial point for the optimization. - const arma::mat& Parameters() const { return parameter; } - //! Modify the initial point for the optimization. - arma::mat& Parameters() { return parameter; } - - /** - * Serialize the convolutional neural network. - */ - template - void Serialize(Archive& ar, const unsigned int /* version */); - - private: - /** - * Reset the network by setting the layer status. - */ - template - typename std::enable_if::type - ResetParameter(std::tuple& /* unused */) { /* Nothing to do here */ } - - template - typename std::enable_if::type - ResetParameter(std::tuple& network) - { - ResetDeterministic(std::get(network)); - ResetParameter(network); - } - - /** - * Reset the layer status by setting the current deterministic parameter - * through all layer that implement the Deterministic function. - */ - template - typename std::enable_if< - HasDeterministicCheck::value, void>::type - ResetDeterministic(T& layer) - { - layer.Deterministic() = deterministic; - } - - template - typename std::enable_if< - !HasDeterministicCheck::value, void>::type - ResetDeterministic(T& /* unused */) { /* Nothing to do here */ - } - - /** - * Run a single iteration of the feed forward algorithm, using the given - * input and target vector, store the calculated error into the error - * vector. - */ - template - void Forward(const DataType& input, std::tuple& network) - { - std::get(network).InputParameter() = input; - - std::get(network).Forward(std::get(network).InputParameter(), - std::get(network).OutputParameter()); - - ForwardTail(network); - } - - template - typename std::enable_if::type - ForwardTail(std::tuple& network) - { - LinkParameter(network); - } - - template - typename std::enable_if::type - ForwardTail(std::tuple& network) - { - std::get(network).Forward(std::get(network).OutputParameter(), - std::get(network).OutputParameter()); - - ForwardTail(network); - } - - /** - * Link the calculated activation with the connection layer. - */ - template - typename std::enable_if::type - LinkParameter(std::tuple& /* unused */) { /* Nothing to do here */ } - - template - typename std::enable_if::type - LinkParameter(std::tuple& network) - { - if (!LayerTraits(network))>::type>::IsBiasLayer) - { - std::get(network).InputParameter() = std::get( - network).OutputParameter(); - } - - LinkParameter(network); - } - - /* - * Calculate the output error and update the overall error. - */ - template - double OutputError(const DataType& target, - ErrorType& error, - const std::tuple& network) - { - // Calculate and store the output error. - outputLayer.CalculateError( - std::get(network).OutputParameter(), target, error); - - // Masures the network's performance with the specified performance - // function. - return performanceFunc.Error(network, target, error); - } - - /** - * Run a single iteration of the feed backward algorithm, using the given - * error of the output layer. Note that we iterate backward through the - * layer modules. - */ - template - typename std::enable_if::type - Backward(const DataType& error, std::tuple& network) - { - std::get(network).Backward( - std::get(network).OutputParameter(), error, - std::get(network).Delta()); - - BackwardTail(error, network); - } - - template - typename std::enable_if::type - BackwardTail(const DataType& /* unused */, - std::tuple& /* unused */) { /* Nothing to do here */ } - - template - typename std::enable_if::type - BackwardTail(const DataType& error, std::tuple& network) - { - std::get(network).Backward( - std::get(network).OutputParameter(), - std::get(network).Delta(), - std::get(network).Delta()); - - BackwardTail(error, network); - } - - /** - * Iterate through all layer modules and update the the gradient using the - * layer defined optimizer. - */ - template< - size_t I = 0, - size_t Max = std::tuple_size::value - 1, - typename... Tp - > - typename std::enable_if::type - UpdateGradients(std::tuple& /* unused */) { /* Nothing to do here */ } - - template< - size_t I = 0, - size_t Max = std::tuple_size::value - 1, - typename... Tp - > - typename std::enable_if::type - UpdateGradients(std::tuple& network) - { - Update(std::get(network), std::get(network).OutputParameter(), - std::get(network).Delta()); - - UpdateGradients(network); - } - - template - typename std::enable_if< - HasGradientCheck::value, void>::type - Update(T& layer, P& /* unused */, D& delta) - { - layer.Gradient(layer.InputParameter(), delta, layer.Gradient()); - } - - template - typename std::enable_if< - !HasGradientCheck::value, void>::type - Update(T& /* unused */, P& /* unused */, D& /* unused */) - { - /* Nothing to do here */ - } - - /* - * Calculate and store the output activation. - */ - template - void OutputPrediction(DataType& output, std::tuple& network) - { - // Calculate and store the output prediction. - outputLayer.OutputClass(std::get( - network).OutputParameter(), output); - } - - //! Instantiated convolutional neural network. - LayerTypes network; - - //! The outputlayer used to evaluate the network - OutputLayerType& outputLayer; - - //! Performance strategy used to claculate the error. - PerformanceFunction performanceFunc; - - //! The current evaluation mode (training or testing). - bool deterministic; - - //! Matrix of (trained) parameters. - arma::mat parameter; - - //! The matrix of data points (predictors). - arma::cube predictors; - - //! The matrix of responses to the input data points. - arma::mat responses; - - //! The number of separable functions (the number of predictor points). - size_t numFunctions; - - //! Locally stored backward error. - arma::mat error; - - //! Locally stored sample size. - size_t sampleSize; -}; // class CNN - -} // namespace ann -} // namespace mlpack - -// Include implementation. -#include "cnn_impl.hpp" - -#endif diff --git a/src/mlpack/methods/ann/cnn_impl.hpp b/src/mlpack/methods/ann/cnn_impl.hpp deleted file mode 100644 index ba774ba3097..00000000000 --- a/src/mlpack/methods/ann/cnn_impl.hpp +++ /dev/null @@ -1,289 +0,0 @@ -/** - * @file cnn_impl.hpp - * @author Marcus Edel - * - * Definition of the CNN class, which implements convolutional neural networks. - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. - */ -#ifndef MLPACK_METHODS_ANN_CNN_IMPL_HPP -#define MLPACK_METHODS_ANN_CNN_IMPL_HPP - -// In case it hasn't been included yet. -#include "cnn.hpp" - -namespace mlpack { -namespace ann /** Artificial Neural Network. */ { - - -template -template class OptimizerType -> -CNN::CNN(LayerType &&network, - OutputType &&outputLayer, - const arma::cube& predictors, - const arma::mat& responses, - OptimizerType& optimizer, - InitializationRuleType initializeRule, - PerformanceFunction performanceFunction) : - network(std::forward(network)), - outputLayer(std::forward(outputLayer)), - performanceFunc(std::move(performanceFunction)), - predictors(predictors), - responses(responses), - numFunctions(predictors.n_cols) -{ - static_assert(std::is_same::type, - LayerTypes>::value, - "The type of network must be LayerTypes."); - - static_assert(std::is_same::type, - OutputLayerType>::value, - "The type of outputLayer must be OutputLayerType."); - - initializeRule.Initialize(parameter, NetworkSize(this->network), 1); - NetworkWeights(parameter, this->network); - - // Train the model. - Timer::Start("cnn_optimization"); - const double out = optimizer.Optimize(parameter); - Timer::Stop("cnn_optimization"); - - Log::Info << "CNN::CNN(): final objective of trained model is " << out - << "." << std::endl; -} - -template -template -CNN::CNN(LayerType &&network, - OutputType &&outputLayer, - const arma::cube& predictors, - const arma::mat& responses, - InitializationRuleType initializeRule, - PerformanceFunction performanceFunction) : - network(std::forward(network)), - outputLayer(std::forward(outputLayer)), - performanceFunc(std::move(performanceFunction)) -{ - static_assert(std::is_same::type, - LayerTypes>::value, - "The type of network must be LayerTypes."); - - static_assert(std::is_same::type, - OutputLayerType>::value, - "The type of outputLayer must be OutputLayerType."); - - initializeRule.Initialize(parameter, NetworkSize(this->network), 1); - NetworkWeights(parameter, this->network); - - Train(predictors, responses); -} - -template -template -CNN::CNN(LayerType &&network, - OutputType &&outputLayer, - InitializationRuleType initializeRule, - PerformanceFunction performanceFunction) : - network(std::forward(network)), - outputLayer(std::forward(outputLayer)), - performanceFunc(std::move(performanceFunction)) -{ - static_assert(std::is_same::type, - LayerTypes>::value, - "The type of network must be LayerTypes."); - - static_assert(std::is_same::type, - OutputLayerType>::value, - "The type of outputLayer must be OutputLayerType."); - - initializeRule.Initialize(parameter, NetworkSize(this->network), 1); - NetworkWeights(parameter, this->network); -} - -template -template class OptimizerType> -void CNN< -LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction ->::Train(const arma::cube& predictors, const arma::mat& responses) -{ - numFunctions = predictors.n_cols; - sampleSize = predictors.n_slices / responses.n_cols; - this->predictors = predictors; - this->responses = responses; - - OptimizerType optimizer(*this); - - // Train the model. - Timer::Start("cnn_optimization"); - const double out = optimizer.Optimize(parameter); - Timer::Stop("cnn_optimization"); - - Log::Info << "CNN::CNN(): final objective of trained model is " << out - << "." << std::endl; -} - -template -template class OptimizerType> -void CNN< -LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction ->::Train(const arma::cube& predictors, - const arma::mat& responses, - OptimizerType& optimizer) -{ - numFunctions = responses.n_cols; - sampleSize = predictors.n_slices / responses.n_cols; - this->predictors = predictors; - this->responses = responses; - - // Train the model. - Timer::Start("cnn_optimization"); - const double out = optimizer.Optimize(parameter); - Timer::Stop("cnn_optimization"); - - Log::Info << "CNN::CNN(): final objective of trained model is " << out - << "." << std::endl; -} - -template -template< - template class OptimizerType -> -void CNN< -LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction ->::Train(OptimizerType& optimizer) -{ - // Train the model. - Timer::Start("cnn_optimization"); - const double out = optimizer.Optimize(parameter); - Timer::Stop("cnn_optimization"); - - Log::Info << "CNN::CNN(): final objective of trained model is " << out - << "." << std::endl; -} - -template -void CNN< -LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction ->::Predict(arma::cube& predictors, arma::mat& responses) -{ - deterministic = true; - - arma::mat responsesTemp; - ResetParameter(network); - Forward(predictors.slices(0, sampleSize - 1), network); - OutputPrediction(responsesTemp, network); - - responses = arma::mat(responsesTemp.n_elem, predictors.n_slices); - responses.col(0) = responsesTemp.col(0); - - for (size_t i = 1; i < (predictors.n_slices / sampleSize); i++) - { - Forward(predictors.slices(i, (i + 1) * sampleSize - 1), network); - - responsesTemp = arma::mat(responses.colptr(i), responses.n_rows, 1, false, - true); - OutputPrediction(responsesTemp, network); - responses.col(i) = responsesTemp.col(0); - } -} - -template -double CNN< -LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction ->::Evaluate(const arma::mat& /* unused */, - const size_t i, - const bool deterministic) -{ - this->deterministic = deterministic; - - ResetParameter(network); - Forward(predictors.slices(i, (i + 1) * sampleSize - 1), network); - - return OutputError(arma::mat(responses.colptr(i), responses.n_rows, 1, false, - true), error, network); -} - -template -void CNN< -LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction ->::Gradient(const arma::mat& /* unused */, - const size_t i, - arma::mat& gradient) -{ - Evaluate(parameter, i, false); - - NetworkGradients(gradient, network); - - Backward<>(error, network); - UpdateGradients<>(network); -} - -template -template -void CNN< -LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction ->::Serialize(Archive& ar, const unsigned int /* version */) -{ - ar & data::CreateNVP(parameter, "parameter"); - ar & data::CreateNVP(sampleSize, "sampleSize"); - - // If we are loading, we need to initialize the weights. - if (Archive::is_loading::value) - { - NetworkWeights(parameter, network); - } -} - -} // namespace ann -} // namespace mlpack - -#endif diff --git a/src/mlpack/methods/ann/convolution_rules/naive_convolution.hpp b/src/mlpack/methods/ann/convolution_rules/naive_convolution.hpp index fc7fc6926fb..c90574293ba 100644 --- a/src/mlpack/methods/ann/convolution_rules/naive_convolution.hpp +++ b/src/mlpack/methods/ann/convolution_rules/naive_convolution.hpp @@ -41,16 +41,20 @@ class NaiveConvolution * @param input Input used to perform the convolution. * @param filter Filter used to perform the conolution. * @param output Output data that contains the results of the convolution. + * @param dW Stride of filter application in the x direction. + * @param dH Stride of filter application in the y direction. */ template static typename std::enable_if< std::is_same::value, void>::type Convolution(const arma::Mat& input, const arma::Mat& filter, - arma::Mat& output) + arma::Mat& output, + const size_t dW = 1, + const size_t dH = 1) { - output = arma::zeros >(input.n_rows - filter.n_rows + 1, - input.n_cols - filter.n_cols + 1); + output = arma::zeros >((input.n_rows - filter.n_rows + 1) / + dW, (input.n_cols - filter.n_cols + 1) / dH); // It seems to be about 3.5 times faster to use pointers instead of // filter(ki, kj) * input(leftInput + ki, topInput + kj) and output(i, j). @@ -63,7 +67,7 @@ class NaiveConvolution const eT* kernelPtr = filter.memptr(); for (size_t kj = 0; kj < filter.n_cols; ++kj) { - const eT* inputPtr = input.colptr(kj + j) + i; + const eT* inputPtr = input.colptr(kj + j * dW) + i * dH; for (size_t ki = 0; ki < filter.n_rows; ++ki, ++kernelPtr, ++inputPtr) *outputPtr += *kernelPtr * (*inputPtr); } @@ -77,26 +81,30 @@ class NaiveConvolution * @param input Input used to perform the convolution. * @param filter Filter used to perform the conolution. * @param output Output data that contains the results of the convolution. + * @param dW Stride of filter application in the x direction. + * @param dH Stride of filter application in the y direction. */ template static typename std::enable_if< std::is_same::value, void>::type Convolution(const arma::Mat& input, const arma::Mat& filter, - arma::Mat& output) + arma::Mat& output, + const size_t dW = 1, + const size_t dH = 1) { - const size_t outputRows = input.n_rows + 2 * (filter.n_rows - 1); - const size_t outputCols = input.n_cols + 2 * (filter.n_cols - 1); + const size_t outputRows = (input.n_rows + 2 * (filter.n_rows - 1)) * dW; + const size_t outputCols = (input.n_cols + 2 * (filter.n_cols - 1)) * dH; // Pad filter and input to the working output shape. arma::Mat inputPadded = arma::zeros >(outputRows, outputCols); inputPadded.submat(filter.n_rows - 1, filter.n_cols - 1, - filter.n_rows - 1 + input.n_rows - 1, - filter.n_cols - 1 + input.n_cols - 1) = input; + filter.n_rows - 1 + input.n_rows - 1, + filter.n_cols - 1 + input.n_cols - 1) = input; NaiveConvolution::Convolution(inputPadded, filter, - output); + output, 1, 1); } /* @@ -105,15 +113,19 @@ class NaiveConvolution * @param input Input used to perform the convolution. * @param filter Filter used to perform the conolution. * @param output Output data that contains the results of the convolution. + * @param dW Stride of filter application in the x direction. + * @param dH Stride of filter application in the y direction. */ template static void Convolution(const arma::Cube& input, const arma::Cube& filter, - arma::Cube& output) + arma::Cube& output, + const size_t dW = 1, + const size_t dH = 1) { arma::Mat convOutput; NaiveConvolution::Convolution(input.slice(0), filter.slice(0), - convOutput); + convOutput, dW, dH); output = arma::Cube(convOutput.n_rows, convOutput.n_cols, input.n_slices); @@ -122,7 +134,7 @@ class NaiveConvolution for (size_t i = 1; i < input.n_slices; i++) { NaiveConvolution::Convolution(input.slice(i), filter.slice(i), - output.slice(i)); + output.slice(i), dW, dH); } } @@ -133,15 +145,19 @@ class NaiveConvolution * @param input Input used to perform the convolution. * @param filter Filter used to perform the conolution. * @param output Output data that contains the results of the convolution. + * @param dW Stride of filter application in the x direction. + * @param dH Stride of filter application in the y direction. */ template static void Convolution(const arma::Mat& input, const arma::Cube& filter, - arma::Cube& output) + arma::Cube& output, + const size_t dW = 1, + const size_t dH = 1) { arma::Mat convOutput; NaiveConvolution::Convolution(input, filter.slice(0), - convOutput); + convOutput, dW, dH); output = arma::Cube(convOutput.n_rows, convOutput.n_cols, filter.n_slices); @@ -150,7 +166,7 @@ class NaiveConvolution for (size_t i = 1; i < filter.n_slices; i++) { NaiveConvolution::Convolution(input, filter.slice(i), - output.slice(i)); + output.slice(i), dW, dH); } } @@ -161,15 +177,19 @@ class NaiveConvolution * @param input Input used to perform the convolution. * @param filter Filter used to perform the conolution. * @param output Output data that contains the results of the convolution. + * @param dW Stride of filter application in the x direction. + * @param dH Stride of filter application in the y direction. */ template static void Convolution(const arma::Cube& input, const arma::Mat& filter, - arma::Cube& output) + arma::Cube& output, + const size_t dW = 1, + const size_t dH = 1) { arma::Mat convOutput; NaiveConvolution::Convolution(input.slice(0), filter, - convOutput); + convOutput, dW, dH); output = arma::Cube(convOutput.n_rows, convOutput.n_cols, input.n_slices); @@ -178,7 +198,7 @@ class NaiveConvolution for (size_t i = 1; i < input.n_slices; i++) { NaiveConvolution::Convolution(input.slice(i), filter, - output.slice(i)); + output.slice(i), dW, dH); } } diff --git a/src/mlpack/methods/ann/convolution_rules/svd_convolution.hpp b/src/mlpack/methods/ann/convolution_rules/svd_convolution.hpp index a0b317ebb0c..5206ec1996f 100644 --- a/src/mlpack/methods/ann/convolution_rules/svd_convolution.hpp +++ b/src/mlpack/methods/ann/convolution_rules/svd_convolution.hpp @@ -49,6 +49,8 @@ class SVDConvolution * @param input Input used to perform the convolution. * @param filter Filter used to perform the conolution. * @param output Output data that contains the results of the convolution. + * @param dW Stride of filter application in the x direction. + * @param dH Stride of filter application in the y direction. */ template static void Convolution(const arma::Mat& input, @@ -113,6 +115,8 @@ class SVDConvolution * @param input Input used to perform the convolution. * @param filter Filter used to perform the conolution. * @param output Output data that contains the results of the convolution. + * @param dW Stride of filter application in the x direction. + * @param dH Stride of filter application in the y direction. */ template static void Convolution(const arma::Cube& input, @@ -142,6 +146,8 @@ class SVDConvolution * @param input Input used to perform the convolution. * @param filter Filter used to perform the conolution. * @param output Output data that contains the results of the convolution. + * @param dW Stride of filter application in the x direction. + * @param dH Stride of filter application in the y direction. */ template static void Convolution(const arma::Mat& input, @@ -170,6 +176,8 @@ class SVDConvolution * @param input Input used to perform the convolution. * @param filter Filter used to perform the conolution. * @param output Output data that contains the results of the convolution. + * @param dW Stride of filter application in the x direction. + * @param dH Stride of filter application in the y direction. */ template static void Convolution(const arma::Cube& input, diff --git a/src/mlpack/methods/ann/ffn.hpp b/src/mlpack/methods/ann/ffn.hpp index f9bc4d53a8b..5e4b30fa4dc 100644 --- a/src/mlpack/methods/ann/ffn.hpp +++ b/src/mlpack/methods/ann/ffn.hpp @@ -14,10 +14,10 @@ #include -#include -#include +#include +#include +#include #include -#include #include namespace mlpack { @@ -26,25 +26,18 @@ namespace ann /** Artificial Neural Network. */ { /** * Implementation of a standard feed forward network. * - * @tparam LayerTypes Contains all layer modules used to construct the network. * @tparam OutputLayerType The output layer type used to evaluate the network. * @tparam InitializationRuleType Rule used to initialize the weight matrix. - * @tparam PerformanceFunction Performance strategy used to calculate the error. */ -template < - typename LayerTypes, - typename OutputLayerType, - typename InitializationRuleType = NguyenWidrowInitialization, - class PerformanceFunction = CrossEntropyErrorFunction<> +template< + typename OutputLayerType = NegativeLogLikelihood<>, + typename InitializationRuleType = RandomInitialization > class FFN { public: //! Convenience typedef for the internal model construction. - using NetworkType = FFN; + using NetworkType = FFN; /** * Create the FFN object with the given predictors and responses set (this is @@ -52,71 +45,36 @@ class FFN * Optionally, specify which initialize rule and performance function should * be used. * - * @param network Network modules used to construct the network. * @param outputLayer Output layer used to evaluate the network. - * @param predictors Input training variables. - * @param responses Outputs resulting from input training variables. - * @param optimizer Instantiated optimizer used to train the model. * @param initializeRule Optional instantiated InitializationRule object * for initializing the network parameter. - * @param performanceFunction Optional instantiated PerformanceFunction - * object used to calculate the error. */ - template class OptimizerType> - FFN(LayerType &&network, - OutputType &&outputLayer, - const arma::mat& predictors, - const arma::mat& responses, - OptimizerType& optimizer, - InitializationRuleType initializeRule = InitializationRuleType(), - PerformanceFunction performanceFunction = PerformanceFunction()); + FFN(OutputLayerType&& outputLayer = OutputLayerType(), + InitializationRuleType initializeRule = InitializationRuleType()); /** * Create the FFN object with the given predictors and responses set (this is - * the set that is used to train the network). Optionally, specify which - * initialize rule and performance function should be used. + * the set that is used to train the network) and the given optimizer. + * Optionally, specify which initialize rule and performance function should + * be used. * - * @param network Network modules used to construct the network. - * @param outputLayer Output layer used to evaluate the network. * @param predictors Input training variables. - * @param responses Outputs resulting from input training variables. + * @param responses Outputs results from input training variables. + * @param outputLayer Output layer used to evaluate the network. * @param initializeRule Optional instantiated InitializationRule object * for initializing the network parameter. - * @param performanceFunction Optional instantiated PerformanceFunction - * object used to calculate the error. */ - template - FFN(LayerType &&network, - OutputType &&outputLayer, - const arma::mat& predictors, + FFN(const arma::mat& predictors, const arma::mat& responses, - InitializationRuleType initializeRule = InitializationRuleType(), - PerformanceFunction performanceFunction = PerformanceFunction()); + OutputLayerType&& outputLayer = OutputLayerType(), + InitializationRuleType initializeRule = InitializationRuleType()); - /** - * Create the FNN object with an empty predictors and responses set and - * default optimizer. Make sure to call Train(predictors, responses) when - * training. - * - * @param network Network modules used to construct the network. - * @param outputLayer Output layer used to evaluate the network. - * @param initializeRule Optional instantiated InitializationRule object - * for initializing the network parameter. - * @param performanceFunction Optional instantiated PerformanceFunction - * object used to calculate the error. - */ - template - FFN(LayerType &&network, - OutputType &&outputLayer, - InitializationRuleType initializeRule = InitializationRuleType(), - PerformanceFunction performanceFunction = PerformanceFunction()); + //! Destructor to release allocated memory. + ~FFN(); /** - * Train the feedforward network on the given input data. By default, the - * RMSprop optimization algorithm is used, but others can be specified - * (such as mlpack::optimization::SGD). + * Train the feedforward network on the given input data using the given + * optimizer. * * This will use the existing model parameters as a starting point for the * optimization. If this is not what you want, then you should access the @@ -125,31 +83,19 @@ class FFN * @tparam OptimizerType Type of optimizer to use to train the model. * @param predictors Input training variables. * @param responses Outputs results from input training variables. - */ - template< - template class OptimizerType = mlpack::optimization::RMSprop - > - void Train(const arma::mat& predictors, const arma::mat& responses); - - /** - * Train the feedforward network with the given instantiated optimizer. - * Using this overload allows configuring the instantiated optimizer before - * training is performed. - * - * This will use the existing model parameters as a starting point for the - * optimization. If this is not what you want, then you should access the - * parameters vector directly with Parameters() and modify it as desired. - * * @param optimizer Instantiated optimizer used to train the model. */ template< template class OptimizerType = mlpack::optimization::RMSprop > - void Train(OptimizerType& optimizer); + void Train(const arma::mat& predictors, + const arma::mat& responses, + OptimizerType& optimizer); /** - * Train the feedforward network on the given input data using the given - * optimizer. + * Train the feedforward network on the given input data. By default, the + * RMSprop optimization algorithm is used, but others can be specified + * (such as mlpack::optimization::SGD). * * This will use the existing model parameters as a starting point for the * optimization. If this is not what you want, then you should access the @@ -158,19 +104,16 @@ class FFN * @tparam OptimizerType Type of optimizer to use to train the model. * @param predictors Input training variables. * @param responses Outputs results from input training variables. - * @param optimizer Instantiated optimizer used to train the model. */ template< template class OptimizerType = mlpack::optimization::RMSprop > - void Train(const arma::mat& predictors, - const arma::mat& responses, - OptimizerType& optimizer); + void Train(const arma::mat& predictors, const arma::mat& responses); /** * Predict the responses to a given set of predictors. The responses will * reflect the output of the given output layer as returned by the - * OutputClass() function. + * output layer function. * * @param predictors Input predictors. * @param responses Matrix to put output predictions of responses into. @@ -184,7 +127,7 @@ class FFN * @param parameters Matrix model parameters. * @param i Index of point to use for objective function evaluation. * @param deterministic Whether or not to train or test the model. Note some - * layer act differently in training or testing mode. + * layer act differently in training or testing mode. */ double Evaluate(const arma::mat& parameters, const size_t i, @@ -203,6 +146,21 @@ class FFN const size_t i, arma::mat& gradient); + /* + * Add a new module to the model. + * + * @param args The layer parameter. + */ + template + void Add(Args... args) { network.push_back(new LayerType(args...)); } + + /* + * Add a new module to the model. + * + * @param layer The Layer to be added to the model. + */ + void Add(LayerTypes layer) { network.push_back(layer); } + //! Return the number of separable functions (the number of predictor points). size_t NumFunctions() const { return numFunctions; } @@ -216,214 +174,61 @@ class FFN void Serialize(Archive& ar, const unsigned int /* version */); private: + // Helper functions. /** - * Reset the network by zeroing the layer activations and by setting the - * layer status. + * The Forward algorithm (part of the Forward-Backward algorithm). Computes + * forward probabilities for each module. * - * enable_if (SFINAE) is used to iterate through the network. The general - * case peels off the first type and recurses, as usual with - * variadic function templates. + * @param input Data sequence to compute probabilities for. */ - template - typename std::enable_if::type - ResetParameter(std::tuple& /* unused */) { /* Nothing to do here */ } - - template - typename std::enable_if::type - ResetParameter(std::tuple& network) - { - ResetDeterministic(std::get(network)); - ResetParameter(network); - } + void Forward(arma::mat&& input); /** - * Reset the layer status by setting the current deterministic parameter - * through all layer that implement the Deterministic function. + * The Backward algorithm (part of the Forward-Backward algorithm). Computes + * backward pass for module. */ - template - typename std::enable_if< - HasDeterministicCheck::value, void>::type - ResetDeterministic(T& layer) - { - layer.Deterministic() = deterministic; - } - - template - typename std::enable_if< - !HasDeterministicCheck::value, void>::type - ResetDeterministic(T& /* unused */) { /* Nothing to do here */ } + void Backward(); /** - * Run a single iteration of the feed forward algorithm, using the given - * input and target vector, store the calculated error into the error - * vector. + * Iterate through all layer modules and update the the gradient using the + * layer defined optimizer. */ - template - void Forward(const DataType& input, std::tuple& network) - { - std::get(network).InputParameter() = input; - - std::get(network).Forward(std::get(network).InputParameter(), - std::get(network).OutputParameter()); - - ForwardTail(network); - } - - template - typename std::enable_if::type - ForwardTail(std::tuple& network) - { - LinkParameter(network); - } - - template - typename std::enable_if::type - ForwardTail(std::tuple& network) - { - std::get(network).Forward(std::get(network).OutputParameter(), - std::get(network).OutputParameter()); - - ForwardTail(network); - } + void Gradient(); /** - * Link the calculated activation with the connection layer. - */ - template - typename std::enable_if::type - LinkParameter(std::tuple& /* unused */) { /* Nothing to do here */ } - - template - typename std::enable_if::type - LinkParameter(std::tuple& network) - { - if (!LayerTraits(network))>::type>::IsBiasLayer) - { - std::get(network).InputParameter() = std::get( - network).OutputParameter(); - } - - LinkParameter(network); - } - - /* - * Calculate the output error and update the overall error. + * Reset the module infomration (weights/parameters). */ - template - double OutputError(const DataType& target, - ErrorType& error, - const std::tuple& network) - { - // Calculate and store the output error. - outputLayer.CalculateError( - std::get(network).OutputParameter(), target, error); - - // Measures the network's performance with the specified performance - // function. - return performanceFunc.Error(network, target, error); - } + void ResetParameters(); /** - * Run a single iteration of the feed backward algorithm, using the given - * error of the output layer. Note that we iterate backward through the - * layer modules. + * Reset the module status by setting the current deterministic parameter + * for all modules that implement the Deterministic function. */ - template - typename std::enable_if::type - Backward(const DataType& error, std::tuple& network) - { - std::get(network).Backward( - std::get(network).OutputParameter(), error, - std::get(network).Delta()); - - BackwardTail(error, network); - } - - template - typename std::enable_if::type - BackwardTail(const DataType& /* unused */, - std::tuple& /* unused */) { /* Nothing to do here */ } - - template - typename std::enable_if::type - BackwardTail(const DataType& error, std::tuple& network) - { - std::get(network).Backward( - std::get(network).OutputParameter(), - std::get(network).Delta(), - std::get(network).Delta()); - - BackwardTail(error, network); - } + void ResetDeterministic(); /** - * Iterate through all layer modules and update the the gradient using the - * layer defined optimizer. + * Reset the gradient for all modules that implement the Gradient function. */ - template< - size_t I = 0, - size_t Max = std::tuple_size::value - 1, - typename... Tp - > - typename std::enable_if::type - UpdateGradients(std::tuple& /* unused */) { /* Nothing to do here */ } + void ResetGradients(arma::mat& gradient); - template< - size_t I = 0, - size_t Max = std::tuple_size::value - 1, - typename... Tp - > - typename std::enable_if::type - UpdateGradients(std::tuple& network) - { - Update(std::get(network), std::get(network).OutputParameter(), - std::get(network).Delta()); - - UpdateGradients(network); - } - - template - typename std::enable_if< - HasGradientCheck::value, void>::type - Update(T& layer, P& /* unused */, D& delta) - { - layer.Gradient(layer.InputParameter(), delta, layer.Gradient()); - } - - template - typename std::enable_if< - !HasGradientCheck::value, void>::type - Update(T& /* unused */, P& /* unused */, D& /* unused */) - { - /* Nothing to do here */ - } - - /* - * Calculate and store the output activation. - */ - template - void OutputPrediction(DataType& output, std::tuple& network) - { - // Calculate and store the output prediction. - outputLayer.OutputClass(std::get( - network).OutputParameter(), output); - } - - //! Instantiated feedforward network. - LayerTypes network; - - //! The output layer used to evaluate the network + //! Instantiated outputlayer used to evaluate the network. OutputLayerType outputLayer; - //! Performance strategy used to calculate the error. - PerformanceFunction performanceFunc; + //! Instantiated InitializationRule object for initializing the network + //! parameter. + InitializationRuleType initializeRule; - //! The current evaluation mode (training or testing). - bool deterministic; + //! The input width. + size_t width; - //! Matrix of (trained) parameters. - arma::mat parameter; + //! The input height. + size_t height; + + //! Indicator if we already trained the model. + bool reset; + + //! Locally-stored model modules. + std::vector network; //! The matrix of data points (predictors). arma::mat predictors; @@ -431,11 +236,56 @@ class FFN //! The matrix of responses to the input data points. arma::mat responses; + //! Matrix of (trained) parameters. + arma::mat parameter; + //! The number of separable functions (the number of predictor points). size_t numFunctions; - //! Locally stored backward error. + //! The current error for the backward pass. arma::mat error; + + //! THe current input of the forward/backward pass. + arma::mat currentInput; + + //! THe current target of the forward/backward pass. + arma::mat currentTarget; + + //! Locally-stored delta visitor. + DeltaVisitor deltaVisitor; + + //! Locally-stored output parameter visitor. + OutputParameterVisitor outputParameterVisitor; + + //! Locally-stored weight size visitor. + WeightSizeVisitor weightSizeVisitor; + + //! Locally-stored output width visitor. + OutputWidthVisitor outputWidthVisitor; + + //! Locally-stored output height visitor. + OutputHeightVisitor outputHeightVisitor; + + //! Locally-stored reset visitor. + ResetVisitor resetVisitor; + + //! Locally-stored delete visitor. + DeleteVisitor deleteVisitor; + + //! The current evaluation mode (training or testing). + bool deterministic; + + //! Locally-stored delta object. + arma::mat delta; + + //! Locally-stored input parameter object. + arma::mat inputParameter; + + //! Locally-stored output parameter object. + arma::mat outputParameter; + + //! Locally-stored gradient parameter. + arma::mat gradient; }; // class FFN } // namespace ann diff --git a/src/mlpack/methods/ann/ffn_impl.hpp b/src/mlpack/methods/ann/ffn_impl.hpp index 5b1cc611871..6d8ee25e1a3 100644 --- a/src/mlpack/methods/ann/ffn_impl.hpp +++ b/src/mlpack/methods/ann/ffn_impl.hpp @@ -19,123 +19,70 @@ namespace mlpack { namespace ann /** Artificial Neural Network. */ { -template -template class OptimizerType -> -FFN::FFN(LayerType &&network, - OutputType &&outputLayer, - const arma::mat& predictors, - const arma::mat& responses, - OptimizerType& optimizer, - InitializationRuleType initializeRule, - PerformanceFunction performanceFunction) : - network(std::forward(network)), - outputLayer(std::forward(outputLayer)), - performanceFunc(std::move(performanceFunction)), - predictors(predictors), - responses(responses), - numFunctions(predictors.n_cols) +template +FFN::FFN( + OutputLayerType&& outputLayer, InitializationRuleType initializeRule) : + outputLayer(std::move(outputLayer)), + initializeRule(initializeRule), + width(0), + height(0), + reset(false) { - static_assert(std::is_same::type, - LayerTypes>::value, - "The type of network must be LayerTypes."); - - static_assert(std::is_same::type, - OutputLayerType>::value, - "The type of outputLayer must be OutputLayerType."); - - initializeRule.Initialize(parameter, NetworkSize(this->network), 1); - NetworkWeights(parameter, this->network); - - // Train the model. - Timer::Start("ffn_optimization"); - const double out = optimizer.Optimize(parameter); - Timer::Stop("ffn_optimization"); - - Log::Info << "FFN::FFN(): final objective of trained model is " << out - << "." << std::endl; + /* Nothing to do here */ } -template -template -FFN::FFN(LayerType &&network, - OutputType &&outputLayer, - const arma::mat& predictors, - const arma::mat& responses, - InitializationRuleType initializeRule, - PerformanceFunction performanceFunction) : - network(std::forward(network)), - outputLayer(std::forward(outputLayer)), - performanceFunc(std::move(performanceFunction)) +template +FFN::FFN( + const arma::mat& predictors, + const arma::mat& responses, + OutputLayerType&& outputLayer, + InitializationRuleType initializeRule) : + outputLayer(std::move(outputLayer)), + initializeRule(initializeRule), + width(0), + height(0), + reset(false) { - static_assert(std::is_same::type, - LayerTypes>::value, - "The type of network must be LayerTypes."); + numFunctions = responses.n_cols; - static_assert(std::is_same::type, - OutputLayerType>::value, - "The type of outputLayer must be OutputLayerType."); + this->predictors = std::move(predictors); + this->responses = std::move(responses); - initializeRule.Initialize(parameter, NetworkSize(this->network), 1); - NetworkWeights(parameter, this->network); + this->deterministic = true; + ResetDeterministic(); - Train(predictors, responses); + if (!reset) + { + ResetParameters(); + } } -template -template -FFN::FFN(LayerType &&network, - OutputType &&outputLayer, - InitializationRuleType initializeRule, - PerformanceFunction performanceFunction) : - network(std::forward(network)), - outputLayer(std::forward(outputLayer)), - performanceFunc(std::move(performanceFunction)) +template +FFN::~FFN() { - static_assert(std::is_same::type, - LayerTypes>::value, - "The type of network must be LayerTypes."); - - static_assert(std::is_same::type, - OutputLayerType>::value, - "The type of outputLayer must be OutputLayerType."); - - initializeRule.Initialize(parameter, NetworkSize(this->network), 1); - NetworkWeights(parameter, this->network); + std::for_each(network.begin(), network.end(), + boost::apply_visitor(deleteVisitor)); } -template +template template class OptimizerType> -void FFN< -LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction ->::Train(const arma::mat& predictors, const arma::mat& responses) +void FFN::Train( + const arma::mat& predictors, + const arma::mat& responses, + OptimizerType& optimizer) { - numFunctions = predictors.n_cols; - this->predictors = predictors; - this->responses = responses; + numFunctions = responses.n_cols; - OptimizerType optimizer(*this); + this->predictors = std::move(predictors); + this->responses = std::move(responses); + + this->deterministic = true; + ResetDeterministic(); + + if (!reset) + { + ResetParameters(); + } // Train the model. Timer::Start("ffn_optimization"); @@ -146,43 +93,26 @@ LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction << "." << std::endl; } -template +template template class OptimizerType> -void FFN< -LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction ->::Train(const arma::mat& predictors, - const arma::mat& responses, - OptimizerType& optimizer) +void FFN::Train( + const arma::mat& predictors, const arma::mat& responses) { - numFunctions = predictors.n_cols; - this->predictors = predictors; - this->responses = responses; + numFunctions = responses.n_cols; - // Train the model. - Timer::Start("ffn_optimization"); - const double out = optimizer.Optimize(parameter); - Timer::Stop("ffn_optimization"); + this->predictors = std::move(predictors); + this->responses = std::move(responses); - Log::Info << "FFN::FFN(): final objective of trained model is " << out - << "." << std::endl; -} + this->deterministic = true; + ResetDeterministic(); + + if (!reset) + { + ResetParameters(); + } + + OptimizerType optimizer(*this); -template -template< - template class OptimizerType -> -void FFN< -LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction ->::Train(OptimizerType& optimizer) -{ // Train the model. Timer::Start("ffn_optimization"); const double out = optimizer.Optimize(parameter); @@ -192,101 +122,252 @@ LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction << "." << std::endl; } -template -void FFN< -LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction ->::Predict(arma::mat& predictors, arma::mat& responses) +template +void FFN::Predict( + arma::mat& predictors, arma::mat& responses) { - deterministic = true; + if (parameter.is_empty()) + { + ResetParameters(); + } + + if (!deterministic) + { + deterministic = true; + ResetDeterministic(); + } arma::mat responsesTemp; - ResetParameter(network); - Forward(arma::mat(predictors.colptr(0), predictors.n_rows, 1, false, true), - network); - OutputPrediction(responsesTemp, network); + Forward(std::move(arma::mat(predictors.colptr(0), + predictors.n_rows, 1, false, true))); + responsesTemp = boost::apply_visitor(outputParameterVisitor, + network.back()).col(0); responses = arma::mat(responsesTemp.n_elem, predictors.n_cols); responses.col(0) = responsesTemp.col(0); for (size_t i = 1; i < predictors.n_cols; i++) { - Forward(arma::mat(predictors.colptr(i), predictors.n_rows, 1, false, true), - network); + Forward(std::move(arma::mat(predictors.colptr(i), + predictors.n_rows, 1, false, true))); - responsesTemp = arma::mat(responses.colptr(i), responses.n_rows, 1, false, - true); - OutputPrediction(responsesTemp, network); + responsesTemp = boost::apply_visitor(outputParameterVisitor, + network.back()); responses.col(i) = responsesTemp.col(0); } } -template -double FFN< -LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction ->::Evaluate(const arma::mat& /* unused */, - const size_t i, - const bool deterministic) +template +double FFN::Evaluate( + const arma::mat& /* parameters */, const size_t i, const bool deterministic) { - this->deterministic = deterministic; + if (parameter.is_empty()) + { + ResetParameters(); + } - ResetParameter(network); + if (deterministic != this->deterministic) + { + this->deterministic = deterministic; + ResetDeterministic(); + } + + currentInput = predictors.unsafe_col(i); + currentTarget = responses.unsafe_col(i); - Forward(arma::mat(predictors.colptr(i), predictors.n_rows, 1, false, true), - network); + Forward(std::move(currentInput)); + double res = outputLayer.Forward(std::move(boost::apply_visitor( + outputParameterVisitor, network.back())), std::move(currentTarget)); - return OutputError(arma::mat(responses.colptr(i), responses.n_rows, 1, false, - true), error, network); + return res; } -template -void FFN< -LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction ->::Gradient(const arma::mat& /* unused */, - const size_t i, - arma::mat& gradient) +template +void FFN::Gradient( + const arma::mat& parameters, const size_t i, arma::mat& gradient) { if (gradient.is_empty()) { + if (parameter.is_empty()) + { + ResetParameters(); + } + gradient = arma::zeros(parameter.n_rows, parameter.n_cols); } + else + { + gradient.zeros(); + } + Evaluate(parameters, i, false); - Evaluate(parameter, i, false); + outputLayer.Backward(std::move(boost::apply_visitor(outputParameterVisitor, + network.back())), std::move(currentTarget), std::move(error)); - NetworkGradients(gradient, network); + Backward(); + ResetGradients(gradient); + Gradient(); +} + +template +void FFN::ResetParameters() +{ + size_t weights = 0; + for (size_t i = 0; i < network.size(); ++i) + { + weights += boost::apply_visitor(weightSizeVisitor, network[i]); + } + + parameter.set_size(weights, 1); + initializeRule.Initialize(parameter, parameter.n_elem, 1); + + size_t offset = 0; + for (size_t i = 0; i < network.size(); ++i) + { + offset += boost::apply_visitor(WeightSetVisitor(std::move(parameter), + offset), network[i]); + + boost::apply_visitor(resetVisitor, network[i]); + } +} + +template +void FFN::ResetDeterministic() +{ + DeterministicSetVisitor deterministicSetVisitor(deterministic); + std::for_each(network.begin(), network.end(), + boost::apply_visitor(deterministicSetVisitor)); +} + +template +void FFN::ResetGradients( + arma::mat& gradient) +{ + size_t offset = 0; + for (size_t i = 0; i < network.size(); ++i) + { + offset += boost::apply_visitor(GradientSetVisitor(std::move(gradient), + offset), network[i]); + } +} + +template +void FFN::Forward(arma::mat&& input) +{ + boost::apply_visitor(ForwardVisitor(std::move(input), std::move( + boost::apply_visitor(outputParameterVisitor, network.front()))), + network.front()); + + if (!reset) + { + if (boost::apply_visitor(outputWidthVisitor, network.front()) != 0) + { + width = boost::apply_visitor(outputWidthVisitor, network.front()); + } + + if (boost::apply_visitor(outputHeightVisitor, network.front()) != 0) + { + height = boost::apply_visitor(outputHeightVisitor, network.front()); + } + } + + for (size_t i = 1; i < network.size(); ++i) + { + if (!reset) + { + // Set the input width. + boost::apply_visitor(SetInputWidthVisitor(width), network[i]); + + // Set the input height. + boost::apply_visitor(SetInputHeightVisitor(height), network[i]); + } + + boost::apply_visitor(ForwardVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, network[i - 1])), std::move( + boost::apply_visitor(outputParameterVisitor, network[i]))), network[i]); + + if (!reset) + { + // Get the output width. + if (boost::apply_visitor(outputWidthVisitor, network[i]) != 0) + { + width = boost::apply_visitor(outputWidthVisitor, network[i]); + } + + // Get the output height. + if (boost::apply_visitor(outputHeightVisitor, network[i]) != 0) + { + height = boost::apply_visitor(outputHeightVisitor, network[i]); + } + } + } + + if (!reset) + { + reset = true; + } +} + +template +void FFN::Backward() +{ + boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, network.back())), std::move(error), std::move( + boost::apply_visitor(deltaVisitor, network.back()))), network.back()); - Backward<>(error, network); - UpdateGradients<>(network); + for (size_t i = 2; i < network.size(); ++i) + { + boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, network[network.size() - i])), std::move( + boost::apply_visitor(deltaVisitor, network[network.size() - i + 1])), + std::move(boost::apply_visitor(deltaVisitor, + network[network.size() - i]))), network[network.size() - i]); + } } -template +template +void FFN::Gradient() +{ + boost::apply_visitor(GradientVisitor(std::move(currentInput), std::move( + boost::apply_visitor(deltaVisitor, network[1]))), network.front()); + + for (size_t i = 1; i < network.size() - 1; ++i) + { + boost::apply_visitor(GradientVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, network[i - 1])), std::move( + boost::apply_visitor(deltaVisitor, network[i + 1]))), network[i]); + } + + boost::apply_visitor(GradientVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, network[network.size() - 2])), std::move(error)), + network[network.size() - 1]); +} + +template template -void FFN< -LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction ->::Serialize(Archive& ar, const unsigned int /* version */) +void FFN::Serialize( + Archive& ar, const unsigned int /* version */) { ar & data::CreateNVP(parameter, "parameter"); + ar & data::CreateNVP(width, "width"); + ar & data::CreateNVP(height, "height"); + ar & data::CreateNVP(currentInput, "currentInput"); + ar & data::CreateNVP(currentTarget, "currentTarget"); // If we are loading, we need to initialize the weights. if (Archive::is_loading::value) { - NetworkWeights(parameter, network); + reset = false; + + size_t offset = 0; + for (size_t i = 0; i < network.size(); ++i) + { + offset += boost::apply_visitor(WeightSetVisitor(std::move(parameter), + offset), network[i]); + + boost::apply_visitor(resetVisitor, network[i]); + } } } diff --git a/src/mlpack/methods/ann/layer/CMakeLists.txt b/src/mlpack/methods/ann/layer/CMakeLists.txt index b639cdad785..4211aeaaaa7 100644 --- a/src/mlpack/methods/ann/layer/CMakeLists.txt +++ b/src/mlpack/methods/ann/layer/CMakeLists.txt @@ -1,23 +1,68 @@ # Define the files we need to compile # Anything not in this list will not be compiled into mlpack. set(SOURCES - layer_traits.hpp - binary_classification_layer.hpp + add.hpp + add_impl.hpp + add_merge.hpp + add_merge_impl.hpp base_layer.hpp - empty_layer.hpp - bias_layer.hpp - dropout_layer.hpp - dropconnect_layer.hpp - hard_tanh_layer.hpp - leaky_relu_layer.hpp - linear_layer.hpp - conv_layer.hpp - pooling_layer.hpp - recurrent_layer.hpp - lstm_layer.hpp - sparse_bias_layer.hpp - sparse_input_layer.hpp - sparse_output_layer.hpp + concat.hpp + concat_impl.hpp + concat_performance.hpp + concat_performance_impl.hpp + constant.hpp + constant_impl.hpp + convolution.hpp + convolution_impl.hpp + dropconnect.hpp + dropconnect_impl.hpp + dropout.hpp + dropout_impl.hpp + glimpse.hpp + glimpse_impl.hpp + hard_tanh.hpp + hard_tanh_impl.hpp + join.hpp + join_impl.hpp + layer.hpp + layer_traits.hpp + layer_visitor.hpp + layer_visitor_impl.hpp + layer_types.hpp + leaky_relu.hpp + leaky_relu_impl.hpp + linear.hpp + linear_impl.hpp + linear_no_bias.hpp + linear_no_bias_impl.hpp + log_softmax.hpp + log_softmax_impl.hpp + lookup.hpp + lookup_impl.hpp + lstm.hpp + lstm_impl.hpp + max_pooling.hpp + max_pooling_impl.hpp + mean_pooling.hpp + mean_pooling_impl.hpp + mean_squared_error.hpp + mean_squared_error_impl.hpp + multiply_constant.hpp + multiply_constant_impl.hpp + negative_log_likelihood.hpp + negative_log_likelihood_impl.hpp + recurrent.hpp + recurrent_impl.hpp + recurrent_attention.hpp + recurrent_attention_impl.hpp + reinforce_normal.hpp + reinforce_normal_impl.hpp + select.hpp + select_impl.hpp + sequential.hpp + sequential_impl.hpp + vr_class_reward_impl.hpp + vr_class_reward_impl.hpp ) # Add directory name to sources. diff --git a/src/mlpack/methods/ann/layer/empty_layer.hpp b/src/mlpack/methods/ann/layer/add.hpp similarity index 60% rename from src/mlpack/methods/ann/layer/empty_layer.hpp rename to src/mlpack/methods/ann/layer/add.hpp index cf5a70e43ae..1afb1121300 100644 --- a/src/mlpack/methods/ann/layer/empty_layer.hpp +++ b/src/mlpack/methods/ann/layer/add.hpp @@ -1,23 +1,26 @@ /** - * @file empty_layer.hpp - * @author Palash Ahuja + * @file add.hpp + * @author Marcus Edel * - * Definition of the EmptyLayer class, which is basically empty. + * Definition of the Add class that applies a bias term to the incoming data. * * mlpack is free software; you may redistribute it and/or modify it under the * terms of the 3-clause BSD license. You should have received a copy of the * 3-clause BSD license along with mlpack. If not, see * http://www.opensource.org/licenses/BSD-3-Clause for more information. */ -#ifndef MLPACK_METHODS_ANN_LAYER_EMPTY_LAYER_HPP -#define MLPACK_METHODS_ANN_LAYER_EMPTY_LAYER_HPP +#ifndef MLPACK_METHODS_ANN_LAYER_ADD_HPP +#define MLPACK_METHODS_ANN_LAYER_ADD_HPP -namespace mlpack{ +#include +#include + +namespace mlpack { namespace ann /** Artificial Neural Network. */ { /** - * Implementation of the EmptyLayer class. The EmptyLayer class represents a - * single layer which is mainly used as placeholder. + * Implementation of the Add module class. The Add module applies a bias term + * to the incoming data. * * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, * arma::sp_mat or arma::cube). @@ -28,14 +31,15 @@ template < typename InputDataType = arma::mat, typename OutputDataType = arma::mat > -class EmptyLayer +class Add { - public: + public: /** - * Creates the empty layer object. All the methods are - * empty as well. + * Create the Add object using the specified number of output units. + * + * @param outSize The number of output units. */ - EmptyLayer() { /* Nothing to do here. */ } + Add(const size_t outSize); /** * Ordinary feed forward pass of a neural network, evaluating the function @@ -44,11 +48,8 @@ class EmptyLayer * @param input Input data used for evaluating the specified function. * @param output Resulting output activation. */ - template - void Forward(const InputType& /* input */, OutputType& /* output */) - { - /* Nothing to do here. */ - } + template + void Forward(const arma::Mat&& input, arma::Mat&& output); /** * Ordinary feed backward pass of a neural network, calculating the function @@ -59,58 +60,58 @@ class EmptyLayer * @param gy The backpropagated error. * @param g The calculated gradient. */ - template - void Backward(const InputType& /* input */, - const ErrorType& /* gy */, - GradientType& /* g */) - { - /* Nothing to do here. */ - } + template + void Backward(const arma::Mat&& /* input */, + const arma::Mat&& gy, + arma::Mat&& g); /* * Calculate the gradient using the output delta and the input activation. * - * @param d The calculated error. - * @param g The calculated gradient. + * @param input The propagated input. + * @param error The calculated error. + * @param gradient The calculated gradient. */ - template - void Gradient(const InputType& /* input */, - const ErrorType& /* error */, - GradientType& /* gradient */) - { - /* Nothing to do here. */ - } - - //! Get the weights. - OutputDataType const& Weights() const { return weights; } + template + void Gradient(const arma::Mat&& /* input */, + arma::Mat&& error, + arma::Mat&& gradient); - //! Modify the weights. - OutputDataType& Weights() { return weights; } + //! Get the parameters. + OutputDataType const& Parameters() const { return weights; } + //! Modify the parameters. + OutputDataType& Parameters() { return weights; } //! Get the input parameter. InputDataType const& InputParameter() const { return inputParameter; } - //! Modify the input parameter. InputDataType& InputParameter() { return inputParameter; } //! Get the output parameter. OutputDataType const& OutputParameter() const { return outputParameter; } - //! Modify the output parameter. OutputDataType& OutputParameter() { return outputParameter; } //! Get the delta. OutputDataType const& Delta() const { return delta; } - //! Modify the delta. OutputDataType& Delta() { return delta; } //! Get the gradient. OutputDataType const& Gradient() const { return gradient; } - //! Modify the gradient. OutputDataType& Gradient() { return gradient; } + /** + * Serialize the layer + */ + template + void Serialize(Archive& ar, const unsigned int /* version */); + + private: + //! Locally-stored number of output units. + size_t outSize; + //! Locally-stored weight object. OutputDataType weights; @@ -125,9 +126,12 @@ class EmptyLayer //! Locally-stored output parameter object. OutputDataType outputParameter; -}; // class EmptyLayer +}; // class Add + +} // namespace ann +} // namespace mlpack -} //namespace ann -} //namespace mlpack +// Include implementation. +#include "add_impl.hpp" #endif diff --git a/src/mlpack/methods/ann/layer/add_impl.hpp b/src/mlpack/methods/ann/layer/add_impl.hpp new file mode 100644 index 00000000000..3ce562007a4 --- /dev/null +++ b/src/mlpack/methods/ann/layer/add_impl.hpp @@ -0,0 +1,68 @@ +/** + * @file add_impl.hpp + * @author Marcus Edel + * + * Implementation of the Add class that applies a bias term to the incoming + * data. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_ADD_IMPL_HPP +#define MLPACK_METHODS_ANN_LAYER_ADD_IMPL_HPP + +// In case it hasn't yet been included. +#include "add.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +template +Add::Add(const size_t outSize) : + outSize(outSize) +{ + weights.set_size(outSize, 1); +} + +template +template +void Add::Forward( + const arma::Mat&& input, arma::Mat&& output) +{ + output = input + weights; +} + +template +template +void Add::Backward( + const arma::Mat&& /* input */, + const arma::Mat&& gy, + arma::Mat&& g) +{ + g = gy; +} + +template +template +void Add::Gradient( + const arma::Mat&& /* input */, + arma::Mat&& error, + arma::Mat&& gradient) +{ + gradient = error; +} + +template +template +void Add::Serialize( + Archive& ar, const unsigned int /* version */) +{ + ar & data::CreateNVP(weights, "weights"); +} + +} // namespace ann +} // namespace mlpack + +#endif diff --git a/src/mlpack/methods/ann/layer/add_merge.hpp b/src/mlpack/methods/ann/layer/add_merge.hpp new file mode 100644 index 00000000000..222c3ef1a2f --- /dev/null +++ b/src/mlpack/methods/ann/layer/add_merge.hpp @@ -0,0 +1,139 @@ +/** + * @file add_merge.hpp + * @author Marcus Edel + * + * Definition of the AddMerge module which accumulates the output of the given + * modules. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_ADD_MERGE_HPP +#define MLPACK_METHODS_ANN_LAYER_ADD_MERGE_HPP + +#include + +#include "layer_types.hpp" +#include "layer_visitor.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +/** + * Implementation of the AddMerge module class. The AddMerge class accumulates + * the output of various modules. + * + * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + */ +template< + typename InputDataType = arma::mat, + typename OutputDataType = arma::mat +> +class AddMerge +{ + public: + //! Create the AddMerge object. + AddMerge(); + + /** + * Ordinary feed forward pass of a neural network, evaluating the function + * f(x) by propagating the activity forward through f. + * + * @param input Input data used for evaluating the specified function. + * @param output Resulting output activation. + */ + template + void Forward(const InputType&& /* input */, OutputType&& output); + + /** + * Ordinary feed backward pass of a neural network, calculating the function + * f(x) by propagating x backwards trough f. Using the results from the feed + * forward pass. + * + * @param input The propagated input activation. + * @param gy The backpropagated error. + * @param g The calculated gradient. + */ + template + void Backward(const arma::Mat&& /* input */, + arma::Mat&& gy, + arma::Mat&& g); + + /* + * Add a new module to the model. + * + * @param layer The Layer to be added to the model. + */ + void Add(LayerTypes layer) { network.push_back(layer); } + + /* + * Add a new module to the model. + * + * @param layer The Layer to be added to the model. + */ + template + void Add(const LayerType& layer) { network.push_back(new LayerType(layer)); } + + /* + * Add a new module to the model. + * + * @param args The layer parameter. + */ + template + void Add(Args... args) { network.push_back(new LayerType(args...)); } + + //! Get the input parameter. + InputDataType const& InputParameter() const { return inputParameter; } + //! Modify the input parameter. + InputDataType& InputParameter() { return inputParameter; } + + //! Get the output parameter. + OutputDataType const& OutputParameter() const { return outputParameter; } + //! Modify the output parameter. + OutputDataType& OutputParameter() { return outputParameter; } + + //! Get the delta. + OutputDataType const& Delta() const { return delta; } + //! Modify the delta. + OutputDataType& Delta() { return delta; } + + /** + * Serialize the layer. + */ + template + void Serialize(Archive& ar, const unsigned int /* version */); + + private: + std::vector network; + + //! Locally-stored delete visitor module object. + DeleteVisitor deleteVisitor; + + //! Locally-stored output parameter visitor module object. + OutputParameterVisitor outputParameterVisitor; + + //! Locally-stored delta visitor module object. + DeltaVisitor deltaVisitor; + + //! Locally-stored delta object. + OutputDataType delta; + + //! Locally-stored input parameter object. + InputDataType inputParameter; + + //! Locally-stored output parameter object. + OutputDataType outputParameter; +}; // class AddMerge + +} // namespace ann +} // namespace mlpack + +// Include implementation. +#include "add_merge_impl.hpp" + +#endif diff --git a/src/mlpack/methods/ann/layer/add_merge_impl.hpp b/src/mlpack/methods/ann/layer/add_merge_impl.hpp new file mode 100644 index 00000000000..0ef62c2d0ac --- /dev/null +++ b/src/mlpack/methods/ann/layer/add_merge_impl.hpp @@ -0,0 +1,61 @@ +/** + * @file add_merge_impl.hpp + * @author Marcus Edel + * + * Definition of the AddMerge module which accumulates the output of the given + * modules. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_ADD_MERGE_IMPL_HPP +#define MLPACK_METHODS_ANN_LAYER_ADD_MERGE_IMPL_HPP + +// In case it hasn't yet been included. +#include "add_merge_impl.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +template +AddMerge::AddMerge() +{ + // Nothing to do here. +} + +template +template +void AddMerge::Forward( + const InputType&& /* input */, OutputType&& output) +{ + output = boost::apply_visitor(outputParameterVisitor, network.front()); + + for (size_t i = 1; i < network.size(); ++i) + { + output += boost::apply_visitor(outputParameterVisitor, network[i]); + } +} + +template +template +void AddMerge::Backward( + const arma::Mat&& /* input */, arma::Mat&& gy, arma::Mat&& g) +{ + g = gy; +} + + +template +template +void AddMerge::Serialize( + Archive& ar, const unsigned int /* version */) +{ + ar & data::CreateNVP(network, "network"); +} + +} // namespace ann +} // namespace mlpack + +#endif diff --git a/src/mlpack/methods/ann/layer/base_layer.hpp b/src/mlpack/methods/ann/layer/base_layer.hpp index 2b915a116d5..68afe1cefeb 100644 --- a/src/mlpack/methods/ann/layer/base_layer.hpp +++ b/src/mlpack/methods/ann/layer/base_layer.hpp @@ -32,7 +32,6 @@ namespace ann /** Artificial Neural Network. */ { * - IdentityLayer * - ReLULayer * - TanHLayer - * - BaseLayer2D * * @tparam ActivationFunction Activation function used for the embedding layer. * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, @@ -64,33 +63,14 @@ class BaseLayer * @param output Resulting output activation. */ template - void Forward(const InputType& input, OutputType& output) + void Forward(const InputType&& input, OutputType&& output) { ActivationFunction::fn(input, output); } /** * Ordinary feed backward pass of a neural network, calculating the function - * f(x) by propagating x backwards through f. Using the results from the feed - * forward pass. - * - * @param input The propagated input activation. - * @param gy The backpropagated error. - * @param g The calculated gradient. - */ - template - void Backward(const DataType& input, - const DataType& gy, - DataType& g) - { - DataType derivative; - ActivationFunction::deriv(input, derivative); - g = gy % derivative; - } - - /** - * Ordinary feed backward pass of a neural network, calculating the function - * f(x) by propagating x backwards through f. Using the results from the feed + * f(x) by propagating x backwards trough f. Using the results from the feed * forward pass. * * @param input The propagated input activation. @@ -98,30 +78,13 @@ class BaseLayer * @param g The calculated gradient. */ template - void Backward(const arma::Cube& input, - const arma::Mat& gy, - arma::Cube& g) + void Backward(const arma::Mat&& input, + arma::Mat&& gy, + arma::Mat&& g) { - // Generate a cube using the backpropagated error matrix. - arma::Cube mappedError = arma::zeros(input.n_rows, - input.n_cols, input.n_slices); - - for (size_t s = 0, j = 0; s < mappedError.n_slices; s+= gy.n_cols, j++) - { - for (size_t i = 0; i < gy.n_cols; i++) - { - arma::Col temp = gy.col(i).subvec( - j * input.n_rows * input.n_cols, - (j + 1) * input.n_rows * input.n_cols - 1); - - mappedError.slice(s + i) = arma::Mat(temp.memptr(), - input.n_rows, input.n_cols); - } - } - - arma::Cube derivative; + arma::Mat derivative; ActivationFunction::deriv(input, derivative); - g = mappedError % derivative; + g = gy % derivative; } //! Get the input parameter. @@ -205,18 +168,6 @@ template < using TanHLayer = BaseLayer< ActivationFunction, InputDataType, OutputDataType>; -/** - * Standard Base-Layer2D using the logistic activation function. - */ -template < - class ActivationFunction = LogisticFunction, - typename InputDataType = arma::cube, - typename OutputDataType = arma::cube -> -using BaseLayer2D = BaseLayer< - ActivationFunction, InputDataType, OutputDataType>; - - } // namespace ann } // namespace mlpack diff --git a/src/mlpack/methods/ann/layer/bias_layer.hpp b/src/mlpack/methods/ann/layer/bias_layer.hpp deleted file mode 100644 index 0be535dec7b..00000000000 --- a/src/mlpack/methods/ann/layer/bias_layer.hpp +++ /dev/null @@ -1,208 +0,0 @@ -/** - * @file bias_layer.hpp - * @author Marcus Edel - * - * Definition of the BiasLayer class. - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. - */ -#ifndef MLPACK_METHODS_ANN_LAYER_BIAS_LAYER_HPP -#define MLPACK_METHODS_ANN_LAYER_BIAS_LAYER_HPP - -#include -#include - -namespace mlpack { -namespace ann /** Artificial Neural Network. */ { - -/** - * An implementation of a standard bias layer. The BiasLayer class represents a - * single layer of a neural network. - * - * A convenient typedef is given: - * - * - 2DBiasLayer - * - * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, - * arma::sp_mat or arma::cube). - * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, - * arma::sp_mat or arma::cube). - */ -template < - typename InputDataType = arma::mat, - typename OutputDataType = arma::mat -> -class BiasLayer -{ - public: - /** - * Create the BiasLayer object using the specified number of units and bias - * parameter. - * - * @param outSize The number of output units. - * @param bias The bias value. - */ - BiasLayer(const size_t outSize, const double bias = 1) : - outSize(outSize), - bias(bias) - { - weights.set_size(outSize, 1); - } - - /** - * Ordinary feed forward pass of a neural network, evaluating the function - * f(x) by propagating the activity forward through f. - * - * @param input Input data used for evaluating the specified function. - * @param output Resulting output activation. - */ - template - void Forward(const arma::Mat& input, arma::Mat& output) - { - output = input + (weights * bias); - } - - /** - * Ordinary feed forward pass of a neural network, evaluating the function - * f(x) by propagating the activity forward through f. - * - * @param input Input data used for evaluating the specified function. - * @param output Resulting output activation. - */ - template - void Forward(const arma::Cube& input, arma::Cube& output) - { - output = input; - for (size_t s = 0; s < input.n_slices; s++) - { - output.slice(s) += weights(s) * bias; - } - } - - /** - * Ordinary feed backward pass of a neural network, calculating the function - * f(x) by propagating x backwards trough f. Using the results from the feed - * forward pass. - * - * @param input The propagated input activation. - * @param gy The backpropagated error. - * @param g The calculated gradient. - */ - template - void Backward(const DataType& /* unused */, - const ErrorType& gy, - ErrorType& g) - { - g = gy; - } - - /* - * Calculate the gradient using the output delta and the bias. - * - * @param input The propagated input. - * @param error The calculated error. - * @param gradient The calculated gradient. - */ - template - void Gradient(const arma::Mat& /* input */, - const ErrorType& error, - GradientType& gradient) - { - gradient = error * bias; - } - - //! Get the weights. - InputDataType const& Weights() const { return weights; } - //! Modify the weights. - InputDataType& Weights() { return weights; } - - //! Get the input parameter. - InputDataType const& InputParameter() const { return inputParameter; } - //! Modify the input parameter. - InputDataType& InputParameter() { return inputParameter; } - - //! Get the output parameter. - OutputDataType const& OutputParameter() const { return outputParameter; } - //! Modify the output parameter. - OutputDataType& OutputParameter() { return outputParameter; } - - //! Get the delta. - OutputDataType const& Delta() const { return delta; } - //! Modify the delta. - OutputDataType& Delta() { return delta; } - - //! Get the gradient. - InputDataType const& Gradient() const { return gradient; } - //! Modify the gradient. - InputDataType& Gradient() { return gradient; } - - /** - * Serialize the layer. - */ - template - void Serialize(Archive& ar, const unsigned int /* version */) - { - ar & data::CreateNVP(weights, "weights"); - ar & data::CreateNVP(bias, "bias"); - } - - private: - //! Locally-stored number of output units. - size_t outSize; - - //! Locally-stored bias value. - double bias; - - //! Locally-stored weight object. - InputDataType weights; - - //! Locally-stored delta object. - OutputDataType delta; - - //! Locally-stored gradient object. - InputDataType gradient; - - //! Locally-stored input parameter object. - InputDataType inputParameter; - - //! Locally-stored output parameter object. - OutputDataType outputParameter; -}; // class BiasLayer - -//! Layer traits for the bias layer. -template -class LayerTraits > -{ - public: - static const bool IsBinary = false; - static const bool IsOutputLayer = false; - static const bool IsBiasLayer = true; - static const bool IsLSTMLayer = false; - static const bool IsConnection = true; -}; - -/** - * Standard 2D-Bias-Layer. - */ -template < - typename InputDataType = arma::mat, - typename OutputDataType = arma::cube -> -using BiasLayer2D = BiasLayer; - -/** - * Standard 2D-Bias-Layer. - */ -template < - typename InputDataType = arma::mat, - typename OutputDataType = arma::mat -> -using AdditionLayer = BiasLayer; - -} // namespace ann -} // namespace mlpack - -#endif diff --git a/src/mlpack/methods/ann/layer/binary_classification_layer.hpp b/src/mlpack/methods/ann/layer/binary_classification_layer.hpp deleted file mode 100644 index 1b3d6172a74..00000000000 --- a/src/mlpack/methods/ann/layer/binary_classification_layer.hpp +++ /dev/null @@ -1,106 +0,0 @@ -/** - * @file binary_classification_layer.hpp - * @author Marcus Edel - * - * Definition of the BinaryClassificationLayer class, which implements a - * binary class classification layer that can be used as output layer. - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. - */ -#ifndef MLPACK_METHODS_ANN_LAYER_BINARY_CLASSIFICATION_LAYER_HPP -#define MLPACK_METHODS_ANN_LAYER_BINARY_CLASSIFICATION_LAYER_HPP - -#include -#include - -namespace mlpack { -namespace ann /** Artificial Neural Network. */ { - -/** - * An implementation of a binary classification layer that can be used as - * output layer. - */ -class BinaryClassificationLayer -{ - public: - /** - * Create the BinaryClassificationLayer object. - * - * @param confidence The confidence used for the output class transformation. - */ - BinaryClassificationLayer(const double confidence = 0.5) : - confidence(confidence) - { - // Nothing to do here. - } - - /* - * Calculate the error using the specified input activation and the target. - * The error is stored into the given error parameter. - * - * @param inputActivations Input data used for evaluating the network. - * @param target Target data used for evaluating the network. - * @param error The calculated error with respect to the input activation and - * the given target. - */ - template - void CalculateError(const DataType& inputActivations, - const DataType& target, - DataType& error) - { - error = inputActivations - target; - } - - /* - * Calculate the output class using the specified input activation. - * - * @param inputActivations Input data used to calculate the output class. - * @param output Output class of the input activation. - */ - template - void OutputClass(const DataType& inputActivations, DataType& output) - { - output = inputActivations; - - for (size_t i = 0; i < output.n_elem; i++) - output(i) = output(i) > confidence ? 1 : 0; - } - - //! Get the confidence parameter. - double const& Confidence() const { return confidence; } - //! Modify the confidence parameter. - double& Confidence() { return confidence; } - - /** - * Serialize the layer. - */ - template - void Serialize(Archive& ar, const unsigned int /* version */) - { - ar & data::CreateNVP(confidence, "confidence"); - } - - private: - double confidence; - -}; // class BinaryClassificationLayer - -//! Layer traits for the binary class classification layer. -template <> -class LayerTraits -{ - public: - static const bool IsBinary = true; - static const bool IsOutputLayer = true; - static const bool IsBiasLayer = false; - static const bool IsLSTMLayer = false; - static const bool IsConnection = false; -}; - -} // namespace ann -} // namespace mlpack - -#endif diff --git a/src/mlpack/methods/ann/layer/concat.hpp b/src/mlpack/methods/ann/layer/concat.hpp new file mode 100644 index 00000000000..808309f7122 --- /dev/null +++ b/src/mlpack/methods/ann/layer/concat.hpp @@ -0,0 +1,186 @@ +/** + * @file concat.hpp + * @author Marcus Edel + * + * Definition of the Concat class, which acts as a concatenation contain. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_CONCAT_HPP +#define MLPACK_METHODS_ANN_LAYER_CONCAT_HPP + +#include + +#include + +#include "layer_types.hpp" +#include "layer_visitor.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +/** + * Implementation of the Concat class. The Concat class works as a + * feed-forward fully connected network container which plugs various layers + * together. + * + * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + */ +template < + typename InputDataType = arma::mat, + typename OutputDataType = arma::mat +> +class Concat +{ + public: + /** + * Create the Concat object using the specified parameters. + * + * @param model Expose all network modules. + * @param same Merge the error in the backward pass. + */ + Concat(const bool model = true, const bool same = true); + + /** + * Ordinary feed forward pass of a neural network, evaluating the function + * f(x) by propagating the activity forward through f. + * + * @param input Input data used for evaluating the specified function. + * @param output Resulting output activation. + */ + template + void Forward(arma::Mat&& input, arma::Mat&& output); + + /** + * Ordinary feed backward pass of a neural network, using 3rd-order tensors as + * input, calculating the function f(x) by propagating x backwards through f. + * Using the results from the feed forward pass. + * + * @param input The propagated input activation. + * @param gy The backpropagated error. + * @param g The calculated gradient. + */ + template + void Backward(const arma::Mat&& /* input */, + arma::Mat&& gy, + arma::Mat&& g); + + /* + * Calculate the gradient using the output delta and the input activation. + * + * @param input The input parameter used for calculating the gradient. + * @param error The calculated error. + * @param gradient The calculated gradient. + */ + template + void Gradient(arma::Mat&& /* input */, + arma::Mat&& error, + arma::Mat&& /* gradient */); + + /* + * Add a new module to the model. + * + * @param args The layer parameter. + */ + template + void Add(Args... args) { network.push_back(new LayerType(args...)); } + + /* + * Add a new module to the model. + * + * @param layer The Layer to be added to the model. + */ + void Add(LayerTypes layer) { network.push_back(layer); } + + //! Return the model modules. + std::vector& Model() + { + if (model) + { + return network; + } + + return empty; + } + + //! Return the initial point for the optimization. + const arma::mat& Parameters() const { return parameters; } + //! Modify the initial point for the optimization. + arma::mat& Parameters() { return parameters; } + + arma::mat const& InputParameter() const { return inputParameter; } + //! Modify the input parameter. + arma::mat& InputParameter() { return inputParameter; } + + //! Get the output parameter. + arma::mat const& OutputParameter() const { return outputParameter; } + //! Modify the output parameter. + arma::mat& OutputParameter() { return outputParameter; } + + //! Get the delta.e + arma::mat const& Delta() const { return delta; } + //! Modify the delta. + arma::mat& Delta() { return delta; } + + //! Get the gradient. + arma::mat const& Gradient() const { return gradient; } + //! Modify the gradient. + arma::mat& Gradient() { return gradient; } + + /** + * Serialize the layer + */ + template + void Serialize(Archive& /* ar */, const unsigned int /* version */); + + private: + //! Parameter which indicates if the modules should be exposed. + bool model; + + //! If true merge the error in the backward pass. + bool same; + + //! Locally-stored network modules. + std::vector network; + + //! Locally-stored model parameters. + arma::mat parameters; + + //! Locally-stored delta visitor. + DeltaVisitor deltaVisitor; + + //! Locally-stored output parameter visitor. + OutputParameterVisitor outputParameterVisitor; + + //! Locally-stored delete visitor. + DeleteVisitor deleteVisitor; + + //! Locally-stored empty list of modules. + std::vector empty; + + //! Locally-stored delta object. + arma::mat delta; + + //! Locally-stored input parameter object. + arma::mat inputParameter; + + //! Locally-stored output parameter object. + arma::mat outputParameter; + + //! Locally-stored gradient object. + arma::mat gradient; +}; // class Concat + +} // namespace ann +} // namespace mlpack + +// Include implementation. +#include "concat_impl.hpp" + +#endif diff --git a/src/mlpack/methods/ann/layer/concat_impl.hpp b/src/mlpack/methods/ann/layer/concat_impl.hpp new file mode 100644 index 00000000000..44efe499231 --- /dev/null +++ b/src/mlpack/methods/ann/layer/concat_impl.hpp @@ -0,0 +1,159 @@ +/** + * @file concat_impl.hpp + * @author Marcus Edel + * + * Implementation of the Concat class, which acts as a concatenation contain. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_CONCAT_IMPL_HPP +#define MLPACK_METHODS_ANN_LAYER_CONCAT_IMPL_HPP + +// In case it hasn't yet been included. +#include "concat.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +template +Concat::Concat( + const bool model, const bool same) : model(model), same(same) +{ + parameters.set_size(0, 0); +} + +template +template +void Concat::Forward( + arma::Mat&& input, arma::Mat&& output) +{ + size_t outSize = 0; + + for (size_t i = 0; i < network.size(); ++i) + { + boost::apply_visitor(ForwardVisitor(std::move(input), std::move( + boost::apply_visitor(outputParameterVisitor, network[i]))), + network[i]); + + if (boost::apply_visitor( + outputParameterVisitor, network[i]).n_elem > outSize) + { + outSize = boost::apply_visitor(outputParameterVisitor, + network[i]).n_elem; + } + } + + output = arma::zeros(outSize, network.size()); + for (size_t i = 0; i < network.size(); ++i) + { + size_t elements = boost::apply_visitor(outputParameterVisitor, + network[i]).n_elem; + + if (elements < outSize) + { + output.submat(0, i, elements - 1, i) = arma::vectorise( + boost::apply_visitor(outputParameterVisitor, network[i])); + } + else + { + output.col(i) = arma::vectorise(boost::apply_visitor( + outputParameterVisitor, network[i])); + } + } +} + +template +template +void Concat::Backward( + const arma::Mat&& /* input */, arma::Mat&& gy, arma::Mat&& g) +{ + size_t outSize = 0; + size_t elements = 0; + + for (size_t i = 0, j = 0; i < network.size(); ++i, j += elements) + { + elements = boost::apply_visitor(outputParameterVisitor, + network[i]).n_elem; + + arma::mat delta; + if (gy.n_cols == 1) + { + delta = gy.submat(j, 0, j + elements - 1, 0); + } + else + { + delta = gy.submat(0, i, elements - 1, i); + } + + boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, network[i])), std::move(delta), std::move( + boost::apply_visitor(deltaVisitor, network[i]))), network[i]); + + if (boost::apply_visitor(deltaVisitor, network[i]).n_elem > outSize) + { + outSize = boost::apply_visitor(deltaVisitor, network[i]).n_elem; + } + + if (same) + { + if (i == 0) + { + g = std::move(boost::apply_visitor(deltaVisitor, network[i])); + } + else + { + g += std::move(boost::apply_visitor(deltaVisitor, network[i])); + } + } + } + + if (!same) + { + g = arma::zeros(outSize, network.size()); + for (size_t i = 0; i < network.size(); ++i) + { + size_t elements = boost::apply_visitor(deltaVisitor, network[i]).n_elem; + if (elements < outSize) + { + g.submat(0, i, elements - 1, i) = arma::vectorise( + boost::apply_visitor(deltaVisitor, network[i])); + } + else + { + g.col(i) = arma::vectorise( + boost::apply_visitor(deltaVisitor, network[i])); + } + } + } +} + +template +template +void Concat::Gradient( + arma::Mat&& /* input */, + arma::Mat&& error, + arma::Mat&& /* gradient */) +{ + for (size_t i = 0; i < network.size(); ++i) + { + boost::apply_visitor(GradientVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, network[i])), std::move(error)), network[i]); + } +} + +template +template +void Concat::Serialize( + Archive& /* ar */, const unsigned int /* version */) +{ + // Nothing to do here. +} + +} // namespace ann +} // namespace mlpack + + +#endif diff --git a/src/mlpack/methods/ann/layer/concat_performance.hpp b/src/mlpack/methods/ann/layer/concat_performance.hpp new file mode 100644 index 00000000000..39b7e10f126 --- /dev/null +++ b/src/mlpack/methods/ann/layer/concat_performance.hpp @@ -0,0 +1,120 @@ +/** + * @file concat_performance.hpp + * @author Marcus Edel + * + * Definition of the ConcatPerformance class. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_CONCAT_PERFORMANCE_HPP +#define MLPACK_METHODS_ANN_LAYER_CONCAT_PERFORMANCE_HPP + +#include + +#include + +#include "layer_types.hpp" +#include "layer_visitor.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +/** + * Implementation of the concat performance class. The class works as a + * feed-forward fully connected network container which plugs performance layers + * together. + * + * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + */ +template < + typename OutputLayerType = NegativeLogLikelihood<>, + typename InputDataType = arma::mat, + typename OutputDataType = arma::mat +> +class ConcatPerformance +{ + public: + /** + * Create the ConcatPerformance object. + * + * @param inSize The number of inputs. + * @param outputLayer Output layer used to evaluate the network. + */ + ConcatPerformance(const size_t inSize, + OutputLayerType&& outputLayer = OutputLayerType()); + + /* + * Computes the Negative log likelihood. + * + * @param input Input data used for evaluating the specified function. + * @param output Resulting output activation. + */ + template + double Forward(const arma::Mat&& input, arma::Mat&& target); + /** + * Ordinary feed backward pass of a neural network. The negative log + * likelihood layer expectes that the input contains log-probabilities for + * each class. The layer also expects a class index, in the range between 1 + * and the number of classes, as target when calling the Forward function. + * + * @param input The propagated input activation. + * @param target The target vector, that contains the class index in the range + * between 1 and the number of classes. + * @param output The calculated error. + */ + template + void Backward(const arma::Mat&& input, + const arma::Mat&& target, + arma::Mat&& output); + + //! Get the input parameter. + InputDataType& InputParameter() const { return inputParameter; } + //! Modify the input parameter. + InputDataType& InputParameter() { return inputParameter; } + + //! Get the output parameter. + OutputDataType& OutputParameter() const { return outputParameter; } + //! Modify the output parameter. + OutputDataType& OutputParameter() { return outputParameter; } + + //! Get the delta. + OutputDataType& Delta() const { return delta; } + //! Modify the delta. + OutputDataType& Delta() { return delta; } + + /** + * Serialize the layer + */ + template + void Serialize(Archive& /* ar */, const unsigned int /* version */); + + private: + //! Locally-stored number of inputs. + size_t inSize; + + //! Instantiated outputlayer used to evaluate the network. + OutputLayerType outputLayer; + + //! Locally-stored delta object. + OutputDataType delta; + + //! Locally-stored input parameter object. + InputDataType inputParameter; + + //! Locally-stored output parameter object. + OutputDataType outputParameter; +}; // class ConcatPerformance + +} // namespace ann +} // namespace mlpack + +// Include implementation. +#include "concat_performance_impl.hpp" + +#endif diff --git a/src/mlpack/methods/ann/layer/concat_performance_impl.hpp b/src/mlpack/methods/ann/layer/concat_performance_impl.hpp new file mode 100644 index 00000000000..f014ac33f01 --- /dev/null +++ b/src/mlpack/methods/ann/layer/concat_performance_impl.hpp @@ -0,0 +1,118 @@ +/** + * @file concat_performance_impl.hpp + * @author Marcus Edel + * + * Implementation of the ConcatPerformance class. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_CONCAT_PERFORMANCE_IMPL_HPP +#define MLPACK_METHODS_ANN_LAYER_CONCAT_PERFORMANCE_IMPL_HPP + +// In case it hasn't yet been included. +#include "concat_performance.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +template< + typename OutputLayerType, + typename InputDataType, + typename OutputDataType +> +ConcatPerformance< + OutputLayerType, + InputDataType, + OutputDataType +>::ConcatPerformance(const size_t inSize, OutputLayerType&& outputLayer) : + inSize(inSize), + outputLayer(std::move(outputLayer)) +{ + // Nothing to do here. +} + +template< + typename OutputLayerType, + typename InputDataType, + typename OutputDataType +> +template +double ConcatPerformance< + OutputLayerType, + InputDataType, + OutputDataType +>::Forward(const arma::Mat&& input, arma::Mat&& target) +{ + const size_t elements = input.n_elem / inSize; + + double output = 0; + for (size_t i = 0; i < input.n_elem; i+= elements) + { + arma::mat subInput = input.submat(i, 0, i + elements - 1, 0); + output += outputLayer.Forward(std::move(subInput), std::move(target)); + } + + return output; +} + +template< + typename OutputLayerType, + typename InputDataType, + typename OutputDataType +> +template +void ConcatPerformance< + OutputLayerType, + InputDataType, + OutputDataType +>::Backward( + const arma::Mat&& input, + const arma::Mat&& target, + arma::Mat&& output) +{ + const size_t elements = input.n_elem / inSize; + + arma::mat subInput = input.submat(0, 0, elements - 1, 0); + arma::mat subOutput; + + outputLayer.Backward(std::move(subInput), std::move(target), + std::move(subOutput)); + + output = arma::zeros(subOutput.n_elem, inSize); + output.col(0) = subOutput; + + for (size_t i = elements, j = 0; i < input.n_elem; i+= elements, j++) + { + subInput = input.submat(i, 0, i + elements - 1, 0); + outputLayer.Backward(std::move(subInput), std::move(target), + std::move(subOutput)); + + output.col(j) = subOutput; + } +} + +template< + typename OutputLayerType, + typename InputDataType, + typename OutputDataType +> +template +void ConcatPerformance< + OutputLayerType, + InputDataType, + OutputDataType +>::Serialize(Archive& /* ar */, const unsigned int /* version */) +{ + // Nothing to do here. +} + +} // namespace ann +} // namespace mlpack + +// Include implementation. +#include "concat_performance_impl.hpp" + +#endif diff --git a/src/mlpack/methods/ann/layer/constant_layer.hpp b/src/mlpack/methods/ann/layer/constant.hpp similarity index 71% rename from src/mlpack/methods/ann/layer/constant_layer.hpp rename to src/mlpack/methods/ann/layer/constant.hpp index 31da87e7d9d..b24b44aa802 100644 --- a/src/mlpack/methods/ann/layer/constant_layer.hpp +++ b/src/mlpack/methods/ann/layer/constant.hpp @@ -1,8 +1,8 @@ /** - * @file constant_layer.hpp + * @file constant.hpp * @author Marcus Edel * - * Definition of the ConstantLayer class, which outputs a constant value given + * Definition of the Constant class, which outputs a constant value given * any input. * * mlpack is free software; you may redistribute it and/or modify it under the @@ -10,8 +10,8 @@ * 3-clause BSD license along with mlpack. If not, see * http://www.opensource.org/licenses/BSD-3-Clause for more information. */ -#ifndef MLPACK_METHODS_ANN_LAYER_CONSTANT_LAYER_HPP -#define MLPACK_METHODS_ANN_LAYER_CONSTANT_LAYER_HPP +#ifndef MLPACK_METHODS_ANN_LAYER_CONSTANT_HPP +#define MLPACK_METHODS_ANN_LAYER_CONSTANT_HPP #include @@ -31,21 +31,17 @@ template < typename InputDataType = arma::mat, typename OutputDataType = arma::mat > -class ConstantLayer +class Constant { public: /** - * Create the ConstantLayer object that outputs a given constant scalar value + * Create the Constant object that outputs a given constant scalar value * given any input value. * * @param outSize The number of output units. * @param scalar The constant value used to create the constant output. */ - ConstantLayer(const size_t outSize, const double scalar) - { - constantOutput = OutputDataType(outSize, 1); - constantOutput.fill(scalar); - } + Constant(const size_t outSize, const double scalar); /** * Ordinary feed forward pass of a neural network. The forward pass fills the @@ -54,11 +50,8 @@ class ConstantLayer * @param input Input data used for evaluating the specified function. * @param output Resulting output activation. */ - template - void Forward(const arma::Mat& /* input */, arma::Mat& output) - { - output = constantOutput; - } + template + void Forward(const InputType&& input, OutputType&& output); /** * Ordinary feed backward pass of a neural network. The backward pass of the @@ -68,14 +61,10 @@ class ConstantLayer * @param gy The backpropagated error. * @param g The calculated gradient. */ - template - void Backward(const arma::Mat& /* input */, - const arma::Mat& /* gy */, - arma::Mat& g) - { - g = arma::zeros >(inputParameter.n_rows, - inputParameter.n_cols); - } + template + void Backward(const DataType&& /* input */, + DataType&& /* gy */, + DataType&& g); //! Get the input parameter. InputDataType& InputParameter() const { return inputParameter; } @@ -96,12 +85,15 @@ class ConstantLayer * Serialize the layer. */ template - void Serialize(Archive& ar, const unsigned int /* version */) - { - ar & data::CreateNVP(constantOutput, "constantOutput"); - } + void Serialize(Archive& ar, const unsigned int /* version */); private: + //! Locally-stored number of input units. + size_t inSize; + + //! Locally-stored number of output units. + size_t outSize; + //! Locally-stored constant output matrix. OutputDataType constantOutput; @@ -115,7 +107,10 @@ class ConstantLayer OutputDataType outputParameter; }; // class ConstantLayer -}; // namespace ann -}; // namespace mlpack +} // namespace ann +} // namespace mlpack + +// Include implementation. +#include "constant_impl.hpp" -#endif +#endif \ No newline at end of file diff --git a/src/mlpack/methods/ann/layer/constant_impl.hpp b/src/mlpack/methods/ann/layer/constant_impl.hpp new file mode 100644 index 00000000000..09e0624c6e0 --- /dev/null +++ b/src/mlpack/methods/ann/layer/constant_impl.hpp @@ -0,0 +1,65 @@ +/** + * @file constant_impl.hpp + * @author Marcus Edel + * + * Implementation of the Constant class, which outputs a constant value given + * any input. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_CONSTANT_IMPL_HPP +#define MLPACK_METHODS_ANN_LAYER_CONSTANT_IMPL_HPP + +// In case it hasn't yet been included. +#include "constant.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +template +Constant::Constant( + const size_t outSize, + const double scalar) : + inSize(0), + outSize(outSize) +{ + constantOutput = OutputDataType(outSize, 1); + constantOutput.fill(scalar); +} + +template +template +void Constant::Forward( + const InputType&& input, OutputType&& output) +{ + if (inSize == 0) + { + inSize = input.n_elem; + } + + output = constantOutput; +} + +template +template +void Constant::Backward( + const DataType&& /* input */, DataType&& /* gy */, DataType&& g) +{ + g = arma::zeros(inSize, 1); +} + +template +template +void Constant::Serialize( + Archive& ar, const unsigned int /* version */) +{ + ar & data::CreateNVP(constantOutput, "constantOutput"); +} + +} // namespace ann +} // namespace mlpack + +#endif diff --git a/src/mlpack/methods/ann/layer/conv_layer.hpp b/src/mlpack/methods/ann/layer/conv_layer.hpp deleted file mode 100644 index bbb918c8a49..00000000000 --- a/src/mlpack/methods/ann/layer/conv_layer.hpp +++ /dev/null @@ -1,324 +0,0 @@ -/** - * @file conv_layer.hpp - * @author Marcus Edel - * - * Definition of the ConvLayer class. - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. - */ -#ifndef MLPACK_METHODS_ANN_LAYER_CONV_LAYER_HPP -#define MLPACK_METHODS_ANN_LAYER_CONV_LAYER_HPP - -#include -#include -#include -#include - -namespace mlpack { -namespace ann /** Artificial Neural Network. */ { - -/** - * Implementation of the ConvLayer class. The ConvLayer class represents a - * single layer of a neural network. - * - * @tparam ForwardConvolutionRule Convolution to perform forward process. - * @tparam BackwardConvolutionRule Convolution to perform backward process. - * @tparam GradientConvolutionRule Convolution to calculate gradient. - * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, - * arma::sp_mat or arma::cube). - * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, - * arma::sp_mat or arma::cube). - */ -template < - typename ForwardConvolutionRule = NaiveConvolution, - typename BackwardConvolutionRule = NaiveConvolution, - typename GradientConvolutionRule = NaiveConvolution, - typename InputDataType = arma::cube, - typename OutputDataType = arma::cube -> -class ConvLayer -{ - public: - /** - * Create the ConvLayer object using the specified number of input maps, - * output maps, filter size, stride and padding parameter. - * - * @param inMaps The number of input maps. - * @param outMaps The number of output maps. - * @param wfilter Width of the filter/kernel. - * @param wfilter Height of the filter/kernel. - * @param xStride Stride of filter application in the x direction. - * @param yStride Stride of filter application in the y direction. - * @param wPad Spatial padding width of the input. - * @param hPad Spatial padding height of the input. - */ - ConvLayer(const size_t inMaps, - const size_t outMaps, - const size_t wfilter, - const size_t hfilter, - const size_t xStride = 1, - const size_t yStride = 1, - const size_t wPad = 0, - const size_t hPad = 0) : - wfilter(wfilter), - hfilter(hfilter), - inMaps(inMaps), - outMaps(outMaps), - xStride(xStride), - yStride(yStride), - wPad(wPad), - hPad(hPad) - { - weights.set_size(wfilter, hfilter, inMaps * outMaps); - } - - /** - * Ordinary feed forward pass of a neural network, evaluating the function - * f(x) by propagating the activity forward through f. - * - * @param input Input data used for evaluating the specified function. - * @param output Resulting output activation. - */ - template - void Forward(const arma::Cube& input, arma::Cube& output) - { - const size_t wConv = ConvOutSize(input.n_rows, wfilter, xStride, wPad); - const size_t hConv = ConvOutSize(input.n_cols, hfilter, yStride, hPad); - - output = arma::zeros >(wConv, hConv, outMaps); - for (size_t outMap = 0, outMapIdx = 0; outMap < outMaps; outMap++) - { - for (size_t inMap = 0; inMap < inMaps; inMap++, outMapIdx++) - { - arma::Mat convOutput; - ForwardConvolutionRule::Convolution(input.slice(inMap), - weights.slice(outMap), convOutput); - - output.slice(outMap) += convOutput; - } - } - } - - /** - * Ordinary feed backward pass of a neural network, calculating the function - * f(x) by propagating x backwards through f. Using the results from the feed - * forward pass. - * - * @param input The propagated input activation. - * @param gy The backpropagated error. - * @param g The calculated gradient. - */ - template - void Backward(const arma::Cube& /* unused */, - const arma::Cube& gy, - arma::Cube& g) - { - g = arma::zeros >(inputParameter.n_rows, - inputParameter.n_cols, - inputParameter.n_slices); - - for (size_t outMap = 0, outMapIdx = 0; outMap < inMaps; outMap++) - { - for (size_t inMap = 0; inMap < outMaps; inMap++, outMapIdx++) - { - arma::Mat rotatedFilter; - Rotate180(weights.slice(outMap * outMaps + inMap), rotatedFilter); - - arma::Mat output; - BackwardConvolutionRule::Convolution(gy.slice(inMap), rotatedFilter, - output); - - g.slice(outMap) += output; - } - } - } - - /* - * Calculate the gradient using the output delta and the input activation. - * - * @param input The input parameter used for calculating the gradient. - * @param d The calculated error. - * @param g The calculated gradient. - */ - template - void Gradient(const InputType& input, - const arma::Cube& d, - arma::Cube& g) - { - g = arma::zeros >(weights.n_rows, weights.n_cols, - weights.n_slices); - - for (size_t outMap = 0; outMap < outMaps; outMap++) - { - for (size_t inMap = 0, s = outMap; inMap < inMaps; inMap++, s += outMaps) - { - arma::Cube inputSlices = input.slices(inMap, inMap); - arma::Cube deltaSlices = d.slices(outMap, outMap); - - arma::Cube output; - GradientConvolutionRule::Convolution(inputSlices, deltaSlices, output); - - for (size_t i = 0; i < output.n_slices; i++) - g.slice(s) += output.slice(i); - } - } - } - - //! Get the weights. - OutputDataType const& Weights() const { return weights; } - //! Modify the weights. - OutputDataType& Weights() { return weights; } - - //! Get the input parameter. - InputDataType const& InputParameter() const { return inputParameter; } - //! Modify the input parameter. - InputDataType& InputParameter() { return inputParameter; } - - //! Get the output parameter. - OutputDataType const& OutputParameter() const { return outputParameter; } - //! Modify the output parameter. - OutputDataType& OutputParameter() { return outputParameter; } - - //! Get the delta. - OutputDataType const& Delta() const { return delta; } - //! Modify the delta. - OutputDataType& Delta() { return delta; } - - //! Get the gradient. - OutputDataType const& Gradient() const { return gradient; } - //! Modify the gradient. - OutputDataType& Gradient() { return gradient; } - - /** - * Serialize the layer. - */ - template - void Serialize(Archive& ar, const unsigned int /* version */) - { - ar & data::CreateNVP(weights, "weights"); - ar & data::CreateNVP(wfilter, "wfilter"); - ar & data::CreateNVP(hfilter, "hfilter"); - ar & data::CreateNVP(inMaps, "inMaps"); - ar & data::CreateNVP(outMaps, "outMaps"); - ar & data::CreateNVP(xStride, "xStride"); - ar & data::CreateNVP(yStride, "yStride"); - ar & data::CreateNVP(wPad, "wPad"); - ar & data::CreateNVP(hPad, "hPad"); - } - - private: - /* - * Rotates a 3rd-order tesor counterclockwise by 180 degrees. - * - * @param input The input data to be rotated. - * @param output The rotated output. - */ - template - void Rotate180(const arma::Cube& input, arma::Cube& output) - { - output = arma::Cube(input.n_rows, input.n_cols, input.n_slices); - - // * left-right flip, up-down flip */ - for (size_t s = 0; s < output.n_slices; s++) - output.slice(s) = arma::fliplr(arma::flipud(input.slice(s))); - } - - /* - * Rotates a dense matrix counterclockwise by 180 degrees. - * - * @param input The input data to be rotated. - * @param output The rotated output. - */ - template - void Rotate180(const arma::Mat& input, arma::Mat& output) - { - // * left-right flip, up-down flip */ - output = arma::fliplr(arma::flipud(input)); - } - - /* - * Return the convolution output size. - * - * @param size The size of the input (row or column). - * @param k The size of the filter (width or height). - * @param s The stride size (x or y direction). - * @param p The size of the padding (width or height). - * @return The convolution output size. - */ - size_t ConvOutSize(const size_t size, - const size_t k, - const size_t s, - const size_t p) - { - return std::floor(size + p * 2 - k) / s + 1; - } - - //! Locally-stored filter/kernel width. - size_t wfilter; - - //! Locally-stored filter/kernel height. - size_t hfilter; - - //! Locally-stored number of input maps. - size_t inMaps; - - //! Locally-stored number of output maps. - size_t outMaps; - - //! Locally-stored stride of the filter in x-direction. - size_t xStride; - - //! Locally-stored stride of the filter in y-direction. - size_t yStride; - - //! Locally-stored padding width. - size_t wPad; - - //! Locally-stored padding height. - size_t hPad; - - //! Locally-stored weight object. - OutputDataType weights; - - //! Locally-stored delta object. - OutputDataType delta; - - //! Locally-stored gradient object. - OutputDataType gradient; - - //! Locally-stored input parameter object. - InputDataType inputParameter; - - //! Locally-stored output parameter object. - OutputDataType outputParameter; -}; // class ConvLayer - -//! Layer traits for the convolution layer. -template< - typename ForwardConvolutionRule, - typename BackwardConvolutionRule, - typename GradientConvolutionRule, - typename InputDataType, - typename OutputDataType -> -class LayerTraits > -{ - public: - static const bool IsBinary = false; - static const bool IsOutputLayer = false; - static const bool IsBiasLayer = false; - static const bool IsLSTMLayer = false; - static const bool IsConnection = true; -}; - -} // namespace ann -} // namespace mlpack - -#endif diff --git a/src/mlpack/methods/ann/layer/convolution.hpp b/src/mlpack/methods/ann/layer/convolution.hpp new file mode 100644 index 00000000000..a7f647ed226 --- /dev/null +++ b/src/mlpack/methods/ann/layer/convolution.hpp @@ -0,0 +1,344 @@ +/** + * @file convolution.hpp + * @author Marcus Edel + * + * Definition of the Convolution module class. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_CONVOLUTION_HPP +#define MLPACK_METHODS_ANN_LAYER_CONVOLUTION_HPP + +#include + +#include +#include +#include +#include + +#include "layer_types.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +/** + * Implementation of the Convolution class. The Convolution class represents a + * single layer of a neural network. + * + * @tparam ForwardConvolutionRule Convolution to perform forward process. + * @tparam BackwardConvolutionRule Convolution to perform backward process. + * @tparam GradientConvolutionRule Convolution to calculate gradient. + * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + */ +template < + typename ForwardConvolutionRule = NaiveConvolution, + typename BackwardConvolutionRule = NaiveConvolution, + typename GradientConvolutionRule = NaiveConvolution, + typename InputDataType = arma::mat, + typename OutputDataType = arma::mat +> +class Convolution +{ +public: + //! Create the Convolution object. + Convolution(); + + /** + * Create the Convolution object using the specified number of input maps, + * output maps, filter size, stride and padding parameter. + * + * @param inSize The number of input maps. + * @param outSize The number of output maps. + * @param kW Width of the filter/kernel. + * @param kH Height of the filter/kernel. + * @param dW Stride of filter application in the x direction. + * @param dH Stride of filter application in the y direction. + * @param padW Padding width of the input. + * @param padH Padding height of the input. + * @param inputWidth The widht of the input data. + * @param inputHeight The height of the input data. + */ + Convolution(const size_t inSize, + const size_t outSize, + const size_t kW, + const size_t kH, + const size_t dW = 1, + const size_t dH = 1, + const size_t padW = 0, + const size_t padH = 0, + const size_t inputWidth = 0, + const size_t inputHeight = 0); + + /* + * Set the weight and bias term. + */ + void Reset(); + + /** + * Ordinary feed forward pass of a neural network, evaluating the function + * f(x) by propagating the activity forward through f. + * + * @param input Input data used for evaluating the specified function. + * @param output Resulting output activation. + */ + template + void Forward(const arma::Mat&& input, arma::Mat&& output); + + /** + * Ordinary feed backward pass of a neural network, calculating the function + * f(x) by propagating x backwards through f. Using the results from the feed + * forward pass. + * + * @param input The propagated input activation. + * @param gy The backpropagated error. + * @param g The calculated gradient. + */ + template + void Backward(const arma::Mat&& /* input */, + arma::Mat&& gy, + arma::Mat&& g); + + /* + * Calculate the gradient using the output delta and the input activation. + * + * @param input The input parameter used for calculating the gradient. + * @param error The calculated error. + * @param gradient The calculated gradient. + */ + template + void Gradient(const arma::Mat&& /* input */, + arma::Mat&& error, + arma::Mat&& gradient); + + //! Get the parameters. + OutputDataType const& Parameters() const { return weights; } + //! Modify the parameters. + OutputDataType& Parameters() { return weights; } + + //! Get the input parameter. + InputDataType const& InputParameter() const { return inputParameter; } + //! Modify the input parameter. + InputDataType& InputParameter() { return inputParameter; } + + //! Get the output parameter. + OutputDataType const& OutputParameter() const { return outputParameter; } + //! Modify the output parameter. + OutputDataType& OutputParameter() { return outputParameter; } + + //! Get the delta. + OutputDataType const& Delta() const { return delta; } + //! Modify the delta. + OutputDataType& Delta() { return delta; } + + //! Get the gradient. + OutputDataType const& Gradient() const { return gradient; } + //! Modify the gradient. + OutputDataType& Gradient() { return gradient; } + + //! Get the input width. + size_t const& InputWidth() const { return inputWidth; } + //! Modify input the width. + size_t& InputWidth() { return inputWidth; } + + //! Get the input height. + size_t const& InputHeight() const { return inputHeight; } + //! Modify the input height. + size_t& InputHeight() { return inputHeight; } + + //! Get the output width. + size_t const& OutputWidth() const { return outputWidth; } + //! Modify the output width. + size_t& OutputWidth() { return outputWidth; } + + //! Get the output height. + size_t const& OutputHeight() const { return outputHeight; } + //! Modify the output height. + size_t& OutputHeight() { return outputHeight; } + + /** + * Serialize the layer + */ + template + void Serialize(Archive& ar, const unsigned int /* version */); + + private: + + /* + * Return the convolution output size. + * + * @param size The size of the input (row or column). + * @param k The size of the filter (width or height). + * @param s The stride size (x or y direction). + * @param p The size of the padding (width or height). + * @return The convolution output size. + */ + size_t ConvOutSize(const size_t size, + const size_t k, + const size_t s, + const size_t p) + { + return std::floor(size + p * 2 - k) / s + 1; + } + + /* + * Rotates a 3rd-order tensor counterclockwise by 180 degrees. + * + * @param input The input data to be rotated. + * @param output The rotated output. + */ + template + void Rotate180(const arma::Cube& input, arma::Cube& output) + { + output = arma::Cube(input.n_rows, input.n_cols, input.n_slices); + + // * left-right flip, up-down flip */ + for (size_t s = 0; s < output.n_slices; s++) + output.slice(s) = arma::fliplr(arma::flipud(input.slice(s))); + } + + /* + * Rotates a dense matrix counterclockwise by 180 degrees. + * + * @param input The input data to be rotated. + * @param output The rotated output. + */ + template + void Rotate180(const arma::Mat& input, arma::Mat& output) + { + // * left-right flip, up-down flip */ + output = arma::fliplr(arma::flipud(input)); + } + + /* + * Pad the given input data. + * + * @param input The input to be padded. + * @param wPad Padding width of the input. + * @param hPad Padding height of the input. + * @param output The padded output data. + */ + template + void Pad(const arma::Mat& input, + size_t wPad, + size_t hPad, + arma::Mat& output) + { + if (output.n_rows != input.n_rows + wPad * 2 || + output.n_cols != input.n_cols + hPad * 2) + { + output = arma::zeros(input.n_rows + wPad * 2, input.n_cols + hPad * 2); + } + + output.submat(wPad, hPad, wPad + input.n_rows - 1, + hPad + input.n_cols - 1) = input; + } + + /* + * Pad the given input data. + * + * @param input The input to be padded. + * @param wPad Padding width of the input. + * @param hPad Padding height of the input. + * @param output The padded output data. + */ + template + void Pad(const arma::Cube& input, + size_t wPad, + size_t hPad, + arma::Cube& output) + { + output = arma::zeros(input.n_rows + wPad * 2, + input.n_cols + hPad * 2, input.n_slices); + + for (size_t i = 0; i < input.n_slices; ++i) + { + Pad(input.slice(i), wPad, hPad, output.slice(i)); + } + } + + //! Locally-stored number of input units. + size_t inSize; + + //! Locally-stored number of output units. + size_t outSize; + + //! Locally-stored filter/kernel width. + size_t kW; + + //! Locally-stored filter/kernel height. + size_t kH; + + //! Locally-stored stride of the filter in x-direction. + size_t dW; + + //! Locally-stored stride of the filter in y-direction. + size_t dH; + + //! Locally-stored padding width. + size_t padW; + + //! Locally-stored padding height. + size_t padH; + + //! Locally-stored weight object. + OutputDataType weights; + + //! Locally-stored weight object. + arma::cube weight; + + //! Locally-stored bias term object. + arma::mat bias; + + //! Locally-stored input width. + size_t inputWidth; + + //! Locally-stored input height. + size_t inputHeight; + + //! Locally-stored output width. + size_t outputWidth; + + //! Locally-stored output height. + size_t outputHeight; + + //! Locally-stored transformed output parameter. + arma::cube outputTemp; + + //! Locally-stored transformed input parameter. + arma::cube inputTemp; + + //! Locally-stored transformed padded input parameter. + arma::cube inputPaddedTemp; + + //! Locally-stored transformed error parameter. + arma::cube gTemp; + + //! Locally-stored transformed gradient parameter. + arma::cube gradientTemp; + + //! Locally-stored delta object. + OutputDataType delta; + + //! Locally-stored gradient object. + OutputDataType gradient; + + //! Locally-stored input parameter object. + InputDataType inputParameter; + + //! Locally-stored output parameter object. + OutputDataType outputParameter; +}; // class Convolution + +} // namespace ann +} // namespace mlpack + +// Include implementation. +#include "convolution_impl.hpp" + +#endif \ No newline at end of file diff --git a/src/mlpack/methods/ann/layer/convolution_impl.hpp b/src/mlpack/methods/ann/layer/convolution_impl.hpp new file mode 100644 index 00000000000..422c9947095 --- /dev/null +++ b/src/mlpack/methods/ann/layer/convolution_impl.hpp @@ -0,0 +1,333 @@ +/** + * @file convolution_impl.hpp + * @author Marcus Edel + * + * Implementation of the Convolution module class. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_CONVOLUTION_IMPL_HPP +#define MLPACK_METHODS_ANN_LAYER_CONVOLUTION_IMPL_HPP + +// In case it hasn't yet been included. +#include "convolution.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +template< + typename ForwardConvolutionRule, + typename BackwardConvolutionRule, + typename GradientConvolutionRule, + typename InputDataType, + typename OutputDataType +> +Convolution< + ForwardConvolutionRule, + BackwardConvolutionRule, + GradientConvolutionRule, + InputDataType, + OutputDataType +>::Convolution() +{ + // Nothing to do here. +} + +template< + typename ForwardConvolutionRule, + typename BackwardConvolutionRule, + typename GradientConvolutionRule, + typename InputDataType, + typename OutputDataType +> +Convolution< + ForwardConvolutionRule, + BackwardConvolutionRule, + GradientConvolutionRule, + InputDataType, + OutputDataType +>::Convolution( + const size_t inSize, + const size_t outSize, + const size_t kW, + const size_t kH, + const size_t dW, + const size_t dH, + const size_t padW, + const size_t padH, + const size_t inputWidth, + const size_t inputHeight) : + inSize(inSize), + outSize(outSize), + kW(kW), + kH(kH), + dW(dW), + dH(dH), + padW(padW), + padH(padH), + inputWidth(inputWidth), + inputHeight(inputHeight), + outputWidth(0), + outputHeight(0) +{ + weights.set_size((outSize * inSize * kW * kH) + outSize, 1); +} + +template< + typename ForwardConvolutionRule, + typename BackwardConvolutionRule, + typename GradientConvolutionRule, + typename InputDataType, + typename OutputDataType +> +void Convolution< + ForwardConvolutionRule, + BackwardConvolutionRule, + GradientConvolutionRule, + InputDataType, + OutputDataType +>::Reset() +{ + weight = arma::cube(weights.memptr(), kW, kH, + outSize * inSize, false, false); + bias = arma::mat(weights.memptr() + weight.n_elem, + outSize, 1, false, false); +} + +template< + typename ForwardConvolutionRule, + typename BackwardConvolutionRule, + typename GradientConvolutionRule, + typename InputDataType, + typename OutputDataType +> +template +void Convolution< + ForwardConvolutionRule, + BackwardConvolutionRule, + GradientConvolutionRule, + InputDataType, + OutputDataType +>::Forward(const arma::Mat&& input, arma::Mat&& output) +{ + inputTemp = arma::cube(input.memptr(), inputWidth, inputHeight, inSize); + + if (padW != 0 || padH != 0) + { + Pad(inputTemp, padW, padH, inputPaddedTemp); + } + + size_t wConv = ConvOutSize(inputWidth, kW, dW, padW); + size_t hConv = ConvOutSize(inputHeight, kH, dH, padH); + + outputTemp = arma::zeros >(wConv, hConv, outSize); + + for (size_t outMap = 0, outMapIdx = 0; outMap < outSize; outMap++) + { + for (size_t inMap = 0; inMap < inSize; inMap++, outMapIdx++) + { + arma::Mat convOutput; + + if (padW != 0 || padH != 0) + { + ForwardConvolutionRule::Convolution(inputPaddedTemp.slice(inMap), + weight.slice(outMapIdx), convOutput, dW, dH); + } + else + { + ForwardConvolutionRule::Convolution(inputTemp.slice(inMap), + weight.slice(outMapIdx), convOutput, dW, dH); + } + + outputTemp.slice(outMap) += convOutput; + } + + outputTemp.slice(outMap) += bias(outMap); + } + + output = arma::Mat(outputTemp.memptr(), outputTemp.n_elem, 1); + + outputWidth = outputTemp.n_rows; + outputHeight = outputTemp.n_cols; +} + +template< + typename ForwardConvolutionRule, + typename BackwardConvolutionRule, + typename GradientConvolutionRule, + typename InputDataType, + typename OutputDataType +> +template +void Convolution< + ForwardConvolutionRule, + BackwardConvolutionRule, + GradientConvolutionRule, + InputDataType, + OutputDataType +>::Backward( + const arma::Mat&& /* input */, arma::Mat&& gy, arma::Mat&& g) +{ + arma::cube mappedError = arma::cube(gy.memptr(), + outputWidth, outputHeight, outSize); + gTemp = arma::zeros >(inputTemp.n_rows, + inputTemp.n_cols, inputTemp.n_slices); + + for (size_t outMap = 0, outMapIdx = 0; outMap < outSize; outMap++) + { + for (size_t inMap = 0; inMap < inSize; inMap++, outMapIdx++) + { + arma::Mat rotatedFilter; + Rotate180(weight.slice(outMapIdx), rotatedFilter); + + arma::Mat output; + BackwardConvolutionRule::Convolution(mappedError.slice(outMap), + rotatedFilter, output, dW, dH); + + if (padW != 0 || padH != 0) + { + gTemp.slice(inMap) += output.submat(rotatedFilter.n_rows / 2, + rotatedFilter.n_cols / 2, + rotatedFilter.n_rows / 2 + gTemp.n_rows - 1, + rotatedFilter.n_cols / 2 + gTemp.n_cols - 1); + } + else + { + gTemp.slice(inMap) += output; + } + } + } + + g = arma::mat(gTemp.memptr(), gTemp.n_elem, 1); +} + +template< + typename ForwardConvolutionRule, + typename BackwardConvolutionRule, + typename GradientConvolutionRule, + typename InputDataType, + typename OutputDataType +> +template +void Convolution< + ForwardConvolutionRule, + BackwardConvolutionRule, + GradientConvolutionRule, + InputDataType, + OutputDataType +>::Gradient( + const arma::Mat&& /* input */, + arma::Mat&& error, + arma::Mat&& gradient) +{ + arma::cube mappedError; + if (padW != 0 && padH != 0) + { + mappedError = arma::cube(error.memptr(), outputWidth / padW, + outputHeight / padH, outSize); + } + else + { + mappedError = arma::cube(error.memptr(), outputWidth, + outputHeight, outSize); + } + + gradientTemp = arma::zeros >(weight.n_rows, weight.n_cols, + weight.n_slices); + + for (size_t outMap = 0, outMapIdx = 0; outMap < outSize; outMap++) + { + for (size_t inMap = 0, s = outMap; inMap < inSize; inMap++, outMapIdx++, + s += outSize) + { + arma::Cube inputSlices; + if (padW != 0 || padH != 0) + { + inputSlices = inputPaddedTemp.slices(inMap, inMap); + } + else + { + inputSlices = inputTemp.slices(inMap, inMap); + } + + arma::Cube deltaSlices = mappedError.slices(outMap, outMap); + + arma::Cube output; + GradientConvolutionRule::Convolution(inputSlices, deltaSlices, + output, dW, dH); + + if ((padW != 0 || padH != 0) && + (gradientTemp.n_rows < output.n_rows && + gradientTemp.n_cols < output.n_cols)) + { + for (size_t i = 0; i < output.n_slices; i++) + { + arma::mat subOutput = output.slice(i); + + gradientTemp.slice(s) += subOutput.submat(subOutput.n_rows / 2, + subOutput.n_cols / 2, + subOutput.n_rows / 2 + gradientTemp.n_rows - 1, + subOutput.n_cols / 2 + gradientTemp.n_cols - 1); + } + } + else + { + for (size_t i = 0; i < output.n_slices; i++) + { + gradientTemp.slice(s) += output.slice(i); + } + } + } + + gradient.submat(weight.n_elem + outMap, 0, + weight.n_elem + outMap, 0) = arma::accu(mappedError.slices( + outMap, outMap)); + } + + // gradient.submat(0, 0, weight.n_elem - 1, 0) = arma::vectorise(gradientTemp); + gradient.submat(0, 0, weight.n_elem - 1, 0) = arma::Mat( + gradientTemp.memptr(), gradientTemp.n_elem, 1, false, false); + + + // arma::vectorise(gradientTemp); +} + +template< + typename ForwardConvolutionRule, + typename BackwardConvolutionRule, + typename GradientConvolutionRule, + typename InputDataType, + typename OutputDataType +> +template +void Convolution< + ForwardConvolutionRule, + BackwardConvolutionRule, + GradientConvolutionRule, + InputDataType, + OutputDataType +>::Serialize( + Archive& ar, const unsigned int /* version */) +{ + ar & data::CreateNVP(inSize, "inSize"); + ar & data::CreateNVP(outSize, "outSize"); + ar & data::CreateNVP(kW, "kW"); + ar & data::CreateNVP(kH, "kH"); + ar & data::CreateNVP(dW, "dW"); + ar & data::CreateNVP(dH, "dH"); + ar & data::CreateNVP(padW, "padW"); + ar & data::CreateNVP(padH, "padH"); + ar & data::CreateNVP(weights, "weights"); + ar & data::CreateNVP(inputWidth, "inputWidth"); + ar & data::CreateNVP(inputHeight, "inputHeight"); + ar & data::CreateNVP(outputWidth, "outputWidth"); + ar & data::CreateNVP(outputHeight, "outputHeight"); +} + +} // namespace ann +} // namespace mlpack + +#endif \ No newline at end of file diff --git a/src/mlpack/methods/ann/layer/dropconnect.hpp b/src/mlpack/methods/ann/layer/dropconnect.hpp new file mode 100644 index 00000000000..f74c8e6aaac --- /dev/null +++ b/src/mlpack/methods/ann/layer/dropconnect.hpp @@ -0,0 +1,209 @@ +/** + * @file dropconnect.hpp + * @author Palash Ahuja + * @author Marcus Edel + * + * Definition of the DropConnect class, which implements a regularizer + * that randomly sets connections to zero. Preventing units from co-adapting. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_DROPCONNECT_HPP +#define MLPACK_METHODS_ANN_LAYER_DROPCONNECT_HPP + +#include + +#include "layer_types.hpp" +#include "add_merge.hpp" +#include "linear.hpp" +#include "sequential.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +/** + * The DropConnect layer is a regularizer that randomly with probability + * ratio sets the connection values to zero and scales the remaining + * elements by factor 1 /(1 - ratio). The output is scaled with 1 / (1 - p) + * when deterministic is false. In the deterministic mode(during testing), + * the layer just computes the output. The output is computed according + * to the input layer. If no input layer is given, it will take a linear layer + * as default. + * + * Note: + * During training you should set deterministic to false and during testing + * you should set deterministic to true. + * + * For more information, see the following. + * + * @code + * @inproceedings{WanICML2013, + * title={Regularization of Neural Networks using DropConnect}, + * booktitle = {Proceedings of the 30th International Conference on Machine + * Learning(ICML - 13)}, + * author = {Li Wan and Matthew Zeiler and Sixin Zhang and Yann L. Cun and + * Rob Fergus}, + * year = {2013} + * } + * @endcode + * + * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + */ +template< + typename InputDataType = arma::mat, + typename OutputDataType = arma::mat +> +class DropConnect +{ + public: + //! Create the DropConnect object. + DropConnect(); + + /** + * Creates the DropConnect Layer as a Linear Object that takes input size, + * output size and ratio as parameter. + * + * @param inSize The number of input units. + * @param outSize The number of output units. + * @param ratio The probability of setting a value to zero. + */ + DropConnect(const size_t inSize, + const size_t outSize, + const double ratio = 0.5); + + ~DropConnect(); + + /** + * Ordinary feed forward pass of the DropConnect layer. + * + * @param input Input data used for evaluating the specified function. + * @param output Resulting output activation. + */ + template + void Forward(arma::Mat&& input, arma::Mat&& output); + + /** + * Ordinary feed backward pass of the DropConnect layer. + * + * @param input The propagated input activation. + * @param gy The backpropagated error. + * @param g The calculated gradient. + */ + template + void Backward(arma::Mat&& input, + arma::Mat&& gy, + arma::Mat&& g); + + /** + * Calculate the gradient using the output delta and the input activation. + * + * @param input The propagated input. + * @param d The calculated error. + * @param g The calculated gradient. + */ + template + void Gradient(arma::Mat&& input, + arma::Mat&& error, + arma::Mat&& /* gradient */); + + //! Get the model modules. + std::vector& Model() { return network; } + + //! Get the parameters. + OutputDataType const& Parameters() const { return parameters; } + //! Modify the parameters. + OutputDataType& Parameters() { return parameters; } + + //! Get the input parameter. + InputDataType const& InputParameter() const { return inputParameter; } + //! Modify the input parameter. + InputDataType& InputParameter() { return inputParameter; } + + //! Get the output parameter. + OutputDataType const& OutputParameter() const { return outputParameter; } + //! Modify the output parameter. + OutputDataType& OutputParameter() { return outputParameter; } + + //! Get the delta. + OutputDataType const& Delta() const { return delta; } + //! Modify the delta. + OutputDataType& Delta() { return delta; } + + //! Get the gradient. + OutputDataType const& Gradient() const { return gradient; } + //! Modify the gradient. + OutputDataType& Gradient() { return gradient; } + + //! The value of the deterministic parameter. + bool Deterministic() const { return deterministic; } + + //! Modify the value of the deterministic parameter. + bool &Deterministic() { return deterministic; } + + //! The probability of setting a value to zero. + double Ratio() const { return ratio; } + + //! Modify the probability of setting a value to zero. + void Ratio(const double r) + { + ratio = r; + scale = 1.0 / (1.0 - ratio); + } + + /** + * Serialize the layer. + */ + template + void Serialize(Archive& ar, const unsigned int /* version */); + +private: + //! The probability of setting a value to zero. + double ratio; + + //! The scale fraction. + double scale; + + //! Locally-stored weight object. + OutputDataType parameters; + + //! Locally-stored delta object. + OutputDataType delta; + + //! Locally-stored gradient object. + OutputDataType gradient; + + //! Locally-stored input parameter object. + InputDataType inputParameter; + + //! Locally-stored output parameter object. + OutputDataType outputParameter; + + //! Locally-stored mask object. + OutputDataType mask; + + //! If true dropout and scaling is disabled, see notes above. + bool deterministic; + + //! Denoise mask for the weights. + OutputDataType denoise; + + //! Locally-stored layer module. + LayerTypes baseLayer; + + //! Locally-stored network modules. + std::vector network; +}; // class DropConnect. + +} // namespace ann +} // namespace mlpack + +// Include implementation. +#include "dropconnect_impl.hpp" + +#endif diff --git a/src/mlpack/methods/ann/layer/dropconnect_impl.hpp b/src/mlpack/methods/ann/layer/dropconnect_impl.hpp new file mode 100644 index 00000000000..294952b9bcf --- /dev/null +++ b/src/mlpack/methods/ann/layer/dropconnect_impl.hpp @@ -0,0 +1,118 @@ +/** + * @file dropconnect_impl.hpp + * @author Palash Ahuja + * @author Marcus Edel + * + * Implementation of the DropConnect class, which implements a regularizer + * that randomly sets connections to zero. Preventing units from co-adapting. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_DROPCONNECT_IMPL_HPP +#define MLPACK_METHODS_ANN_LAYER_DROPCONNECT_IMPL_HPP + +// In case it hasn't yet been included. +#include "dropconnect.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +template +DropConnect::DropConnect() +{ + // Nothing to do here. +} + +template +DropConnect::DropConnect( + const size_t inSize, + const size_t outSize, + const double ratio) : + ratio(ratio), + scale(1.0 / (1 - ratio)), + baseLayer(new Linear(inSize, outSize)) +{ + network.push_back(baseLayer); +} + +template +DropConnect::~DropConnect() +{ + boost::apply_visitor(DeleteVisitor(), baseLayer); +} + +template +template +void DropConnect::Forward( + arma::Mat&& input, + arma::Mat&& output) +{ + // The DropConnect mask will not be multiplied in the deterministic mode + // (during testing). + if (deterministic) + { + boost::apply_visitor(ForwardVisitor(std::move(input), std::move(output)), + baseLayer); + } + else + { + // Save weights for denoising. + boost::apply_visitor(ParametersVisitor(std::move(denoise)), baseLayer); + + // Scale with input / (1 - ratio) and set values to zero with + // probability ratio. + mask = arma::randu >(denoise.n_rows, denoise.n_cols); + mask.transform([&](double val) { return (val > ratio); }); + + boost::apply_visitor(ParametersSetVisitor(std::move(denoise % mask)), + baseLayer); + + boost::apply_visitor(ForwardVisitor(std::move(input), std::move(output)), + baseLayer); + + output = output * scale; + } +} + +template +template +void DropConnect::Backward( + arma::Mat&& input, + arma::Mat&& gy, + arma::Mat&& g) +{ + boost::apply_visitor(BackwardVisitor(std::move(input), std::move(gy), + std::move(g)), baseLayer); +} + +template +template +void DropConnect::Gradient( + arma::Mat&& input, + arma::Mat&& error, + arma::Mat&& /* gradient */) +{ + boost::apply_visitor(GradientVisitor(std::move(input), std::move(error)), + baseLayer); + + // Denoise the weights. + boost::apply_visitor(ParametersSetVisitor(std::move(denoise)), baseLayer); +} + +template +template +void DropConnect::Serialize( + Archive& ar, + const unsigned int /* version */) +{ + ar & data::CreateNVP(ratio, "ratio"); + ar & data::CreateNVP(scale, "scale"); +} + +} // namespace ann +} // namespace mlpack + +#endif diff --git a/src/mlpack/methods/ann/layer/dropconnect_layer.hpp b/src/mlpack/methods/ann/layer/dropconnect_layer.hpp deleted file mode 100644 index fdb14cbf11c..00000000000 --- a/src/mlpack/methods/ann/layer/dropconnect_layer.hpp +++ /dev/null @@ -1,361 +0,0 @@ -/** - * @file dropconnect_layer.hpp - * @author Palash Ahuja - * - * Definition of the DropConnectLayer class, which implements a regularizer - * that randomly sets connections to zero. Preventing units from co-adapting. - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. - */ -#ifndef MLPACK_METHODS_ANN_LAYER_DROPCONNECT_LAYER_HPP -#define MLPACK_METHODS_ANN_LAYER_DROPCONNECT_LAYER_HPP - -#include - -#include "empty_layer.hpp" -#include - -namespace mlpack { -namespace ann /** Artificial Neural Network. */ { - -/** - * The DropConnect layer is a regularizer that randomly with probability - * ratio sets the connection values to zero and scales the remaining - * elements by factor 1 /(1 - ratio). The output is scaled with 1 / (1 - p) - * when deterministic is false. In the deterministic mode(during testing), - * the layer just computes the output. The output is computed according - * to the input layer. If no input layer is given, it will take a linear layer - * as default. - * - * Note: - * During training you should set deterministic to false and during testing - * you should set deterministic to true. - * - * For more information, see the following. - * - * @code - * @inproceedings{WanICML2013, - * title={Regularization of Neural Networks using DropConnect}, - * booktitle = {Proceedings of the 30th International Conference on Machine - * Learning(ICML - 13)}, - * author = {Li Wan and Matthew Zeiler and Sixin Zhang and Yann L. Cun and - * Rob Fergus}, - * year = {2013} - * } - * @endcode - * - * @tparam InputLayer Layer used instead of the internal linear layer. - * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, - * arma::sp_mat or arma::cube). - * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, - * arma::sp_mat or arma::cube). - */ -template< - typename InputLayer = EmptyLayer, - typename InputDataType = arma::mat, - typename OutputDataType = arma::mat -> -class DropConnectLayer -{ - public: - /** - * Creates the DropConnect Layer as a Linear Object that takes input size, - * output size and ratio as parameter. - * - * @param inSize The number of input units. - * @param outSize The number of output units. - * @param ratio The probability of setting a value to zero. - */ - DropConnectLayer (const size_t inSize, - const size_t outSize, - const double ratio = 0.5) : - inSize(inSize), - outSize(outSize), - ratio(ratio), - scale(1.0 / (1 - ratio)), - uselayer(false) - { - weights.set_size(outSize, inSize); - } - - /** - * Create the DropConnectLayer object using the specified ratio and rescale - * parameter. This takes the - * - * @param ratio The probability of setting a connection to zero. - * @param inputLayer the layer object that the dropconnect connection would take. - */ - template - DropConnectLayer(InputLayerType &&inputLayer, - const double ratio = 0.5) : - baseLayer(std::forward(inputLayer)), - ratio(ratio), - scale(1.0 / (1 - ratio)), - uselayer(true) - { - static_assert(std::is_same::type, - InputLayer>::value, - "The type of the inputLayer must be InputLayerType"); - } - /** - * Ordinary feed forward pass of the DropConnect layer. - * - * @param input Input data used for evaluating the specified function. - * @param output Resulting output activation. - */ - template - void Forward(const arma::Mat &input, arma::Mat &output) - { - // The DropConnect mask will not be multiplied in the deterministic mode - // (during testing). - if (deterministic) - { - if (uselayer) - { - baseLayer.Forward(input, output); - } - else - { - output = weights * input; - } - } - else - { - if (uselayer) - { - // Scale with input / (1 - ratio) and set values to zero with - // probability ratio. - mask = arma::randu >(baseLayer.Weights().n_rows, - baseLayer.Weights().n_cols); - mask.transform([&](double val) { return (val > ratio); }); - - // Save weights for denoising. - denoise = baseLayer.Weights(); - - baseLayer.Weights() = baseLayer.Weights() % mask; - - baseLayer.Forward(input, output); - } - else - { - // Scale the input / ( 1 - ratio) and set values to zero with - // probability ratio. - mask = arma::randu >(weights.n_rows, weights.n_cols); - mask.transform([&](double val) { return (val > ratio); }); - - // Save weights for denoising. - denoise = weights; - - weights = weights % mask; - output = weights * input; - } - - output = output * scale; - } - } - - /** - * Ordinary feed backward pass of the DropConnect layer. - * - * @param input The propagated input activation. - * @param gy The backpropagated error. - * @param g The calculated gradient. - */ - template - void Backward(const DataType& input, const DataType& gy, DataType& g) - { - if (uselayer) - { - baseLayer.Backward(input, gy, g); - } - else - { - g = weights.t() * gy; - } - } - - /** - * Calculate the gradient using the output delta and the input activation. - * - * @param input The propagated input. - * @param d The calculated error. - * @param g The calculated gradient. - */ - template - void Gradient(const InputType& input, - const arma::Mat& d, - GradientDataType& g) - { - if (uselayer) - { - baseLayer.Gradient(input, d, g); - - // Denoise the weights. - baseLayer.Weights() = denoise; - } - else - { - g = d * input.t(); - - // Denoise the weights. - weights = denoise; - } - } - - //! Get the weights. - OutputDataType const& Weights() const - { - if (uselayer) - return baseLayer.Weights(); - - return weights; - } - - //! Modify the weights. - OutputDataType& Weights() - { - if (uselayer) - return baseLayer.Weights(); - - return weights; - } - - //! Get the input parameter. - InputDataType &InputParameter() const - { - if (uselayer) - return baseLayer.InputParameter(); - - return inputParameter; - } - - //! Modify the input parameter. - InputDataType &InputParameter() - { - if (uselayer) - return baseLayer.InputParameter(); - - return inputParameter; - } - - //! Get the output parameter. - OutputDataType &OutputParameter() const - { - if (uselayer) - return baseLayer.OutputParameter(); - - return outputParameter; - } - - //! Modify the output parameter. - OutputDataType &OutputParameter() - { - if (uselayer) - return baseLayer.OutputParameter(); - - return outputParameter; - } - - //! Get the delta. - OutputDataType const& Delta() const - { - if (uselayer) - return baseLayer.Delta(); - - return delta; - } - - //! Modify the delta. - OutputDataType& Delta() - { - if (uselayer) - return baseLayer.Delta(); - - return delta; - } - - //! Get the gradient. - OutputDataType const& Gradient() const - { - if (uselayer) - return baseLayer.Gradient(); - - return gradient; - } - - //! Modify the gradient. - OutputDataType& Gradient() - { - if (uselayer) - return baseLayer.Gradient(); - - return gradient; - } - - //! The value of the deterministic parameter. - bool Deterministic() const { return deterministic; } - - //! Modify the value of the deterministic parameter. - bool &Deterministic() { return deterministic; } - - //! The probability of setting a value to zero. - double Ratio() const { return ratio; } - - //! Modify the probability of setting a value to zero. - void Ratio(const double r) - { - ratio = r; - scale = 1.0 / (1.0 - ratio); - } - -private: - //! Locally-stored layer object. - InputLayer baseLayer; - - //! Locally stored number of input units. - size_t inSize; - - //! Locally-stored number of output units. - size_t outSize; - - //! The probability of setting a value to zero. - double ratio; - - //! The scale fraction. - double scale; - - //! If true the default layer is used otherwise a new layer will be created. - bool uselayer; - - //! Locally-stored weight object. - OutputDataType weights; - - //! Locally-stored delta object. - OutputDataType delta; - - //! Locally-stored gradient object. - OutputDataType gradient; - - //! Locally-stored input parameter object. - InputDataType inputParameter; - - //! Locally-stored output parameter object. - OutputDataType outputParameter; - - //! Locally-stored mast object. - OutputDataType mask; - - //! If true dropout and scaling is disabled, see notes above. - bool deterministic; - - //! Denoise mask for the weights. - OutputDataType denoise; -}; // class DropConnectLayer. - -} // namespace ann -} // namespace mlpack - -#endif diff --git a/src/mlpack/methods/ann/layer/dropout_layer.hpp b/src/mlpack/methods/ann/layer/dropout.hpp similarity index 59% rename from src/mlpack/methods/ann/layer/dropout_layer.hpp rename to src/mlpack/methods/ann/layer/dropout.hpp index 3ed0bd62a60..4371f21dad3 100644 --- a/src/mlpack/methods/ann/layer/dropout_layer.hpp +++ b/src/mlpack/methods/ann/layer/dropout.hpp @@ -1,8 +1,8 @@ /** - * @file dropout_layer.hpp + * @file dropout.hpp * @author Marcus Edel * - * Definition of the DropoutLayer class, which implements a regularizer that + * Definition of the Dropout class, which implements a regularizer that * randomly sets units to zero. Preventing units from co-adapting. * * mlpack is free software; you may redistribute it and/or modify it under the @@ -10,8 +10,8 @@ * 3-clause BSD license along with mlpack. If not, see * http://www.opensource.org/licenses/BSD-3-Clause for more information. */ -#ifndef MLPACK_METHODS_ANN_LAYER_DROPOUT_LAYER_HPP -#define MLPACK_METHODS_ANN_LAYER_DROPOUT_LAYER_HPP +#ifndef MLPACK_METHODS_ANN_LAYER_DROPOUT_HPP +#define MLPACK_METHODS_ANN_LAYER_DROPOUT_HPP #include @@ -51,57 +51,17 @@ template < typename InputDataType = arma::mat, typename OutputDataType = arma::mat > -class DropoutLayer +class Dropout { public: - /** - * Create the DropoutLayer object using the specified ratio and rescale + * Create the Dropout object using the specified ratio and rescale * parameter. * * @param ratio The probability of setting a value to zero. * @param rescale If true the input is rescaled when deterministic is False. */ - DropoutLayer(const double ratio = 0.5, - const bool rescale = true) : - ratio(ratio), - scale(1.0 / (1.0 - ratio)), - rescale(rescale) - { - // Nothing to do here. - } - - /** - * Ordinary feed forward pass of the dropout layer. - * - * @param input Input data used for evaluating the specified function. - * @param output Resulting output activation. - */ - template - void Forward(const arma::Mat& input, arma::Mat& output) - { - // The dropout mask will not be multiplied in the deterministic mode - // (during testing). - if (deterministic) - { - if (!rescale) - { - output = input; - } - else - { - output = input * scale; - } - } - else - { - // Scale with input / (1 - ratio) and set values to zero with probability - // ratio. - mask = arma::randu >(input.n_rows, input.n_cols); - mask.transform( [&](double val) { return (val > ratio); } ); - output = input % mask * scale; - } - } + Dropout(const double ratio = 0.5, const bool rescale = true); /** * Ordinary feed forward pass of the dropout layer. @@ -110,31 +70,7 @@ class DropoutLayer * @param output Resulting output activation. */ template - void Forward(const arma::Cube& input, arma::Cube& output) - { - // The dropout mask will not be multiplied in the deterministic mode - // (during testing). - if (deterministic) - { - if (!rescale) - { - output = input; - } - else - { - output = input * scale; - } - } - else - { - // Scale with input / (1 - ratio) and set values to zero with probability - // ratio. - mask = arma::randu >(input.n_rows, input.n_cols, - input.n_slices); - mask.transform( [&](double val) { return (val > ratio); } ); - output = input % mask * scale; - } - } + void Forward(const arma::Mat&& input, arma::Mat&& output); /** * Ordinary feed backward pass of the dropout layer. @@ -143,13 +79,10 @@ class DropoutLayer * @param gy The backpropagated error. * @param g The calculated gradient. */ - template - void Backward(const DataType& /* unused */, - const DataType& gy, - DataType& g) - { - g = gy % mask * scale; - } + template + void Backward(const arma::Mat&& /* input */, + arma::Mat&& gy, + arma::Mat&& g); //! Get the input parameter. InputDataType const& InputParameter() const { return inputParameter; } @@ -190,11 +123,7 @@ class DropoutLayer * Serialize the layer. */ template - void Serialize(Archive& ar, const unsigned int /* version */) - { - ar & data::CreateNVP(ratio, "ratio"); - ar & data::CreateNVP(rescale, "rescale"); - } + void Serialize(Archive& ar, const unsigned int /* version */); private: //! Locally-stored delta object. @@ -220,33 +149,12 @@ class DropoutLayer //! If true the input is rescaled when deterministic is False. bool rescale; -}; // class DropoutLayer - -//! Layer traits for the bias layer. -template < - typename InputDataType, - typename OutputDataType -> -class LayerTraits > -{ - public: - static const bool IsBinary = false; - static const bool IsOutputLayer = false; - static const bool IsBiasLayer = false; - static const bool IsLSTMLayer = false; - static const bool IsConnection = true; -}; - -/** - * Standard Dropout-Layer2D. - */ -template < - typename InputDataType = arma::cube, - typename OutputDataType = arma::cube -> -using DropoutLayer2D = DropoutLayer; +}; // class Dropout } // namespace ann } // namespace mlpack +// Include implementation. +#include "dropout_impl.hpp" + #endif diff --git a/src/mlpack/methods/ann/layer/dropout_impl.hpp b/src/mlpack/methods/ann/layer/dropout_impl.hpp new file mode 100644 index 00000000000..b805a94d464 --- /dev/null +++ b/src/mlpack/methods/ann/layer/dropout_impl.hpp @@ -0,0 +1,84 @@ +/** + * @file dropout_impl.hpp + * @author Marcus Edel + * + * Implementation of the Dropout class, which implements a regularizer that + * randomly sets units to zero. Preventing units from co-adapting. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_DROPOUT_IMPL_HPP +#define MLPACK_METHODS_ANN_LAYER_DROPOUT_IMPL_HPP + +// In case it hasn't yet been included. +#include "dropout.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +template +Dropout::Dropout( + const double ratio, const bool rescale) : + ratio(ratio), + scale(1.0 / (1.0 - ratio)), + rescale(rescale) +{ + // Nothing to do here. +} + +template +template +void Dropout::Forward( + const arma::Mat&& input, + arma::Mat&& output) +{ + // The dropout mask will not be multiplied in the deterministic mode + // (during testing). + if (deterministic) + { + if (!rescale) + { + output = input; + } + else + { + output = input * scale; + } + } + else + { + // Scale with input / (1 - ratio) and set values to zero with probability + // ratio. + mask = arma::randu >(input.n_rows, input.n_cols); + mask.transform( [&](double val) { return (val > ratio); } ); + output = input % mask * scale; + } +} + +template +template +void Dropout::Backward( + const arma::Mat&& /* input */, + arma::Mat&& gy, + arma::Mat&& g) +{ + g = gy % mask * scale; +} + +template +template +void Dropout::Serialize( + Archive& ar, + const unsigned int /* version */) +{ + ar & data::CreateNVP(ratio, "ratio"); + ar & data::CreateNVP(rescale, "rescale"); +} + +} // namespace ann +} // namespace mlpack + +#endif diff --git a/src/mlpack/methods/ann/layer/glimpse_layer.hpp b/src/mlpack/methods/ann/layer/glimpse.hpp similarity index 59% rename from src/mlpack/methods/ann/layer/glimpse_layer.hpp rename to src/mlpack/methods/ann/layer/glimpse.hpp index 3f1e9dffabb..e007fae30b9 100644 --- a/src/mlpack/methods/ann/layer/glimpse_layer.hpp +++ b/src/mlpack/methods/ann/layer/glimpse.hpp @@ -1,5 +1,5 @@ /** - * @file glimpse_layer.hpp + * @file glimpse.hpp * @author Marcus Edel * * Definition of the GlimpseLayer class, which takes an input image and a @@ -17,22 +17,54 @@ * year = {2014}, * } * @endcode - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. */ -#ifndef MLPACK_METHODS_ANN_LAYER_GLIMPSE_LAYER_HPP -#define MLPACK_METHODS_ANN_LAYER_GLIMPSE_LAYER_HPP +#ifndef MLPACK_METHODS_ANN_LAYER_GLIMPSE_HPP +#define MLPACK_METHODS_ANN_LAYER_GLIMPSE_HPP #include -#include + +#include "layer_types.hpp" #include namespace mlpack { namespace ann /** Artificial Neural Network. */ { + +/* + * The mean pooling rule for convolution neural networks. Average all values + * within the receptive block. + */ +class MeanPoolingRule +{ + public: + /* + * Return the average value within the receptive block. + * + * @param input Input used to perform the pooling operation. + */ + template + double Pooling(const MatType& input) + { + return arma::mean(arma::mean(input)); + } + + /* + * Set the average value within the receptive block. + * + * @param input Input used to perform the pooling operation. + * @param value The unpooled value. + * @param output The unpooled output data. + */ + template + void Unpooling(const MatType& input, const double value, MatType& output) + { + output = arma::zeros(input.n_rows, input.n_cols); + const double mean = arma::mean(arma::mean(input)); + + output.elem(arma::find(mean == input, 1)).fill(value); + } +}; + /** * The glimpse layer returns a retina-like representation * (down-scaled cropped images) of increasing scale around a given location in a @@ -44,10 +76,10 @@ namespace ann /** Artificial Neural Network. */ { * arma::sp_mat or arma::cube). */ template < - typename InputDataType = arma::cube, - typename OutputDataType = arma::cube + typename InputDataType = arma::mat, + typename OutputDataType = arma::mat > -class GlimpseLayer +class Glimpse { public: @@ -60,18 +92,15 @@ class GlimpseLayer * @param depth The number of patches to crop per glimpse. * @param scale The scaling factor used to create the increasing retina-like * representation. + * @param inputWidth The input width of the given input data. + * @param inputHeight The input height of the given input data. */ - GlimpseLayer(const size_t inSize, - const size_t size, - const size_t depth = 3, - const size_t scale = 2) : - inSize(inSize), - size(size), - depth(depth), - scale(scale) - { - // Nothing to do here. - } + Glimpse(const size_t inSize, + const size_t size, + const size_t depth = 3, + const size_t scale = 2, + const size_t inputWidth = 0, + const size_t inputHeight = 0); /** * Ordinary feed forward pass of the glimpse layer. @@ -80,68 +109,7 @@ class GlimpseLayer * @param output Resulting output activation. */ template - void Forward(const arma::Cube& input, arma::Cube& output) - { - output = arma::Cube(size, size, depth * input.n_slices); - - inputDepth = input.n_slices / inSize; - - for (size_t inputIdx = 0; inputIdx < inSize; inputIdx++) - { - for (size_t depthIdx = 0, glimpseSize = size; - depthIdx < depth; depthIdx++, glimpseSize *= scale) - { - size_t padSize = std::floor((glimpseSize - 1) / 2); - - arma::Cube inputPadded = arma::zeros >( - input.n_rows + padSize * 2, input.n_cols + padSize * 2, - input.n_slices / inSize); - - inputPadded.tube(padSize, padSize, padSize + input.n_rows - 1, - padSize + input.n_cols - 1) = input.subcube(0, 0, - inputIdx * inputDepth, input.n_rows - 1, input.n_cols - 1, - (inputIdx + 1) * inputDepth - 1); - - size_t h = inputPadded.n_rows - glimpseSize; - size_t w = inputPadded.n_cols - glimpseSize; - - size_t x = std::min(h, (size_t) std::max(0.0, - (location(0, inputIdx) + 1) / 2.0 * h)); - size_t y = std::min(w, (size_t) std::max(0.0, - (location(1, inputIdx) + 1) / 2.0 * w)); - - if (depthIdx == 0) - { - for (size_t j = (inputIdx + depthIdx), paddedSlice = 0; - j < output.n_slices; j += (inSize * depth), paddedSlice++) - { - output.slice(j) = inputPadded.subcube(x, y, - paddedSlice, x + glimpseSize - 1, y + glimpseSize - 1, - paddedSlice); - } - } - else - { - for (size_t j = (inputIdx + depthIdx * (depth - 1)), paddedSlice = 0; - j < output.n_slices; j += (inSize * depth), paddedSlice++) - { - arma::Mat poolingInput = inputPadded.subcube(x, y, - paddedSlice, x + glimpseSize - 1, y + glimpseSize - 1, - paddedSlice); - - if (scale == 2) - { - Pooling(glimpseSize / size, poolingInput, output.slice(j)); - } - else - { - ReSampling(poolingInput, output.slice(j)); - } - } - } - } - } - } + void Forward(const arma::Mat&& input, arma::Mat&& output); /** * Ordinary feed backward pass of the glimpse layer. @@ -150,93 +118,10 @@ class GlimpseLayer * @param gy The backpropagated error. * @param g The calculated gradient. */ - template - void Backward(const InputType& input, - const ErrorType& gy, - arma::Cube& g) - { - // Generate a cube using the backpropagated error matrix. - arma::Cube mappedError = arma::zeros(input.n_rows, - input.n_cols, input.n_slices); - - for (size_t s = 0, j = 0; s < mappedError.n_slices; s+= gy.n_cols, j++) - { - for (size_t i = 0; i < gy.n_cols; i++) - { - arma::Col temp = gy.col(i).subvec( - j * input.n_rows * input.n_cols, - (j + 1) * input.n_rows * input.n_cols - 1); - - mappedError.slice(s + i) = arma::Mat(temp.memptr(), - input.n_rows, input.n_cols); - } - } - - g = arma::zeros(inputParameter.n_rows, inputParameter.n_cols, - inputParameter.n_slices); - - for (size_t inputIdx = 0; inputIdx < inSize; inputIdx++) - { - for (size_t depthIdx = 0, glimpseSize = size; - depthIdx < depth; depthIdx++, glimpseSize *= scale) - { - size_t padSize = std::floor((glimpseSize - 1) / 2); - - arma::Cube inputPadded = arma::zeros >( - inputParameter.n_rows + padSize * 2, inputParameter.n_cols + - padSize * 2, inputParameter.n_slices / inSize); - - size_t h = inputPadded.n_rows - glimpseSize; - size_t w = inputPadded.n_cols - glimpseSize; - - size_t x = std::min(h, (size_t) std::max(0.0, - (location(0, inputIdx) + 1) / 2.0 * h)); - size_t y = std::min(w, (size_t) std::max(0.0, - (location(1, inputIdx) + 1) / 2.0 * w)); - - if (depthIdx == 0) - { - for (size_t j = (inputIdx + depthIdx), paddedSlice = 0; - j < mappedError.n_slices; j += (inSize * depth), paddedSlice++) - { - inputPadded.subcube(x, y, - paddedSlice, x + glimpseSize - 1, y + glimpseSize - 1, - paddedSlice) = mappedError.slice(j); - } - } - else - { - for (size_t j = (inputIdx + depthIdx * (depth - 1)), paddedSlice = 0; - j < mappedError.n_slices; j += (inSize * depth), paddedSlice++) - { - arma::Mat poolingOutput = inputPadded.subcube(x, y, - paddedSlice, x + glimpseSize - 1, y + glimpseSize - 1, - paddedSlice); - - if (scale == 2) - { - Unpooling(inputParameter.slice(paddedSlice), mappedError.slice(j), - poolingOutput); - } - else - { - DownwardReSampling(inputParameter.slice(paddedSlice), - mappedError.slice(j), poolingOutput); - } - - inputPadded.subcube(x, y, - paddedSlice, x + glimpseSize - 1, y + glimpseSize - 1, - paddedSlice) = poolingOutput; - } - } - - g += inputPadded.tube(padSize, padSize, padSize + - inputParameter.n_rows - 1, padSize + inputParameter.n_cols - 1); - } - } - - Transform(g); - } + template + void Backward(const arma::Mat&& /* input */, + arma::Mat&& gy, + arma::Mat&& g); //! Get the input parameter. InputDataType& InputParameter() const {return inputParameter; } @@ -260,6 +145,37 @@ class GlimpseLayer this->location = location; } + //! Get the input width. + size_t const& InputWidth() const { return inputWidth; } + //! Modify input the width. + size_t& InputWidth() { return inputWidth; } + + //! Get the input height. + size_t const& InputHeight() const { return inputHeight; } + //! Modify the input height. + size_t& InputHeight() { return inputHeight; } + + //! Get the output width. + size_t const& OutputWidth() const { return outputWidth; } + //! Modify the output width. + size_t& OutputWidth() { return outputWidth; } + + //! Get the output height. + size_t const& OutputHeight() const { return outputHeight; } + //! Modify the output height. + size_t& OutputHeight() { return outputHeight; } + + //! Get the value of the deterministic parameter. + bool Deterministic() const { return deterministic; } + //! Modify the value of the deterministic parameter. + bool& Deterministic() { return deterministic; } + + /** + * Serialize the layer. + */ + template + void Serialize(Archive& ar, const unsigned int /* version */); + private: /* * Transform the given input by changing rows to columns. @@ -447,6 +363,30 @@ class GlimpseLayer } } + //! The size of the input units. + size_t inSize; + + //! The used glimpse size (height = width). + size_t size; + + //! The number of patches to crop per glimpse. + size_t depth; + + //! The scale fraction. + size_t scale; + + //! Locally-stored input width. + size_t inputWidth; + + //! Locally-stored input height. + size_t inputHeight; + + //! Locally-stored output width. + size_t outputWidth; + + //! Locally-stored output height. + size_t outputHeight; + //! Locally-stored delta object. OutputDataType delta; @@ -459,26 +399,32 @@ class GlimpseLayer //! Locally-stored depth of the input. size_t inputDepth; - //! The size of the input units. - size_t inSize; - - //! The used glimpse size (height = width). - size_t size; - - //! The number of patches to crop per glimpse. - size_t depth; + //! Locally-stored transformed input parameter. + arma::cube inputTemp; - //! The scale fraction. - size_t scale; + //! Locally-stored transformed output parameter. + arma::cube outputTemp; //! The x and y coordinate of the center of the output glimpse. arma::mat location; //! Locally-stored object to perform the mean pooling operation. - MeanPooling pooling; + MeanPoolingRule pooling; + + //! Location-stored module location parameter. + std::vector locationParameter; + + //! Location-stored transformed gradient paramter. + arma::cube gTemp; + + //! If true use maximum a posteriori during the forward pass. + bool deterministic; }; // class GlimpseLayer -}; // namespace ann -}; // namespace mlpack +} // namespace ann +} // namespace mlpack + +// Include implementation. +#include "glimpse_impl.hpp" -#endif +#endif \ No newline at end of file diff --git a/src/mlpack/methods/ann/layer/glimpse_impl.hpp b/src/mlpack/methods/ann/layer/glimpse_impl.hpp new file mode 100644 index 00000000000..40ec37c932d --- /dev/null +++ b/src/mlpack/methods/ann/layer/glimpse_impl.hpp @@ -0,0 +1,224 @@ +/** + * @file glimpse_impl.hpp + * @author Marcus Edel + * + * Implementation of the GlimpseLayer class, which takes an input image and a + * location to extract a retina-like representation of the input image at + * different increasing scales. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_GLIMPSE_IMPL_HPP +#define MLPACK_METHODS_ANN_LAYER_GLIMPSE_IMPL_HPP + +// In case it hasn't yet been included. +#include "glimpse.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +template +Glimpse::Glimpse( + const size_t inSize, + const size_t size, + const size_t depth, + const size_t scale, + const size_t inputWidth, + const size_t inputHeight) : + inSize(inSize), + size(size), + depth(depth), + scale(scale), + inputWidth(inputWidth), + inputHeight(inputHeight) +{ + // Nothing to do here. +} + +template +template +void Glimpse::Forward( + const arma::Mat&& input, arma::Mat&& output) +{ + inputTemp = arma::cube(input.colptr(0), inputWidth, inputHeight, inSize); + outputTemp = arma::Cube(size, size, depth * inputTemp.n_slices); + + location = input.submat(0, 1, 1, 1); + + if (!deterministic) + { + locationParameter.push_back(location); + } + + inputDepth = inputTemp.n_slices / inSize; + + for (size_t inputIdx = 0; inputIdx < inSize; inputIdx++) + { + for (size_t depthIdx = 0, glimpseSize = size; + depthIdx < depth; depthIdx++, glimpseSize *= scale) + { + size_t padSize = std::floor((glimpseSize - 1) / 2); + + arma::Cube inputPadded = arma::zeros >( + inputTemp.n_rows + padSize * 2, inputTemp.n_cols + padSize * 2, + inputTemp.n_slices / inSize); + + inputPadded.tube(padSize, padSize, padSize + inputTemp.n_rows - 1, + padSize + inputTemp.n_cols - 1) = inputTemp.subcube(0, 0, + inputIdx * inputDepth, inputTemp.n_rows - 1, inputTemp.n_cols - 1, + (inputIdx + 1) * inputDepth - 1); + + size_t h = inputPadded.n_rows - glimpseSize; + size_t w = inputPadded.n_cols - glimpseSize; + + size_t x = std::min(h, (size_t) std::max(0.0, + (location(0, inputIdx) + 1) / 2.0 * h)); + size_t y = std::min(w, (size_t) std::max(0.0, + (location(1, inputIdx) + 1) / 2.0 * w)); + + if (depthIdx == 0) + { + for (size_t j = (inputIdx + depthIdx), paddedSlice = 0; + j < outputTemp.n_slices; j += (inSize * depth), paddedSlice++) + { + outputTemp.slice(j) = inputPadded.subcube(x, y, + paddedSlice, x + glimpseSize - 1, y + glimpseSize - 1, + paddedSlice); + } + } + else + { + for (size_t j = (inputIdx + depthIdx * (depth - 1)), paddedSlice = 0; + j < outputTemp.n_slices; j += (inSize * depth), paddedSlice++) + { + arma::Mat poolingInput = inputPadded.subcube(x, y, + paddedSlice, x + glimpseSize - 1, y + glimpseSize - 1, + paddedSlice); + + if (scale == 2) + { + Pooling(glimpseSize / size, poolingInput, outputTemp.slice(j)); + } + else + { + ReSampling(poolingInput, outputTemp.slice(j)); + } + } + } + } + } + + for (size_t i = 0; i < outputTemp.n_slices; ++i) + { + outputTemp.slice(i) = arma::trans(outputTemp.slice(i)); + } + + output = arma::Mat(outputTemp.memptr(), outputTemp.n_elem, 1); + + outputWidth = outputTemp.n_rows; + outputHeight = outputTemp.n_cols; +} + +template +template +void Glimpse::Backward( + const arma::Mat&& /* input */, arma::Mat&& gy, arma::Mat&& g) +{ + // Generate a cube using the backpropagated error matrix. + arma::Cube mappedError = arma::zeros(outputWidth, + outputHeight, 1); + + location = locationParameter.back(); + locationParameter.pop_back(); + + for (size_t s = 0, j = 0; s < mappedError.n_slices; s+= gy.n_cols, j++) + { + for (size_t i = 0; i < gy.n_cols; i++) + { + mappedError.slice(s + i) = arma::Mat(gy.memptr(), + outputWidth, outputHeight); + } + } + + gTemp = arma::zeros(inputTemp.n_rows, inputTemp.n_cols, + inputTemp.n_slices); + + for (size_t inputIdx = 0; inputIdx < inSize; inputIdx++) + { + for (size_t depthIdx = 0, glimpseSize = size; + depthIdx < depth; depthIdx++, glimpseSize *= scale) + { + size_t padSize = std::floor((glimpseSize - 1) / 2); + + arma::Cube inputPadded = arma::zeros >( + inputTemp.n_rows + padSize * 2, inputTemp.n_cols + + padSize * 2, inputTemp.n_slices / inSize); + + size_t h = inputPadded.n_rows - glimpseSize; + size_t w = inputPadded.n_cols - glimpseSize; + + size_t x = std::min(h, (size_t) std::max(0.0, + (location(0, inputIdx) + 1) / 2.0 * h)); + size_t y = std::min(w, (size_t) std::max(0.0, + (location(1, inputIdx) + 1) / 2.0 * w)); + + if (depthIdx == 0) + { + for (size_t j = (inputIdx + depthIdx), paddedSlice = 0; + j < mappedError.n_slices; j += (inSize * depth), paddedSlice++) + { + inputPadded.subcube(x, y, + paddedSlice, x + glimpseSize - 1, y + glimpseSize - 1, + paddedSlice) = mappedError.slice(j); + } + } + else + { + for (size_t j = (inputIdx + depthIdx * (depth - 1)), paddedSlice = 0; + j < mappedError.n_slices; j += (inSize * depth), paddedSlice++) + { + arma::Mat poolingOutput = inputPadded.subcube(x, y, + paddedSlice, x + glimpseSize - 1, y + glimpseSize - 1, + paddedSlice); + + if (scale == 2) + { + Unpooling(inputTemp.slice(paddedSlice), mappedError.slice(j), + poolingOutput); + } + else + { + DownwardReSampling(inputTemp.slice(paddedSlice), + mappedError.slice(j), poolingOutput); + } + + inputPadded.subcube(x, y, + paddedSlice, x + glimpseSize - 1, y + glimpseSize - 1, + paddedSlice) = poolingOutput; + } + } + + gTemp += inputPadded.tube(padSize, padSize, padSize + + inputTemp.n_rows - 1, padSize + inputTemp.n_cols - 1); + } + } + + Transform(gTemp); + g = arma::mat(gTemp.memptr(), gTemp.n_elem, 1); +} + +template +template +void Glimpse::Serialize( + Archive& ar, const unsigned int /* version */) +{ + ar & data::CreateNVP(inSize, "inSize"); + ar & data::CreateNVP(size, "size"); + ar & data::CreateNVP(depth, "depth"); + ar & data::CreateNVP(scale, "scale"); + ar & data::CreateNVP(inputWidth, "inputWidth"); + ar & data::CreateNVP(location, "location"); +} + +} // namespace ann +} // namespace mlpack + +#endif diff --git a/src/mlpack/methods/ann/layer/hard_tanh.hpp b/src/mlpack/methods/ann/layer/hard_tanh.hpp new file mode 100644 index 00000000000..88c8ad2d853 --- /dev/null +++ b/src/mlpack/methods/ann/layer/hard_tanh.hpp @@ -0,0 +1,140 @@ +/** + * @file hard_tanh.hpp + * @author Dhawal Arora + * + * Definition and implementation of the HardTanH layer. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_HARD_TANH_HPP +#define MLPACK_METHODS_ANN_LAYER_HARD_TANH_HPP + +#include + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +/** + * The Hard Tanh activation function, defined by + * + * @f{eqnarray*}{ + * f(x) &=& \left\{ + * \begin{array}{lr} + * max & : x > maxValue \\ + * min & : x \le minValue \\ + * x & : otherwise + * \end{array} + * \right. \\ + * f'(x) &=& \left\{ + * \begin{array}{lr} + * 0 & : x > maxValue \\ + * 0 & : x \le minValue \\ + * 1 & : otherwise + * \end{array} + * \right. + * @f} + * + * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + */ +template < + typename InputDataType = arma::mat, + typename OutputDataType = arma::mat +> +class HardTanH +{ + public: + /** + * Create the HardTanH object using the specified parameters. The range + * of the linear region can be adjusted by specifying the maxValue and + * minValue. Default (maxValue = 1, minValue = -1). + * + * @param maxValue Range of the linear region maximum value. + * @param minValue Range of the linear region minimum value. + */ + HardTanH(const double maxValue = 1, const double minValue = -1); + + /** + * Ordinary feed forward pass of a neural network, evaluating the function + * f(x) by propagating the activity forward through f. + * + * @param input Input data used for evaluating the specified function. + * @param output Resulting output activation. + */ + template + void Forward(const InputType&& input, OutputType&& output); + + /** + * Ordinary feed backward pass of a neural network, calculating the function + * f(x) by propagating x backwards through f. Using the results from the feed + * forward pass. + * + * @param input The propagated input activation. + * @param gy The backpropagated error. + * @param g The calculated gradient. + */ + template + void Backward(const DataType&& input, + DataType&& gy, + DataType&& g); + + //! Get the input parameter. + InputDataType const& InputParameter() const { return inputParameter; } + //! Modify the input parameter. + InputDataType& InputParameter() { return inputParameter; } + + //! Get the output parameter. + OutputDataType const& OutputParameter() const { return outputParameter; } + //! Modify the output parameter. + OutputDataType& OutputParameter() { return outputParameter; } + + //! Get the delta. + OutputDataType const& Delta() const { return delta; } + //! Modify the delta. + OutputDataType& Delta() { return delta; } + + //! Get the maximum value. + double const& MaxValue() const { return maxValue; } + //! Modify the maximum value. + double& MaxValue() { return maxValue; } + + //! Get the minimum value. + double const& MinValue() const { return minValue; } + //! Modify the minimum value. + double& MinValue() { return minValue; } + + /** + * Serialize the layer. + */ + template + void Serialize(Archive& ar, const unsigned int /* version */); + + private: + //! Locally-stored delta object. + OutputDataType delta; + + //! Locally-stored input parameter object. + InputDataType inputParameter; + + //! Locally-stored output parameter object. + OutputDataType outputParameter; + + //! Maximum value for the HardTanH function. + double maxValue; + + //! Minimum value for the HardTanH function. + double minValue; +}; // class HardTanH + +} // namespace ann +} // namespace mlpack + +// Include implementation. +#include "hard_tanh_impl.hpp" + +#endif diff --git a/src/mlpack/methods/ann/layer/hard_tanh_impl.hpp b/src/mlpack/methods/ann/layer/hard_tanh_impl.hpp new file mode 100644 index 00000000000..55b92812072 --- /dev/null +++ b/src/mlpack/methods/ann/layer/hard_tanh_impl.hpp @@ -0,0 +1,72 @@ +/** + * @file hard_tanh_impl.hpp + * @author Dhawal Arora + * + * Implementation and implementation of the HardTanH layer. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_HARD_TANH_IMPL_HPP +#define MLPACK_METHODS_ANN_LAYER_HARD_TANH_IMPL_HPP + +// In case it hasn't yet been included. +#include "hard_tanh.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +template +HardTanH::HardTanH( + const double maxValue, + const double minValue) : + maxValue(maxValue), + minValue(minValue) +{ + // Nothing to do here. +} + +template +template +void HardTanH::Forward( + const InputType&& input, OutputType&& output) +{ + output = input; + for (size_t i = 0; i < input.n_elem; i++) + { + output(i) = (output(i) > maxValue ? maxValue : + (output(i) < minValue ? minValue : output(i))); + } +} + +template +template +void HardTanH::Backward( + const DataType&& input, DataType&& gy, DataType&& g) +{ + g = gy; + for (size_t i = 0; i < input.n_elem; i++) + { + if (input(i) < minValue || input(i) > maxValue) + { + g(i) = 0; + } + } +} + +template +template +void HardTanH::Serialize( + Archive& ar, + const unsigned int /* version */) +{ + ar & data::CreateNVP(maxValue, "maxValue"); + ar & data::CreateNVP(minValue, "minValue"); +} + +} // namespace ann +} // namespace mlpack + +#endif diff --git a/src/mlpack/methods/ann/layer/hard_tanh_layer.hpp b/src/mlpack/methods/ann/layer/hard_tanh_layer.hpp deleted file mode 100644 index c707017fcbf..00000000000 --- a/src/mlpack/methods/ann/layer/hard_tanh_layer.hpp +++ /dev/null @@ -1,259 +0,0 @@ -/** - * @file hard_tanh_layer.hpp - * @author Dhawal Arora - * - * Definition and implementation of the HardTanHLayer layer. - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. - */ -#ifndef MLPACK_METHODS_ANN_LAYER_HARD_TANH_LAYER_HPP -#define MLPACK_METHODS_ANN_LAYER_HARD_TANH_LAYER_HPP - -#include - -namespace mlpack { -namespace ann /** Artificial Neural Network. */ { - -/** - * The Hard Tanh activation function, defined by - * - * @f{eqnarray*}{ - * f(x) &=& \left\{ - * \begin{array}{lr} - * max & : x > maxValue \\ - * min & : x \le minValue \\ - * x & : otherwise - * \end{array} - * \right. \\ - * f'(x) &=& \left\{ - * \begin{array}{lr} - * 0 & : x > maxValue \\ - * 0 & : x \le minValue \\ - * 1 & : otherwise - * \end{array} - * \right. - * @f} - * - * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, - * arma::sp_mat or arma::cube). - * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, - * arma::sp_mat or arma::cube). - */ -template < - typename InputDataType = arma::mat, - typename OutputDataType = arma::mat -> -class HardTanHLayer -{ - public: - /** - * Create the HardTanHLayer object using the specified parameters. The range - * of the linear region can be adjusted by specifying the maxValue and - * minValue. Default (maxValue = 1, minValue = -1). - * - * @param maxValue Range of the linear region maximum value. - * @param minValue Range of the linear region minimum value. - */ - HardTanHLayer(const double maxValue = 1, const double minValue = -1) : - maxValue(maxValue), minValue(minValue) - { - // Nothing to do here. - } - - /** - * Ordinary feed forward pass of a neural network, evaluating the function - * f(x) by propagating the activity forward through f. - * - * @param input Input data used for evaluating the specified function. - * @param output Resulting output activation. - */ - template - void Forward(const InputType& input, OutputType& output) - { - Fn(input, output); - } - - /** - * Ordinary feed backward pass of a neural network, calculating the function - * f(x) by propagating x backwards through f. Using the results from the feed - * forward pass. - * - * @param input The propagated input activation. - * @param gy The backpropagated error. - * @param g The calculated gradient. - */ - template - void Backward(const DataType& input, - const DataType& gy, - DataType& g) - { - DataType derivative; - Deriv(input, derivative); - g = gy % derivative; - } - - /** - * Ordinary feed backward pass of a neural network, calculating the function - * f(x) by propagating x backwards through f. Using the results from the feed - * forward pass. - * - * @param input The propagated input activation. - * @param gy The backpropagated error. - * @param g The calculated gradient. - */ - template - void Backward(const arma::Cube& input, - const arma::Mat& gy, - arma::Cube& g) - { - // Generate a cube using the backpropagated error matrix. - arma::Cube mappedError = arma::zeros(input.n_rows, - input.n_cols, input.n_slices); - - for (size_t s = 0, j = 0; s < mappedError.n_slices; s+= gy.n_cols, j++) - { - for (size_t i = 0; i < gy.n_cols; i++) - { - arma::Col temp = gy.col(i).subvec( - j * input.n_rows * input.n_cols, - (j + 1) * input.n_rows * input.n_cols - 1); - - mappedError.slice(s + i) = arma::Mat(temp.memptr(), - input.n_rows, input.n_cols); - } - } - - arma::Cube derivative; - Deriv(input, derivative); - g = mappedError % derivative; - } - - //! Get the input parameter. - InputDataType const& InputParameter() const { return inputParameter; } - //! Modify the input parameter. - InputDataType& InputParameter() { return inputParameter; } - - //! Get the output parameter. - OutputDataType const& OutputParameter() const { return outputParameter; } - //! Modify the output parameter. - OutputDataType& OutputParameter() { return outputParameter; } - - //! Get the delta. - OutputDataType const& Delta() const { return delta; } - //! Modify the delta. - OutputDataType& Delta() { return delta; } - - //! Get the maximum value. - double const& MaxValue() const { return maxValue; } - //! Modify the maximum value. - double& MaxValue() { return maxValue; } - - //! Get the minimum value. - double const& MinValue() const { return minValue; } - //! Modify the minimum value. - double& MinValue() { return minValue; } - - /** - * Serialize the layer. - */ - template - void Serialize(Archive& ar, const unsigned int /* version */) - { - ar & data::CreateNVP(maxValue, "maxValue"); - ar & data::CreateNVP(minValue, "minValue"); - } - - private: - /** - * Computes the HardTanH function. - * - * @param x Input data. - * @return f(x). - */ - double Fn(const double x) - { - if (x > maxValue) - return maxValue; - else if (x < minValue) - return minValue; - return x; - } - - /** - * Computes the HardTanH function using a dense matrix as input. - * - * @param x Input data. - * @param y The resulting output activation. - */ - - template - void Fn(const arma::Mat& x, arma::Mat& y) - { - y = x; - y.transform( [&](eT val) { return std::min( - std::max( val, minValue ), maxValue ); } ); - } - - /** - * Computes the HardTanH function using a 3rd-order tensor as input. - * - * @param x Input data. - * @param y The resulting output activation. - */ - template - void Fn(const arma::Cube& x, arma::Cube& y) - { - y = x; - for (size_t s = 0; s < x.n_slices; s++) - Fn(x.slice(s), y.slice(s)); - } - - /** - * Computes the first derivative of the HardTanH function. - * - * @param x Input data. - * @return f'(x) - */ - double Deriv(const double x) - { - return (x > maxValue || x < minValue) ? 0 : 1; - } - - /** - * Computes the first derivative of the HardTanH function. - * - * @param y Input activations. - * @param x The resulting derivatives. - */ - template - void Deriv(const InputType& x, OutputType& y) - { - y = x; - - for (size_t i = 0; i < x.n_elem; i++) - y(i) = Deriv(x(i)); - } - - //! Locally-stored delta object. - OutputDataType delta; - - //! Locally-stored input parameter object. - InputDataType inputParameter; - - //! Locally-stored output parameter object. - OutputDataType outputParameter; - - //! Maximum value for the HardTanH function. - double maxValue; - - //! Minimum value for the HardTanH function. - double minValue; -}; // class HardTanHLayer - -} // namespace ann -} // namespace mlpack - -#endif diff --git a/src/mlpack/methods/ann/layer/softmax_layer.hpp b/src/mlpack/methods/ann/layer/join.hpp similarity index 67% rename from src/mlpack/methods/ann/layer/softmax_layer.hpp rename to src/mlpack/methods/ann/layer/join.hpp index a2d3323eed8..bda31d787ab 100644 --- a/src/mlpack/methods/ann/layer/softmax_layer.hpp +++ b/src/mlpack/methods/ann/layer/join.hpp @@ -1,16 +1,16 @@ /** - * @file softmax_layer.hpp + * @file join.hpp * @author Marcus Edel * - * Definition of the SoftmaxLayer class. + * Definition of the Join module. * * mlpack is free software; you may redistribute it and/or modify it under the * terms of the 3-clause BSD license. You should have received a copy of the * 3-clause BSD license along with mlpack. If not, see * http://www.opensource.org/licenses/BSD-3-Clause for more information. */ -#ifndef MLPACK_METHODS_ANN_LAYER_SOFTMAX_LAYER_HPP -#define MLPACK_METHODS_ANN_LAYER_SOFTMAX_LAYER_HPP +#ifndef MLPACK_METHODS_ANN_LAYER_JOIN_HPP +#define MLPACK_METHODS_ANN_LAYER_JOIN_HPP #include @@ -18,28 +18,23 @@ namespace mlpack { namespace ann /** Artificial Neural Network. */ { /** - * Implementation of the softmax layer. The softmax loss layer computes the - * multinomial logistic loss of the softmax of its inputs. + * Implementation of the Join module class. The Join class accumulates + * the output of various modules. * * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, * arma::sp_mat or arma::cube). * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, * arma::sp_mat or arma::cube). */ -template < +template< typename InputDataType = arma::mat, typename OutputDataType = arma::mat > -class SoftmaxLayer +class Join { public: - /** - * Create the SoftmaxLayer object. - */ - SoftmaxLayer() - { - // Nothing to do here. - } + //! Create the Join object. + Join(); /** * Ordinary feed forward pass of a neural network, evaluating the function @@ -48,13 +43,8 @@ class SoftmaxLayer * @param input Input data used for evaluating the specified function. * @param output Resulting output activation. */ - template - void Forward(const arma::Mat& input, arma::Mat& output) - { - output = arma::trunc_exp(input - - arma::repmat(arma::max(input), input.n_rows, 1)); - output /= arma::accu(output); - } + template + void Forward(const InputType&& input, OutputType&& output); /** * Ordinary feed backward pass of a neural network, calculating the function @@ -66,12 +56,9 @@ class SoftmaxLayer * @param g The calculated gradient. */ template - void Backward(const arma::Mat& /* unused */, - const arma::Mat& gy, - arma::Mat& g) - { - g = gy; - } + void Backward(const arma::Mat&& /* input */, + arma::Mat&& gy, + arma::Mat&& g); //! Get the input parameter. InputDataType const& InputParameter() const { return inputParameter; } @@ -84,20 +71,23 @@ class SoftmaxLayer OutputDataType& OutputParameter() { return outputParameter; } //! Get the delta. - InputDataType const& Delta() const { return delta; } + OutputDataType const& Delta() const { return delta; } //! Modify the delta. - InputDataType& Delta() { return delta; } + OutputDataType& Delta() { return delta; } /** * Serialize the layer. */ template - void Serialize(Archive& /* ar */, const unsigned int /* version */) - { - /* Nothing to do here */ - } + void Serialize(Archive& ar, const unsigned int /* version */); private: + //! Locally-stored number of input rows. + size_t inSizeRows; + + //! Locally-stored number of input cols. + size_t inSizeCols; + //! Locally-stored delta object. OutputDataType delta; @@ -106,9 +96,12 @@ class SoftmaxLayer //! Locally-stored output parameter object. OutputDataType outputParameter; -}; // class SoftmaxLayer +}; // class Join } // namespace ann } // namespace mlpack +// Include implementation. +#include "join_impl.hpp" + #endif diff --git a/src/mlpack/methods/ann/layer/join_impl.hpp b/src/mlpack/methods/ann/layer/join_impl.hpp new file mode 100644 index 00000000000..47584324871 --- /dev/null +++ b/src/mlpack/methods/ann/layer/join_impl.hpp @@ -0,0 +1,60 @@ +/** + * @file join_impl.hpp + * @author Marcus Edel + * + * Implementation of the Join module. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_JOIN_IMPL_HPP +#define MLPACK_METHODS_ANN_LAYER_JOIN_IMPL_HPP + +// In case it hasn't yet been included. +#include "join.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +template +Join::Join() +{ + // Nothing to do here. +} + +template +template +void Join::Forward( + const InputType&& input, OutputType&& output) +{ + inSizeRows = input.n_rows; + inSizeCols = input.n_cols; + output = arma::vectorise(input); +} + +template +template +void Join::Backward( + const arma::Mat&& /* input */, + arma::Mat&& gy, + arma::Mat&& g) +{ + g = arma::mat(gy.memptr(), inSizeRows, inSizeCols, false, false); +} + +template +template +void Join::Serialize( + Archive& ar, + const unsigned int /* version */) +{ + ar & data::CreateNVP(inSizeRows, "inSizeRows"); + ar & data::CreateNVP(inSizeCols, "inSizeCols"); +} + +} // namespace ann +} // namespace mlpack + +#endif diff --git a/src/mlpack/methods/ann/layer/layer.hpp b/src/mlpack/methods/ann/layer/layer.hpp new file mode 100644 index 00000000000..c1de77741e9 --- /dev/null +++ b/src/mlpack/methods/ann/layer/layer.hpp @@ -0,0 +1,30 @@ +/** + * @file layer.hpp + * @author Marcus Edel + * + * This includes various layers to construct a model. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_LAYER_HPP +#define MLPACK_METHODS_ANN_LAYER_LAYER_HPP + +#include "add_merge.hpp" +#include "concat_performance.hpp" +#include "convolution.hpp" +#include "dropconnect.hpp" +#include "glimpse.hpp" +#include "layer_types.hpp" +#include "linear.hpp" +#include "linear_no_bias.hpp" +#include "lstm.hpp" +#include "recurrent.hpp" +#include "recurrent_attention.hpp" +#include "sequential.hpp" +#include "concat.hpp" +#include "vr_class_reward.hpp" + +#endif diff --git a/src/mlpack/methods/ann/layer/layer_traits.hpp b/src/mlpack/methods/ann/layer/layer_traits.hpp index a8671d62f5d..ff4fbf2d387 100644 --- a/src/mlpack/methods/ann/layer/layer_traits.hpp +++ b/src/mlpack/methods/ann/layer/layer_traits.hpp @@ -64,27 +64,42 @@ HAS_MEM_FUNC(Gradient, HasGradientCheck); // function. HAS_MEM_FUNC(Deterministic, HasDeterministicCheck); -// This gives us a HasRecurrentParameterCheck type (where U is a function -// pointer) we can use with SFINAE to catch when a type has a -// RecurrentParameter() function. -HAS_MEM_FUNC(RecurrentParameter, HasRecurrentParameterCheck); +// This gives us a HasParametersCheck type (where U is a function pointer) we +// can use with SFINAE to catch when a type has a Weights() function. +HAS_MEM_FUNC(Parameters, HasParametersCheck); -// This gives us a HasSeqLenCheck type (where U is a function pointer) we -// can use with SFINAE to catch when a type has a SeqLen() function. -HAS_MEM_FUNC(SeqLen, HasSeqLenCheck); +// This gives us a HasAddCheck type (where U is a function pointer) we +// can use with SFINAE to catch when a type has a Weights() function. +HAS_MEM_FUNC(Add, HasAddCheck); -// This gives us a HasWeightsCheck type (where U is a function pointer) we +// This gives us a HasModelCheck type (where U is a function pointer) we // can use with SFINAE to catch when a type has a Weights() function. -HAS_MEM_FUNC(Weights, HasWeightsCheck); +HAS_MEM_FUNC(Model, HasModelCheck); // This gives us a HasLocationCheck type (where U is a function pointer) // we can use with SFINAE to catch when a type has a Location() function. HAS_MEM_FUNC(Location, HasLocationCheck); +// This gives us a HasResetCheck type (where U is a function pointer) +// we can use with SFINAE to catch when a type has a Location() function. +HAS_MEM_FUNC(Reset, HasResetCheck); + // This gives us a HasRewardCheck type (where U is a function pointer) we // can use with SFINAE to catch when a type has a Reward() function. HAS_MEM_FUNC(Reward, HasRewardCheck); +// This gives us a HasInputWidth type (where U is a function pointer) we +// can use with SFINAE to catch when a type has a InputWidth() function. +HAS_MEM_FUNC(InputWidth, HasInputWidth); + +// This gives us a HasInputHeight type (where U is a function pointer) we +// can use with SFINAE to catch when a type has a InputHeight() function. +HAS_MEM_FUNC(InputHeight, HasInputHeight); + +// This gives us a HasRho type (where U is a function pointer) we +// can use with SFINAE to catch when a type has a Rho() function. +HAS_MEM_FUNC(InputHeight, HasRho); + } // namespace ann } // namespace mlpack diff --git a/src/mlpack/methods/ann/layer/layer_types.hpp b/src/mlpack/methods/ann/layer/layer_types.hpp new file mode 100644 index 00000000000..156616864db --- /dev/null +++ b/src/mlpack/methods/ann/layer/layer_types.hpp @@ -0,0 +1,117 @@ +/** + * @file layer_types.hpp + * @author Marcus Edel + * + * This provides a list of all modules that can be used to construct a model. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_LAYER_TYPES_HPP +#define MLPACK_METHODS_ANN_LAYER_LAYER_TYPES_HPP + +#include + +// Layer modules. +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// Convolution modules. +#include +#include +#include + +namespace mlpack { +namespace ann { + +template class AddMerge; +template class Concat; +template class DropConnect; +template class Glimpse; +template class Linear; +template class LinearNoBias; +template class LSTM; +template class Recurrent; +template class Sequential; +template class VRClassReward; + +template< + typename OutputLayerType, + typename InputDataType, + typename OutputDataType +> +class ConcatPerformance; + +template< + typename ForwardConvolutionRule, + typename BackwardConvolutionRule, + typename GradientConvolutionRule, + typename InputDataType, + typename OutputDataType +> +class Convolution; + +template< + typename InputDataType, + typename OutputDataType +> +class RecurrentAttention; + +using LayerTypes = boost::variant< + Add*, + AddMerge*, + BaseLayer*, + BaseLayer*, + BaseLayer*, + BaseLayer*, + Concat*, + ConcatPerformance, + arma::mat, arma::mat>*, + Constant*, + Convolution, + NaiveConvolution, + NaiveConvolution, arma::mat, arma::mat>*, + DropConnect*, + Dropout*, + Glimpse*, + HardTanH*, + Join*, + LeakyReLU*, + Linear*, + LinearNoBias*, + LogSoftMax*, + Lookup*, + LSTM*, + MaxPooling*, + MeanPooling*, + MeanSquaredError*, + MultiplyConstant*, + NegativeLogLikelihood*, + Recurrent*, + RecurrentAttention*, + ReinforceNormal*, + Select*, + Sequential*, + VRClassReward* +>; + +} // namespace ann +} // namespace mlpack + +#endif diff --git a/src/mlpack/methods/ann/layer/layer_visitor.hpp b/src/mlpack/methods/ann/layer/layer_visitor.hpp new file mode 100644 index 00000000000..a311b0addbc --- /dev/null +++ b/src/mlpack/methods/ann/layer/layer_visitor.hpp @@ -0,0 +1,1034 @@ +/** + * @file layer_visitor.hpp + * @author Marcus Edel + * + * This file provides an easy way to serialize a layer, abstracts away the + * different types of layers, and also automatically directs any function to the + * right layer type. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_LAYER_VISITOR_HPP +#define MLPACK_METHODS_ANN_LAYER_LAYER_VISITOR_HPP + +#include +#include + +#include + +namespace mlpack { +namespace ann { + +/** + * LoadOutputParameterVisitor restores the output parameter using the given + * parameter set. + */ +class LoadOutputParameterVisitor : public boost::static_visitor +{ + public: + //! Restore the output parameter given a parameter set. + LoadOutputParameterVisitor(std::vector&& parameter); + + //! Restore the output parameter. + template + void operator()(LayerType* layer) const; + + private: + //! The parameter set. + std::vector&& parameter; + + //! Restore the output parameter for a module which doesn't implement the + //! Model() function. + template + typename std::enable_if< + !HasModelCheck&(T::*)()>::value, void>::type + OutputParameter(T* layer) const; + + //! Restore the output parameter for a module which implements the Model() + //! function. + template + typename std::enable_if< + HasModelCheck&(T::*)()>::value, void>::type + OutputParameter(T* layer) const; +}; + +/** + * SaveOutputParameterVisitor saves the output parameter into the given + * parameter set. + */ +class SaveOutputParameterVisitor : public boost::static_visitor +{ + public: + //! Save the output parameter into the given parameter set. + SaveOutputParameterVisitor(std::vector&& parameter); + + //! Save the output parameter. + template + void operator()(LayerType* layer) const; + + private: + //! The parameter set. + std::vector&& parameter; + + //! Save the output parameter for a module which doesn't implement the + //! Model() function. + template + typename std::enable_if< + !HasModelCheck&(T::*)()>::value, void>::type + OutputParameter(T* layer) const; + + //! Save the output parameter for a module which implements the Model() + //! function. + template + typename std::enable_if< + HasModelCheck&(T::*)()>::value, void>::type + OutputParameter(T* layer) const; +}; + +/** + * DeleteVisitor executes the destructor of the instantiated object. + */ +class DeleteVisitor : public boost::static_visitor +{ + public: + //! Execute the destructor. + template + void operator()(LayerType* layer) const; +}; + +/** + * ForwardOutputVisitor executes the Forward() function given the input and + * output parameter. + */ +class ForwardOutputVisitor : public boost::static_visitor +{ + public: + //! Execute the Foward() function given the input and output parameter. + ForwardOutputVisitor(arma::mat&& input, arma::mat&& output); + + //! Execute the Foward() function. + template + double operator()(LayerType* layer) const; + + private: + //! The input parameter set. + arma::mat&& input; + + //! The output parameter set. + arma::mat&& output; +}; + +/** + * ForwardVisitor executes the Forward() function given the input and output + * parameter. + */ +class ForwardVisitor : public boost::static_visitor +{ + public: + //! Execute the Foward() function given the input and output parameter. + ForwardVisitor(arma::mat&& input, arma::mat&& output); + + //! Execute the Foward() function. + template + void operator()(LayerType* layer) const; + + private: + //! The input parameter set. + arma::mat&& input; + + //! The output parameter set. + arma::mat&& output; +}; + +/** + * BackwardVisitor executes the Backward() function given the input, error and + * delta parameter. + */ +class BackwardVisitor : public boost::static_visitor +{ + public: + //! Execute the Backward() function given the input, error and delta + //! parameter. + BackwardVisitor(arma::mat&& input, arma::mat&& error, arma::mat&& delta); + + //! Execute the Backward() function. + template + void operator()(LayerType* layer) const; + + private: + //! The input parameter set. + arma::mat&& input; + + //! The error parameter. + arma::mat&& error; + + //! The delta parameter. + arma::mat&& delta; +}; + +/** + * ResetVisitor executes the Reset() function. + */ +class ResetVisitor : public boost::static_visitor +{ + public: + //! Execute the Reset() function. + template + void operator()(LayerType* layer) const; + + private: + //! Execute the Reset() function for a module which implements the Reset() + //! function. + template + typename std::enable_if< + HasResetCheck::value && + !HasModelCheck&(T::*)()>::value, void>::type + ResetParameter(T* layer) const; + + //! Execute the Reset() function for a module which implements the Model() + //! function. + template + typename std::enable_if< + !HasResetCheck::value && + HasModelCheck&(T::*)()>::value, void>::type + ResetParameter(T* layer) const; + + //! Execute the Reset() function for a module which implements the Reset() + //! and Model() function. + template + typename std::enable_if< + HasResetCheck::value && + HasModelCheck&(T::*)()>::value, void>::type + ResetParameter(T* layer) const; + + //! Do not execute the Reset() function for a module which doesn't implement + // the Reset() or Model() function. + template + typename std::enable_if< + !HasResetCheck::value && + !HasModelCheck&(T::*)()>::value, void>::type + ResetParameter(T* layer) const; +}; + +/** + * InputParameterVisitor exposes the input parameter of the given module. + */ +class InputParameterVisitor : public boost::static_visitor +{ + public: + //! Return the input parameter set. + template + arma::mat& operator()(LayerType* layer) const; +}; + +/** + * OutputParameterVisitor exposes the output parameter of the given module. + */ +class OutputParameterVisitor : public boost::static_visitor +{ + public: + //! Return the output parameter set. + template + arma::mat& operator()(LayerType* layer) const; +}; + +/** + * DeltaVisitor exposes the delta parameter of the given module. + */ +class DeltaVisitor : public boost::static_visitor +{ + public: + //! Return the delta parameter. + template + arma::mat& operator()(LayerType* layer) const; +}; + +/** + * ParametersVisitor exposes the parameters set of the given module and stores + * the parameters set into the given matrix. + */ +class ParametersVisitor : public boost::static_visitor +{ + public: + //! Store the parameters set into the given parameters matrix. + ParametersVisitor(arma::mat&& parameters); + + //! Set the parameters set. + template + void operator()(LayerType* layer) const; + + private: + //! The parameters set. + arma::mat&& parameters; + + //! Do not set the parameters set if the module doesn't implement the + //! Parameters() function. + template + typename std::enable_if< + !HasParametersCheck::value, void>::type + LayerParameters(T* layer, P& output) const; + + //! Set the parameters set if the module implements the Parameters() function. + template + typename std::enable_if< + HasParametersCheck::value, void>::type + LayerParameters(T* layer, P& output) const; +}; + +/** + * ParametersSetVisitor update the parameters set using the given matrix. + */ +class ParametersSetVisitor : public boost::static_visitor +{ + public: + //! Update the parameters set given the parameters matrix. + ParametersSetVisitor(arma::mat&& parameters); + + //! Update the parameters set. + template + void operator()(LayerType *layer) const; + + private: + //! The parameters set. + arma::mat&& parameters; + + //! Do not update the parameters set if the module doesn't implement the + //! Parameters() function. + template + typename std::enable_if< + !HasParametersCheck::value, void>::type + LayerParameters(T* layer, P& output) const; + + //! Update the parameters set if the module implements the Parameters() + //! function. + template + typename std::enable_if< + HasParametersCheck::value, void>::type + LayerParameters(T* layer, P& output) const; +}; + +/** + * WeightSizeVisitor returns the number of weights of the given module. + */ +class WeightSizeVisitor : public boost::static_visitor +{ + public: + //! Return the number of weights. + template + size_t operator()(LayerType* layer) const; + + private: + //! If the module doesn't implement the Parameters() or Model() function + //! return 0. + template + typename std::enable_if< + !HasParametersCheck::value && + !HasModelCheck&(T::*)()>::value, size_t>::type + LayerSize(T* layer, P& output) const; + + //! Return the number of parameters if the module implements the Model() + //! function. + template + typename std::enable_if< + !HasParametersCheck::value && + HasModelCheck&(T::*)()>::value, size_t>::type + LayerSize(T* layer, P& output) const; + + //! Return the number of parameters if the module implements the Parameters() + //! function. + template + typename std::enable_if< + HasParametersCheck::value && + !HasModelCheck&(T::*)()>::value, size_t>::type + LayerSize(T* layer, P& output) const; + + //! Return the accumulated number of parameters if the module implements the + //! Parameters() and Model() function. + template + typename std::enable_if< + HasParametersCheck::value && + HasModelCheck&(T::*)()>::value, size_t>::type + LayerSize(T* layer, P& output) const; +}; + +/** + * SetInputWidthVisitor updates the input width parameter with the given input + * width. + */ +class SetInputWidthVisitor : public boost::static_visitor +{ + public: + //! Update the input width parameter with the given input width. + SetInputWidthVisitor(const size_t inputWidth = 0, const bool reset = false); + + //! Update the input width parameter. + template + bool operator()(LayerType* layer) const; + + private: + //! The input width parameter. + size_t inputWidth; + + //! If set reset the height parameter if already set. + bool reset; + + //! Do nothing if the module doesn't implement the InputWidth() or Model() + //! function. + template + typename std::enable_if< + !HasInputWidth::value && + !HasModelCheck&(T::*)()>::value, bool>::type + LayerInputWidth(T* layer) const; + + //! Update the input width if the module implements the InputWidth() function. + template + typename std::enable_if< + HasInputWidth::value && + !HasModelCheck&(T::*)()>::value, bool>::type + LayerInputWidth(T* layer) const; + + //! Update the input width if the module implements the Model() function. + template + typename std::enable_if< + !HasInputWidth::value && + HasModelCheck&(T::*)()>::value, bool>::type + LayerInputWidth(T* layer) const; + + //! Update the input width if the module implements the InputWidth() or + //! Model() function. + template + typename std::enable_if< + HasInputWidth::value && + HasModelCheck&(T::*)()>::value, bool>::type + LayerInputWidth(T* layer) const; +}; + +/** + * SetInputHeightVisitor updates the input height parameter with the given input + * height. + */ +class SetInputHeightVisitor : public boost::static_visitor +{ + public: + //! Update the input height parameter with the given input height. + SetInputHeightVisitor(const size_t inputHeight = 0, const bool reset = false); + + //! Update the input height parameter. + template + bool operator()(LayerType* layer) const; + + private: + //! The input height parameter. + size_t inputHeight; + + //! If set reset the height parameter if already set. + bool reset; + + //! Do nothing if the module doesn't implement the InputHeight() or Model() + //! function. + template + typename std::enable_if< + !HasInputHeight::value && + !HasModelCheck&(T::*)()>::value, bool>::type + LayerInputHeight(T* layer) const; + + //! Update the input height if the module implements the InputHeight() + //! function. + template + typename std::enable_if< + HasInputHeight::value && + !HasModelCheck&(T::*)()>::value, bool>::type + LayerInputHeight(T* layer) const; + + //! Update the input height if the module implements the Model() function. + template + typename std::enable_if< + !HasInputHeight::value && + HasModelCheck&(T::*)()>::value, bool>::type + LayerInputHeight(T* layer) const; + + //! Update the input height if the module implements the InputHeight() or + //! Model() function. + template + typename std::enable_if< + HasInputHeight::value && + HasModelCheck&(T::*)()>::value, bool>::type + LayerInputHeight(T* layer) const; +}; + +/** + * OutputWidthVisitor exposes the OutputWidth() method of the given module. + */ +class OutputWidthVisitor : public boost::static_visitor +{ + public: + //! Return the output width. + template + size_t operator()(LayerType* layer) const; + + private: + //! Return 0 if the module doesn't implement the InputWidth() or Model() + //! function. + template + typename std::enable_if< + !HasInputWidth::value && + !HasModelCheck&(T::*)()>::value, size_t>::type + LayerOutputWidth(T* layer) const; + + //! Return the output width if the module implements the InputWidth() + //! function. + template + typename std::enable_if< + HasInputWidth::value && + !HasModelCheck&(T::*)()>::value, size_t>::type + LayerOutputWidth(T* layer) const; + + //! Return the output width if the module implements the Model() function. + template + typename std::enable_if< + !HasInputWidth::value && + HasModelCheck&(T::*)()>::value, size_t>::type + LayerOutputWidth(T* layer) const; + + //! Return the output width if the module implements the Model() or + //! InputWidth() function. + template + typename std::enable_if< + HasInputWidth::value && + HasModelCheck&(T::*)()>::value, size_t>::type + LayerOutputWidth(T* layer) const; +}; + +/** + * OutputWidthVisitor exposes the OutputHeight() method of the given module. + */ +class OutputHeightVisitor : public boost::static_visitor +{ + public: + //! Return the output height. + template + size_t operator()(LayerType* layer) const; + + private: + //! Return 0 if the module doesn't implement the InputHeight() or Model() + //! function. + template + typename std::enable_if< + !HasInputHeight::value && + !HasModelCheck&(T::*)()>::value, size_t>::type + LayerOutputHeight(T* layer) const; + + //! Return the output height if the module implements the InputHeight() + //! function. + template + typename std::enable_if< + HasInputHeight::value && + !HasModelCheck&(T::*)()>::value, size_t>::type + LayerOutputHeight(T* layer) const; + + //! Return the output height if the module implements the Model() function. + template + typename std::enable_if< + !HasInputHeight::value && + HasModelCheck&(T::*)()>::value, size_t>::type + LayerOutputHeight(T* layer) const; + + //! Return the output height if the module implement the Model() or + //! InputHeight() function. + template + typename std::enable_if< + HasInputHeight::value && + HasModelCheck&(T::*)()>::value, size_t>::type + LayerOutputHeight(T* layer) const; +}; + +/** + * LastOutputWidthVisitor exposes the OutputWidth() method of the given module. + */ +class LastOutputWidthVisitor : public boost::static_visitor +{ + public: + //! Return the output width. + template + size_t operator()(LayerType* layer) const; + + private: + //! Return 0 if the module doesn't implement the InputWidth() or Model() + //! function. + template + typename std::enable_if< + !HasInputWidth::value && + !HasModelCheck&(T::*)()>::value, size_t>::type + LayerOutputWidth(T* layer) const; + + //! Return the output width if the module implements the InputWidth() + //! function. + template + typename std::enable_if< + HasInputWidth::value && + !HasModelCheck&(T::*)()>::value, size_t>::type + LayerOutputWidth(T* layer) const; + + //! Return the output width if the module implements the Model() function. + template + typename std::enable_if< + !HasInputWidth::value && + HasModelCheck&(T::*)()>::value, size_t>::type + LayerOutputWidth(T* layer) const; + + //! Return the output width if the module implements the Model() or + //! InputWidth() function. + template + typename std::enable_if< + HasInputWidth::value && + HasModelCheck&(T::*)()>::value, size_t>::type + LayerOutputWidth(T* layer) const; +}; + +/** + * LastOutputHeightVisitor exposes the OutputHeight() method of the given module. + */ +class LastOutputHeightVisitor : public boost::static_visitor +{ + public: + //! Return the output height. + template + size_t operator()(LayerType* layer) const; + + private: + //! Return 0 if the module doesn't implement the InputHeight() or Model() + //! function. + template + typename std::enable_if< + !HasInputHeight::value && + !HasModelCheck&(T::*)()>::value, size_t>::type + LayerOutputHeight(T* layer) const; + + //! Return the output height if the module implements the InputHeight() + //! function. + template + typename std::enable_if< + HasInputHeight::value && + !HasModelCheck&(T::*)()>::value, size_t>::type + LayerOutputHeight(T* layer) const; + + //! Return the output height if the module implements the Model() function. + template + typename std::enable_if< + !HasInputHeight::value && + HasModelCheck&(T::*)()>::value, size_t>::type + LayerOutputHeight(T* layer) const; + + //! Return the output height if the module implement the Model() or + //! InputHeight() function. + template + typename std::enable_if< + HasInputHeight::value && + HasModelCheck&(T::*)()>::value, size_t>::type + LayerOutputHeight(T* layer) const; +}; + +/** + * WeightSetVisitor update the module parameters given the parameters set. + */ +class WeightSetVisitor : public boost::static_visitor +{ + public: + //! Update the parameters given the parameters set and offset. + WeightSetVisitor(arma::mat&& weight, const size_t offset = 0); + + //! Update the parameters set. + template + size_t operator()(LayerType* layer) const; + + private: + //! The parameters set. + arma::mat&& weight; + + //! The parameters offset. + const size_t offset; + + //! Do not update the parameters if the module doesn't implement the + //! Parameters() or Model() function. + template + typename std::enable_if< + !HasParametersCheck::value && + !HasModelCheck&(T::*)()>::value, size_t>::type + LayerSize(T* layer, P&& input) const; + + //! Update the parameters if the module implements the Model() function. + template + typename std::enable_if< + !HasParametersCheck::value && + HasModelCheck&(T::*)()>::value, size_t>::type + LayerSize(T* layer, P&& input) const; + + //! Update the parameters if the module implements the Parameters() function. + template + typename std::enable_if< + HasParametersCheck::value && + !HasModelCheck&(T::*)()>::value, size_t>::type + LayerSize(T* layer, P&& input) const; + + //! Update the parameters if the module implements the Model() and + //! Parameters() function. + template + typename std::enable_if< + HasParametersCheck::value && + HasModelCheck&(T::*)()>::value, size_t>::type + LayerSize(T* layer, P&& input) const; +}; + +/** + * RhoVisitor exposes the Rho() method of the given module. + */ +class RhoVisitor : public boost::static_visitor +{ + public: + //! Return the output height. + template + size_t operator()(LayerType* layer) const; + + private: + //! Return 0 if the module doesn't implement the InputHeight() or Model() + //! function. + template + typename std::enable_if< + !HasRho::value && + !HasModelCheck&(T::*)()>::value, size_t>::type + LayerRho(T* layer) const; + + //! Return the output height if the module implements the InputHeight() + //! function. + template + typename std::enable_if< + HasRho::value && + !HasModelCheck&(T::*)()>::value, size_t>::type + LayerRho(T* layer) const; + + //! Return the output height if the module implements the Model() function. + template + typename std::enable_if< + !HasRho::value && + HasModelCheck&(T::*)()>::value, size_t>::type + LayerRho(T* layer) const; + + //! Return the output height if the module implement the Model() or + //! InputHeight() function. + template + typename std::enable_if< + HasRho::value && + HasModelCheck&(T::*)()>::value, size_t>::type + LayerRho(T* layer) const; +}; + +/** + * DeterministicSetVisitor set the deterministic parameter given the + * deterministic value. + */ +class DeterministicSetVisitor : public boost::static_visitor +{ + public: + //! Set the deterministic parameter given the current deterministic value. + DeterministicSetVisitor(const bool deterministic = true); + + //! Set the deterministic parameter. + template + void operator()(LayerType* layer) const; + + private: + //! The deterministic parameter. + const bool deterministic; + + //! Set the deterministic parameter if the module implements the + //! Deterministic() and Model() function. + template + typename std::enable_if< + HasDeterministicCheck::value && + HasModelCheck&(T::*)()>::value, void>::type + LayerDeterministic(T* layer) const; + + //! Set the deterministic parameter if the module implements the + //! Model() function. + template + typename std::enable_if< + !HasDeterministicCheck::value && + HasModelCheck&(T::*)()>::value, void>::type + LayerDeterministic(T* layer) const; + + //! Set the deterministic parameter if the module implements the + //! Deterministic() function. + template + typename std::enable_if< + HasDeterministicCheck::value && + !HasModelCheck&(T::*)()>::value, void>::type + LayerDeterministic(T* layer) const; + + //! Do not set the deterministic parameter if the module doesn't implement the + //! Deterministic() or Model() function. + template + typename std::enable_if< + !HasDeterministicCheck::value && + !HasModelCheck&(T::*)()>::value, void>::type + LayerDeterministic(T* layer) const; +}; + +/** + * AddVisitor exposes the Add() method of the given module. + */ +class AddVisitor : public boost::static_visitor +{ + public: + //! Exposes the Add() method of the given module. + template + AddVisitor(T newLayer); + + //! Exposes the Add() method. + template + void operator()(LayerType* layer) const; + + private: + //! The layer that should be added. + LayerTypes newLayer; + + //! Only add the layer if the module implements the Add() function. + template + typename std::enable_if< + HasAddCheck::value, void>::type + LayerAdd(T* layer) const; + + //! Do not add the layer if the module doesn't implement the Add() function. + template + typename std::enable_if< + !HasAddCheck::value, void>::type + LayerAdd(T* layer) const; +}; + +/** + * GradientSetVisitor update the gradient parameter given the gradient set. + */ +class GradientSetVisitor : public boost::static_visitor +{ + public: + //! Update the gradient parameter given the gradient set. + GradientSetVisitor(arma::mat&& gradient, size_t offset = 0); + + //! Update the gradient parameter. + template + size_t operator()(LayerType* layer) const; + + private: + //! The gradient set. + arma::mat&& gradient; + + //! The gradient offset. + size_t offset; + + //! Update the gradient if the module implements the Gradient() function. + template + typename std::enable_if< + HasGradientCheck::value && + !HasModelCheck&(T::*)()>::value, size_t>::type + LayerGradients(T* layer, arma::mat& input) const; + + //! Update the gradient if the module implements the Model() function. + template + typename std::enable_if< + !HasGradientCheck::value && + HasModelCheck&(T::*)()>::value, size_t>::type + LayerGradients(T* layer, arma::mat& input) const; + + //! Update the gradient if the module implements the Gradient() and Model() + //! function. + template + typename std::enable_if< + HasGradientCheck::value && + HasModelCheck&(T::*)()>::value, size_t>::type + LayerGradients(T* layer, arma::mat& input) const; + + //! Do not update the gradient parameter if the module doesn't implement the + //! Gradient() or Model() function. + template + typename std::enable_if< + !HasGradientCheck::value && + !HasModelCheck&(T::*)()>::value, size_t>::type + LayerGradients(T* layer, P& input) const; +}; + + +/** + * GradientUpdateVisitor update the gradient parameter given the gradient set. + */ +class GradientUpdateVisitor : public boost::static_visitor +{ + public: + //! Update the gradient parameter given the gradient set. + GradientUpdateVisitor(arma::mat&& gradient, size_t offset = 0); + + //! Update the gradient parameter. + template + size_t operator()(LayerType* layer) const; + + private: + //! The gradient set. + arma::mat&& gradient; + + //! The gradient offset. + size_t offset; + + //! Update the gradient if the module implements the Gradient() function. + template + typename std::enable_if< + HasGradientCheck::value && + !HasModelCheck&(T::*)()>::value, size_t>::type + LayerGradients(T* layer, arma::mat& input) const; + + //! Update the gradient if the module implements the Model() function. + template + typename std::enable_if< + !HasGradientCheck::value && + HasModelCheck&(T::*)()>::value, size_t>::type + LayerGradients(T* layer, arma::mat& input) const; + + //! Update the gradient if the module implements the Gradient() and Model() + //! function. + template + typename std::enable_if< + HasGradientCheck::value && + HasModelCheck&(T::*)()>::value, size_t>::type + LayerGradients(T* layer, arma::mat& input) const; + + //! Do not update the gradient parameter if the module doesn't implement the + //! Gradient() or Model() function. + template + typename std::enable_if< + !HasGradientCheck::value && + !HasModelCheck&(T::*)()>::value, size_t>::type + LayerGradients(T* layer, P& input) const; +}; + +/* + * GradientZeroVisitor set the gradient to zero for the given module. + */ +class GradientZeroVisitor : public boost::static_visitor +{ + public: + //! Set the gradient to zero for the given module. + GradientZeroVisitor(); + + //! Set the gradient to zero. + template + void operator()(LayerType* layer) const; + + private: + //! Set the gradient to zero if the module implements the Gradient() function. + template + typename std::enable_if< + HasGradientCheck::value, void>::type + LayerGradients(T* layer, arma::mat& input) const; + + //! Do not set the gradient to zero if the module doesn't implement the + //! Gradient() function. + template + typename std::enable_if< + !HasGradientCheck::value, void>::type + LayerGradients(T* layer, P& input) const; +}; + +/** + * SearchModeVisitor executes the Gradient() method of the given module using + * the input and delta parameter. + */ +class GradientVisitor : public boost::static_visitor +{ + public: + //! Executes the Gradient() method of the given module using the input and + //! delta parameter. + GradientVisitor(arma::mat&& input, arma::mat&& delta); + + //! Executes the Gradient() method. + template + void operator()(LayerType* layer) const; + + private: + //! The input set. + arma::mat&& input; + + //! The delta parameter. + arma::mat&& delta; + + //! Execute the Gradient() function if the module implements the Gradient() + //! function. + template + typename std::enable_if< + HasGradientCheck::value, void>::type + LayerGradients(T* layer, arma::mat& input) const; + + //! Do not execute the Gradient() function if the module doesn't implement + //! the Gradient() function. + template + typename std::enable_if< + !HasGradientCheck::value, void>::type + LayerGradients(T* layer, P& input) const; +}; + +/** + * RewardSetVisitor set the reward parameter given the reward value. + */ +class RewardSetVisitor : public boost::static_visitor +{ + public: + //! Set the reward parameter given the reward value. + RewardSetVisitor(const double reward); + + //! Set the reward parameter. + template + void operator()(LayerType* layer) const; + + private: + //! The reward value. + const double reward; + + //! Set the deterministic parameter if the module implements the + //! Deterministic() and Model() function. + template + typename std::enable_if< + HasRewardCheck::value && + HasModelCheck&(T::*)()>::value, void>::type + LayerReward(T* layer) const; + + //! Set the deterministic parameter if the module implements the + //! Model() function. + template + typename std::enable_if< + !HasRewardCheck::value && + HasModelCheck&(T::*)()>::value, void>::type + LayerReward(T* layer) const; + + //! Set the deterministic parameter if the module implements the + //! Deterministic() function. + template + typename std::enable_if< + HasRewardCheck::value && + !HasModelCheck&(T::*)()>::value, void>::type + LayerReward(T* layer) const; + + //! Do not set the deterministic parameter if the module doesn't implement the + //! Deterministic() or Model() function. + template + typename std::enable_if< + !HasRewardCheck::value && + !HasModelCheck&(T::*)()>::value, void>::type + LayerReward(T* layer) const; +}; + +} // namespace ann +} // namespace mlpack + +// Include implementation. +#include "layer_visitor_impl.hpp" + +#endif diff --git a/src/mlpack/methods/ann/layer/layer_visitor_impl.hpp b/src/mlpack/methods/ann/layer/layer_visitor_impl.hpp new file mode 100644 index 00000000000..482b8e10ad3 --- /dev/null +++ b/src/mlpack/methods/ann/layer/layer_visitor_impl.hpp @@ -0,0 +1,1241 @@ +/** + * @file layer_visitor_impl.hpp + * @author Marcus Edel + * + * This file provides an easy way to serialize a layer, abstracts away the + * different types of layers, and also automatically directs any function to the + * right layer type. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_LAYER_VISITOR_IMPL_HPP +#define MLPACK_METHODS_ANN_LAYER_LAYER_VISITOR_IMPL_HPP + +// In case it hasn't been included yet. +#include "layer_visitor.hpp" + +namespace mlpack { +namespace ann { + +//! LoadOutputParameterVisitor visitor class. +inline LoadOutputParameterVisitor::LoadOutputParameterVisitor( + std::vector&& parameter) : parameter(std::move(parameter)) +{ + /* Nothing to do here. */ +} + +template +inline void LoadOutputParameterVisitor::operator()(LayerType* layer) const +{ + OutputParameter(layer); +} + +template +inline typename std::enable_if< + !HasModelCheck&(T::*)()>::value, void>::type +LoadOutputParameterVisitor::OutputParameter(T* layer) const +{ + layer->OutputParameter() = parameter.back(); + parameter.pop_back(); +} + +template +inline typename std::enable_if< + HasModelCheck&(T::*)()>::value, void>::type +LoadOutputParameterVisitor::OutputParameter(T* layer) const +{ + for (size_t i = 0; i < layer->Model().size(); ++i) + { + boost::apply_visitor(LoadOutputParameterVisitor(std::move(parameter)), + layer->Model()[layer->Model().size() - i - 1]); + } + + layer->OutputParameter() = parameter.back(); + parameter.pop_back(); +} + +//! SaveOutputParameterVisitor visitor class. +inline SaveOutputParameterVisitor::SaveOutputParameterVisitor( + std::vector&& parameter) : parameter(std::move(parameter)) +{ + /* Nothing to do here. */ +} + +template +inline void SaveOutputParameterVisitor::operator()(LayerType* layer) const +{ + OutputParameter(layer); +} + +template +inline typename std::enable_if< + !HasModelCheck&(T::*)()>::value, void>::type +SaveOutputParameterVisitor::OutputParameter(T* layer) const +{ + parameter.push_back(layer->OutputParameter()); +} + +template +inline typename std::enable_if< + HasModelCheck&(T::*)()>::value, void>::type +SaveOutputParameterVisitor::OutputParameter(T* layer) const +{ + parameter.push_back(layer->OutputParameter()); + + for (size_t i = 0; i < layer->Model().size(); ++i) + { + boost::apply_visitor(SaveOutputParameterVisitor(std::move(parameter)), + layer->Model()[i]); + } +} + +//! DeleteVisitor visitor class. +template +inline void DeleteVisitor::operator()(LayerType* layer) const +{ + if (layer) + delete layer; +} + +//! ForwardOutputVisitor visitor class. +inline ForwardOutputVisitor::ForwardOutputVisitor(arma::mat&& input, + arma::mat&& output) : + input(std::move(input)), + output(std::move(output)) +{ + /* Nothing to do here. */ +} + +template +inline double ForwardOutputVisitor::operator()(LayerType* layer) const +{ + return layer->Forward(std::move(input), std::move(output)); +} + +//! ForwardVisitor visitor class. +inline ForwardVisitor::ForwardVisitor(arma::mat&& input, arma::mat&& output) : + input(std::move(input)), + output(std::move(output)) +{ + /* Nothing to do here. */ +} + +template +inline void ForwardVisitor::operator()(LayerType* layer) const +{ + layer->Forward(std::move(input), std::move(output)); +} + +//! BackwardVisitor visitor class. +inline BackwardVisitor::BackwardVisitor(arma::mat&& input, + arma::mat&& error, + arma::mat&& delta) : + input(std::move(input)), + error(std::move(error)), + delta(std::move(delta)) +{ + /* Nothing to do here. */ +} + +template +inline void BackwardVisitor::operator()(LayerType* layer) const +{ + layer->Backward(std::move(input), std::move(error), std::move(delta)); +} + +//! ResetVisitor visitor class. +template +inline void ResetVisitor::operator()(LayerType* layer) const +{ + ResetParameter(layer); +} + +template +inline typename std::enable_if< + HasResetCheck::value && + !HasModelCheck&(T::*)()>::value, void>::type +ResetVisitor::ResetParameter(T* layer) const +{ + layer->Reset(); +} + +template +inline typename std::enable_if< + !HasResetCheck::value && + HasModelCheck&(T::*)()>::value, void>::type +ResetVisitor::ResetParameter(T* layer) const +{ + for (size_t i = 0; i < layer->Model().size(); ++i) + { + boost::apply_visitor(ResetVisitor(), layer->Model()[i]); + } +} + +template +inline typename std::enable_if< + HasResetCheck::value && + HasModelCheck&(T::*)()>::value, void>::type +ResetVisitor::ResetParameter(T* layer) const +{ + for (size_t i = 0; i < layer->Model().size(); ++i) + { + boost::apply_visitor(ResetVisitor(), layer->Model()[i]); + } + + layer->Reset(); +} + +template +inline typename std::enable_if< + !HasResetCheck::value && + !HasModelCheck&(T::*)()>::value, void>::type +ResetVisitor::ResetParameter(T* /* layer */) const +{ + /* Nothing to do here. */ +} + +//! InputParameterVisitor visitor class. +template +inline arma::mat& InputParameterVisitor::operator()(LayerType *layer) const +{ + return layer->InputParameter(); +} + +//! OutputParameterVisitor visitor class. +template +inline arma::mat& OutputParameterVisitor::operator()(LayerType *layer) const +{ + return layer->OutputParameter(); +} + +//! DeltaVisitor visitor class. +template +inline arma::mat& DeltaVisitor::operator()(LayerType *layer) const +{ + return layer->Delta(); +} + +//! ParametersVisitor visitor class. +inline ParametersVisitor::ParametersVisitor(arma::mat&& parameters) : + parameters(std::move(parameters)) +{ + /* Nothing to do here. */ +} + +template +inline void ParametersVisitor::operator()(LayerType *layer) const +{ + LayerParameters(layer, layer->OutputParameter()); +} + +template +inline typename std::enable_if< + !HasParametersCheck::value, void>::type +ParametersVisitor::LayerParameters(T* /* layer */, P& /* output */) const +{ + /* Nothing to do here. */ +} + +template +inline typename std::enable_if< + HasParametersCheck::value, void>::type +ParametersVisitor::LayerParameters(T* layer, P& /* output */) const +{ + parameters = layer->Parameters(); +} + +//! ParametersSetVisitor visitor class. +inline ParametersSetVisitor::ParametersSetVisitor(arma::mat&& parameters) : + parameters(std::move(parameters)) +{ + /* Nothing to do here. */ +} + +template +inline void ParametersSetVisitor::operator()(LayerType *layer) const +{ + LayerParameters(layer, layer->OutputParameter()); +} + +template +inline typename std::enable_if< + !HasParametersCheck::value, void>::type +ParametersSetVisitor::LayerParameters(T* /* layer */, P& /* output */) const +{ + /* Nothing to do here. */ +} + +template +inline typename std::enable_if< + HasParametersCheck::value, void>::type +ParametersSetVisitor::LayerParameters(T* layer, P& /* output */) const +{ + layer->Parameters() = parameters; +} + +//! WeightSizeVisitor visitor class. +template +inline size_t WeightSizeVisitor::operator()(LayerType* layer) const +{ + return LayerSize(layer, layer->OutputParameter()); +} + +template +inline typename std::enable_if< + !HasParametersCheck::value && + !HasModelCheck&(T::*)()>::value, size_t>::type +WeightSizeVisitor::LayerSize(T* /* layer */, P& /* output */) const +{ + return 0; +} + +template +inline typename std::enable_if< + !HasParametersCheck::value && + HasModelCheck&(T::*)()>::value, size_t>::type +WeightSizeVisitor::LayerSize(T* layer, P& /* output */) const +{ + size_t weights = 0; + for (size_t i = 0; i < layer->Model().size(); ++i) + { + weights += boost::apply_visitor(WeightSizeVisitor(), layer->Model()[i]); + } + + return weights; +} + +template +inline typename std::enable_if< + HasParametersCheck::value && + !HasModelCheck&(T::*)()>::value, size_t>::type +WeightSizeVisitor::LayerSize(T* layer, P& /* output */) const +{ + return layer->Parameters().n_elem; +} + +template +inline typename std::enable_if< + HasParametersCheck::value && + HasModelCheck&(T::*)()>::value, size_t>::type +WeightSizeVisitor::LayerSize(T* layer, P& /* output */) const +{ + size_t weights = layer->Parameters().n_elem; + for (size_t i = 0; i < layer->Model().size(); ++i) + { + weights += boost::apply_visitor(WeightSizeVisitor(), layer->Model()[i]); + } + + return weights; +} + +//! SetInputWidthVisitor visitor class. +inline SetInputWidthVisitor::SetInputWidthVisitor(const size_t inputWidth, + const bool reset) : + inputWidth(inputWidth), + reset(reset) +{ + /* Nothing to do here. */ +} + +template +inline bool SetInputWidthVisitor::operator()(LayerType* layer) const +{ + return LayerInputWidth(layer); +} + +template +inline typename std::enable_if< + !HasInputWidth::value && + !HasModelCheck&(T::*)()>::value, bool>::type +SetInputWidthVisitor::LayerInputWidth(T* /* layer */) const +{ + return false; +} + +template +inline typename std::enable_if< + HasInputWidth::value && + !HasModelCheck&(T::*)()>::value, bool>::type +SetInputWidthVisitor::LayerInputWidth(T* layer) const +{ + if (layer->InputWidth() == 0 || reset) + { + layer->InputWidth() = inputWidth; + } + + return true; +} + +template +inline typename std::enable_if< + !HasInputWidth::value && + HasModelCheck&(T::*)()>::value, bool>::type +SetInputWidthVisitor::LayerInputWidth(T* layer) const +{ + for (size_t i = 0; i < layer->Model().size(); ++i) + { + boost::apply_visitor(SetInputWidthVisitor(inputWidth, reset), + layer->Model()[i]); + } + + return true; +} + +template +inline typename std::enable_if< + HasInputWidth::value && + HasModelCheck&(T::*)()>::value, bool>::type +SetInputWidthVisitor::LayerInputWidth(T* layer) const +{ + if (layer->InputWidth() == 0 || reset) + { + layer->InputWidth() = inputWidth; + } + + for (size_t i = 0; i < layer->Model().size(); ++i) + { + boost::apply_visitor(SetInputWidthVisitor(inputWidth, reset), + layer->Model()[i]); + } + + return true; +} + +//! SetInputHeightVisitor visitor class. +inline SetInputHeightVisitor::SetInputHeightVisitor(const size_t inputHeight, + const bool reset) : + inputHeight(inputHeight), + reset(reset) +{ + /* Nothing to do here. */ +} + +template +inline bool SetInputHeightVisitor::operator()(LayerType* layer) const +{ + return LayerInputHeight(layer); +} + +template +inline typename std::enable_if< + !HasInputHeight::value && + !HasModelCheck&(T::*)()>::value, bool>::type +SetInputHeightVisitor::LayerInputHeight(T* /* layer */) const +{ + return false; +} + +template +inline typename std::enable_if< + HasInputHeight::value && + !HasModelCheck&(T::*)()>::value, bool>::type +SetInputHeightVisitor::LayerInputHeight(T* layer) const +{ + if (layer->InputHeight() == 0 || reset) + { + layer->InputHeight() = inputHeight; + } + + return true; +} + +template +inline typename std::enable_if< + !HasInputHeight::value && + HasModelCheck&(T::*)()>::value, bool>::type +SetInputHeightVisitor::LayerInputHeight(T* layer) const +{ + for (size_t i = 0; i < layer->Model().size(); ++i) + { + boost::apply_visitor(SetInputHeightVisitor(inputHeight, reset), + layer->Model()[i]); + } + + return true; +} + +template +inline typename std::enable_if< + HasInputHeight::value && + HasModelCheck&(T::*)()>::value, bool>::type +SetInputHeightVisitor::LayerInputHeight(T* layer) const +{ + if (layer->InputHeight() == 0 || reset) + { + layer->InputHeight() = inputHeight; + } + + for (size_t i = 0; i < layer->Model().size(); ++i) + { + boost::apply_visitor(SetInputHeightVisitor(inputHeight, reset), + layer->Model()[i]); + } + + return true; +} + +//! OutputWidthVisitor visitor class. +template +inline size_t OutputWidthVisitor::operator()(LayerType* layer) const +{ + return LayerOutputWidth(layer); +} + +template +inline typename std::enable_if< + !HasInputWidth::value && + !HasModelCheck&(T::*)()>::value, size_t>::type +OutputWidthVisitor::LayerOutputWidth(T* /* layer */) const +{ + return 0; +} + +template +inline typename std::enable_if< + HasInputWidth::value && + !HasModelCheck&(T::*)()>::value, size_t>::type +OutputWidthVisitor::LayerOutputWidth(T* layer) const +{ + return layer->OutputWidth(); +} + +template +inline typename std::enable_if< + !HasInputWidth::value && + HasModelCheck&(T::*)()>::value, size_t>::type +OutputWidthVisitor::LayerOutputWidth(T* layer) const +{ + for (size_t i = 0; i < layer->Model().size(); ++i) + { + size_t outputWidth = boost::apply_visitor(OutputWidthVisitor(), + layer->Model()[layer->Model().size() - 1 - i]); + + if (outputWidth != 0) + { + return outputWidth; + } + } + + return 0; +} + +template +inline typename std::enable_if< + HasInputWidth::value && + HasModelCheck&(T::*)()>::value, size_t>::type +OutputWidthVisitor::LayerOutputWidth(T* layer) const +{ + size_t outputWidth = layer->OutputWidth(); + + if (outputWidth == 0) + { + for (size_t i = 0; i < layer->Model().size(); ++i) + { + outputWidth = boost::apply_visitor(OutputWidthVisitor(), + layer->Model()[layer->Model().size() - 1 - i]); + + if (outputWidth != 0) + { + return outputWidth; + } + } + } + + return outputWidth; +} + +//! OutputHeightVisitor visitor class. +template +inline size_t OutputHeightVisitor::operator()(LayerType* layer) const +{ + return LayerOutputHeight(layer); +} + +template +inline typename std::enable_if< + !HasInputHeight::value && + !HasModelCheck&(T::*)()>::value, size_t>::type +OutputHeightVisitor::LayerOutputHeight(T* /* layer */) const +{ + return 0; +} + +template +inline typename std::enable_if< + HasInputHeight::value && + !HasModelCheck&(T::*)()>::value, size_t>::type +OutputHeightVisitor::LayerOutputHeight(T* layer) const +{ + return layer->OutputHeight(); +} + +template +inline typename std::enable_if< + !HasInputHeight::value && + HasModelCheck&(T::*)()>::value, size_t>::type +OutputHeightVisitor::LayerOutputHeight(T* layer) const +{ + for (size_t i = 0; i < layer->Model().size(); ++i) + { + size_t outputHeight = boost::apply_visitor(OutputHeightVisitor(), + layer->Model()[layer->Model().size() - 1 - i]); + + if (outputHeight != 0) + { + return outputHeight; + } + } + + return 0; +} + +template +inline typename std::enable_if< + HasInputHeight::value && + HasModelCheck&(T::*)()>::value, size_t>::type +OutputHeightVisitor::LayerOutputHeight(T* layer) const +{ + size_t outputHeight = layer->OutputHeight(); + + if (outputHeight == 0) + { + for (size_t i = 0; i < layer->Model().size(); ++i) + { + outputHeight = boost::apply_visitor(OutputHeightVisitor(), + layer->Model()[layer->Model().size() - 1 - i]); + + if (outputHeight != 0) + { + return outputHeight; + } + } + } + + return outputHeight; +} + +//! LastOutputWidthVisitor visitor class. +template +inline size_t LastOutputWidthVisitor::operator()(LayerType* layer) const +{ + return LayerOutputWidth(layer); +} + +template +inline typename std::enable_if< + !HasInputWidth::value && + !HasModelCheck&(T::*)()>::value, size_t>::type +LastOutputWidthVisitor::LayerOutputWidth(T* /* layer */) const +{ + return 0; +} + +template +inline typename std::enable_if< + HasInputWidth::value && + !HasModelCheck&(T::*)()>::value, size_t>::type +LastOutputWidthVisitor::LayerOutputWidth(T* layer) const +{ + return layer->OutputWidth(); +} + +template +inline typename std::enable_if< + !HasInputWidth::value && + HasModelCheck&(T::*)()>::value, size_t>::type +LastOutputWidthVisitor::LayerOutputWidth(T* layer) const +{ + for (size_t i = 0; i < layer->Model().size(); ++i) + { + size_t outputWidth = boost::apply_visitor(LastOutputWidthVisitor(), + layer->Model()[layer->Model().size() - 1 - i]); + + if (outputWidth != 0) + { + return outputWidth; + } + } + + return 0; +} + +template +inline typename std::enable_if< + HasInputWidth::value && + HasModelCheck&(T::*)()>::value, size_t>::type +LastOutputWidthVisitor::LayerOutputWidth(T* layer) const +{ + size_t outputWidth = layer->OutputWidth(); + + if (outputWidth == 0) + { + for (size_t i = 0; i < layer->Model().size(); ++i) + { + outputWidth = boost::apply_visitor(OutputWidthVisitor(), + layer->Model()[layer->Model().size() - 1 - i]); + + if (outputWidth != 0) + { + return outputWidth; + } + } + } + + return outputWidth; +} + +//! LastOutputHeightVisitor visitor class. +template +inline size_t LastOutputHeightVisitor::operator()(LayerType* layer) const +{ + return LayerOutputHeight(layer); +} + +template +inline typename std::enable_if< + !HasInputHeight::value && + !HasModelCheck&(T::*)()>::value, size_t>::type +LastOutputHeightVisitor::LayerOutputHeight(T* /* layer */) const +{ + return 0; +} + +template +inline typename std::enable_if< + HasInputHeight::value && + !HasModelCheck&(T::*)()>::value, size_t>::type +LastOutputHeightVisitor::LayerOutputHeight(T* layer) const +{ + return layer->OutputHeight(); +} + +template +inline typename std::enable_if< + !HasInputHeight::value && + HasModelCheck&(T::*)()>::value, size_t>::type +LastOutputHeightVisitor::LayerOutputHeight(T* layer) const +{ + for (size_t i = 0; i < layer->Model().size(); ++i) + { + size_t outputHeight = boost::apply_visitor(LastOutputHeightVisitor(), + layer->Model()[layer->Model().size() - 1 - i]); + + if (outputHeight != 0) + { + return outputHeight; + } + } + + return 0; +} + +template +inline typename std::enable_if< + HasInputHeight::value && + HasModelCheck&(T::*)()>::value, size_t>::type +LastOutputHeightVisitor::LayerOutputHeight(T* layer) const +{ + size_t outputHeight = layer->OutputHeight(); + + if (outputHeight == 0) + { + for (size_t i = 0; i < layer->Model().size(); ++i) + { + outputHeight = boost::apply_visitor(OutputHeightVisitor(), + layer->Model()[layer->Model().size() - 1 - i]); + + if (outputHeight != 0) + { + return outputHeight; + } + } + } + + return outputHeight; +} + +//! WeightSetVisitor visitor class. +inline WeightSetVisitor::WeightSetVisitor(arma::mat&& weight, + const size_t offset) : + weight(std::move(weight)), + offset(offset) +{ + /* Nothing to do here. */ +} + +template +inline size_t WeightSetVisitor::operator()(LayerType* layer) const +{ + return LayerSize(layer, std::move(layer->OutputParameter())); +} + +template +inline typename std::enable_if< + !HasParametersCheck::value && + !HasModelCheck&(T::*)()>::value, size_t>::type +WeightSetVisitor::LayerSize(T* /* layer */, P&& /*output */) const +{ + return 0; +} + +template +inline typename std::enable_if< + !HasParametersCheck::value && + HasModelCheck&(T::*)()>::value, size_t>::type +WeightSetVisitor::LayerSize(T* layer, P&& /*output */) const +{ + size_t modelOffset = 0; + for (size_t i = 0; i < layer->Model().size(); ++i) + { + modelOffset += boost::apply_visitor(WeightSetVisitor( + std::move(weight), modelOffset + offset), layer->Model()[i]); + } + + return modelOffset; +} + +template +inline typename std::enable_if< + HasParametersCheck::value && + !HasModelCheck&(T::*)()>::value, size_t>::type +WeightSetVisitor::LayerSize(T* layer, P&& /* output */) const +{ + layer->Parameters() = arma::mat(weight.memptr() + offset, + layer->Parameters().n_rows, layer->Parameters().n_cols, false, false); + + return layer->Parameters().n_elem; +} + +template +inline typename std::enable_if< + HasParametersCheck::value && + HasModelCheck&(T::*)()>::value, size_t>::type +WeightSetVisitor::LayerSize(T* layer, P&& /* output */) const +{ + layer->Parameters() = arma::mat(weight.memptr() + offset, + layer->Parameters().n_rows, layer->Parameters().n_cols, false, false); + + size_t modelOffset = layer->Parameters().n_elem; + for (size_t i = 0; i < layer->Model().size(); ++i) + { + modelOffset += boost::apply_visitor(WeightSetVisitor( + std::move(weight), modelOffset + offset), layer->Model()[i]); + } + + return modelOffset; +} + +//! RhoVisitor visitor class. +template +inline size_t RhoVisitor::operator()(LayerType* layer) const +{ + return LayerRho(layer); +} + +template +inline typename std::enable_if< + !HasRho::value && + !HasModelCheck&(T::*)()>::value, size_t>::type +RhoVisitor::LayerRho(T* /* layer */) const +{ + return 0; +} + +template +inline typename std::enable_if< + !HasRho::value && + HasModelCheck&(T::*)()>::value, size_t>::type +RhoVisitor::LayerRho(T* layer) const +{ + size_t moduleRho = 0; + for (size_t i = 0; i < layer->Model().size(); ++i) + { + moduleRho = boost::apply_visitor(RhoVisitor(), layer->Model()[i]); + if (moduleRho != 0) + { + return moduleRho; + } + } + + return moduleRho; +} + +template +inline typename std::enable_if< + HasRho::value && + !HasModelCheck&(T::*)()>::value, size_t>::type +RhoVisitor::LayerRho(T* layer) const +{ + return layer->Rho(); +} + +template +inline typename std::enable_if< + HasRho::value && + HasModelCheck&(T::*)()>::value, size_t>::type +RhoVisitor::LayerRho(T* layer) const +{ + return layer->Rho(); +} + +//! DeterministicSetVisitor visitor class. +inline DeterministicSetVisitor::DeterministicSetVisitor( + const bool deterministic) : deterministic(deterministic) +{ + /* Nothing to do here. */ +} + +template +inline void DeterministicSetVisitor::operator()(LayerType* layer) const +{ + LayerDeterministic(layer); +} + +template +inline typename std::enable_if< + HasDeterministicCheck::value && + HasModelCheck&(T::*)()>::value, void>::type +DeterministicSetVisitor::LayerDeterministic(T* layer) const +{ + layer->Deterministic() = deterministic; + + for (size_t i = 0; i < layer->Model().size(); ++i) + { + boost::apply_visitor(DeterministicSetVisitor(deterministic), + layer->Model()[i]); + } +} + +template +inline typename std::enable_if< + !HasDeterministicCheck::value && + HasModelCheck&(T::*)()>::value, void>::type +DeterministicSetVisitor::LayerDeterministic(T* layer) const +{ + for (size_t i = 0; i < layer->Model().size(); ++i) + { + boost::apply_visitor(DeterministicSetVisitor(deterministic), + layer->Model()[i]); + } +} + +template +inline typename std::enable_if< + HasDeterministicCheck::value && + !HasModelCheck&(T::*)()>::value, void>::type +DeterministicSetVisitor::LayerDeterministic(T* layer) const +{ + layer->Deterministic() = deterministic; +} + +template +inline typename std::enable_if< + !HasDeterministicCheck::value && + !HasModelCheck&(T::*)()>::value, void>::type +DeterministicSetVisitor::LayerDeterministic(T* /* input */) const +{ + /* Nothing to do here. */ +} + +//! AddVisitor visitor class. +template +inline AddVisitor::AddVisitor(T newLayer) : + newLayer(std::move(newLayer)) +{ + /* Nothing to do here. */ +} + +template +inline void AddVisitor::operator()(LayerType* layer) const +{ + LayerAdd(layer); +} + +template +inline typename std::enable_if< + HasAddCheck::value, void>::type +AddVisitor::LayerAdd(T* layer) const +{ + layer->Add(newLayer); +} + +template +inline typename std::enable_if< + !HasAddCheck::value, void>::type +AddVisitor::LayerAdd(T* /* layer */) const +{ + /* Nothing to do here. */ +} + +//! GradientSetVisitor visitor class. +inline GradientSetVisitor::GradientSetVisitor(arma::mat&& gradient, + size_t offset) : + gradient(std::move(gradient)), + offset(offset) +{ + /* Nothing to do here. */ +} + +template +inline size_t GradientSetVisitor::operator()(LayerType* layer) const +{ + return LayerGradients(layer, layer->OutputParameter()); +} + +template +inline typename std::enable_if< + HasGradientCheck::value && + !HasModelCheck&(T::*)()>::value, size_t>::type +GradientSetVisitor::LayerGradients(T* layer, arma::mat& /* input */) const +{ + layer->Gradient() = arma::mat(gradient.memptr() + offset, + layer->Parameters().n_rows, layer->Parameters().n_cols, false, false); + + return layer->Parameters().n_elem; +} + +template +inline typename std::enable_if< + !HasGradientCheck::value && + HasModelCheck&(T::*)()>::value, size_t>::type +GradientSetVisitor::LayerGradients(T* layer, arma::mat& /* input */) const +{ + size_t modelOffset = 0; + for (size_t i = 0; i < layer->Model().size(); ++i) + { + modelOffset += boost::apply_visitor(GradientSetVisitor( + std::move(gradient), modelOffset + offset), layer->Model()[i]); + } + + return modelOffset; +} + +template +inline typename std::enable_if< + HasGradientCheck::value && + HasModelCheck&(T::*)()>::value, size_t>::type +GradientSetVisitor::LayerGradients(T* layer, arma::mat& /* input */) const +{ + layer->Gradient() = arma::mat(gradient.memptr() + offset, + layer->Parameters().n_rows, layer->Parameters().n_cols, false, false); + + size_t modelOffset = layer->Parameters().n_elem; + for (size_t i = 0; i < layer->Model().size(); ++i) + { + modelOffset += boost::apply_visitor(GradientSetVisitor( + std::move(gradient), modelOffset + offset), layer->Model()[i]); + } + + return modelOffset; +} + +template +inline typename std::enable_if< + !HasGradientCheck::value && + !HasModelCheck&(T::*)()>::value, size_t>::type +GradientSetVisitor::LayerGradients(T* /* layer */, P& /* input */) const +{ + return 0; +} + +//! GradientUpdateVisitor visitor class. +inline GradientUpdateVisitor::GradientUpdateVisitor(arma::mat&& gradient, + size_t offset) : + gradient(std::move(gradient)), + offset(offset) +{ + /* Nothing to do here. */ +} + +template +inline size_t GradientUpdateVisitor::operator()(LayerType* layer) const +{ + return LayerGradients(layer, layer->OutputParameter()); +} + +template +inline typename std::enable_if< + HasGradientCheck::value && + !HasModelCheck&(T::*)()>::value, size_t>::type +GradientUpdateVisitor::LayerGradients(T* layer, arma::mat& /* input */) const +{ + if (layer->Parameters().n_elem != 0) + { + layer->Gradient() = gradient.submat(offset, 0, + offset + layer->Parameters().n_elem - 1, 0);; + } + + return layer->Parameters().n_elem; +} + +template +inline typename std::enable_if< + !HasGradientCheck::value && + HasModelCheck&(T::*)()>::value, size_t>::type +GradientUpdateVisitor::LayerGradients(T* layer, arma::mat& /* input */) const +{ + size_t modelOffset = 0; + for (size_t i = 0; i < layer->Model().size(); ++i) + { + modelOffset += boost::apply_visitor(GradientUpdateVisitor( + std::move(gradient), modelOffset + offset), layer->Model()[i]); + } + + return modelOffset; +} + +template +inline typename std::enable_if< + HasGradientCheck::value && + HasModelCheck&(T::*)()>::value, size_t>::type +GradientUpdateVisitor::LayerGradients(T* layer, arma::mat& /* input */) const +{ + if (layer->Parameters().n_elem != 0) + { + layer->Gradient() = gradient.submat(offset, 0, + offset + layer->Parameters().n_elem - 1, 0);; + } + + size_t modelOffset = layer->Parameters().n_elem; + for (size_t i = 0; i < layer->Model().size(); ++i) + { + modelOffset += boost::apply_visitor(GradientUpdateVisitor( + std::move(gradient), modelOffset + offset), layer->Model()[i]); + } + + return modelOffset; +} + +template +inline typename std::enable_if< + !HasGradientCheck::value && + !HasModelCheck&(T::*)()>::value, size_t>::type +GradientUpdateVisitor::LayerGradients(T* /* layer */, P& /* input */) const +{ + return 0; +} + +//! GradientZeroVisitor visitor class. +inline GradientZeroVisitor::GradientZeroVisitor() +{ + /* Nothing to do here. */ +} + +template +inline void GradientZeroVisitor::operator()(LayerType* layer) const +{ + LayerGradients(layer, layer->OutputParameter()); +} + +template +inline typename std::enable_if< + HasGradientCheck::value, void>::type +GradientZeroVisitor::LayerGradients(T* layer, arma::mat& /* input */) const +{ + layer->Gradient().zeros(); +} + +template +inline typename std::enable_if< + !HasGradientCheck::value, void>::type +GradientZeroVisitor::LayerGradients(T* /* layer */, P& /* input */) const +{ + /* Nothing to do here. */ +} + +//! GradientVisitor visitor class. +inline GradientVisitor::GradientVisitor(arma::mat&& input, arma::mat&& delta) : + input(std::move(input)), + delta(std::move(delta)) +{ + /* Nothing to do here. */ +} + +template +inline void GradientVisitor::operator()(LayerType* layer) const +{ + LayerGradients(layer, layer->OutputParameter()); +} + +template +inline typename std::enable_if< + HasGradientCheck::value, void>::type +GradientVisitor::LayerGradients(T* layer, arma::mat& /* input */) const +{ + layer->Gradient(std::move(input), std::move(delta), + std::move(layer->Gradient())); +} + +template +inline typename std::enable_if< + !HasGradientCheck::value, void>::type +GradientVisitor::LayerGradients(T* /* layer */, P& /* input */) const +{ + /* Nothing to do here. */ +} + +//! RewardSetVisitor visitor class. +inline RewardSetVisitor::RewardSetVisitor(const double reward) : reward(reward) +{ + /* Nothing to do here. */ +} + +template +inline void RewardSetVisitor::operator()(LayerType* layer) const +{ + LayerReward(layer); +} + +template +inline typename std::enable_if< + HasRewardCheck::value && + HasModelCheck&(T::*)()>::value, void>::type +RewardSetVisitor::LayerReward(T* layer) const +{ + layer->Reward() = reward; + + for (size_t i = 0; i < layer->Model().size(); ++i) + { + boost::apply_visitor(RewardSetVisitor(reward), + layer->Model()[i]); + } +} + +template +inline typename std::enable_if< + !HasRewardCheck::value && + HasModelCheck&(T::*)()>::value, void>::type +RewardSetVisitor::LayerReward(T* layer) const +{ + for (size_t i = 0; i < layer->Model().size(); ++i) + { + boost::apply_visitor(RewardSetVisitor(reward), + layer->Model()[i]); + } +} + +template +inline typename std::enable_if< + HasRewardCheck::value && + !HasModelCheck&(T::*)()>::value, void>::type +RewardSetVisitor::LayerReward(T* layer) const +{ + layer->Reward() = reward; +} + +template +inline typename std::enable_if< + !HasRewardCheck::value && + !HasModelCheck&(T::*)()>::value, void>::type +RewardSetVisitor::LayerReward(T* /* input */) const +{ + /* Nothing to do here. */ +} + +} // namespace ann +} // namespace mlpack + +#endif diff --git a/src/mlpack/methods/ann/layer/leaky_relu_layer.hpp b/src/mlpack/methods/ann/layer/leaky_relu.hpp similarity index 63% rename from src/mlpack/methods/ann/layer/leaky_relu_layer.hpp rename to src/mlpack/methods/ann/layer/leaky_relu.hpp index a87792e4295..d8160f1a50b 100644 --- a/src/mlpack/methods/ann/layer/leaky_relu_layer.hpp +++ b/src/mlpack/methods/ann/layer/leaky_relu.hpp @@ -1,9 +1,9 @@ /** - * @file leaky_relu_layer.hpp + * @file leaky_relu.hpp * @author Dhawal Arora * - * Definition and implementation of LeakyReLULayer layer first introduced - * in the acoustic model, Andrew L. Maas, Awni Y. Hannun, Andrew Y. Ng, + * Definition of LeakyReLU layer first introduced in the acoustic model, + * Andrew L. Maas, Awni Y. Hannun, Andrew Y. Ng, * "Rectifier Nonlinearities Improve Neural Network Acoustic Models", 2014 * * mlpack is free software; you may redistribute it and/or modify it under the @@ -11,8 +11,8 @@ * 3-clause BSD license along with mlpack. If not, see * http://www.opensource.org/licenses/BSD-3-Clause for more information. */ -#ifndef MLPACK_METHODS_ANN_LAYER_LEAKYRELU_LAYER_HPP -#define MLPACK_METHODS_ANN_LAYER_LEAKYRELU_LAYER_HPP +#ifndef MLPACK_METHODS_ANN_LAYER_LEAKYRELU_HPP +#define MLPACK_METHODS_ANN_LAYER_LEAKYRELU_HPP #include @@ -41,20 +41,17 @@ template < typename InputDataType = arma::mat, typename OutputDataType = arma::mat > -class LeakyReLULayer +class LeakyReLU { public: /** - * Create the LeakyReLULayer object using the specified parameters. + * Create the LeakyReLU object using the specified parameters. * The non zero gradient can be adjusted by specifying tha parameter * alpha in the range 0 to 1. Default (alpha = 0.03) * * @param alpha Non zero gradient */ - LeakyReLULayer(const double alpha = 0.03) : alpha(alpha) - { - // Nothing to do here. - } + LeakyReLU(const double alpha = 0.03); /** * Ordinary feed forward pass of a neural network, evaluating the function @@ -64,10 +61,7 @@ class LeakyReLULayer * @param output Resulting output activation. */ template - void Forward(const InputType& input, OutputType& output) - { - Fn(input, output); - } + void Forward(const InputType&& input, OutputType&& output); /** * Ordinary feed backward pass of a neural network, calculating the function @@ -79,50 +73,7 @@ class LeakyReLULayer * @param g The calculated gradient. */ template - void Backward(const DataType& input, - const DataType& gy, - DataType& g) - { - DataType derivative; - Deriv(input, derivative); - g = gy % derivative; - } - - /** - * Ordinary feed backward pass of a neural network, calculating the function - * f(x) by propagating x backwards through f. Using the results from the feed - * forward pass. - * - * @param input The propagated input activation. - * @param gy The backpropagated error. - * @param g The calculated gradient. - */ - template - void Backward(const arma::Cube& input, - const arma::Mat& gy, - arma::Cube& g) - { - // Generate a cube using the backpropagated error matrix. - arma::Cube mappedError = arma::zeros(input.n_rows, - input.n_cols, input.n_slices); - - for (size_t s = 0, j = 0; s < mappedError.n_slices; s+= gy.n_cols, j++) - { - for (size_t i = 0; i < gy.n_cols; i++) - { - arma::Col temp = gy.col(i).subvec( - j * input.n_rows * input.n_cols, - (j + 1) * input.n_rows * input.n_cols - 1); - - mappedError.slice(s + i) = arma::Mat(temp.memptr(), - input.n_rows, input.n_cols); - } - } - - arma::Cube derivative; - Deriv(input, derivative); - g = mappedError % derivative; - } + void Backward(const DataType&& input, DataType&& gy, DataType&& g); //! Get the input parameter. InputDataType const& InputParameter() const { return inputParameter; } @@ -148,10 +99,7 @@ class LeakyReLULayer * Serialize the layer. */ template - void Serialize(Archive& ar, const unsigned int /* version */) - { - ar & data::CreateNVP(alpha, "alpha"); - } + void Serialize(Archive& ar, const unsigned int /* version */); private: /** @@ -177,20 +125,6 @@ class LeakyReLULayer y = arma::max(x, alpha * x); } - /** - * Computes the LeakyReLU function using a 3rd-order tensor as input. - * - * @param x Input data. - * @param y The resulting output activation. - */ - template - void Fn(const arma::Cube& x, arma::Cube& y) - { - y = x; - for (size_t s = 0; s < x.n_slices; s++) - fn(x.slice(s), y.slice(s)); - } - /** * Computes the first derivative of the LeakyReLU function. * @@ -215,11 +149,11 @@ class LeakyReLULayer y = x; for (size_t i = 0; i < x.n_elem; i++) + { y(i) = Deriv(x(i)); + } } - - //! Locally-stored delta object. OutputDataType delta; @@ -232,9 +166,12 @@ class LeakyReLULayer //! Leakyness Parameter in the range 0 +LeakyReLU::LeakyReLU( + const double alpha) : alpha(alpha) +{ + // Nothing to do here. +} + +template +template +void LeakyReLU::Forward( + const InputType&& input, OutputType&& output) +{ + Fn(input, output); +} + +template +template +void LeakyReLU::Backward( + const DataType&& input, DataType&& gy, DataType&& g) +{ + DataType derivative; + Deriv(input, derivative); + g = gy % derivative; +} + +template +template +void LeakyReLU::Serialize( + Archive& ar, + const unsigned int /* version */) +{ + ar & data::CreateNVP(alpha, "alpha"); +} + +} // namespace ann +} // namespace mlpack + +#endif diff --git a/src/mlpack/methods/ann/layer/recurrent_layer.hpp b/src/mlpack/methods/ann/layer/linear.hpp similarity index 50% rename from src/mlpack/methods/ann/layer/recurrent_layer.hpp rename to src/mlpack/methods/ann/layer/linear.hpp index 5e231a7480b..0f6be78b647 100644 --- a/src/mlpack/methods/ann/layer/recurrent_layer.hpp +++ b/src/mlpack/methods/ann/layer/linear.hpp @@ -1,27 +1,28 @@ /** - * @file recurrent_layer.hpp + * @file linear.hpp * @author Marcus Edel * - * Definition of the RecurrentLayer class. + * Definition of the Linear layer class also known as fully-connected layer or + * affine transformation. * * mlpack is free software; you may redistribute it and/or modify it under the * terms of the 3-clause BSD license. You should have received a copy of the * 3-clause BSD license along with mlpack. If not, see * http://www.opensource.org/licenses/BSD-3-Clause for more information. */ -#ifndef MLPACK_METHODS_ANN_LAYER_RECURRENT_LAYER_HPP -#define MLPACK_METHODS_ANN_LAYER_RECURRENT_LAYER_HPP +#ifndef MLPACK_METHODS_ANN_LAYER_LINEAR_HPP +#define MLPACK_METHODS_ANN_LAYER_LINEAR_HPP #include -#include + +#include "layer_types.hpp" namespace mlpack { namespace ann /** Artificial Neural Network. */ { /** - * Implementation of the RecurrentLayer class. Recurrent layers can be used - * similarly to feed-forward layers except that the input isn't stored in the - * inputParameter, instead it's in stored in the recurrentParameter. + * Implementation of the Linear layer class. The Linear class represents a + * single layer of a neural network. * * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, * arma::sp_mat or arma::cube). @@ -32,35 +33,24 @@ template < typename InputDataType = arma::mat, typename OutputDataType = arma::mat > -class RecurrentLayer +class Linear { public: + //! Create the Linear object. + Linear(); + /** - * Create the RecurrentLayer object using the specified number of units. + * Create the Linear layer object using the specified number of units. * * @param inSize The number of input units. * @param outSize The number of output units. */ - RecurrentLayer(const size_t inSize, const size_t outSize) : - inSize(outSize), - outSize(outSize), - recurrentParameter(arma::zeros(inSize, 1)) - { - weights.set_size(outSize, inSize); - } + Linear(const size_t inSize, const size_t outSize);; - /** - * Create the RecurrentLayer object using the specified number of units. - * - * @param outSize The number of output units. + /* + * Reset the layer parameter. */ - RecurrentLayer(const size_t outSize) : - inSize(outSize), - outSize(outSize), - recurrentParameter(arma::zeros(outSize, 1)) - { - weights.set_size(outSize, inSize); - } + void Reset(); /** * Ordinary feed forward pass of a neural network, evaluating the function @@ -70,10 +60,7 @@ class RecurrentLayer * @param output Resulting output activation. */ template - void Forward(const arma::Mat& input, arma::Mat& output) - { - output = input + weights * recurrentParameter; - } + void Forward(const arma::Mat&& input, arma::Mat&& output); /** * Ordinary feed backward pass of a neural network, calculating the function @@ -84,44 +71,33 @@ class RecurrentLayer * @param gy The backpropagated error. * @param g The calculated gradient. */ - template - void Backward(const InputType& /* unused */, - const arma::Mat& gy, - arma::mat& g) - { - g = (weights).t() * gy; - } + template + void Backward(const arma::Mat&& /* input */, + arma::Mat&& gy, + arma::Mat&& g); /* * Calculate the gradient using the output delta and the input activation. * - * @param input The propagated input activation. - * @param d The calculated error. - * @param g The calculated gradient. + * @param input The input parameter used for calculating the gradient. + * @param error The calculated error. + * @param gradient The calculated gradient. */ - template - void Gradient(const InputType& /* input */, - const arma::Mat& d, - GradientDataType& g) - { - g = d * recurrentParameter.t(); - } - - //! Get the weights. - OutputDataType const& Weights() const { return weights; } - //! Modify the weights. - OutputDataType& Weights() { return weights; } + template + void Gradient(const arma::Mat&& input, + arma::Mat&& error, + arma::Mat&& gradient); + + //! Get the parameters. + OutputDataType const& Parameters() const { return weights; } + //! Modify the parameters. + OutputDataType& Parameters() { return weights; } //! Get the input parameter. InputDataType const& InputParameter() const { return inputParameter; } //! Modify the input parameter. InputDataType& InputParameter() { return inputParameter; } - //! Get the input parameter. - InputDataType const& RecurrentParameter() const { return recurrentParameter; } - //! Modify the input parameter. - InputDataType& RecurrentParameter() { return recurrentParameter; } - //! Get the output parameter. OutputDataType const& OutputParameter() const { return outputParameter; } //! Modify the output parameter. @@ -138,14 +114,10 @@ class RecurrentLayer OutputDataType& Gradient() { return gradient; } /** - * Serialize the layer. + * Serialize the layer */ template - void Serialize(Archive& ar, const unsigned int /* version */) - { - ar & data::CreateNVP(recurrentParameter, "recurrentParameter"); - ar & data::CreateNVP(weights, "weights"); - } + void Serialize(Archive& ar, const unsigned int /* version */); private: //! Locally-stored number of input units. @@ -157,6 +129,12 @@ class RecurrentLayer //! Locally-stored weight object. OutputDataType weights; + //! Locally-stored weight paramters. + OutputDataType weight; + + //! Locally-stored bias term parameters. + OutputDataType bias; + //! Locally-stored delta object. OutputDataType delta; @@ -168,25 +146,12 @@ class RecurrentLayer //! Locally-stored output parameter object. OutputDataType outputParameter; - - //! Locally-stored recurrent parameter object. - InputDataType recurrentParameter; -}; // class RecurrentLayer - -//! Layer traits for the recurrent layer. -template -class LayerTraits > -{ - public: - static const bool IsBinary = false; - static const bool IsOutputLayer = false; - static const bool IsBiasLayer = false; - static const bool IsLSTMLayer = false; - static const bool IsConnection = true; -}; +}; // class Linear } // namespace ann } // namespace mlpack +// Include implementation. +#include "linear_impl.hpp" + #endif diff --git a/src/mlpack/methods/ann/layer/linear_impl.hpp b/src/mlpack/methods/ann/layer/linear_impl.hpp new file mode 100644 index 00000000000..8b726e5d255 --- /dev/null +++ b/src/mlpack/methods/ann/layer/linear_impl.hpp @@ -0,0 +1,87 @@ +/** + * @file linear_impl.hpp + * @author Marcus Edel + * + * Implementation of the Linear layer class also known as fully-connected layer + * or affine transformation. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_LINEAR_IMPL_HPP +#define MLPACK_METHODS_ANN_LAYER_LINEAR_IMPL_HPP + +// In case it hasn't yet been included. +#include "linear.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +template +Linear::Linear() +{ + // Nothing to do here. +} + +template +Linear::Linear( + const size_t inSize, + const size_t outSize) : + inSize(inSize), + outSize(outSize) +{ + weights.set_size(outSize * inSize + outSize, 1); +} + +template +void Linear::Reset() +{ + weight = arma::mat(weights.memptr(), outSize, inSize, false, false); + bias = arma::mat(weights.memptr() + weight.n_elem, + outSize, 1, false, false); +} + +template +template +void Linear::Forward( + const arma::Mat&& input, arma::Mat&& output) +{ + output = (weight * input) + bias; +} + +template +template +void Linear::Backward( + const arma::Mat&& /* input */, arma::Mat&& gy, arma::Mat&& g) +{ + g = weight.t() * gy; +} + +template +template +void Linear::Gradient( + const arma::Mat&& input, + arma::Mat&& error, + arma::Mat&& gradient) +{ + gradient.submat(0, 0, weight.n_elem - 1, 0) = arma::vectorise( + error * input.t()); + gradient.submat(weight.n_elem, 0, gradient.n_elem - 1, 0) = error; +} + +template +template +void Linear::Serialize( + Archive& ar, const unsigned int /* version */) +{ + ar & data::CreateNVP(weights, "weights"); + ar & data::CreateNVP(inSize, "inSize"); + ar & data::CreateNVP(outSize, "outSize"); +} + +} // namespace ann +} // namespace mlpack + +#endif diff --git a/src/mlpack/methods/ann/layer/linear_layer.hpp b/src/mlpack/methods/ann/layer/linear_layer.hpp deleted file mode 100644 index b3b3dbf0266..00000000000 --- a/src/mlpack/methods/ann/layer/linear_layer.hpp +++ /dev/null @@ -1,289 +0,0 @@ -/** - * @file linear_layer.hpp - * @author Marcus Edel - * - * Definition of the LinearLayer class also known as fully-connected layer or - * affine transformation. - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. - */ -#ifndef MLPACK_METHODS_ANN_LAYER_LINEAR_LAYER_HPP -#define MLPACK_METHODS_ANN_LAYER_LINEAR_LAYER_HPP - -#include -#include - -namespace mlpack { -namespace ann /** Artificial Neural Network. */ { - -/** - * Implementation of the LinearLayer class. The LinearLayer class represents a - * single layer of a neural network. - * - * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, - * arma::sp_mat or arma::cube). - * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, - * arma::sp_mat or arma::cube). - */ -template < - typename InputDataType = arma::mat, - typename OutputDataType = arma::mat -> -class LinearLayer -{ - public: - /** - * Create the LinearLayer object using the specified number of units. - * - * @param inSize The number of input units. - * @param outSize The number of output units. - */ - LinearLayer(const size_t inSize, const size_t outSize) : - inSize(inSize), - outSize(outSize) - { - weights.set_size(outSize, inSize); - } - - /** - * Ordinary feed forward pass of a neural network, evaluating the function - * f(x) by propagating the activity forward through f. - * - * @param input Input data used for evaluating the specified function. - * @param output Resulting output activation. - */ - template - void Forward(const arma::Mat& input, arma::Mat& output) - { - output = weights * input; - } - - /** - * Ordinary feed forward pass of a neural network, evaluating the function - * f(x) by propagating the activity forward through f. - * - * @param input Input data used for evaluating the specified function. - * @param output Resulting output activation. - */ - template - void Forward(const arma::Cube& input, arma::Mat& output) - { - arma::Mat data(input.n_elem, 1); - - for (size_t s = 0, c = 0; s < input.n_slices / data.n_cols; s++) - { - for (size_t i = 0; i < data.n_cols; i++, c++) - { - data.col(i).subvec(s * input.n_rows * input.n_cols, (s + 1) * - input.n_rows * input.n_cols - 1) = arma::trans(arma::vectorise( - input.slice(c), 1)); - } - } - - output = weights * data; - } - - /** - * Ordinary feed backward pass of a neural network, calculating the function - * f(x) by propagating x backwards trough f. Using the results from the feed - * forward pass. - * - * @param input The propagated input activation. - * @param gy The backpropagated error. - * @param g The calculated gradient. - */ - template - void Backward(const InputType& /* unused */, - const arma::Mat& gy, - arma::Mat& g) - { - g = weights.t() * gy; - } - - /* - * Calculate the gradient using the output delta and the input activation. - * - * @param input The propagated input. - * @param error The calculated error. - * @param gradient The calculated gradient. - */ - template - void Gradient(const InputType& input, - const ErrorType& error, - GradientType& gradient) - { - GradientDelta(input, error, gradient); - } - - //! Get the weights. - OutputDataType const& Weights() const { return weights; } - //! Modify the weights. - OutputDataType& Weights() { return weights; } - - //! Get the input parameter. - InputDataType const& InputParameter() const { return inputParameter; } - //! Modify the input parameter. - InputDataType& InputParameter() { return inputParameter; } - - //! Get the output parameter. - OutputDataType const& OutputParameter() const { return outputParameter; } - //! Modify the output parameter. - OutputDataType& OutputParameter() { return outputParameter; } - - //! Get the delta. - OutputDataType const& Delta() const { return delta; } - //! Modify the delta. - OutputDataType& Delta() { return delta; } - - //! Get the gradient. - OutputDataType const& Gradient() const { return gradient; } - //! Modify the gradient. - OutputDataType& Gradient() { return gradient; } - - /** - * Serialize the layer - */ - template - void Serialize(Archive& ar, const unsigned int /* version */) - { - ar & data::CreateNVP(weights, "weights"); - } - - private: - /* - * Calculate the gradient using the output delta (3rd order tensor) and the - * input activation (3rd order tensor). - * - * @param input The input parameter used for calculating the gradient. - * @param d The output delta. - * @param g The calculated gradient. - */ - template - void GradientDelta(const arma::Cube& input, - const arma::Mat& d, - arma::Cube& g) - { - g = arma::Cube(weights.n_rows, weights.n_cols, 1); - arma::Mat data = arma::Mat(d.n_cols, - input.n_elem / d.n_cols); - - for (size_t s = 0, c = 0; s < input.n_slices / - data.n_rows; s++) - { - for (size_t i = 0; i < data.n_rows; i++, c++) - { - data.row(i).subvec(s * input.n_rows * - input.n_cols, (s + 1) * - input.n_rows * - input.n_cols - 1) = arma::vectorise( - input.slice(c), 1); - } - } - - g.slice(0) = d * data / d.n_cols; - } - - /* - * Calculate the gradient (3rd order tensor) using the output delta - * (dense matrix) and the input activation (dense matrix). - * - * @param input The input parameter used for calculating the gradient. - * @param d The output delta. - * @param g The calculated gradient. - */ - template - void GradientDelta(const arma::Mat& input, - const arma::Mat& d, - arma::Cube& g) - { - g = arma::Cube(weights.n_rows, weights.n_cols, 1); - Gradient(input, d, g.slice(0)); - } - - /* - * Calculate the gradient (dense matrix) using the output delta - * (dense matrix) and the input activation (3rd order tensor). - * - * @param input The input parameter used for calculating the gradient. - * @param d The output delta. - * @param g The calculated gradient. - */ - template - void GradientDelta(const arma::Cube& input, - const arma::Mat& d, - arma::Mat& g) - { - arma::Cube grad = arma::Cube(weights.n_rows, weights.n_cols, 1); - Gradient(input, d, grad); - g = grad.slice(0); - } - - /* - * Calculate the gradient (dense matrix) using the output delta - * (dense matrix) and the input activation (dense matrix). - * - * @param input The input parameter used for calculating the gradient. - * @param d The output delta. - * @param g The calculated gradient. - */ - template - void GradientDelta(const arma::Mat& input, - const arma::Mat& d, - arma::Mat& g) - { - g = d * input.t(); - } - - //! Locally-stored number of input units. - size_t inSize; - - //! Locally-stored number of output units. - size_t outSize; - - //! Locally-stored weight object. - OutputDataType weights; - - //! Locally-stored delta object. - OutputDataType delta; - - //! Locally-stored gradient object. - OutputDataType gradient; - - //! Locally-stored input parameter object. - InputDataType inputParameter; - - //! Locally-stored output parameter object. - OutputDataType outputParameter; -}; // class LinearLayer - -/** - * Linear Mapping layer to map between 3rd order tensors and dense matrices. - */ -template < - typename InputDataType = arma::cube, - typename OutputDataType = arma::mat -> -using LinearMappingLayer = LinearLayer; - -//! Layer traits for the linear layer. -template< - typename InputDataType, - typename OutputDataType -> -class LayerTraits > -{ - public: - static const bool IsBinary = false; - static const bool IsOutputLayer = false; - static const bool IsBiasLayer = false; - static const bool IsLSTMLayer = false; - static const bool IsConnection = true; -}; - -} // namespace ann -} // namespace mlpack - -#endif diff --git a/src/mlpack/methods/ann/layer/linear_no_bias.hpp b/src/mlpack/methods/ann/layer/linear_no_bias.hpp new file mode 100644 index 00000000000..972f72db8ec --- /dev/null +++ b/src/mlpack/methods/ann/layer/linear_no_bias.hpp @@ -0,0 +1,154 @@ +/** + * @file linear.hpp + * @author Marcus Edel + * + * Definition of the LinearNoBias class also known as fully-connected layer or + * affine transformation without the bias term. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_LINEAR_NO_BIAS_HPP +#define MLPACK_METHODS_ANN_LAYER_LINEAR_NO_BIAS_HPP + +#include + +#include "layer_types.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +/** + * Implementation of the LinearNoBias class. The LinearNoBias class represents a + * single layer of a neural network. + * + * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + */ +template < + typename InputDataType = arma::mat, + typename OutputDataType = arma::mat +> +class LinearNoBias +{ + public: + //! Create the LinearNoBias object. + LinearNoBias(); + /** + * Create the LinearNoBias object using the specified number of units. + * + * @param inSize The number of input units. + * @param outSize The number of output units. + */ + LinearNoBias(const size_t inSize, const size_t outSize); + + /* + * Reset the layer parameter. + */ + void Reset(); + + /** + * Ordinary feed forward pass of a neural network, evaluating the function + * f(x) by propagating the activity forward through f. + * + * @param input Input data used for evaluating the specified function. + * @param output Resulting output activation. + */ + template + void Forward(const arma::Mat&& input, arma::Mat&& output); + + /** + * Ordinary feed backward pass of a neural network, calculating the function + * f(x) by propagating x backwards trough f. Using the results from the feed + * forward pass. + * + * @param input The propagated input activation. + * @param gy The backpropagated error. + * @param g The calculated gradient. + */ + template + void Backward(const arma::Mat&& /* input */, + arma::Mat&& gy, + arma::Mat&& g); + + /* + * Calculate the gradient using the output delta and the input activation. + * + * @param input The input parameter used for calculating the gradient. + * @param error The calculated error. + * @param gradient The calculated gradient. + */ + template + void Gradient(const arma::Mat&& input, + arma::Mat&& error, + arma::Mat&& gradient); + + //! Get the parameters. + OutputDataType const& Parameters() const { return weights; } + //! Modify the parameters. + OutputDataType& Parameters() { return weights; } + + //! Get the input parameter. + InputDataType const& InputParameter() const { return inputParameter; } + //! Modify the input parameter. + InputDataType& InputParameter() { return inputParameter; } + + //! Get the output parameter. + OutputDataType const& OutputParameter() const { return outputParameter; } + //! Modify the output parameter. + OutputDataType& OutputParameter() { return outputParameter; } + + //! Get the delta. + OutputDataType const& Delta() const { return delta; } + //! Modify the delta. + OutputDataType& Delta() { return delta; } + + //! Get the gradient. + OutputDataType const& Gradient() const { return gradient; } + //! Modify the gradient. + OutputDataType& Gradient() { return gradient; } + + /** + * Serialize the layer + */ + template + void Serialize(Archive& ar, const unsigned int /* version */); + + private: + + //! Locally-stored number of input units. + size_t inSize; + + //! Locally-stored number of output units. + size_t outSize; + + //! Locally-stored weight object. + OutputDataType weights; + + //! Locally-stored weight parameter. + OutputDataType weight; + + //! Locally-stored delta object. + OutputDataType delta; + + //! Locally-stored gradient object. + OutputDataType gradient; + + //! Locally-stored input parameter object. + InputDataType inputParameter; + + //! Locally-stored output parameter object. + OutputDataType outputParameter; +}; // class LinearNoBias + +} // namespace ann +} // namespace mlpack + +// Include implementation. +#include "linear_no_bias_impl.hpp" + +#endif diff --git a/src/mlpack/methods/ann/layer/linear_no_bias_impl.hpp b/src/mlpack/methods/ann/layer/linear_no_bias_impl.hpp new file mode 100644 index 00000000000..15f96cdb537 --- /dev/null +++ b/src/mlpack/methods/ann/layer/linear_no_bias_impl.hpp @@ -0,0 +1,83 @@ +/** + * @file linear_no_bias_impl.hpp + * @author Marcus Edel + * + * Implementation of the LinearNoBias class also known as fully-connected layer + * or affine transformation without the bias term. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_LINEAR_NO_BIAS_IMPL_HPP +#define MLPACK_METHODS_ANN_LAYER_LINEAR_NO_BIAS_IMPL_HPP + +// In case it hasn't yet been included. +#include "linear_no_bias.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +template +LinearNoBias::LinearNoBias() +{ + // Nothing to do here. +} + +template +LinearNoBias::LinearNoBias( + const size_t inSize, const size_t outSize) : + inSize(inSize), + outSize(outSize) +{ + weights.set_size(outSize * inSize, 1); +} + +template +void LinearNoBias::Reset() +{ + weight = arma::mat(weights.memptr(), outSize, inSize, false, false); +} + +template +template +void LinearNoBias::Forward( + const arma::Mat&& input, arma::Mat&& output) +{ + output = weight * input; +} + +template +template +void LinearNoBias::Backward( + const arma::Mat&& /* input */, arma::Mat&& gy, arma::Mat&& g) +{ + g = weight.t() * gy; +} + +template +template +void LinearNoBias::Gradient( + const arma::Mat&& input, + arma::Mat&& error, + arma::Mat&& gradient) +{ + gradient.submat(0, 0, weight.n_elem - 1, 0) = arma::vectorise( + error * input.t()); +} + +template +template +void LinearNoBias::Serialize( + Archive& ar, const unsigned int /* version */) +{ + ar & data::CreateNVP(weights, "weights"); + ar & data::CreateNVP(inSize, "inSize"); + ar & data::CreateNVP(outSize, "outSize"); +} + +} // namespace ann +} // namespace mlpack + +#endif diff --git a/src/mlpack/methods/ann/layer/log_softmax_layer.hpp b/src/mlpack/methods/ann/layer/log_softmax.hpp similarity index 62% rename from src/mlpack/methods/ann/layer/log_softmax_layer.hpp rename to src/mlpack/methods/ann/layer/log_softmax.hpp index 2b417e32b61..df9872d6c8a 100644 --- a/src/mlpack/methods/ann/layer/log_softmax_layer.hpp +++ b/src/mlpack/methods/ann/layer/log_softmax.hpp @@ -1,16 +1,16 @@ /** - * @file log_softmax_layer.hpp + * @file log_softmax.hpp * @author Marcus Edel * - * Definition of the LogSoftmaxLayer class. + * Definition of the LogSoftmax class. * * mlpack is free software; you may redistribute it and/or modify it under the * terms of the 3-clause BSD license. You should have received a copy of the * 3-clause BSD license along with mlpack. If not, see * http://www.opensource.org/licenses/BSD-3-Clause for more information. */ -#ifndef MLPACK_METHODS_ANN_LAYER_LOG_SOFTMAX_LAYER_HPP -#define MLPACK_METHODS_ANN_LAYER_LOG_SOFTMAX_LAYER_HPP +#ifndef MLPACK_METHODS_ANN_LAYER_LOG_SOFTMAX_HPP +#define MLPACK_METHODS_ANN_LAYER_LOG_SOFTMAX_HPP #include @@ -33,13 +33,13 @@ template < typename InputDataType = arma::mat, typename OutputDataType = arma::mat > -class LogSoftmaxLayer +class LogSoftMax { public: /** - * Create the LogSoftmaxLayer object. + * Create the LogSoftmax object. */ - LogSoftmaxLayer() { /* Nothing to do here. */ } + LogSoftMax(); /** * Ordinary feed forward pass of a neural network, evaluating the function @@ -48,39 +48,8 @@ class LogSoftmaxLayer * @param input Input data used for evaluating the specified function. * @param output Resulting output activation. */ - template - void Forward(const arma::Mat& input, arma::Mat& output) - { - arma::mat maxInput = arma::repmat(arma::max(input), input.n_rows, 1); - output = (maxInput - input); - - // Approximation of the hyperbolic tangent. The acuracy however is - // about 0.00001 lower as using tanh. Credits go to Leon Bottou. - output.transform( [](double x) - { - //! Fast approximation of exp(-x) for x positive. - static constexpr double A0 = 1.0; - static constexpr double A1 = 0.125; - static constexpr double A2 = 0.0078125; - static constexpr double A3 = 0.00032552083; - static constexpr double A4 = 1.0172526e-5; - - if (x < 13.0) - { - double y = A0 + x * (A1 + x * (A2 + x * (A3 + x * A4))); - y *= y; - y *= y; - y *= y; - y = 1 / y; - - return y; - } - - return 0.0; - } ); - - output = input - (maxInput + std::log(arma::accu(output))); - } + template + void Forward(const InputType&& input, OutputType&& output); /** * Ordinary feed backward pass of a neural network, calculating the function @@ -92,12 +61,9 @@ class LogSoftmaxLayer * @param g The calculated gradient. */ template - void Backward(const arma::Mat& input, - const arma::Mat& gy, - arma::Mat& g) - { - g = gy - arma::exp(input) * arma::accu(gy); - } + void Backward(const arma::Mat&& input, + arma::Mat&& gy, + arma::Mat&& g); //! Get the input parameter. InputDataType& InputParameter() const { return inputParameter; } @@ -114,6 +80,12 @@ class LogSoftmaxLayer //! Modify the delta. InputDataType& Delta() { return delta; } + /** + * Serialize the layer. + */ + template + void Serialize(Archive& /* ar */, const unsigned int /* version */); + private: //! Locally-stored delta object. OutputDataType delta; @@ -123,9 +95,12 @@ class LogSoftmaxLayer //! Locally-stored output parameter object. OutputDataType outputParameter; -}; // class LogSoftmaxLayer +}; // class LogSoftmax + +} // namespace ann +} // namespace mlpack -}; // namespace ann -}; // namespace mlpack +// Include implementation. +#include "log_softmax_impl.hpp" #endif diff --git a/src/mlpack/methods/ann/layer/log_softmax_impl.hpp b/src/mlpack/methods/ann/layer/log_softmax_impl.hpp new file mode 100644 index 00000000000..68fba8d8459 --- /dev/null +++ b/src/mlpack/methods/ann/layer/log_softmax_impl.hpp @@ -0,0 +1,85 @@ +/** + * @file log_softmax_impl.hpp + * @author Marcus Edel + * + * Implementation of the LogSoftmax class. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_LOG_SOFTMAX_IMPL_HPP +#define MLPACK_METHODS_ANN_LAYER_LOG_SOFTMAX_IMPL_HPP + +// In case it hasn't yet been included. +#include "log_softmax.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +template +LogSoftMax::LogSoftMax() +{ + // Nothing to do here. +} + +template +template +void LogSoftMax::Forward( + const InputType&& input, OutputType&& output) +{ + arma::mat maxInput = arma::repmat(arma::max(input), input.n_rows, 1); + output = (maxInput - input); + + // Approximation of the hyperbolic tangent. The acuracy however is + // about 0.00001 lower as using tanh. Credits go to Leon Bottou. + output.transform( [](double x) + { + //! Fast approximation of exp(-x) for x positive. + static constexpr double A0 = 1.0; + static constexpr double A1 = 0.125; + static constexpr double A2 = 0.0078125; + static constexpr double A3 = 0.00032552083; + static constexpr double A4 = 1.0172526e-5; + + if (x < 13.0) + { + double y = A0 + x * (A1 + x * (A2 + x * (A3 + x * A4))); + y *= y; + y *= y; + y *= y; + y = 1 / y; + + return y; + } + + return 0.0; + } ); + + output = input - (maxInput + std::log(arma::accu(output))); +} + +template +template +void LogSoftMax::Backward( + const arma::Mat&& input, + arma::Mat&& gy, + arma::Mat&& g) +{ + g = gy - arma::exp(input) * arma::accu(gy); +} + +template +template +void LogSoftMax::Serialize( + Archive& /* ar */, + const unsigned int /* version */) +{ + // Nothing to do here. +} + +} // namespace ann +} // namespace mlpack + +#endif diff --git a/src/mlpack/methods/ann/layer/sparse_input_layer.hpp b/src/mlpack/methods/ann/layer/lookup.hpp similarity index 55% rename from src/mlpack/methods/ann/layer/sparse_input_layer.hpp rename to src/mlpack/methods/ann/layer/lookup.hpp index 6b1d9d118f9..080d4b5746e 100644 --- a/src/mlpack/methods/ann/layer/sparse_input_layer.hpp +++ b/src/mlpack/methods/ann/layer/lookup.hpp @@ -1,29 +1,27 @@ /** - * @file sparse_input_layer.hpp - * @author Tham Ngap Wei + * @file lookup.hpp + * @author Marcus Edel * - * Definition of the sparse input class which serve as the first layer - * of the sparse autoencoder + * Definition of the Lookup class a particular convolution, where the width of + * the convolution is 1. * * mlpack is free software; you may redistribute it and/or modify it under the * terms of the 3-clause BSD license. You should have received a copy of the * 3-clause BSD license along with mlpack. If not, see * http://www.opensource.org/licenses/BSD-3-Clause for more information. */ -#ifndef MLPACK_METHODS_ANN_LAYER_SPARSE_INPUT_LAYER_HPP -#define MLPACK_METHODS_ANN_LAYER_SPARSE_INPUT_LAYER_HPP +#ifndef MLPACK_METHODS_ANN_LAYER_LOOKUP_HPP +#define MLPACK_METHODS_ANN_LAYER_LOOKUP_HPP #include #include -#include - namespace mlpack { namespace ann /** Artificial Neural Network. */ { /** - * Implementation of the SparseInputLayer. The SparseInputLayer class represents - * the first layer of sparse autoencoder + * Implementation of the Lookup class. The Lookup class is a particular + * convolution, where the width of the convolution is 1. * * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, * arma::sp_mat or arma::cube). @@ -33,26 +31,18 @@ namespace ann /** Artificial Neural Network. */ { template < typename InputDataType = arma::mat, typename OutputDataType = arma::mat - > -class SparseInputLayer +> +class Lookup { public: /** - * Create the SparseInputLayer object using the specified number of units. + * Create the Lookup object using the specified number of input and output + * units. * * @param inSize The number of input units. * @param outSize The number of output units. - * @param lambda L2-regularization parameter. */ - SparseInputLayer(const size_t inSize, - const size_t outSize, - const double lambda = 0.0001) : - inSize(inSize), - outSize(outSize), - lambda(lambda) - { - weights.set_size(outSize, inSize); - } + Lookup(const size_t inSize, const size_t outSize); /** * Ordinary feed forward pass of a neural network, evaluating the function @@ -62,10 +52,7 @@ class SparseInputLayer * @param output Resulting output activation. */ template - void Forward(const arma::Mat& input, arma::Mat& output) - { - output = weights * input; - } + void Forward(const arma::Mat&& input, arma::Mat&& output); /** * Ordinary feed backward pass of a neural network, calculating the function @@ -76,34 +63,27 @@ class SparseInputLayer * @param gy The backpropagated error. * @param g The calculated gradient. */ - template - void Backward(const InputType& /* unused */, - const arma::Mat& gy, - arma::Mat& g) - { - g = gy; - } + template + void Backward(const arma::Mat&& /* input */, + const arma::Mat&& gy, + arma::Mat&& g); /* * Calculate the gradient using the output delta and the input activation. * - * @param input The propagated input. - * @param d The calculated error. - * @param g The calculated gradient. + * @param input The input parameter used for calculating the gradient. + * @param error The calculated error. + * @param gradient The calculated gradient. */ - template - void Gradient(const InputType& input, - const arma::Mat& d, - GradientDataType& g) - { - g = d * input.t() / static_cast( - input.n_cols) + lambda * weights; - } - - //! Get the weights. - OutputDataType const& Weights() const { return weights; } - //! Modify the weights. - OutputDataType& Weights() { return weights; } + template + void Gradient(const arma::Mat&& input, + arma::Mat&& error, + arma::Mat&& gradient); + + //! Get the parameters. + OutputDataType const& Parameters() const { return weights; } + //! Modify the parameters. + OutputDataType& Parameters() { return weights; } //! Get the input parameter. InputDataType const& InputParameter() const { return inputParameter; } @@ -126,25 +106,19 @@ class SparseInputLayer OutputDataType& Gradient() { return gradient; } /** - * Serialize the layer. + * Serialize the layer */ template - void Serialize(Archive& ar, const unsigned int /* version */) - { - ar & data::CreateNVP(weights, "weights"); - ar & data::CreateNVP(lambda, "lambda"); - } + void Serialize(Archive& ar, const unsigned int /* version */); private: + //! Locally-stored number of input units. size_t inSize; //! Locally-stored number of output units. size_t outSize; - //! L2-regularization parameter. - double lambda; - //! Locally-stored weight object. OutputDataType weights; @@ -159,22 +133,12 @@ class SparseInputLayer //! Locally-stored output parameter object. OutputDataType outputParameter; -}; // class SparseInputLayer - -//! Layer traits for the SparseInputLayer. -template -class LayerTraits > -{ -public: - static const bool IsBinary = false; - static const bool IsOutputLayer = false; - static const bool IsBiasLayer = false; - static const bool IsLSTMLayer = false; - static const bool IsConnection = true; -}; +}; // class Lookup } // namespace ann } // namespace mlpack +// Include implementation. +#include "lookup_impl.hpp" + #endif diff --git a/src/mlpack/methods/ann/layer/lookup_impl.hpp b/src/mlpack/methods/ann/layer/lookup_impl.hpp new file mode 100644 index 00000000000..cbe24468ed3 --- /dev/null +++ b/src/mlpack/methods/ann/layer/lookup_impl.hpp @@ -0,0 +1,74 @@ +/** + * @file lookup_impl.hpp + * @author Marcus Edel + * + * Implementation of the Lookup class a particular convolution, where the width + * of the convolution is 1. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_LOOKUP_IMPL_HPP +#define MLPACK_METHODS_ANN_LAYER_LOOKUP_IMPL_HPP + +// In case it hasn't yet been included. +#include "lookup.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +template +Lookup::Lookup( + const size_t inSize, + const size_t outSize) : + inSize(inSize), + outSize(outSize) +{ + weights.set_size(outSize, inSize); +} + +template +template +void Lookup::Forward( + const arma::Mat&& input, arma::Mat&& output) +{ + output = weights.cols(arma::conv_to::from(input) - 1); +} + +template +template +void Lookup::Backward( + const arma::Mat&& /* input */, + const arma::Mat&& gy, + arma::Mat&& g) +{ + g = gy; +} + +template +template +void Lookup::Gradient( + const arma::Mat&& input, + arma::Mat&& error, + arma::Mat&& gradient) +{ + gradient = arma::zeros >(weights.n_rows, weights.n_cols); + gradient.cols(arma::conv_to::from(input) - 1) = error; +} + +template +template +void Lookup::Serialize( + Archive& ar, const unsigned int /* version */) +{ + ar & data::CreateNVP(weights, "weights"); + ar & data::CreateNVP(inSize, "inSize"); + ar & data::CreateNVP(outSize, "outSize"); +} + +} // namespace ann +} // namespace mlpack + +#endif diff --git a/src/mlpack/methods/ann/layer/lstm.hpp b/src/mlpack/methods/ann/layer/lstm.hpp new file mode 100644 index 00000000000..cf70e344d94 --- /dev/null +++ b/src/mlpack/methods/ann/layer/lstm.hpp @@ -0,0 +1,237 @@ +/** + * @file lstm.hpp + * @author Marcus Edel + * + * Definition of the LSTM class, which implements a lstm network + * layer. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_LSTM_HPP +#define MLPACK_METHODS_ANN_LAYER_LSTM_HPP + +#include + +#include + +#include "layer_types.hpp" +#include "add_merge.hpp" +#include "sequential.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +/** + * An implementation of a lstm network layer. + * + * This class allows specification of the type of the activation functions used + * for the gates and cells and also of the type of the function used to + * initialize and update the peephole weights. + * + * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + */ +template < + typename InputDataType = arma::mat, + typename OutputDataType = arma::mat +> +class LSTM +{ + public: + //! Create the LSTM object. + LSTM(); + + /** + * Create the LSTM layer object using the specified parameters. + * + * @param inSize The number of input units. + * @param outSize The number of output units. + * @param rho Maximum number of steps to backpropagate through time (BPTT). + */ + LSTM(const size_t inSize, const size_t outSize, const size_t rho); + + /** + * Ordinary feed forward pass of a neural network, evaluating the function + * f(x) by propagating the activity forward through f. + * + * @param input Input data used for evaluating the specified function. + * @param output Resulting output activation. + */ + template + void Forward(arma::Mat&& input, arma::Mat&& output); + + /** + * Ordinary feed backward pass of a neural network, calculating the function + * f(x) by propagating x backwards trough f. Using the results from the feed + * forward pass. + * + * @param input The propagated input activation. + * @param gy The backpropagated error. + * @param g The calculated gradient. + */ + template + void Backward(const arma::Mat&& /* input */, + arma::Mat&& gy, + arma::Mat&& g); + + /* + * Calculate the gradient using the output delta and the input activation. + * + * @param input The input parameter used for calculating the gradient. + * @param error The calculated error. + * @param gradient The calculated gradient. + */ + template + void Gradient(arma::Mat&& input, + arma::Mat&& /* error */, + arma::Mat&& /* gradient */); + + //! The value of the deterministic parameter. + bool Deterministic() const { return deterministic; } + //! Modify the value of the deterministic parameter. + bool& Deterministic() { return deterministic; } + + //! Get the maximum number of steps to backpropagate through time (BPTT). + size_t Rho() const { return rho; } + //! Modify the maximum number of steps to backpropagate through time (BPTT). + size_t& Rho() { return rho; } + + //! Get the parameters. + OutputDataType const& Parameters() const { return weights; } + //! Modify the parameters. + OutputDataType& Parameters() { return weights; } + + //! Get the input parameter. + InputDataType const& InputParameter() const { return inputParameter; } + //! Modify the input parameter. + InputDataType& InputParameter() { return inputParameter; } + + //! Get the output parameter. + OutputDataType const& OutputParameter() const { return outputParameter; } + //! Modify the output parameter. + OutputDataType& OutputParameter() { return outputParameter; } + + //! Get the delta. + OutputDataType const& Delta() const { return delta; } + //! Modify the delta. + OutputDataType& Delta() { return delta; } + + //! Get the gradient. + OutputDataType const& Gradient() const { return gradient; } + //! Modify the gradient. + OutputDataType& Gradient() { return gradient; } + + //! Get the model modules. + std::vector& Model() { return network; } + + /** + * Serialize the layer + */ + template + void Serialize(Archive& ar, const unsigned int /* version */); + + private: + + //! Locally-stored number of input units. + size_t inSize; + + //! Locally-stored number of output units. + size_t outSize; + + //! Number of steps to backpropagate through time (BPTT). + size_t rho; + + //! Locally-stored weight object. + OutputDataType weights; + + //! Locally-stored previous output. + arma::mat prevOutput; + + //! Locally-stored previous cell state. + arma::mat prevCell; + + //! Locally-stored input 2 gate module. + LayerTypes input2GateModule; + + //! Locally-stored output 2 gate module. + LayerTypes output2GateModule; + + //! Locally-stored input gate module. + LayerTypes inputGateModule; + + //! Locally-stored hidden state module. + LayerTypes hiddenStateModule; + + //! Locally-stored forget gate module. + LayerTypes forgetGateModule; + + //! Locally-stored output gate module. + LayerTypes outputGateModule; + + //! Locally-stored cell module. + LayerTypes cellModule; + + //! Locally-stored cell activation module. + LayerTypes cellActivationModule; + + //! Locally-stored output parameter visitor. + OutputParameterVisitor outputParameterVisitor; + + //! Locally-stored delta visitor. + DeltaVisitor deltaVisitor; + + //! Locally-stored list of network modules. + std::vector network; + + //! Locally-stored number of forward steps. + size_t forwardStep; + + //! Locally-stored number of backward steps. + size_t backwardStep; + + //! Locally-stored number of gradient steps. + size_t gradientStep; + + //! Locally-stored cell parameters. + std::vector cellParameter; + + //! Locally-stored output parameters. + std::vector outParameter; + + //! Locally-stored previous error. + arma::mat prevError; + + //! Locally-stored cell activation error. + arma::mat cellActivationError; + + //! Locally-stored foget gate error. + arma::mat forgetGateError; + + //! If true dropout and scaling is disabled, see notes above. + bool deterministic; + + //! Locally-stored delta object. + OutputDataType delta; + + //! Locally-stored gradient object. + OutputDataType gradient; + + //! Locally-stored input parameter object. + InputDataType inputParameter; + + //! Locally-stored output parameter object. + OutputDataType outputParameter; +}; // class LSTM + +} // namespace ann +} // namespace mlpack + +// Include implementation. +#include "lstm_impl.hpp" + +#endif diff --git a/src/mlpack/methods/ann/layer/lstm_impl.hpp b/src/mlpack/methods/ann/layer/lstm_impl.hpp new file mode 100644 index 00000000000..6ebe89b2907 --- /dev/null +++ b/src/mlpack/methods/ann/layer/lstm_impl.hpp @@ -0,0 +1,273 @@ +/** + * @file lstm_impl.hpp + * @author Marcus Edel + * + * Implementation of the LSTM class, which implements a lstm network + * layer. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_LSTM_IMPL_HPP +#define MLPACK_METHODS_ANN_LAYER_LSTM_IMPL_HPP + +// In case it hasn't yet been included. +#include "linear.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +template +LSTM::LSTM() +{ + // Nothing to do here. +} + +template +LSTM::LSTM( + const size_t inSize, + const size_t outSize, + const size_t rho) : + inSize(inSize), + outSize(outSize), + rho(rho), + forwardStep(0), + backwardStep(0), + gradientStep(0), + deterministic(false) +{ + input2GateModule = new Linear<>(inSize, 4 * outSize); + output2GateModule = new LinearNoBias<>(outSize, 4 * outSize); + + network.push_back(input2GateModule); + network.push_back(output2GateModule); + + inputGateModule = new SigmoidLayer<>(); + hiddenStateModule = new TanHLayer<>(); + forgetGateModule = new SigmoidLayer<>(); + outputGateModule = new SigmoidLayer<>(); + + network.push_back(inputGateModule); + network.push_back(hiddenStateModule); + network.push_back(forgetGateModule); + network.push_back(outputGateModule); + + cellModule = new IdentityLayer<>(); + cellActivationModule = new TanHLayer<>(); + + network.push_back(cellModule); + network.push_back(cellActivationModule); + + prevOutput = arma::zeros(outSize, 1); + prevCell = arma::zeros(outSize, 1); + prevError = arma::zeros(4 * outSize, 1); + cellActivationError = arma::zeros(outSize, 1); +} + +template +template +void LSTM::Forward( + arma::Mat&& input, arma::Mat&& output) +{ + if (!deterministic) + { + cellParameter.push_back(prevCell); + outParameter.push_back(prevOutput); + } + + arma::mat output1; + arma::mat output2; + arma::mat output3; + + boost::apply_visitor(ForwardVisitor(std::move(input), std::move( + boost::apply_visitor(outputParameterVisitor, input2GateModule))), + input2GateModule); + + boost::apply_visitor(ForwardVisitor(std::move(prevOutput), std::move( + boost::apply_visitor(outputParameterVisitor, output2GateModule))), + output2GateModule); + + output = boost::apply_visitor(outputParameterVisitor, input2GateModule) + + boost::apply_visitor(outputParameterVisitor, output2GateModule); + + boost::apply_visitor(ForwardVisitor(std::move(output.submat( + 0, 0, 1 * outSize - 1, 0)), std::move(boost::apply_visitor( + outputParameterVisitor, inputGateModule))), inputGateModule); + + boost::apply_visitor(ForwardVisitor(std::move(output.submat( + 1 * outSize, 0, 2 * outSize - 1, 0)), std::move(boost::apply_visitor( + outputParameterVisitor, hiddenStateModule))), hiddenStateModule); + + boost::apply_visitor(ForwardVisitor(std::move(output.submat( + 2 * outSize, 0, 3 * outSize - 1, 0)), std::move(boost::apply_visitor( + outputParameterVisitor, forgetGateModule))), forgetGateModule); + + boost::apply_visitor(ForwardVisitor(std::move(output.submat( + 3 * outSize, 0, 4 * outSize - 1, 0)), std::move(boost::apply_visitor( + outputParameterVisitor, outputGateModule))), outputGateModule); + + arma::mat cell = prevCell; + + // Input gate * hidden state. + arma::mat cmul1 = boost::apply_visitor(outputParameterVisitor, + inputGateModule) % boost::apply_visitor(outputParameterVisitor, + hiddenStateModule); + + // Forget gate * cell. + arma::mat cmul2 = boost::apply_visitor(outputParameterVisitor, + forgetGateModule) % cell; + + arma::mat nextCell = cmul1 + cmul2; + + boost::apply_visitor(ForwardVisitor(std::move(nextCell), std::move( + boost::apply_visitor(outputParameterVisitor, cellModule))), cellModule); + + boost::apply_visitor(ForwardVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, cellModule)), std::move(boost::apply_visitor( + outputParameterVisitor, cellActivationModule))), cellActivationModule); + + output = boost::apply_visitor(outputParameterVisitor, + cellActivationModule) % boost::apply_visitor(outputParameterVisitor, + outputGateModule); + + prevCell = nextCell; + prevOutput = output; + + forwardStep++; + if (forwardStep == rho) + { + forwardStep = 0; + prevOutput.zeros(); + prevCell.zeros(); + } +} + +template +template +void LSTM::Backward( + const arma::Mat&& /* input */, arma::Mat&& gy, arma::Mat&& g) +{ + if (backwardStep > 0) + { + gy += boost::apply_visitor(deltaVisitor, output2GateModule); + } + + arma::mat g1 = boost::apply_visitor(outputParameterVisitor, + cellActivationModule) % gy; + + arma::mat g2 = boost::apply_visitor(outputParameterVisitor, + outputGateModule) % gy; + + boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, cellActivationModule)), std::move(g2), + std::move(boost::apply_visitor(deltaVisitor, cellActivationModule))), + cellActivationModule); + + cellActivationError = boost::apply_visitor(deltaVisitor, + cellActivationModule); + + if (backwardStep > 0) + { + cellActivationError += forgetGateError; + } + + arma::mat g4 = boost::apply_visitor(outputParameterVisitor, + inputGateModule) % cellActivationError; + + arma::mat g5 = boost::apply_visitor(outputParameterVisitor, + hiddenStateModule) % cellActivationError; + + forgetGateError = boost::apply_visitor(outputParameterVisitor, + forgetGateModule) % cellActivationError; + + arma::mat g7 = cellParameter[cellParameter.size() - + backwardStep - 1] % cellActivationError; + + boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, inputGateModule)), std::move(g5), + std::move(boost::apply_visitor(deltaVisitor, inputGateModule))), + inputGateModule); + + boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, hiddenStateModule)), std::move(g4), + std::move(boost::apply_visitor(deltaVisitor, hiddenStateModule))), + hiddenStateModule); + + boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, forgetGateModule)), std::move(g7), + std::move(boost::apply_visitor(deltaVisitor, forgetGateModule))), + forgetGateModule); + + boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, outputGateModule)), std::move(g1), + std::move(boost::apply_visitor(deltaVisitor, outputGateModule))), + outputGateModule); + + prevError.submat(0, 0, 1 * outSize - 1, 0) = boost::apply_visitor( + deltaVisitor, inputGateModule); + prevError.submat(1 * outSize, 0, 2 * outSize - 1, 0) = boost::apply_visitor( + deltaVisitor, hiddenStateModule); + prevError.submat(2 * outSize, 0, 3 * outSize - 1, 0) = boost::apply_visitor( + deltaVisitor, forgetGateModule); + prevError.submat(3 * outSize, 0, 4 * outSize - 1, 0) = boost::apply_visitor( + deltaVisitor, outputGateModule); + + boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, input2GateModule)), std::move(prevError), + std::move(boost::apply_visitor(deltaVisitor, input2GateModule))), + input2GateModule); + + boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, output2GateModule)), std::move(prevError), + std::move(boost::apply_visitor(deltaVisitor, output2GateModule))), + output2GateModule); + + backwardStep++; + if (backwardStep == rho) + { + backwardStep = 0; + cellParameter.clear(); + } + + g = boost::apply_visitor(deltaVisitor, input2GateModule); +} + +template +template +void LSTM::Gradient( + arma::Mat&& input, + arma::Mat&& /* error */, + arma::Mat&& /* gradient */) +{ + boost::apply_visitor(GradientVisitor(std::move(input), std::move(prevError)), + input2GateModule); + + boost::apply_visitor(GradientVisitor( + std::move(outParameter[outParameter.size() - gradientStep - 1]), + std::move(prevError)), output2GateModule); + + gradientStep++; + if (gradientStep == rho) + { + gradientStep = 0; + outParameter.clear(); + } +} + +template +template +void LSTM::Serialize( + Archive& ar, const unsigned int /* version */) +{ + ar & data::CreateNVP(weights, "weights"); + ar & data::CreateNVP(inSize, "inSize"); + ar & data::CreateNVP(outSize, "outSize"); + ar & data::CreateNVP(rho, "rho"); +} + +} // namespace ann +} // namespace mlpack + +#endif diff --git a/src/mlpack/methods/ann/layer/lstm_layer.hpp b/src/mlpack/methods/ann/layer/lstm_layer.hpp deleted file mode 100644 index 6ccd2fc65fd..00000000000 --- a/src/mlpack/methods/ann/layer/lstm_layer.hpp +++ /dev/null @@ -1,418 +0,0 @@ -/** - * @file lstm_layer.hpp - * @author Marcus Edel - * - * Definition of the LSTMLayer class, which implements a lstm network - * layer. - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. - */ -#ifndef MLPACK_METHODS_ANN_LAYER_LSTM_LAYER_HPP -#define MLPACK_METHODS_ANN_LAYER_LSTM_LAYER_HPP - -#include -#include - -namespace mlpack { -namespace ann /** Artificial Neural Network. */ { - -/** - * An implementation of a lstm network layer. - * - * This class allows specification of the type of the activation functions used - * for the gates and cells and also of the type of the function used to - * initialize and update the peephole weights. - * - * @tparam GateActivationFunction Activation function used for the gates. - * @tparam StateActivationFunction Activation function used for the state. - * @tparam OutputActivationFunction Activation function used for the output. - * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, - * arma::sp_mat or arma::cube). - * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, - * arma::sp_mat or arma::cube). - */ -template < - class GateActivationFunction = LogisticFunction, - class StateActivationFunction = TanhFunction, - class OutputActivationFunction = TanhFunction, - typename InputDataType = arma::mat, - typename OutputDataType = arma::mat -> -class LSTMLayer -{ - public: - /** - * Create the LSTMLayer object using the specified parameters. - * - * @param outSize The number of output units. - * @param peepholes The flag used to indicate if peephole connections should - * be used (Default: false). - * @param WeightInitRule The weight initialization rule used to initialize the - * weight matrix. - */ - LSTMLayer(const size_t outSize, const bool peepholes = false) : - outSize(outSize), - peepholes(peepholes), - seqLen(1), - offset(0) - { - if (peepholes) - { - peepholeWeights.set_size(outSize, 3); - peepholeDerivatives = arma::zeros(outSize, 3); - } - else - { - peepholeWeights.set_size(0, 0); - } - } - - /** - * Ordinary feed forward pass of a neural network, evaluating the function - * f(x) by propagating the activity forward through f. - * - * @param input Input data used for evaluating the specified function. - * @param output Resulting output activation. - */ - template - void Forward(const arma::Mat& input, arma::Mat& output) - { - if (inGate.n_cols < seqLen) - { - inGate = arma::zeros(outSize, seqLen); - inGateAct = arma::zeros(outSize, seqLen); - inGateError = arma::zeros(outSize, seqLen); - outGate = arma::zeros(outSize, seqLen); - outGateAct = arma::zeros(outSize, seqLen); - outGateError = arma::zeros(outSize, seqLen); - forgetGate = arma::zeros(outSize, seqLen); - forgetGateAct = arma::zeros(outSize, seqLen); - forgetGateError = arma::zeros(outSize, seqLen); - state = arma::zeros(outSize, seqLen); - stateError = arma::zeros(outSize, seqLen); - cellAct = arma::zeros(outSize, seqLen); - } - - // Split up the inputactivation into the 3 parts (inGate, forgetGate, - // outGate). - inGate.col(offset) = input.submat(0, 0, outSize - 1, 0); - - forgetGate.col(offset) = input.submat(outSize, 0, (outSize * 2) - 1, 0); - outGate.col(offset) = input.submat(outSize * 3, 0, (outSize * 4) - 1, 0); - - if (peepholes && offset > 0) - { - inGate.col(offset) += peepholeWeights.col(0) % state.col(offset - 1); - forgetGate.col(offset) += peepholeWeights.col(1) % - state.col(offset - 1); - } - - arma::Col inGateActivation = inGateAct.unsafe_col(offset); - GateActivationFunction::fn(inGate.unsafe_col(offset), inGateActivation); - - arma::Col forgetGateActivation = forgetGateAct.unsafe_col(offset); - GateActivationFunction::fn(forgetGate.unsafe_col(offset), - forgetGateActivation); - - arma::Col cellActivation = cellAct.unsafe_col(offset); - StateActivationFunction::fn(input.submat(outSize * 2, 0, - (outSize * 3) - 1, 0), cellActivation); - - state.col(offset) = inGateAct.col(offset) % cellActivation; - - if (offset > 0) - state.col(offset) += forgetGateAct.col(offset) % state.col(offset - 1); - - if (peepholes) - outGate.col(offset) += peepholeWeights.col(2) % state.col(offset); - - arma::Col outGateActivation = outGateAct.unsafe_col(offset); - GateActivationFunction::fn(outGate.unsafe_col(offset), outGateActivation); - - OutputActivationFunction::fn(state.unsafe_col(offset), output); - output = outGateAct.col(offset) % output; - - offset = (offset + 1) % seqLen; - } - - /** - * Ordinary feed backward pass of a neural network, calculating the function - * f(x) by propagating x backwards trough f. Using the results from the feed - * forward pass. - * - * @param input The propagated input activation. - * @param gy The backpropagated error. - * @param g The calculated gradient. - */ - template - void Backward(const InputType& /* unused */, - const arma::Mat& gy, - arma::Mat& g) - { - queryOffset = seqLen - offset - 1; - - arma::Col outGateDerivative; - GateActivationFunction::deriv(outGateAct.unsafe_col(queryOffset), - outGateDerivative); - - arma::Col stateActivation; - StateActivationFunction::fn(state.unsafe_col(queryOffset), stateActivation); - - outGateError.col(queryOffset) = outGateDerivative % gy % stateActivation; - - arma::Col stateDerivative; - StateActivationFunction::deriv(stateActivation, stateDerivative); - - stateError.col(queryOffset) = gy % outGateAct.col(queryOffset) % - stateDerivative; - - if (queryOffset < (seqLen - 1)) - { - stateError.col(queryOffset) += stateError.col(queryOffset + 1) % - forgetGateAct.col(queryOffset + 1); - - if (peepholes) - { - stateError.col(queryOffset) += inGateError.col(queryOffset + 1) % - peepholeWeights.col(0); - stateError.col(queryOffset) += forgetGateError.col(queryOffset + 1) % - peepholeWeights.col(1); - } - } - - if (peepholes) - { - stateError.col(queryOffset) += outGateError.col(queryOffset) % - peepholeWeights.col(2); - } - - arma::Col cellDerivative; - StateActivationFunction::deriv(cellAct.col(queryOffset), cellDerivative); - - arma::Col cellError = inGateAct.col(queryOffset) % cellDerivative % - stateError.col(queryOffset); - - if (queryOffset > 0) - { - arma::Col forgetGateDerivative; - GateActivationFunction::deriv(forgetGateAct.col(queryOffset), - forgetGateDerivative); - - forgetGateError.col(queryOffset) = forgetGateDerivative % - stateError.col(queryOffset) % state.col(queryOffset - 1); - } - - arma::Col inGateDerivative; - GateActivationFunction::deriv(inGateAct.col(queryOffset), inGateDerivative); - - inGateError.col(queryOffset) = inGateDerivative % - stateError.col(queryOffset) % cellAct.col(queryOffset); - - if (peepholes) - { - peepholeDerivatives.col(2) += outGateError.col(queryOffset) % - state.col(queryOffset); - - if (queryOffset > 0) - { - peepholeDerivatives.col(0) += inGateError.col(queryOffset) % - state.col(queryOffset - 1); - peepholeDerivatives.col(1) += forgetGateError.col(queryOffset) % - state.col(queryOffset - 1); - } - } - - g = arma::zeros >(outSize * 4, 1); - g.submat(0, 0, outSize - 1, 0) = inGateError.col(queryOffset); - g.submat(outSize, 0, (outSize * 2) - 1, 0) = - forgetGateError.col(queryOffset); - g.submat(outSize * 2, 0, (outSize * 3) - 1, 0) = cellError; - g.submat(outSize * 3, 0, (outSize * 4) - 1, 0) = - outGateError.col(queryOffset); - - offset = (offset + 1) % seqLen; - } - - /** - * Ordinary feed backward pass of the lstm layer. - * - * @param input The propagated input activation. - * @param gy The backpropagated error. - * @param g The calculated gradient. - */ - template - void Gradient(const InputType& /* input */, - const arma::Mat& /* gy */, - GradientDataType& /* g */) - { - if (peepholes && offset == 0) - { - peepholeGradient.col(0) = arma::trans((peepholeWeights.col(0).t() * - (inGateError.col(queryOffset) % peepholeDerivatives.col(0))) * - inGate.col(queryOffset).t()); - - peepholeGradient.col(1) = arma::trans((peepholeWeights.col(1).t() * - (forgetGateError.col(queryOffset) % peepholeDerivatives.col(1))) * - forgetGate.col(queryOffset).t()); - - peepholeGradient.col(2) = arma::trans((peepholeWeights.col(2).t() * - (outGateError.col(queryOffset) % peepholeDerivatives.col(2))) * - outGate.col(queryOffset).t()); - - peepholeDerivatives.zeros(); - } - } - - //! Get the peephole weights. - OutputDataType const& Weights() const { return peepholeWeights; } - //! Modify the peephole weights. - OutputDataType& Weights() { return peepholeWeights; } - - //! Get the input parameter. - InputDataType const& InputParameter() const { return inputParameter; } - //! Modify the input parameter. - InputDataType& InputParameter() { return inputParameter; } - - //! Get the output parameter. - OutputDataType const& OutputParameter() const { return outputParameter; } - //! Modify the output parameter. - OutputDataType& OutputParameter() { return outputParameter; } - - //! Get the delta. - OutputDataType const& Delta() const { return delta; } - //! Modify the delta. - OutputDataType& Delta() { return delta; } - - //! Get the peephole gradient. - OutputDataType const& Gradient() const { return peepholeGradient; } - //! Modify the peephole gradient. - OutputDataType& Gradient() { return peepholeGradient; } - - //! Get the sequence length. - size_t SeqLen() const { return seqLen; } - //! Modify the sequence length. - size_t& SeqLen() { return seqLen; } - - /** - * Serialize the layer. - */ - template - void Serialize(Archive& ar, const unsigned int /* version */) - { - ar & data::CreateNVP(peepholes, "peepholes"); - - if (peepholes) - { - ar & data::CreateNVP(peepholeWeights, "peepholeWeights"); - - if (Archive::is_loading::value) - { - peepholeDerivatives = arma::zeros( - peepholeWeights.n_rows, 3); - } - } - } - - private: - //! Locally-stored number of output units. - size_t outSize; - - //! Locally-stored peephole indication flag. - bool peepholes; - - //! Locally-stored length of the the input sequence. - size_t seqLen; - - //! Locally-stored sequence offset. - size_t offset; - - //! Locally-stored query offset. - size_t queryOffset; - - //! Locally-stored delta object. - OutputDataType delta; - - //! Locally-stored gradient object. - OutputDataType gradient; - - //! Locally-stored input parameter object. - InputDataType inputParameter; - - //! Locally-stored output parameter object. - OutputDataType outputParameter; - - //! Locally-stored ingate object. - InputDataType inGate; - - //! Locally-stored ingate activation object. - InputDataType inGateAct; - - //! Locally-stored ingate error object. - InputDataType inGateError; - - //! Locally-stored outgate object. - InputDataType outGate; - - //! Locally-stored outgate activation object. - InputDataType outGateAct; - - //! Locally-stored outgate error object. - InputDataType outGateError; - - //! Locally-stored forget object. - InputDataType forgetGate; - - //! Locally-stored forget activation object. - InputDataType forgetGateAct; - - //! Locally-stored forget error object. - InputDataType forgetGateError; - - //! Locally-stored state object. - InputDataType state; - - //! Locally-stored state erro object. - InputDataType stateError; - - //! Locally-stored cell activation object. - InputDataType cellAct; - - //! Locally-stored peephole weight object. - OutputDataType peepholeWeights; - - //! Locally-stored derivatives object. - OutputDataType peepholeDerivatives; - - //! Locally-stored peephole gradient object. - OutputDataType peepholeGradient; -}; // class LSTMLayer - -//! Layer traits for the lstm layer. -template< - class GateActivationFunction, - class StateActivationFunction, - class OutputActivationFunction, - typename InputDataType, - typename OutputDataType -> -class LayerTraits > -{ - public: - static const bool IsBinary = false; - static const bool IsOutputLayer = false; - static const bool IsBiasLayer = false; - static const bool IsLSTMLayer = true; - static const bool IsConnection = false; -}; - -} // namespace ann -} // namespace mlpack - -#endif diff --git a/src/mlpack/methods/ann/layer/max_pooling.hpp b/src/mlpack/methods/ann/layer/max_pooling.hpp new file mode 100644 index 00000000000..6bf179b7a5a --- /dev/null +++ b/src/mlpack/methods/ann/layer/max_pooling.hpp @@ -0,0 +1,280 @@ +/** + * @file max_pooling.hpp + * @author Marcus Edel + * @author Nilay Jain + * + * Definition of the MaxPooling class. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_MAX_POOLING_HPP +#define MLPACK_METHODS_ANN_LAYER_MAX_POOLING_HPP + +#include +// #include "layer_types.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +/* + * The max pooling rule for convolution neural networks. Take the maximum value + * within the receptive block. + */ +class MaxPoolingRule +{ + public: + /* + * Return the maximum value within the receptive block. + * + * @param input Input used to perform the pooling operation. + */ + template + size_t Pooling(const MatType& input) + { + return arma::as_scalar(arma::find(input.max() == input, 1)); + } +}; + +/** + * Implementation of the MaxPooling layer. + * + * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + */ +template < + typename InputDataType = arma::mat, + typename OutputDataType = arma::mat +> +class MaxPooling +{ +public: + //! Create the MaxPooling object. + MaxPooling(); + + /** + * Create the MaxPooling object using the specified number of units. + * + * @param kW Width of the pooling window. + * @param kH Height of the pooling window. + * @param dW Width of the stride operation. + * @param dH Width of the stride operation. + * @param floor Rounding operator (floor or ceil). + */ + MaxPooling(const size_t kW, + const size_t kH, + const size_t dW = 1, + const size_t dH = 1, + const bool floor = true); + + /** + * Ordinary feed forward pass of a neural network, evaluating the function + * f(x) by propagating the activity forward through f. + * + * @param input Input data used for evaluating the specified function. + * @param output Resulting output activation. + */ + template + void Forward(const arma::Mat&& input, arma::Mat&& output); + + /** + * Ordinary feed backward pass of a neural network, using 3rd-order tensors as + * input, calculating the function f(x) by propagating x backwards through f. + * Using the results from the feed forward pass. + * + * @param input The propagated input activation. + * @param gy The backpropagated error. + * @param g The calculated gradient. + */ + template + void Backward(const arma::Mat&& /* input */, + arma::Mat&& gy, + arma::Mat&& g); + + //! Get the input parameter. + InputDataType const& InputParameter() const { return inputParameter; } + //! Modify the input parameter. + InputDataType& InputParameter() { return inputParameter; } + + //! Get the output parameter. + OutputDataType const& OutputParameter() const { return outputParameter; } + //! Modify the output parameter. + OutputDataType& OutputParameter() { return outputParameter; } + + //! Get the delta. + OutputDataType const& Delta() const { return delta; } + //! Modify the delta. + OutputDataType& Delta() { return delta; } + + //! Get the width. + size_t const& InputWidth() const { return inputWidth; } + //! Modify the width. + size_t& InputWidth() { return inputWidth; } + + //! Get the height. + size_t const& InputHeight() const { return inputHeight; } + //! Modify the height. + size_t& InputHeight() { return inputHeight; } + + //! Get the width. + size_t const& OutputWidth() const { return outputWidth; } + //! Modify the width. + size_t& OutputWidth() { return outputWidth; } + + //! Get the height. + size_t const& OutputHeight() const { return outputHeight; } + //! Modify the height. + size_t& OutputHeight() { return outputHeight; } + + //! Get the value of the deterministic parameter. + bool Deterministic() const { return deterministic; } + //! Modify the value of the deterministic parameter. + bool& Deterministic() { return deterministic; } + + /** + * Serialize the layer + */ + template + void Serialize(Archive& ar, const unsigned int /* version */); + + private: + + /** + * Apply pooling to the input and store the results. + * + * @param input The input to be apply the pooling rule. + * @param output The pooled result. + * @param poolingIndices The pooled indices. + */ + template + void PoolingOperation(const arma::Mat& input, + arma::Mat& output, + arma::Mat& poolingIndices) + { + for (size_t j = 0, colidx = 0; j < output.n_cols; ++j, colidx += dW) + { + for (size_t i = 0, rowidx = 0; i < output.n_rows; ++i, rowidx += dH) + { + arma::mat subInput = input(arma::span(rowidx, rowidx + kW - 1 - offset), + arma::span(colidx, colidx + kH - 1 - offset)); + + const size_t idx = pooling.Pooling(subInput); + output(i, j) = subInput(idx); + + if (!deterministic) + { + arma::Mat subIndices = indices(arma::span(rowidx, + rowidx + kW - 1 - offset), + arma::span(colidx, colidx + kH - 1 - offset)); + + poolingIndices(i, j) = subIndices(idx); + } + } + } + } + + /** + * Apply unpooling to the input and store the results. + * + * @param error The backward error. + * @param output The pooled result. + * @param poolingIndices The pooled indices. + */ + template + void Unpooling(const arma::Mat& error, + arma::Mat& output, + arma::Mat& poolingIndices) + { + for (size_t i = 0; i < poolingIndices.n_elem; ++i) + { + output(poolingIndices(i)) += error(i); + } + } + + //! Locally-stored number of input units. + size_t inSize; + + //! Locally-stored number of output units. + size_t outSize; + + //! Locally-stored width of the pooling window. + size_t kW; + + //! Locally-stored height of the pooling window. + size_t kH; + + //! Locally-stored width of the stride operation. + size_t dW; + + //! Locally-stored height of the stride operation. + size_t dH; + + //! Locally-stored reset parameter used to initialize the module once. + bool reset; + + //! Rounding operation used. + bool floor; + + //! Locally-stored stored rounding offset. + size_t offset; + + //! Locally-stored input width. + size_t inputWidth; + + //! Locally-stored input height. + size_t inputHeight; + + //! Locally-stored output width. + size_t outputWidth; + + //! Locally-stored output height. + size_t outputHeight; + + //! If true use maximum a posteriori during the forward pass. + bool deterministic; + + //! Locally-stored output parameter. + arma::cube outputTemp; + + //! Locally-stored transformed input parameter. + arma::cube inputTemp; + + //! Locally-stored transformed output parameter. + arma::cube gTemp; + + //! Locally-stored pooling strategy. + MaxPoolingRule pooling; + + //! Locally-stored delta object. + OutputDataType delta; + + //! Locally-stored gradient object. + OutputDataType gradient; + + //! Locally-stored input parameter object. + InputDataType inputParameter; + + //! Locally-stored output parameter object. + OutputDataType outputParameter; + + //! Locally-stored indices matrix parameter. + arma::Mat indices; + + //! Locally-stored indices column parameter. + arma::Col indicesCol; + + //! Locally-stored pooling indicies. + std::vector poolingIndices; +}; // class MaxPooling + +} // namespace ann +} // namespace mlpack + +// Include implementation. +#include "max_pooling_impl.hpp" + +#endif diff --git a/src/mlpack/methods/ann/layer/max_pooling_impl.hpp b/src/mlpack/methods/ann/layer/max_pooling_impl.hpp new file mode 100644 index 00000000000..95aeea86049 --- /dev/null +++ b/src/mlpack/methods/ann/layer/max_pooling_impl.hpp @@ -0,0 +1,149 @@ +/** + * @file max_pooling_impl.hpp + * @author Marcus Edel + * @author Nilay Jain + * + * Implementation of the MaxPooling class. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_MAX_POOLING_IMPL_HPP +#define MLPACK_METHODS_ANN_LAYER_MAX_POOLING_IMPL_HPP + +// In case it hasn't yet been included. +#include "max_pooling.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +template +MaxPooling::MaxPooling() +{ + // Nothing to do here. +} + +template +MaxPooling::MaxPooling( + const size_t kW, + const size_t kH, + const size_t dW, + const size_t dH, + const bool floor) : + kW(kW), + kH(kH), + dW(dW), + dH(dH), + reset(false), + floor(floor), + offset(0), + inputWidth(0), + inputHeight(0), + outputWidth(0), + outputHeight(0), + deterministic(false) +{ + // Nothing to do here. +} + +template +template +void MaxPooling::Forward( + const arma::Mat&& input, arma::Mat&& output) +{ + const size_t slices = input.n_elem / (inputWidth * inputHeight); + inputTemp = arma::cube(input.memptr(), inputWidth, inputHeight, slices); + + if (floor) + { + outputWidth = std::floor((inputWidth - (double) kW) / (double) dW + 1); + outputHeight = std::floor((inputHeight - (double) kH) / (double) dH + 1); + offset = 0; + } + else + { + outputWidth = std::ceil((inputWidth - (double) kW) / (double) dW + 1); + outputHeight = std::ceil((inputHeight - (double) kH) / (double) dH + 1); + offset = 1; + } + + outputTemp = arma::zeros >(outputWidth, outputHeight, + slices); + + if (!deterministic) + { + poolingIndices.push_back(outputTemp); + } + + if (!reset) + { + size_t elements = inputWidth * inputHeight; + indicesCol = arma::linspace >(0, (elements - 1), + elements); + + indices = arma::Mat(indicesCol.memptr(), inputWidth, inputHeight); + + reset = true; + } + + for (size_t s = 0; s < inputTemp.n_slices; s++) + { + if (!deterministic) + { + PoolingOperation(inputTemp.slice(s), outputTemp.slice(s), + poolingIndices.back().slice(s)); + } + else + { + PoolingOperation(inputTemp.slice(s), outputTemp.slice(s), + inputTemp.slice(s)); + } + } + + output = arma::Mat(outputTemp.memptr(), outputTemp.n_elem, 1); + + outputWidth = outputTemp.n_rows; + outputHeight = outputTemp.n_cols; + outSize = slices; +} + +template +template +void MaxPooling::Backward( + const arma::Mat&& /* input */, arma::Mat&& gy, arma::Mat&& g) +{ + arma::cube mappedError = arma::cube(gy.memptr(), outputWidth, + outputHeight, outSize); + + gTemp = arma::zeros(inputTemp.n_rows, + inputTemp.n_cols, inputTemp.n_slices); + + for (size_t s = 0; s < mappedError.n_slices; s++) + { + Unpooling(mappedError.slice(s), gTemp.slice(s), + poolingIndices.back().slice(s)); + } + + poolingIndices.pop_back(); + + g = arma::mat(gTemp.memptr(), gTemp.n_elem, 1); +} + +template +template +void MaxPooling::Serialize( + Archive& ar, + const unsigned int /* version */) +{ + ar & data::CreateNVP(kW, "kW"); + ar & data::CreateNVP(kH, "kH"); + ar & data::CreateNVP(dW, "dW"); + ar & data::CreateNVP(dH, "dH"); +} + +} // namespace ann +} // namespace mlpack + +#endif diff --git a/src/mlpack/methods/ann/layer/mean_pooling.hpp b/src/mlpack/methods/ann/layer/mean_pooling.hpp new file mode 100644 index 00000000000..70e061c7e2b --- /dev/null +++ b/src/mlpack/methods/ann/layer/mean_pooling.hpp @@ -0,0 +1,252 @@ +/** + * @file mean_pooling.hpp + * @author Marcus Edel + * @author Nilay Jain + * + * Definition of the MeanPooling layer class. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_MEAN_POOLING_HPP +#define MLPACK_METHODS_ANN_LAYER_MEAN_POOLING_HPP + +#include + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +/** + * Implementation of the MeanPooling. + * + * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + */ +template < + typename InputDataType = arma::mat, + typename OutputDataType = arma::mat +> +class MeanPooling +{ +public: + //! Create the MeanPooling object. + MeanPooling(); + + /** + * Create the MeanPooling object using the specified number of units. + * + * @param kW Width of the pooling window. + * @param kH Height of the pooling window. + * @param dW Width of the stride operation. + * @param dH Width of the stride operation. + */ + MeanPooling(const size_t kW, + const size_t kH, + const size_t dW = 1, + const size_t dH = 1, + const bool floor = true); + + /** + * Ordinary feed forward pass of a neural network, evaluating the function + * f(x) by propagating the activity forward through f. + * + * @param input Input data used for evaluating the specified function. + * @param output Resulting output activation. + */ + template + void Forward(const arma::Mat&& input, arma::Mat&& output); + + /** + * Ordinary feed backward pass of a neural network, using 3rd-order tensors as + * input, calculating the function f(x) by propagating x backwards through f. + * Using the results from the feed forward pass. + * + * @param input The propagated input activation. + * @param gy The backpropagated error. + * @param g The calculated gradient. + */ + template + void Backward(const arma::Mat&& /* input */, + arma::Mat&& gy, + arma::Mat&& g); + + //! Get the input parameter. + InputDataType const& InputParameter() const { return inputParameter; } + //! Modify the input parameter. + InputDataType& InputParameter() { return inputParameter; } + + //! Get the output parameter. + OutputDataType const& OutputParameter() const { return outputParameter; } + //! Modify the output parameter. + OutputDataType& OutputParameter() { return outputParameter; } + + //! Get the delta. + OutputDataType const& Delta() const { return delta; } + //! Modify the delta. + OutputDataType& Delta() { return delta; } + + //! Get the width. + size_t const& InputWidth() const { return inputWidth; } + //! Modify the width. + size_t& InputWidth() { return inputWidth; } + + //! Get the height. + size_t const& InputHeight() const { return inputHeight; } + //! Modify the height. + size_t& InputHeight() { return inputHeight; } + + //! Get the width. + size_t const& OutputWidth() const { return outputWidth; } + //! Modify the width. + size_t& OutputWidth() { return outputWidth; } + + //! Get the height. + size_t const& OutputHeight() const { return outputHeight; } + //! Modify the height. + size_t& OutputHeight() { return outputHeight; } + + //! Get the value of the deterministic parameter. + bool Deterministic() const { return deterministic; } + //! Modify the value of the deterministic parameter. + bool& Deterministic() { return deterministic; } + + /** + * Serialize the layer + */ + template + void Serialize(Archive& ar, const unsigned int /* version */); + + private: + + /** + * Apply pooling to the input and store the results. + * + * @param input The input to be apply the pooling rule. + * @param output The pooled result. + */ + template + void Pooling(const arma::Mat& input, arma::Mat& output) + { + const size_t rStep = kW; + const size_t cStep = kH; + + for (size_t j = 0, colidx = 0; j < output.n_cols; ++j, colidx += dH) + { + for (size_t i = 0, rowidx = 0; i < output.n_rows; ++i, rowidx += dW) + { + arma::mat subInput = input( + arma::span(rowidx, rowidx + rStep - 1 - offset), + arma::span(colidx, colidx + cStep - 1 - offset)); + + output(i, j) = arma::mean(arma::mean(subInput)); + } + } + } + + /** + * Apply unpooling to the input and store the results. + * + * @param input The input to be apply the unpooling rule. + * @param output The pooled result. + */ + template + void Unpooling(const arma::Mat& input, + const arma::Mat& error, + arma::Mat& output) + { + const size_t rStep = input.n_rows / error.n_rows - offset; + const size_t cStep = input.n_cols / error.n_cols - offset; + + arma::Mat unpooledError; + for (size_t j = 0; j < input.n_cols - cStep; j += cStep) + { + for (size_t i = 0; i < input.n_rows - rStep; i += rStep) + { + const arma::Mat& inputArea = input(arma::span(i, i + rStep - 1), + arma::span(j, j + cStep - 1)); + + unpooledError = arma::Mat(inputArea.n_rows, inputArea.n_cols); + unpooledError.fill(error(i / rStep, j / cStep) / inputArea.n_elem); + + output(arma::span(i, i + rStep - 1 - offset), + arma::span(j, j + cStep - 1 - offset)) += unpooledError; + } + } + } + + //! Locally-stored number of input units. + size_t inSize; + + //! Locally-stored number of output units. + size_t outSize; + + //! Locally-stored width of the pooling window. + size_t kW; + + //! Locally-stored height of the pooling window. + size_t kH; + + //! Locally-stored width of the stride operation. + size_t dW; + + //! Locally-stored height of the stride operation. + size_t dH; + + //! Locally-stored input width. + size_t inputWidth; + + //! Locally-stored input height. + size_t inputHeight; + + //! Locally-stored output width. + size_t outputWidth; + + //! Locally-stored output height. + size_t outputHeight; + + //! Locally-stored reset parameter used to initialize the module once. + bool reset; + + //! Rounding operation used. + bool floor; + + //! If true use maximum a posteriori during the forward pass. + bool deterministic; + + //! Locally-stored stored rounding offset. + size_t offset; + + //! Locally-stored output parameter. + arma::cube outputTemp; + + //! Locally-stored transformed input parameter. + arma::cube inputTemp; + + //! Locally-stored transformed output parameter. + arma::cube gTemp; + + //! Locally-stored delta object. + OutputDataType delta; + + //! Locally-stored gradient object. + OutputDataType gradient; + + //! Locally-stored input parameter object. + InputDataType inputParameter; + + //! Locally-stored output parameter object. + OutputDataType outputParameter; +}; // class MeanPooling + + +} // namespace ann +} // namespace mlpack + +// Include implementation. +#include "mean_pooling_impl.hpp" + +#endif diff --git a/src/mlpack/methods/ann/layer/mean_pooling_impl.hpp b/src/mlpack/methods/ann/layer/mean_pooling_impl.hpp new file mode 100644 index 00000000000..5008763752b --- /dev/null +++ b/src/mlpack/methods/ann/layer/mean_pooling_impl.hpp @@ -0,0 +1,126 @@ +/** + * @file mean_pooling_impl.hpp + * @author Marcus Edel + * @author Nilay Jain + * + * Implementation of the MeanPooling layer class. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_MEAN_POOLING_IMPL_HPP +#define MLPACK_METHODS_ANN_LAYER_MEAN_POOLING_IMPL_HPP + +// In case it hasn't yet been included. +#include "mean_pooling.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +template +MeanPooling::MeanPooling() +{ + // Nothing to do here. +} + +template +MeanPooling::MeanPooling( + const size_t kW, + const size_t kH, + const size_t dW, + const size_t dH, + const bool floor) : + kW(kW), + kH(kH), + dW(dW), + dH(dH), + inputWidth(0), + inputHeight(0), + outputWidth(0), + outputHeight(0), + reset(false), + floor(floor), + deterministic(false), + offset(0) +{ + // Nothing to do here. +} + +template +template +void MeanPooling::Forward( + const arma::Mat&& input, arma::Mat&& output) +{ + size_t slices = input.n_elem / (inputWidth * inputHeight); + inputTemp = arma::cube(input.memptr(), inputWidth, inputHeight, slices); + + if (floor) + { + outputWidth = std::floor((inputWidth - (double) kW) / (double) dW + 1); + outputHeight = std::floor((inputHeight - (double) kH) / (double) dH + 1); + + offset = 0; + } + else + { + outputWidth = std::ceil((inputWidth - (double) kW) / (double) dW + 1); + outputHeight = std::ceil((inputHeight - (double) kH) / (double) dH + 1); + + offset = 1; + } + + outputTemp = arma::zeros >(outputWidth, outputHeight, + slices); + + for (size_t s = 0; s < inputTemp.n_slices; s++) + { + + Pooling(inputTemp.slice(s), outputTemp.slice(s)); + } + + output = arma::Mat(outputTemp.memptr(), outputTemp.n_elem, 1); + + outputWidth = outputTemp.n_rows; + outputHeight = outputTemp.n_cols; + outSize = slices; +} + +template +template +void MeanPooling::Backward( + const arma::Mat&& /* input */, + arma::Mat&& gy, + arma::Mat&& g) +{ + arma::cube mappedError = arma::cube(gy.memptr(), outputWidth, + outputHeight, outSize); + + gTemp = arma::zeros(inputTemp.n_rows, + inputTemp.n_cols, inputTemp.n_slices); + + for (size_t s = 0; s < mappedError.n_slices; s++) + { + Unpooling(inputTemp.slice(s), mappedError.slice(s), gTemp.slice(s)); + } + + g = arma::mat(gTemp.memptr(), gTemp.n_elem, 1); +} + +template +template +void MeanPooling::Serialize( + Archive& ar, + const unsigned int /* version */) +{ + ar & data::CreateNVP(kW, "kW"); + ar & data::CreateNVP(kH, "kH"); + ar & data::CreateNVP(dW, "dW"); + ar & data::CreateNVP(dH, "dH"); +} + +} // namespace ann +} // namespace mlpack + +#endif diff --git a/src/mlpack/methods/ann/layer/mean_squared_error.hpp b/src/mlpack/methods/ann/layer/mean_squared_error.hpp new file mode 100644 index 00000000000..280b6e79b11 --- /dev/null +++ b/src/mlpack/methods/ann/layer/mean_squared_error.hpp @@ -0,0 +1,100 @@ +/** + * @file mean_squared_error.hpp + * @author Marcus Edel + * + * Definition of the mean squared error performance function. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_MEAN_SQUARED_ERROR_HPP +#define MLPACK_METHODS_ANN_LAYER_MEAN_SQUARED_ERROR_HPP + +#include + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +/** + * The mean squared error performance function measures the network's + * performance according to the mean of squared errors. + * + * @tparam ActivationFunction Activation function used for the embedding layer. + * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + */ +template < + typename InputDataType = arma::mat, + typename OutputDataType = arma::mat +> +class MeanSquaredError +{ + public: + /** + * Create the MeanSquaredError object. + */ + MeanSquaredError(); + + /* + * Computes the mean squared error function. + * + * @param input Input data used for evaluating the specified function. + * @param output Resulting output activation. + */ + template + double Forward(const arma::Mat&& input, const arma::Mat&& target); + /** + * Ordinary feed backward pass of a neural network. + * + * @param input The propagated input activation. + * @param target The target vector. + * @param output The calculated error. + */ + template + void Backward(const arma::Mat&& input, + const arma::Mat&& target, + arma::Mat&& output); + + //! Get the input parameter. + InputDataType& InputParameter() const { return inputParameter; } + //! Modify the input parameter. + InputDataType& InputParameter() { return inputParameter; } + + //! Get the output parameter. + OutputDataType& OutputParameter() const { return outputParameter; } + //! Modify the output parameter. + OutputDataType& OutputParameter() { return outputParameter; } + + //! Get the delta. + OutputDataType& Delta() const { return delta; } + //! Modify the delta. + OutputDataType& Delta() { return delta; } + + /** + * Serialize the layer + */ + template + void Serialize(Archive& ar, const unsigned int /* version */); + + private: + //! Locally-stored delta object. + OutputDataType delta; + + //! Locally-stored input parameter object. + InputDataType inputParameter; + + //! Locally-stored output parameter object. + OutputDataType outputParameter; +}; // class MeanSquaredError + +} // namespace ann +} // namespace mlpack + +// Include implementation. +#include "mean_squared_error_impl.hpp" + +#endif diff --git a/src/mlpack/methods/ann/layer/mean_squared_error_impl.hpp b/src/mlpack/methods/ann/layer/mean_squared_error_impl.hpp new file mode 100644 index 00000000000..037a9445295 --- /dev/null +++ b/src/mlpack/methods/ann/layer/mean_squared_error_impl.hpp @@ -0,0 +1,57 @@ +/** + * @file mean_squared_error_impl.hpp + * @author Marcus Edel + * + * Implementation of the mean squared error performance function. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_MEAN_SQUARED_ERROR_IMPL_HPP +#define MLPACK_METHODS_ANN_LAYER_MEAN_SQUARED_ERROR_IMPL_HPP + +// In case it hasn't yet been included. +#include "mean_squared_error.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +template +MeanSquaredError::MeanSquaredError() +{ + // Nothing to do here. +} + +template +template +double MeanSquaredError::Forward( + const arma::Mat&& input, const arma::Mat&& target) +{ + return arma::mean(arma::mean(arma::square(input - target))); +} + +template +template +void MeanSquaredError::Backward( + const arma::Mat&& input, + const arma::Mat&& target, + arma::Mat&& output) +{ + output = (input - target); +} + +template +template +void MeanSquaredError::Serialize( + Archive& /* ar */, + const unsigned int /* version */) +{ + // Nothing to do here. +} + +} // namespace ann +} // namespace mlpack + +#endif diff --git a/src/mlpack/methods/ann/layer/multiclass_classification_layer.hpp b/src/mlpack/methods/ann/layer/multiclass_classification_layer.hpp deleted file mode 100644 index 7705b52205a..00000000000 --- a/src/mlpack/methods/ann/layer/multiclass_classification_layer.hpp +++ /dev/null @@ -1,98 +0,0 @@ -/** - * @file multiclass_classification_layer.hpp - * @author Marcus Edel - * - * Definition of the MulticlassClassificationLayer class, which implements a - * multiclass classification layer that can be used as output layer. - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. - */ -#ifndef MLPACK_METHODS_ANN_LAYER_MULTICLASS_CLASSIFICATION_LAYER_HPP -#define MLPACK_METHODS_ANN_LAYER_MULTICLASS_CLASSIFICATION_LAYER_HPP - -#include -#include - -namespace mlpack { -namespace ann /** Artificial Neural Network. */ { - -/** - * An implementation of a multiclass classification layer that can be used as - * output layer. - * - * A convenience typedef is given: - * - * - ClassificationLayer - */ -class MulticlassClassificationLayer -{ - public: - /** - * Create the MulticlassClassificationLayer object. - */ - MulticlassClassificationLayer() - { - // Nothing to do here. - } - - /* - * Calculate the error using the specified input activation and the target. - * The error is stored into the given error parameter. - * - * @param inputActivations Input data used for evaluating the network. - * @param target Target data used for evaluating the network. - * @param error The calculated error with respect to the input activation and - * the given target. - */ - template - void CalculateError(const DataType& inputActivations, - const DataType& target, - DataType& error) - { - error = inputActivations - target; - } - - /* - * Calculate the output class using the specified input activation. - * - * @param inputActivations Input data used to calculate the output class. - * @param output Output class of the input activation. - */ - template - void OutputClass(const DataType& inputActivations, DataType& output) - { - output = inputActivations; - } - - /** - * Serialize the layer - */ - template - void Serialize(Archive& ar, const unsigned int /* version */) - { - } -}; // class MulticlassClassificationLayer - -//! Layer traits for the multiclass classification layer. -template <> -class LayerTraits -{ - public: - static const bool IsBinary = false; - static const bool IsOutputLayer = true; - static const bool IsBiasLayer = false; - static const bool IsConnection = false; -}; - -/*** - * Alias ClassificationLayer. - */ -using ClassificationLayer = MulticlassClassificationLayer; - -} // namespace ann -} // namespace mlpack - -#endif diff --git a/src/mlpack/methods/ann/layer/multiply_constant_layer.hpp b/src/mlpack/methods/ann/layer/multiply_constant.hpp similarity index 69% rename from src/mlpack/methods/ann/layer/multiply_constant_layer.hpp rename to src/mlpack/methods/ann/layer/multiply_constant.hpp index afa0f42e63c..b2985b27b83 100644 --- a/src/mlpack/methods/ann/layer/multiply_constant_layer.hpp +++ b/src/mlpack/methods/ann/layer/multiply_constant.hpp @@ -1,17 +1,12 @@ /** - * @file multiply_constant_layer.hpp + * @file multiply_constant.hpp * @author Marcus Edel * * Definition of the MultiplyConstantLayer class, which multiplies the input by * a (non-learnable) constant. - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. */ -#ifndef MLPACK_METHODS_ANN_LAYER_MULTIPLY_CONSTANT_LAYER_HPP -#define MLPACK_METHODS_ANN_LAYER_MULTIPLY_CONSTANT_LAYER_HPP +#ifndef MLPACK_METHODS_ANN_LAYER_MULTIPLY_CONSTANT_HPP +#define MLPACK_METHODS_ANN_LAYER_MULTIPLY_CONSTANT_HPP #include @@ -31,16 +26,13 @@ template < typename InputDataType = arma::mat, typename OutputDataType = arma::mat > -class MultiplyConstantLayer +class MultiplyConstant { public: /** - * Create the BaseLayer object. + * Create the MultiplyConstant object. */ - MultiplyConstantLayer(const double scalar) : scalar(scalar) - { - // Nothing to do here. - } + MultiplyConstant(const double scalar); /** * Ordinary feed forward pass of a neural network. Multiply the input with the @@ -50,10 +42,7 @@ class MultiplyConstantLayer * @param output Resulting output activation. */ template - void Forward(const InputType& input, OutputType& output) - { - output = input * scalar; - } + void Forward(const InputType&& input, OutputType&& output); /** * Ordinary feed backward pass of a neural network. The backward pass @@ -64,10 +53,7 @@ class MultiplyConstantLayer * @param g The calculated gradient. */ template - void Backward(const DataType& /* input */, const DataType& gy, DataType& g) - { - g = gy * scalar; - } + void Backward(const DataType&& /* input */, DataType&& gy, DataType&& g); //! Get the input parameter. InputDataType& InputParameter() const { return inputParameter; } @@ -88,10 +74,7 @@ class MultiplyConstantLayer * Serialize the layer. */ template - void Serialize(Archive& ar, const unsigned int /* version */) - { - ar & data::CreateNVP(scalar, "scalar"); - } + void Serialize(Archive& ar, const unsigned int /* version */); private: //! Locally-stored constant scalar value. @@ -105,9 +88,12 @@ class MultiplyConstantLayer //! Locally-stored output parameter object. OutputDataType outputParameter; -}; // class MultiplyConstantLayer +}; // class MultiplyConstant + +} // namespace ann +} // namespace mlpack -}; // namespace ann -}; // namespace mlpack +// Include implementation. +#include "multiply_constant_impl.hpp" #endif diff --git a/src/mlpack/methods/ann/layer/multiply_constant_impl.hpp b/src/mlpack/methods/ann/layer/multiply_constant_impl.hpp new file mode 100644 index 00000000000..07dc4799779 --- /dev/null +++ b/src/mlpack/methods/ann/layer/multiply_constant_impl.hpp @@ -0,0 +1,51 @@ +/** + * @file multiply_constant_impl.hpp + * @author Marcus Edel + * + * Implementation of the MultiplyConstantLayer class, which multiplies the + * input by a (non-learnable) constant. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_MULTIPLY_CONSTANT_IMPL_HPP +#define MLPACK_METHODS_ANN_LAYER_MULTIPLY_CONSTANT_IMPL_HPP + +// In case it hasn't yet been included. +#include "multiply_constant.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +template +MultiplyConstant::MultiplyConstant( + const double scalar) : scalar(scalar) +{ + // Nothing to do here. +} + +template +template +void MultiplyConstant::Forward( + const InputType&& input, OutputType&& output) +{ + output = input * scalar; +} + +template +template +void MultiplyConstant::Backward( + const DataType&& /* input */, DataType&& gy, DataType&& g) +{ + g = gy * scalar; +} + +template +template +void MultiplyConstant::Serialize( + Archive& ar, const unsigned int /* version */) +{ + ar & data::CreateNVP(scalar, "scalar"); +} + +} // namespace ann +} // namespace mlpack + +#endif diff --git a/src/mlpack/methods/ann/layer/negative_log_likelihood_layer.hpp b/src/mlpack/methods/ann/layer/negative_log_likelihood.hpp similarity index 54% rename from src/mlpack/methods/ann/layer/negative_log_likelihood_layer.hpp rename to src/mlpack/methods/ann/layer/negative_log_likelihood.hpp index 6c08698f790..84c6a2d7904 100644 --- a/src/mlpack/methods/ann/layer/negative_log_likelihood_layer.hpp +++ b/src/mlpack/methods/ann/layer/negative_log_likelihood.hpp @@ -1,16 +1,16 @@ /** - * @file negative_log_likelihood_layer.hpp + * @file negative_log_likelihood.hpp * @author Marcus Edel * - * Definition of the NegativeLogLikelihoodLayer class. + * Definition of the NegativeLogLikelihood class. * * mlpack is free software; you may redistribute it and/or modify it under the * terms of the 3-clause BSD license. You should have received a copy of the * 3-clause BSD license along with mlpack. If not, see * http://www.opensource.org/licenses/BSD-3-Clause for more information. */ -#ifndef MLPACK_METHODS_ANN_LAYER_NEGATIVE_LOG_LIKELIHOOD_Layer_HPP -#define MLPACK_METHODS_ANN_LAYER_NEGATIVE_LOG_LIKELIHOOD_Layer_HPP +#ifndef MLPACK_METHODS_ANN_LAYER_NEGATIVE_LOG_LIKELIHOOD_HPP +#define MLPACK_METHODS_ANN_LAYER_NEGATIVE_LOG_LIKELIHOOD_HPP #include @@ -19,11 +19,10 @@ namespace ann /** Artificial Neural Network. */ { /** * Implementation of the negative log likelihood layer. The negative log - * likelihood layer expects that the input contains log-probabilities for each + * likelihood layer expectes that the input contains log-probabilities for each * class. The layer also expects a class index, in the range between 1 and the * number of classes, as target when calling the Forward function. * - * @tparam ActivationFunction Activation function used for the embedding layer. * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, * arma::sp_mat or arma::cube). * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, @@ -33,44 +32,26 @@ template < typename InputDataType = arma::mat, typename OutputDataType = arma::mat > -class NegativeLogLikelihoodLayer +class NegativeLogLikelihood { public: /** * Create the NegativeLogLikelihoodLayer object. */ - NegativeLogLikelihoodLayer() { /* Nothing to do here. */ } + NegativeLogLikelihood(); - /** - * Ordinary feed forward pass of a neural network. The negative log - * likelihood layer expects that the input contains log-probabilities for - * each class. The layer also expects a class index, in the range between 1 - * and the number of classes, as target when calling the Forward function. + /* + * Computes the Negative log likelihood. * - * @param input Input data that contains the log-probabilities for each class. - * @param target The target vector, that contains the class index in the range - * between 1 and the number of classes. + * @param input Input data used for evaluating the specified function. + * @param output Resulting output activation. */ template - double Forward(const arma::Mat& input, const arma::Mat& target) - { - double output = 0; - - for (size_t i = 0; i < input.n_cols; ++i) - { - size_t currentTarget = target(i) - 1; - Log::Assert(currentTarget >= 0 && currentTarget < input.n_rows, - "Target class out of range."); - - output -= input(currentTarget, i); - } - - return output; - } + double Forward(const arma::Mat&& input, arma::Mat&& target); /** * Ordinary feed backward pass of a neural network. The negative log - * likelihood layer expects that the input contains log-probabilities for + * likelihood layer expectes that the input contains log-probabilities for * each class. The layer also expects a class index, in the range between 1 * and the number of classes, as target when calling the Forward function. * @@ -80,20 +61,9 @@ class NegativeLogLikelihoodLayer * @param output The calculated error. */ template - void Backward(const arma::Mat& input, - const arma::Mat& target, - arma::Mat& output) - { - output = arma::zeros >(input.n_rows, input.n_cols); - for (size_t i = 0; i < input.n_cols; ++i) - { - size_t currentTarget = target(i) - 1; - Log::Assert(currentTarget >= 0 && currentTarget < input.n_rows, - "Target class out of range."); - - output(currentTarget, i) = -1; - } - } + void Backward(const arma::Mat&& input, + const arma::Mat&& target, + arma::Mat&& output); //! Get the input parameter. InputDataType& InputParameter() const { return inputParameter; } @@ -110,6 +80,12 @@ class NegativeLogLikelihoodLayer //! Modify the delta. OutputDataType& Delta() { return delta; } + /** + * Serialize the layer + */ + template + void Serialize(Archive& /* ar */, const unsigned int /* version */); + private: //! Locally-stored delta object. OutputDataType delta; @@ -119,9 +95,12 @@ class NegativeLogLikelihoodLayer //! Locally-stored output parameter object. OutputDataType outputParameter; -}; // class NegativeLogLikelihoodLayer +}; // class NegativeLogLikelihood + +} // namespace ann +} // namespace mlpack -}; // namespace ann -}; // namespace mlpack +// Include implementation. +#include "negative_log_likelihood_impl.hpp" #endif diff --git a/src/mlpack/methods/ann/layer/negative_log_likelihood_impl.hpp b/src/mlpack/methods/ann/layer/negative_log_likelihood_impl.hpp new file mode 100644 index 00000000000..2ec7799efe0 --- /dev/null +++ b/src/mlpack/methods/ann/layer/negative_log_likelihood_impl.hpp @@ -0,0 +1,76 @@ +/** + * @file negative_log_likelihood_impl.hpp + * @author Marcus Edel + * + * Implementation of the NegativeLogLikelihood class. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_NEGATIVE_LOG_LIKELIHOOD_IMPL_HPP +#define MLPACK_METHODS_ANN_LAYER_NEGATIVE_LOG_LIKELIHOOD_IMPL_HPP + +// In case it hasn't yet been included. +#include "negative_log_likelihood.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +template +NegativeLogLikelihood::NegativeLogLikelihood() +{ + // Nothing to do here. +} + +template +template +double NegativeLogLikelihood::Forward( + const arma::Mat&& input, arma::Mat&& target) +{ + double output = 0; + + for (size_t i = 0; i < input.n_cols; ++i) + { + size_t currentTarget = target(i) - 1; + Log::Assert(currentTarget >= 0 && currentTarget < input.n_rows, + "Target class out of range."); + + output -= input(currentTarget, i); + } + + return output; +} + +template +template +void NegativeLogLikelihood::Backward( + const arma::Mat&& input, + const arma::Mat&& target, + arma::Mat&& output) +{ + output = arma::zeros >(input.n_rows, input.n_cols); + for (size_t i = 0; i < input.n_cols; ++i) + { + size_t currentTarget = target(i) - 1; + Log::Assert(currentTarget >= 0 && currentTarget < input.n_rows, + "Target class out of range."); + + output(currentTarget, i) = -1; + } +} + +template +template +void NegativeLogLikelihood::Serialize( + Archive& /* ar */, + const unsigned int /* version */) +{ + // Nothing to do here. +} + +} // namespace ann +} // namespace mlpack + +#endif diff --git a/src/mlpack/methods/ann/layer/one_hot_layer.hpp b/src/mlpack/methods/ann/layer/one_hot_layer.hpp deleted file mode 100644 index 63200b2c335..00000000000 --- a/src/mlpack/methods/ann/layer/one_hot_layer.hpp +++ /dev/null @@ -1,96 +0,0 @@ -/** - * @file one_hot_layer.hpp - * @author Shangtong Zhang - * - * Definition of the OneHotLayer class, which implements a standard network - * layer. - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. - */ -#ifndef MLPACK_METHODS_ANN_LAYER_ONE_HOT_LAYER_HPP -#define MLPACK_METHODS_ANN_LAYER_ONE_HOT_LAYER_HPP - -#include -#include - -namespace mlpack { -namespace ann /** Artificial Neural Network. */ { - -/** - * An implementation of a one hot classification layer that can be used as - * output layer. - */ -class OneHotLayer -{ - public: - /** - * Create the OneHotLayer object. - */ - OneHotLayer() - { - // Nothing to do here. - } - - /* - * Calculate the error using the specified input activation and the target. - * The error is stored into the given error parameter. - * - * @param inputActivations Input data used for evaluating the network. - * @param target Target data used for evaluating the network. - * @param error The calculated error with respect to the input activation and - * the given target. - */ - template - void CalculateError(const DataType& inputActivations, - const DataType& target, - DataType& error) - { - error = inputActivations - target; - } - - /* - * Calculate the output class using the specified input activation. - * - * @param inputActivations Input data used to calculate the output class. - * @param output Output class of the input activation. - */ - template - void OutputClass(const DataType& inputActivations, DataType& output) - { - output = inputActivations; - output.zeros(); - - arma::uword maxIndex = 0; - inputActivations.max(maxIndex); - output(maxIndex) = 1; - } - - /** - * Serialize the layer. - */ - template - void Serialize(Archive& /* ar */, const unsigned int /* version */) - { - /* Nothing to do here */ - } -}; // class OneHotLayer - -//! Layer traits for the one-hot class classification layer. -template <> -class LayerTraits -{ - public: - static const bool IsBinary = true; - static const bool IsOutputLayer = true; - static const bool IsBiasLayer = false; - static const bool IsConnection = false; -}; - -} // namespace ann -} // namespace mlpack - - -#endif diff --git a/src/mlpack/methods/ann/layer/pooling_layer.hpp b/src/mlpack/methods/ann/layer/pooling_layer.hpp deleted file mode 100644 index e8a205f44f8..00000000000 --- a/src/mlpack/methods/ann/layer/pooling_layer.hpp +++ /dev/null @@ -1,267 +0,0 @@ -/** - * @file pooling_layer.hpp - * @author Marcus Edel - * @author Nilay Jain - * - * Definition of the PoolingLayer class, which attaches various pooling - * functions to the embedding layer. - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. - */ -#ifndef MLPACK_METHODS_ANN_LAYER_POOLING_LAYER_HPP -#define MLPACK_METHODS_ANN_LAYER_POOLING_LAYER_HPP - -#include -#include -#include - -namespace mlpack { -namespace ann /** Artificial Neural Network. */ { - -/** - * Implementation of the pooling layer. The pooling layer works as a metaclass - * which attaches various functions to the embedding layer. - * - * @tparam PoolingRule Pooling function used for the embedding layer. - * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, - * arma::sp_mat or arma::cube). - * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, - * arma::sp_mat or arma::cube). - */ -template < - typename PoolingRule = MeanPooling, - typename InputDataType = arma::cube, - typename OutputDataType = arma::cube -> -class PoolingLayer -{ - public: - /** - * Create the PoolingLayer object using the specified number of units. - * - * @param kSize Size of the pooling window. - * @param stride The stride of the convolution operation. - * @param pooling The pooling strategy. - */ - PoolingLayer(const size_t kSize, - const size_t stride = 1, - PoolingRule pooling = PoolingRule()) : - kSize(kSize), - stride(stride), - pooling(pooling) - { - // Nothing to do here. - } - - /** - * Ordinary feed forward pass of a neural network, evaluating the function - * f(x) by propagating the activity forward through f. - * - * @param input Input data used for evaluating the specified function. - * @param output Resulting output activation. - */ - template - void Forward(const arma::Mat& input, arma::Mat& output) - { - Pooling(input, output); - } - - /** - * Ordinary feed forward pass of a neural network, evaluating the function - * f(x) by propagating the activity forward through f. - * - * @param input Input data used for evaluating the specified function. - * @param output Resulting output activation. - */ - template - void Forward(const arma::Cube& input, arma::Cube& output) - { - output = arma::zeros >((input.n_rows - kSize) / stride + 1, - (input.n_cols - kSize) / stride + 1, input.n_slices); - - for (size_t s = 0; s < input.n_slices; s++) - Pooling(input.slice(s), output.slice(s)); - } - - /** - * Ordinary feed backward pass of a neural network, using 3rd-order tensors as - * input, calculating the function f(x) by propagating x backwards through f. - * Using the results from the feed forward pass. - * - * @param input The propagated input activation. - * @param gy The backpropagated error. - * @param g The calculated gradient. - */ - template - void Backward(const arma::Cube& /* unused */, - const arma::Cube& gy, - arma::Cube& g) - { - g = arma::zeros >(inputParameter.n_rows, - inputParameter.n_cols, inputParameter.n_slices); - - for (size_t s = 0; s < gy.n_slices; s++) - { - Unpooling(inputParameter.slice(s), gy.slice(s), g.slice(s)); - } - } - - /** - * Ordinary feed backward pass of a neural network, using 3rd-order tensors as - * input, calculating the function f(x) by propagating x backwards through f. - * Using the results from the feed forward pass. - * - * @param input The propagated input activation. - * @param gy The backpropagated error. - * @param g The calculated gradient. - */ - template - void Backward(const arma::Cube& /* unused */, - const arma::Mat& gy, - arma::Cube& g) - { - // Generate a cube from the error matrix. - arma::Cube mappedError = arma::zeros(outputParameter.n_rows, - outputParameter.n_cols, outputParameter.n_slices); - - for (size_t s = 0, j = 0; s < mappedError.n_slices; s+= gy.n_cols, j++) - { - for (size_t i = 0; i < gy.n_cols; i++) - { - arma::Col temp = gy.col(i).subvec( - j * outputParameter.n_rows * outputParameter.n_cols, - (j + 1) * outputParameter.n_rows * outputParameter.n_cols - 1); - - mappedError.slice(s + i) = arma::Mat(temp.memptr(), - outputParameter.n_rows, outputParameter.n_cols); - } - } - - Backward(inputParameter, mappedError, g); - } - - //! Get the input parameter. - InputDataType const& InputParameter() const { return inputParameter; } - //! Modify the input parameter. - InputDataType& InputParameter() { return inputParameter; } - - //! Get the output parameter. - InputDataType const& OutputParameter() const { return outputParameter; } - //! Modify the output parameter. - InputDataType& OutputParameter() { return outputParameter; } - - //! Get the delta. - OutputDataType const& Delta() const { return delta; } - //! Modify the delta. - OutputDataType& Delta() { return delta; } - - /** - * Serialize the layer. - */ - template - void Serialize(Archive& ar, const unsigned int /* version */) - { - ar & data::CreateNVP(kSize, "kSize"); - ar & data::CreateNVP(pooling, "pooling"); - ar & data::CreateNVP(stride, "stride"); - } - - private: - /** - * Apply pooling to the input and store the results. - * - * @param input The input to be apply the pooling rule. - * @param output The pooled result. - */ - template - void Pooling(const arma::Mat& input, arma::Mat& output) - { - const size_t rStep = kSize; - const size_t cStep = kSize; - - for (size_t j = 0, colidx = 0; j < output.n_cols; ++j, colidx += stride) - { - for (size_t i = 0, rowidx = 0; i < output.n_rows; ++i, rowidx += stride) - { - output(i, j) += pooling.Pooling(input( - arma::span(rowidx, rowidx + rStep - 1), - arma::span(colidx, colidx + cStep - 1))); - } - } - } - - /** - * Apply unpooling to the input and store the results. - * - * @param input The input to be apply the unpooling rule. - * @param output The pooled result. - */ - template - void Unpooling(const arma::Mat& input, - const arma::Mat& error, - arma::Mat& output) - { - const size_t rStep = input.n_rows / error.n_rows; - const size_t cStep = input.n_cols / error.n_cols; - - arma::Mat unpooledError; - for (size_t j = 0; j < input.n_cols; j += cStep) - { - for (size_t i = 0; i < input.n_rows; i += rStep) - { - const arma::Mat& inputArea = input(arma::span(i, i + rStep - 1), - arma::span(j, j + cStep - 1)); - - pooling.Unpooling(inputArea, error(i / rStep, j / cStep), - unpooledError); - - output(arma::span(i, i + rStep - 1), - arma::span(j, j + cStep - 1)) += unpooledError; - } - } - } - - //! Locally-stored size of the pooling window. - size_t kSize; - - //! Locally-stored stride value by which we move filter. - size_t stride; - - //! Locally-stored pooling strategy. - PoolingRule pooling; - - //! Locally-stored delta object. - OutputDataType delta; - - //! Locally-stored input parameter object. - InputDataType inputParameter; - - //! Locally-stored output parameter object. - OutputDataType outputParameter; -}; // class PoolingLayer - -//! Layer traits for the pooling layer. -template< - typename PoolingRule, - typename InputDataType, - typename OutputDataType -> -class LayerTraits > -{ - public: - static const bool IsBinary = false; - static const bool IsOutputLayer = false; - static const bool IsBiasLayer = false; - static const bool IsLSTMLayer = false; - static const bool IsConnection = true; -}; - - -} // namespace ann -} // namespace mlpack - -#endif - diff --git a/src/mlpack/methods/ann/layer/recurrent.hpp b/src/mlpack/methods/ann/layer/recurrent.hpp new file mode 100644 index 00000000000..297127a6ae8 --- /dev/null +++ b/src/mlpack/methods/ann/layer/recurrent.hpp @@ -0,0 +1,213 @@ +/** + * @file recurrent.hpp + * @author Marcus Edel + * + * Definition of the LinearLayer class also known as fully-connected layer or + * affine transformation. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_RECURRENT_HPP +#define MLPACK_METHODS_ANN_LAYER_RECURRENT_HPP + +#include +#include + +#include "layer_types.hpp" +#include "add_merge.hpp" +#include "sequential.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +/** + * Implementation of the RecurrentLayer class. Recurrent layers can be used + * similarly to feed-forward layers. + * + * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + */ +template < + typename InputDataType = arma::mat, + typename OutputDataType = arma::mat +> +class Recurrent +{ + public: + /** + * Create the Recurrent object using the specified modules. + * + * @param start The start module. + * @param start The input module. + * @param start The feedback module. + * @param start The transfer module. + * @param rho Maximum number of steps to backpropagate through time (BPTT). + */ + template + Recurrent(const StartModuleType& start, + const InputModuleType& input, + const FeedbackModuleType& feedback, + const TransferModuleType& transfer, + const size_t rho); + + /** + * Ordinary feed forward pass of a neural network, evaluating the function + * f(x) by propagating the activity forward through f. + * + * @param input Input data used for evaluating the specified function. + * @param output Resulting output activation. + */ + template + void Forward(arma::Mat&& input, arma::Mat&& output); + + /** + * Ordinary feed backward pass of a neural network, calculating the function + * f(x) by propagating x backwards trough f. Using the results from the feed + * forward pass. + * + * @param input The propagated input activation. + * @param gy The backpropagated error. + * @param g The calculated gradient. + */ + template + void Backward(const arma::Mat&& /* input */, + arma::Mat&& gy, + arma::Mat&& g); + + /* + * Calculate the gradient using the output delta and the input activation. + * + * @param input The input parameter used for calculating the gradient. + * @param error The calculated error. + * @param gradient The calculated gradient. + */ + template + void Gradient(arma::Mat&& input, + arma::Mat&& error, + arma::Mat&& /* gradient */); + + //! Get the model modules. + std::vector& Model() { return network; } + + //! The value of the deterministic parameter. + bool Deterministic() const { return deterministic; } + //! Modify the value of the deterministic parameter. + bool& Deterministic() { return deterministic; } + + //! Get the parameters. + OutputDataType const& Parameters() const { return parameters; } + //! Modify the parameters. + OutputDataType& Parameters() { return parameters; } + + //! Get the input parameter. + InputDataType const& InputParameter() const { return inputParameter; } + //! Modify the input parameter. + InputDataType& InputParameter() { return inputParameter; } + + //! Get the output parameter. + OutputDataType const& OutputParameter() const { return outputParameter; } + //! Modify the output parameter. + OutputDataType& OutputParameter() { return outputParameter; } + + //! Get the delta. + OutputDataType const& Delta() const { return delta; } + //! Modify the delta. + OutputDataType& Delta() { return delta; } + + //! Get the gradient. + OutputDataType const& Gradient() const { return gradient; } + //! Modify the gradient. + OutputDataType& Gradient() { return gradient; } + + /** + * Serialize the layer + */ + template + void Serialize(Archive& ar, const unsigned int /* version */); + + private: + //! Locally-stored start module. + LayerTypes startModule; + + //! Locally-stored input module. + LayerTypes inputModule; + + //! Locally-stored feedback module. + LayerTypes feedbackModule; + + //! Locally-stored transfer module. + LayerTypes transferModule; + + //! Number of steps to backpropagate through time (BPTT). + size_t rho; + + //! Locally-stored number of forward steps. + size_t forwardStep; + + //! Locally-stored number of backward steps. + size_t backwardStep; + + //! Locally-stored number of gradient steps. + size_t gradientStep; + + //! If true dropout and scaling is disabled, see notes above. + bool deterministic; + + //! Locally-stored weight object. + OutputDataType parameters; + + //! Locally-stored initial module. + LayerTypes initialModule; + + //! Locally-stored recurrent module. + LayerTypes recurrentModule; + + //! Locally-stored model modules. + std::vector network; + + //! Locally-stored merge module. + LayerTypes mergeModule; + + //! Locally-stored weight size visitor. + WeightSizeVisitor weightSizeVisitor; + + //! Locally-stored delta visitor. + DeltaVisitor deltaVisitor; + + //! Locally-stored output parameter visitor. + OutputParameterVisitor outputParameterVisitor; + + //! Locally-stored feedback output parameters. + std::vector feedbackOutputParameter; + + //! Locally-stored delta object. + OutputDataType delta; + + //! Locally-stored gradient object. + OutputDataType gradient; + + //! Locally-stored input parameter object. + InputDataType inputParameter; + + //! Locally-stored output parameter object. + OutputDataType outputParameter; + + //! Locally-stored recurrent error parameter. + arma::mat recurrentError; +}; // class Recurrent + +} // namespace ann +} // namespace mlpack + +// Include implementation. +#include "recurrent_impl.hpp" + +#endif diff --git a/src/mlpack/methods/ann/layer/recurrent_attention.hpp b/src/mlpack/methods/ann/layer/recurrent_attention.hpp new file mode 100644 index 00000000000..ffb7320b232 --- /dev/null +++ b/src/mlpack/methods/ann/layer/recurrent_attention.hpp @@ -0,0 +1,263 @@ +/** + * @file recurrent_attention.hpp + * @author Marcus Edel + * + * Definition of the RecurrentAttention class. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_RECURRENT_ATTENTION_HPP +#define MLPACK_METHODS_ANN_LAYER_RECURRENT_ATTENTION_HPP + +#include +#include + +#include "layer_types.hpp" +#include "add_merge.hpp" +#include "sequential.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +/** + * This class implements the Recurrent Model for Visual Attention, using a + * variety of possible layer implementations. + * + * For more information, see the following paper. + * + * @code + * @article{MnihHGK14, + * title={Recurrent Models of Visual Attention}, + * author={Volodymyr Mnih, Nicolas Heess, Alex Graves, Koray Kavukcuoglu}, + * journal={CoRR}, + * volume={abs/1406.6247}, + * year={2014} + * } + * @endcode + * + * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + */ +template < + typename InputDataType = arma::mat, + typename OutputDataType = arma::mat +> +class RecurrentAttention +{ + public: + /** + * Create the RecurrentAttention object using the specified modules. + * + * @param start The module output size. + * @param start The recurrent neural network module. + * @param start The action module. + * @param rho Maximum number of steps to backpropagate through time (BPTT). + */ + template + RecurrentAttention(const size_t outSize, + const RNNModuleType& rnn, + const ActionModuleType& action, + const size_t rho); + + /** + * Ordinary feed forward pass of a neural network, evaluating the function + * f(x) by propagating the activity forward through f. + * + * @param input Input data used for evaluating the specified function. + * @param output Resulting output activation. + */ + template + void Forward(arma::Mat&& input, arma::Mat&& output); + + /** + * Ordinary feed backward pass of a neural network, calculating the function + * f(x) by propagating x backwards trough f. Using the results from the feed + * forward pass. + * + * @param input The propagated input activation. + * @param gy The backpropagated error. + * @param g The calculated gradient. + */ + template + void Backward(const arma::Mat&& /* input */, + arma::Mat&& gy, + arma::Mat&& g); + + /* + * Calculate the gradient using the output delta and the input activation. + * + * @param input The input parameter used for calculating the gradient. + * @param error The calculated error. + * @param gradient The calculated gradient. + */ + template + void Gradient(arma::Mat&& /* input */, + arma::Mat&& /* error */, + arma::Mat&& /* gradient */); + + //! Get the model modules. + std::vector& Model() { return network; } + + //! The value of the deterministic parameter. + bool Deterministic() const { return deterministic; } + //! Modify the value of the deterministic parameter. + bool& Deterministic() { return deterministic; } + + //! Get the parameters. + OutputDataType const& Parameters() const { return parameters; } + //! Modify the parameters. + OutputDataType& Parameters() { return parameters; } + + //! Get the input parameter. + InputDataType const& InputParameter() const { return inputParameter; } + //! Modify the input parameter. + InputDataType& InputParameter() { return inputParameter; } + + //! Get the output parameter. + OutputDataType const& OutputParameter() const { return outputParameter; } + //! Modify the output parameter. + OutputDataType& OutputParameter() { return outputParameter; } + + //! Get the delta. + OutputDataType const& Delta() const { return delta; } + //! Modify the delta. + OutputDataType& Delta() { return delta; } + + //! Get the gradient. + OutputDataType const& Gradient() const { return gradient; } + //! Modify the gradient. + OutputDataType& Gradient() { return gradient; } + + /** + * Serialize the layer + */ + template + void Serialize(Archive& ar, const unsigned int /* version */); + + private: + //! Calculate the gradient of the attention module. + void IntermediateGradient() + { + intermediateGradient.zeros(); + + // Gradient of the action module. + if (backwardStep == (rho - 1)) + { + boost::apply_visitor(GradientVisitor(std::move(initialInput), + std::move(actionError)), actionModule); + } + else + { + boost::apply_visitor(GradientVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, actionModule)), std::move(actionError)), + actionModule); + } + + // Gradient of the recurrent module. + boost::apply_visitor(GradientVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, rnnModule)), std::move(recurrentError)), + rnnModule); + + attentionGradient += intermediateGradient; + } + + //! Locally-stored module output size. + size_t outSize; + + //! Locally-stored start module. + LayerTypes rnnModule; + + //! Locally-stored input module. + LayerTypes actionModule; + + //! Number of steps to backpropagate through time (BPTT). + size_t rho; + + //! Locally-stored number of forward steps. + size_t forwardStep; + + //! Locally-stored number of backward steps. + size_t backwardStep; + + //! If true dropout and scaling is disabled, see notes above. + bool deterministic; + + //! Locally-stored weight object. + OutputDataType parameters; + + //! Locally-stored initial module. + LayerTypes initialModule; + + //! Locally-stored recurrent module. + LayerTypes recurrentModule; + + //! Locally-stored model modules. + std::vector network; + + //! Locally-stored merge module. + LayerTypes mergeModule; + + //! Locally-stored weight size visitor. + WeightSizeVisitor weightSizeVisitor; + + //! Locally-stored delta visitor. + DeltaVisitor deltaVisitor; + + //! Locally-stored output parameter visitor. + OutputParameterVisitor outputParameterVisitor; + + //! Locally-stored feedback output parameters. + std::vector feedbackOutputParameter; + + //! List of all module parameters for the backward pass (BBTT). + std::vector moduleOutputParameter; + + //! Locally-stored delta object. + OutputDataType delta; + + //! Locally-stored gradient object. + OutputDataType gradient; + + //! Locally-stored input parameter object. + InputDataType inputParameter; + + //! Locally-stored output parameter object. + OutputDataType outputParameter; + + //! Locally-stored recurrent error parameter. + arma::mat recurrentError; + + //! Locally-stored action error parameter. + arma::mat actionError; + + //! Locally-stored action delta. + arma::mat actionDelta; + + //! Locally-stored recurrent delta. + arma::mat rnnDelta; + + //! Locally-stored initial action input. + arma::mat initialInput; + + //! Locally-stored reset visitor. + ResetVisitor resetVisitor; + + //! Locally-stored attention gradient. + arma::mat attentionGradient; + + //! Locally-stored intermediate gradient for the attention module. + arma::mat intermediateGradient; +}; // class RecurrentAttention + +} // namespace ann +} // namespace mlpack + +// Include implementation. +#include "recurrent_attention_impl.hpp" + +#endif diff --git a/src/mlpack/methods/ann/layer/recurrent_attention_impl.hpp b/src/mlpack/methods/ann/layer/recurrent_attention_impl.hpp new file mode 100644 index 00000000000..6642894fa88 --- /dev/null +++ b/src/mlpack/methods/ann/layer/recurrent_attention_impl.hpp @@ -0,0 +1,204 @@ +/** + * @file recurrent_attention_impl.hpp + * @author Marcus Edel + * + * Implementation of the RecurrentAttention class. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_RECURRENT_ATTENTION_IMPL_HPP +#define MLPACK_METHODS_ANN_LAYER_RECURRENT_ATTENTION_IMPL_HPP + +// In case it hasn't yet been included. +#include "recurrent_attention.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +template +template +RecurrentAttention::RecurrentAttention( + const size_t outSize, + const RNNModuleType& rnn, + const ActionModuleType& action, + const size_t rho) : + outSize(outSize), + rnnModule(new RNNModuleType(rnn)), + actionModule(new ActionModuleType(action)), + rho(rho), + forwardStep(0), + backwardStep(0), + deterministic(false) +{ + network.push_back(rnnModule); + network.push_back(actionModule); +} + +template +template +void RecurrentAttention::Forward( + arma::Mat&& input, arma::Mat&& output) +{ + // Initialize the action input. + if (initialInput.is_empty()) + { + initialInput = arma::zeros(outSize, input.n_cols); + } + + // Propagate through the action and recurrent module. + for (forwardStep = 0; forwardStep < rho; ++forwardStep) + { + if (forwardStep == 0) + { + boost::apply_visitor(ForwardVisitor(std::move(initialInput), std::move( + boost::apply_visitor(outputParameterVisitor, actionModule))), + actionModule); + } + else + { + boost::apply_visitor(ForwardVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, rnnModule)), std::move(boost::apply_visitor( + outputParameterVisitor, actionModule))), actionModule); + } + + // Initialize the glimpse input. + arma::mat glimpseInput = arma::zeros(input.n_elem, 2); + glimpseInput.col(0) = input; + glimpseInput.submat(0, 1, boost::apply_visitor(outputParameterVisitor, + actionModule).n_elem - 1, 1) = boost::apply_visitor( + outputParameterVisitor, actionModule); + + boost::apply_visitor(ForwardVisitor(std::move(glimpseInput), + std::move(boost::apply_visitor(outputParameterVisitor, rnnModule))), + rnnModule); + + // Save the output parameter when training the module. + if (!deterministic) + { + for (size_t l = 0; l < network.size(); ++l) + { + boost::apply_visitor(SaveOutputParameterVisitor( + std::move(moduleOutputParameter)), network[l]); + } + } + } + + output = boost::apply_visitor(outputParameterVisitor, rnnModule); + + forwardStep = 0; + backwardStep = 0; +} + +template +template +void RecurrentAttention::Backward( + const arma::Mat&& /* input */, + arma::Mat&& gy, + arma::Mat&& g) +{ + if (intermediateGradient.is_empty() && backwardStep == 0) + { + // Initialize the attention gradients. + size_t weights = boost::apply_visitor(weightSizeVisitor, rnnModule) + + boost::apply_visitor(weightSizeVisitor, actionModule); + + intermediateGradient = arma::zeros(weights, 1); + attentionGradient = arma::zeros(weights, 1); + + // Initialize the action error. + actionError = arma::zeros( + boost::apply_visitor(outputParameterVisitor, actionModule).n_rows, + boost::apply_visitor(outputParameterVisitor, actionModule).n_cols); + } + + // Propagate the attention gradients. + if (backwardStep == 0) + { + size_t offset = 0; + offset += boost::apply_visitor(GradientSetVisitor( + std::move(intermediateGradient), offset), rnnModule); + boost::apply_visitor(GradientSetVisitor( + std::move(intermediateGradient), offset), actionModule); + + attentionGradient.zeros(); + } + + // Back-propagate through time. + for (; backwardStep < rho; backwardStep++) + { + if (backwardStep == 0) + { + recurrentError = gy; + } + else + { + recurrentError = actionDelta; + } + + for (size_t l = 0; l < network.size(); ++l) + { + boost::apply_visitor(LoadOutputParameterVisitor( + std::move(moduleOutputParameter)), network[network.size() - 1 - l]); + } + + if (backwardStep == (rho - 1)) + { + boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, actionModule)), std::move(actionError), + std::move(actionDelta)), actionModule); + } + else + { + boost::apply_visitor(BackwardVisitor(std::move(initialInput), + std::move(actionError), std::move(actionDelta)), actionModule); + } + + boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, rnnModule)), std::move(recurrentError), + std::move(rnnDelta)), rnnModule); + + if (backwardStep == 0) + { + g = rnnDelta.col(1); + } + else + { + g += rnnDelta.col(1); + } + + IntermediateGradient(); + } +} + +template +template +void RecurrentAttention::Gradient( + arma::Mat&& /* input */, + arma::Mat&& /* error */, + arma::Mat&& /* gradient */) +{ + size_t offset = 0; + offset += boost::apply_visitor(GradientUpdateVisitor( + std::move(attentionGradient), offset), rnnModule); + boost::apply_visitor(GradientUpdateVisitor( + std::move(attentionGradient), offset), actionModule); +} + +template +template +void RecurrentAttention::Serialize( + Archive& ar, const unsigned int /* version */) +{ + ar & data::CreateNVP(rho, "rho"); + ar & data::CreateNVP(outSize, "outSize"); + ar & data::CreateNVP(forwardStep, "forwardStep"); + ar & data::CreateNVP(backwardStep, "backwardStep"); +} + +} // namespace ann +} // namespace mlpack + +#endif diff --git a/src/mlpack/methods/ann/layer/recurrent_impl.hpp b/src/mlpack/methods/ann/layer/recurrent_impl.hpp new file mode 100644 index 00000000000..8f1525c4551 --- /dev/null +++ b/src/mlpack/methods/ann/layer/recurrent_impl.hpp @@ -0,0 +1,206 @@ +/** + * @file recurrent_impl.hpp + * @author Marcus Edel + * + * Implementation of the LinearLayer class also known as fully-connected layer + * or affine transformation. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_RECURRENT_IMPL_HPP +#define MLPACK_METHODS_ANN_LAYER_RECURRENT_IMPL_HPP + +// In case it hasn't yet been included. +#include "recurrent.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +template +template< + typename StartModuleType, + typename InputModuleType, + typename FeedbackModuleType, + typename TransferModuleType +> +Recurrent::Recurrent( + const StartModuleType& start, + const InputModuleType& input, + const FeedbackModuleType& feedback, + const TransferModuleType& transfer, + const size_t rho) : + startModule(new StartModuleType(start)), + inputModule(new InputModuleType(input)), + feedbackModule(new FeedbackModuleType(feedback)), + transferModule(new TransferModuleType(transfer)), + rho(rho), + forwardStep(0), + backwardStep(0), + gradientStep(0), + deterministic(false) +{ + initialModule = new Sequential<>(); + mergeModule = new AddMerge<>(); + recurrentModule = new Sequential<>(false); + + boost::apply_visitor(AddVisitor(inputModule), initialModule); + boost::apply_visitor(AddVisitor(startModule), initialModule); + boost::apply_visitor(AddVisitor(transferModule), initialModule); + + boost::apply_visitor(weightSizeVisitor, startModule); + boost::apply_visitor(weightSizeVisitor, inputModule); + boost::apply_visitor(weightSizeVisitor, feedbackModule); + boost::apply_visitor(weightSizeVisitor, transferModule); + + boost::apply_visitor(AddVisitor(inputModule), mergeModule); + boost::apply_visitor(AddVisitor(feedbackModule), mergeModule); + boost::apply_visitor(AddVisitor(mergeModule), recurrentModule); + boost::apply_visitor(AddVisitor(transferModule), recurrentModule); + + network.push_back(initialModule); + network.push_back(mergeModule); + network.push_back(feedbackModule); + network.push_back(recurrentModule); +} + +template +template +void Recurrent::Forward( + arma::Mat&& input, arma::Mat&& output) +{ + if (forwardStep == 0) + { + boost::apply_visitor(ForwardVisitor(std::move(input), std::move(output)), + initialModule); + } + else + { + boost::apply_visitor(ForwardVisitor(std::move(input), std::move( + boost::apply_visitor(outputParameterVisitor, inputModule))), + inputModule); + + boost::apply_visitor(ForwardVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, transferModule)), std::move( + boost::apply_visitor(outputParameterVisitor, feedbackModule))), + feedbackModule); + + boost::apply_visitor(ForwardVisitor(std::move(input), std::move(output)), + recurrentModule); + } + + output = boost::apply_visitor(outputParameterVisitor, transferModule); + + // Save the feedback output parameter when training the module. + if (!deterministic) + { + feedbackOutputParameter.push_back(output); + } + + forwardStep++; + if (forwardStep == rho) + { + forwardStep = 0; + backwardStep = 0; + + if (!recurrentError.is_empty()) + { + recurrentError.zeros(); + } + } +} + +template +template +void Recurrent::Backward( + const arma::Mat&& /* input */, arma::Mat&& gy, arma::Mat&& g) +{ + if (!recurrentError.is_empty()) + { + recurrentError += gy; + } + else + { + recurrentError = gy; + } + + if (backwardStep < (rho - 1)) + { + boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, recurrentModule)), std::move(recurrentError), + std::move(boost::apply_visitor(deltaVisitor, recurrentModule))), + recurrentModule); + + boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, inputModule)), std::move( + boost::apply_visitor(deltaVisitor, recurrentModule)), std::move(g)), + inputModule); + + boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, feedbackModule)), std::move( + boost::apply_visitor(deltaVisitor, recurrentModule)), std::move( + boost::apply_visitor(deltaVisitor, feedbackModule))),feedbackModule); + } + else + { + boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, initialModule)), std::move(recurrentError), + std::move(g)), initialModule); + } + + recurrentError = boost::apply_visitor(deltaVisitor, feedbackModule); + backwardStep++; +} + +template +template +void Recurrent::Gradient( + arma::Mat&& input, + arma::Mat&& error, + arma::Mat&& /* gradient */) +{ + if (gradientStep < (rho - 1)) + { + boost::apply_visitor(GradientVisitor(std::move(input), std::move(error)), + recurrentModule); + + boost::apply_visitor(GradientVisitor(std::move(input), std::move( + boost::apply_visitor(deltaVisitor, mergeModule))), inputModule); + + boost::apply_visitor(GradientVisitor(std::move( + feedbackOutputParameter[feedbackOutputParameter.size() - 2 - + gradientStep]), std::move(boost::apply_visitor(deltaVisitor, + mergeModule))), feedbackModule); + } + else + { + boost::apply_visitor(GradientZeroVisitor(), recurrentModule); + boost::apply_visitor(GradientZeroVisitor(), inputModule); + boost::apply_visitor(GradientZeroVisitor(), feedbackModule); + + boost::apply_visitor(GradientVisitor(std::move(input), std::move( + boost::apply_visitor(deltaVisitor, startModule))), initialModule); + } + + gradientStep++; + if (gradientStep == rho) + { + gradientStep = 0; + feedbackOutputParameter.clear(); + } +} + +template +template +void Recurrent::Serialize( + Archive& ar, const unsigned int /* version */) +{ + ar & data::CreateNVP(rho, "rho"); +} + +} // namespace ann +} // namespace mlpack + +#endif diff --git a/src/mlpack/methods/ann/layer/reinforce_normal_layer.hpp b/src/mlpack/methods/ann/layer/reinforce_normal.hpp similarity index 69% rename from src/mlpack/methods/ann/layer/reinforce_normal_layer.hpp rename to src/mlpack/methods/ann/layer/reinforce_normal.hpp index 655e443b1e5..fd192f0a1f2 100644 --- a/src/mlpack/methods/ann/layer/reinforce_normal_layer.hpp +++ b/src/mlpack/methods/ann/layer/reinforce_normal.hpp @@ -1,17 +1,12 @@ /** - * @file reinforce_normal_layer.hpp + * @file reinforce_normal.hpp * @author Marcus Edel * * Definition of the ReinforceNormalLayer class, which implements the REINFORCE * algorithm for the normal distribution. - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. */ -#ifndef MLPACK_METHODS_ANN_LAYER_REINFORCE_NORMAL_LAYER_HPP -#define MLPACK_METHODS_ANN_LAYER_REINFORCE_NORMAL_LAYER_HPP +#ifndef MLPACK_METHODS_ANN_LAYER_REINFORCE_NORMAL_HPP +#define MLPACK_METHODS_ANN_LAYER_REINFORCE_NORMAL_HPP #include @@ -31,18 +26,15 @@ template < typename InputDataType = arma::mat, typename OutputDataType = arma::mat > -class ReinforceNormalLayer +class ReinforceNormal { public: /** - * Create the ReinforceNormalLayer object. + * Create the ReinforceNormal object. * * @param stdev Standard deviation used during the forward and backward pass. */ - ReinforceNormalLayer(const double stdev) : stdev(stdev) - { - // Nothing to do here. - } + ReinforceNormal(const double stdev); /** * Ordinary feed forward pass of a neural network, evaluating the function @@ -52,20 +44,7 @@ class ReinforceNormalLayer * @param output Resulting output activation. */ template - void Forward(const arma::Mat& input, arma::Mat& output) - { - if (!deterministic) - { - // Multiply by standard deviations and re-center the means to the mean. - output = arma::randn >(input.n_rows, input.n_cols) * - stdev + input; - } - else - { - // Use maximum a posteriori. - output = input; - } - } + void Forward(const arma::Mat&& input, arma::Mat&& output); /** * Ordinary feed backward pass of a neural network, calculating the function @@ -77,16 +56,7 @@ class ReinforceNormalLayer * @param g The calculated gradient. */ template - void Backward(const DataType& input, - const DataType& /* gy */, - DataType& g) - { - g = (input - inputParameter) / std::pow(stdev, 2.0); - - // Multiply by reward and multiply by -1. - g *= -reward; - } - + void Backward(const DataType&& input, DataType&& /* gy */, DataType&& g); //! Get the input parameter. InputDataType& InputParameter() const { return inputParameter; } @@ -113,6 +83,12 @@ class ReinforceNormalLayer //! Modify the value of the deterministic parameter. double& Reward() { return reward; } + /** + * Serialize the layer + */ + template + void Serialize(Archive& /* ar */, const unsigned int /* version */); + private: //! Standard deviation used during the forward and backward pass. const double stdev; @@ -129,11 +105,17 @@ class ReinforceNormalLayer //! Locally-stored output parameter object. OutputDataType outputParameter; + //! Locally-stored output module parameter parameters. + std::vector moduleInputParameter; + //! If true use maximum a posteriori during the forward pass. bool deterministic; -}; // class ReinforceNormalLayer +}; // class ReinforceNormal + +} // namespace ann +} // namespace mlpack -}; // namespace ann -}; // namespace mlpack +// Include implementation. +#include "reinforce_normal_impl.hpp" #endif diff --git a/src/mlpack/methods/ann/layer/reinforce_normal_impl.hpp b/src/mlpack/methods/ann/layer/reinforce_normal_impl.hpp new file mode 100644 index 00000000000..1eaa25d8fae --- /dev/null +++ b/src/mlpack/methods/ann/layer/reinforce_normal_impl.hpp @@ -0,0 +1,69 @@ +/** + * @file reinforce_normal_impl.hpp + * @author Marcus Edel + * + * Implementation of the ReinforceNormalLayer class, which implements the + * REINFORCE algorithm for the normal distribution. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_REINFORCE_NORMAL_IMPL_HPP +#define MLPACK_METHODS_ANN_LAYER_REINFORCE_NORMAL_IMPL_HPP + +// In case it hasn't yet been included. +#include "reinforce_normal.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +template +ReinforceNormal::ReinforceNormal( + const double stdev) : stdev(stdev) +{ + // Nothing to do here. +} + +template +template +void ReinforceNormal::Forward( + const arma::Mat&& input, arma::Mat&& output) +{ + if (!deterministic) + { + // Multiply by standard deviations and re-center the means to the mean. + output = arma::randn >(input.n_rows, input.n_cols) * + stdev + input; + + moduleInputParameter.push_back(input); + } + else + { + // Use maximum a posteriori. + output = input; + } +} + +template +template +void ReinforceNormal::Backward( + const DataType&& input, DataType&& /* gy */, DataType&& g) +{ + g = (input - moduleInputParameter.back()) / std::pow(stdev, 2.0); + + // Multiply by reward and multiply by -1. + g *= reward; + g *= -1; + + moduleInputParameter.pop_back(); +} + +template +template +void ReinforceNormal::Serialize( + Archive& /* ar */, const unsigned int /* version */) +{ + // Nothing to do here. +} + +} // namespace ann +} // namespace mlpack + +#endif diff --git a/src/mlpack/methods/ann/layer/select.hpp b/src/mlpack/methods/ann/layer/select.hpp new file mode 100644 index 00000000000..d3c42a008c9 --- /dev/null +++ b/src/mlpack/methods/ann/layer/select.hpp @@ -0,0 +1,111 @@ +/** + * @file select.hpp + * @author Marcus Edel + * + * Definition of the Select module. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_SELECT_HPP +#define MLPACK_METHODS_ANN_LAYER_SELECT_HPP + +#include + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +/** + * The select module selects the specified column from a given input matrix. + * + * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + */ +template < + typename InputDataType = arma::mat, + typename OutputDataType = arma::mat +> +class Select +{ + public: + /** + * Create the Select object. + * + * @param index The column which should be extracted from the given input. + * @param elements The number of elements that should be used. + */ + Select(const size_t index, const size_t elements = 0); + + /** + * Ordinary feed forward pass of a neural network, evaluating the function + * f(x) by propagating the activity forward through f. + * + * @param input Input data used for evaluating the specified function. + * @param output Resulting output activation. + */ + template + void Forward(const arma::Mat&& input, arma::Mat&& output); + + /** + * Ordinary feed backward pass of a neural network, calculating the function + * f(x) by propagating x backwards trough f. Using the results from the feed + * forward pass. + * + * @param input The propagated input activation. + * @param gy The backpropagated error. + * @param g The calculated gradient. + */ + template + void Backward(const arma::Mat&& /* input */, + arma::Mat&& gy, + arma::Mat&& g); + + //! Get the input parameter. + InputDataType& InputParameter() const { return inputParameter; } + //! Modify the input parameter. + InputDataType& InputParameter() { return inputParameter; } + + //! Get the output parameter. + OutputDataType& OutputParameter() const { return outputParameter; } + //! Modify the output parameter. + OutputDataType& OutputParameter() { return outputParameter; } + + //! Get the delta. + OutputDataType& Delta() const { return delta; } + //! Modify the delta. + OutputDataType& Delta() { return delta; } + + /** + * Serialize the layer + */ + template + void Serialize(Archive& ar, const unsigned int /* version */); + + private: + //! Locally-stored column index. + size_t index; + + //! Locally-stored number of elements selected. + size_t elements; + + //! Locally-stored delta object. + OutputDataType delta; + + //! Locally-stored input parameter object. + InputDataType inputParameter; + + //! Locally-stored output parameter object. + OutputDataType outputParameter; +}; // class Select + +} // namespace ann +} // namespace mlpack + +// Include implementation. +#include "select_impl.hpp" + +#endif diff --git a/src/mlpack/methods/ann/layer/select_impl.hpp b/src/mlpack/methods/ann/layer/select_impl.hpp new file mode 100644 index 00000000000..a40cb968461 --- /dev/null +++ b/src/mlpack/methods/ann/layer/select_impl.hpp @@ -0,0 +1,75 @@ +/** + * @file select_impl.hpp + * @author Marcus Edel + * + * Implementation of the Select module. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_SELECT_IMPL_HPP +#define MLPACK_METHODS_ANN_LAYER_SELECT_IMPL_HPP + +// In case it hasn't yet been included. +#include "constant.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +template +Select::Select( + const size_t index, + const size_t elements) : + index(index), + elements(elements) + { + // Nothing to do here. + } + +template +template +void Select::Forward( + const arma::Mat&& input, arma::Mat&& output) +{ + if (elements == 0) + { + output = input.col(index); + } + else + { + output = input.submat(0, index, elements - 1, index); + } +} + +template +template +void Select::Backward( + const arma::Mat&& /* input */, + arma::Mat&& gy, + arma::Mat&& g) +{ + if (elements == 0) + { + g = gy; + } + else + { + g = gy.submat(0, 0, elements - 1, 0); + } +} + +template +template +void Select::Serialize( + Archive& ar, const unsigned int /* version */) +{ + ar & data::CreateNVP(index, "index"); + ar & data::CreateNVP(elements, "elements"); +} + +} // namespace ann +} // namespace mlpack + +#endif diff --git a/src/mlpack/methods/ann/layer/sequential.hpp b/src/mlpack/methods/ann/layer/sequential.hpp new file mode 100644 index 00000000000..ca729c9da13 --- /dev/null +++ b/src/mlpack/methods/ann/layer/sequential.hpp @@ -0,0 +1,203 @@ +/** + * @file sequential.hpp + * @author Marcus Edel + * + * Definition of the Sequential class, which acts as a feed-forward fully + * connected network container. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_SEQUENTIAL_HPP +#define MLPACK_METHODS_ANN_LAYER_SEQUENTIAL_HPP + +#include + +#include + +#include "layer_types.hpp" +#include "add_merge.hpp" +#include "layer_visitor.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +/** + * Implementation of the Sequential class. The sequential class works as a + * feed-forward fully connected network container which plugs various layers + * together. + * + * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + */ +template < + typename InputDataType = arma::mat, + typename OutputDataType = arma::mat +> +class Sequential +{ + public: + + /** + * Create the Sequential object using the specified parameters. + * + * @param model Expose the all network modules. + */ + Sequential(const bool model = true); + + //! Destroy the Sequential object. + ~Sequential(); + + /** + * Ordinary feed forward pass of a neural network, evaluating the function + * f(x) by propagating the activity forward through f. + * + * @param input Input data used for evaluating the specified function. + * @param output Resulting output activation. + */ + template + void Forward(arma::Mat&& input, arma::Mat&& output); + + /** + * Ordinary feed backward pass of a neural network, using 3rd-order tensors as + * input, calculating the function f(x) by propagating x backwards through f. + * Using the results from the feed forward pass. + * + * @param input The propagated input activation. + * @param gy The backpropagated error. + * @param g The calculated gradient. + */ + template + void Backward(const arma::Mat&& /* input */, + arma::Mat&& gy, + arma::Mat&& g); + + /* + * Calculate the gradient using the output delta and the input activation. + * + * @param input The input parameter used for calculating the gradient. + * @param error The calculated error. + * @param gradient The calculated gradient. + */ + template + void Gradient(arma::Mat&& input, + arma::Mat&& error, + arma::Mat&& /* gradient */); + + /* + * Add a new module to the model. + * + * @param args The layer parameter. + */ + template + void Add(Args... args) { network.push_back(new LayerType(args...)); } + + /* + * Add a new module to the model. + * + * @param layer The Layer to be added to the model. + */ + void Add(LayerTypes layer) { network.push_back(layer); } + + //! Return the model modules. + std::vector& Model() + { + if (model) + { + return network; + } + + return empty; + } + + //! Return the initial point for the optimization. + const arma::mat& Parameters() const { return parameters; } + //! Modify the initial point for the optimization. + arma::mat& Parameters() { return parameters; } + + arma::mat const& InputParameter() const { return inputParameter; } + //! Modify the input parameter. + arma::mat& InputParameter() { return inputParameter; } + + //! Get the output parameter. + arma::mat const& OutputParameter() const { return outputParameter; } + //! Modify the output parameter. + arma::mat& OutputParameter() { return outputParameter; } + + //! Get the delta.e + arma::mat const& Delta() const { return delta; } + //! Modify the delta. + arma::mat& Delta() { return delta; } + + //! Get the gradient. + arma::mat const& Gradient() const { return gradient; } + //! Modify the gradient. + arma::mat& Gradient() { return gradient; } + + /** + * Serialize the layer + */ + template + void Serialize(Archive& /* ar */, const unsigned int /* version */); + + private: + //! Parameter which indicates if the modules should be exposed. + bool model; + + //! Indicator if we already initialized the model. + bool reset; + + //! Locally-stored network modules. + std::vector network; + + //! Locally-stored model parameters. + arma::mat parameters; + + //! Locally-stored delta visitor. + DeltaVisitor deltaVisitor; + + //! Locally-stored output parameter visitor. + OutputParameterVisitor outputParameterVisitor; + + //! Locally-stored delete visitor. + DeleteVisitor deleteVisitor; + + //! Locally-stored empty list of modules. + std::vector empty; + + //! Locally-stored delta object. + arma::mat delta; + + //! Locally-stored input parameter object. + arma::mat inputParameter; + + //! Locally-stored output parameter object. + arma::mat outputParameter; + + //! Locally-stored gradient object. + arma::mat gradient; + + //! Locally-stored output width visitor. + OutputWidthVisitor outputWidthVisitor; + + //! Locally-stored output height visitor. + OutputHeightVisitor outputHeightVisitor; + + //! The input width. + size_t width; + + //! The input height. + size_t height; +}; // class Sequential + +} // namespace ann +} // namespace mlpack + +// Include implementation. +#include "sequential_impl.hpp" + +#endif diff --git a/src/mlpack/methods/ann/layer/sequential_impl.hpp b/src/mlpack/methods/ann/layer/sequential_impl.hpp new file mode 100644 index 00000000000..df20a7a7668 --- /dev/null +++ b/src/mlpack/methods/ann/layer/sequential_impl.hpp @@ -0,0 +1,154 @@ +/** + * @file sequential_impl.hpp + * @author Marcus Edel + * + * Implementation of the Sequential class, which acts as a feed-forward fully + * connected network container. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_SEQUENTIAL_IMPL_HPP +#define MLPACK_METHODS_ANN_LAYER_SEQUENTIAL_IMPL_HPP + +// In case it hasn't yet been included. +#include "sequential.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +template +Sequential::Sequential( + const bool model) : model(model), reset(false) +{ + // Nothing to do here. +} + +template +Sequential::~Sequential() +{ + if (!model) + { + for (LayerTypes& layer : network) + { + boost::apply_visitor(deleteVisitor, layer); + } + } +} + +template +template +void Sequential::Forward( + arma::Mat&& input, arma::Mat&& output) +{ + boost::apply_visitor(ForwardVisitor(std::move(input), std::move( + boost::apply_visitor(outputParameterVisitor, network.front()))), + network.front()); + + if (!reset) + { + if (boost::apply_visitor(outputWidthVisitor, network.front()) != 0) + { + width = boost::apply_visitor(outputWidthVisitor, network.front()); + } + + if (boost::apply_visitor(outputHeightVisitor, network.front()) != 0) + { + height = boost::apply_visitor(outputHeightVisitor, network.front()); + } + } + + for (size_t i = 1; i < network.size(); ++i) + { + if (!reset) + { + // Set the input width. + boost::apply_visitor(SetInputWidthVisitor(width, true), network[i]); + + // Set the input height. + boost::apply_visitor(SetInputHeightVisitor(height, true), network[i]); + } + + boost::apply_visitor(ForwardVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, network[i - 1])), std::move( + boost::apply_visitor(outputParameterVisitor, network[i]))), + network[i]); + + if (!reset) + { + // Get the output width. + if (boost::apply_visitor(outputWidthVisitor, network[i]) != 0) + { + width = boost::apply_visitor(outputWidthVisitor, network[i]); + } + + // Get the output height. + if (boost::apply_visitor(outputHeightVisitor, network[i]) != 0) + { + height = boost::apply_visitor(outputHeightVisitor, network[i]); + } + } + } + +if (!reset) +{ + reset = true; +} + + output = boost::apply_visitor(outputParameterVisitor, network.back()); +} + +template +template +void Sequential::Backward( + const arma::Mat&& /* input */, arma::Mat&& gy, arma::Mat&& g) +{ + boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, network.back())), std::move(gy), + std::move(boost::apply_visitor(deltaVisitor, network.back()))), + network.back()); + + for (size_t i = 2; i < network.size() + 1; ++i) + { + boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, network[network.size() - i])), std::move( + boost::apply_visitor(deltaVisitor, network[network.size() - i + 1])), + std::move(boost::apply_visitor(deltaVisitor, + network[network.size() - i]))), network[network.size() - i]); + } + + g = boost::apply_visitor(deltaVisitor, network.front()); +} + +template +template +void Sequential::Gradient( + arma::Mat&& input, + arma::Mat&& error, + arma::Mat&& /* gradient */) +{ + boost::apply_visitor(GradientVisitor(std::move(input), std::move(error)), + network.front()); + + for (size_t i = 1; i < network.size() - 1; ++i) + { + boost::apply_visitor(GradientVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, network[i - 1])), std::move( + boost::apply_visitor(deltaVisitor, network[i + 1]))), network[i]); + } +} + +template +template +void Sequential::Serialize( + Archive& /* ar */, const unsigned int /* version */) +{ + // Nothing to do here. +} + +} // namespace ann +} // namespace mlpack + +#endif diff --git a/src/mlpack/methods/ann/layer/sparse_bias_layer.hpp b/src/mlpack/methods/ann/layer/sparse_bias_layer.hpp deleted file mode 100644 index c3b723f17d9..00000000000 --- a/src/mlpack/methods/ann/layer/sparse_bias_layer.hpp +++ /dev/null @@ -1,177 +0,0 @@ -/** - * @file sparse_bias_layer.hpp - * @author Tham Ngap Wei - * - * Definition of the SparseBiasLayer class. - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. - */ -#ifndef MLPACK_METHODS_ANN_LAYER_SPARSE_BIAS_LAYER_HPP -#define MLPACK_METHODS_ANN_LAYER_SPARSE_BIAS_LAYER_HPP - -#include -#include - -namespace mlpack { -namespace ann /** Artificial Neural Network. */ { - -/** - * An implementation of a bias layer design for sparse autoencoder. - * The BiasLayer class represents a single layer of a neural network. - * - * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, - * arma::sp_mat or arma::cube). - * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, - * arma::sp_mat or arma::cube). - */ -template < - typename InputDataType = arma::mat, - typename OutputDataType = arma::mat -> -class SparseBiasLayer -{ - public: - /** - * Create the SparseBiasLayer object using the specified number of units and - * bias parameter. - * - * @param outSize The number of output units. - * @param batchSize The batch size used to train the network. - * @param bias The bias value. - */ - SparseBiasLayer(const size_t outSize, const size_t batchSize) : - outSize(outSize), - batchSize(batchSize) - { - weights.set_size(outSize, 1); - } - - /** - * Ordinary feed forward pass of a neural network, evaluating the function - * f(x) by propagating the activity forward through f. - * - * @param input Input data used for evaluating the specified function. - * @param output Resulting output activation. - */ - template - void Forward(const arma::Mat& input, arma::Mat& output) - { - output = input + arma::repmat(weights, 1, input.n_cols); - } - - /** - * Ordinary feed backward pass of a neural network, calculating the function - * f(x) by propagating x backwards trough f. Using the results from the feed - * forward pass. - * - * @param input The propagated input activation. - * @param gy The backpropagated error. - * @param g The calculated gradient. - */ - template - void Backward(const DataType& /* unused */, - const ErrorType& gy, - ErrorType& g) - { - g = gy; - } - - /* - * Calculate the gradient using the output delta and the bias. - * - * @param input The propagated input. - * @param d The calculated error. - * @param g The calculated gradient. - */ - template - void Gradient(const InputType& /* input */, - const arma::Mat& d, - InputDataType& g) - { - g = arma::sum(d, 1) / static_cast( - batchSize); - } - - //! Get the batch size - size_t BatchSize() const { return batchSize; } - //! Modify the batch size - size_t& BatchSize() { return batchSize; } - - //! Get the weights. - InputDataType const& Weights() const { return weights; } - //! Modify the weights. - InputDataType& Weights() { return weights; } - - //! Get the input parameter. - InputDataType const& InputParameter() const { return inputParameter; } - //! Modify the input parameter. - InputDataType& InputParameter() { return inputParameter; } - - //! Get the output parameter. - OutputDataType const& OutputParameter() const { return outputParameter; } - //! Modify the output parameter. - OutputDataType& OutputParameter() { return outputParameter; } - - //! Get the delta. - OutputDataType const& Delta() const { return delta; } - //! Modify the delta. - OutputDataType& Delta() { return delta; } - - //! Get the gradient. - InputDataType const& Gradient() const { return gradient; } - //! Modify the gradient. - InputDataType& Gradient() { return gradient; } - - /** - * Serialize the layer. - */ - template - void Serialize(Archive& ar, const unsigned int /* version */) - { - ar & data::CreateNVP(weights, "weights"); - ar & data::CreateNVP(batchSize, "batchSize"); - } - - private: - //! Locally-stored number of output units. - size_t outSize; - - //! The batch size used to train the network. - size_t batchSize; - - //! Locally-stored weight object. - InputDataType weights; - - //! Locally-stored delta object. - OutputDataType delta; - - //! Locally-stored gradient object. - InputDataType gradient; - - //! Locally-stored input parameter object. - InputDataType inputParameter; - - //! Locally-stored output parameter object. - OutputDataType outputParameter; -}; // class SparseBiasLayer - -//! Layer traits for the bias layer. -template -class LayerTraits > -{ - public: - static const bool IsBinary = false; - static const bool IsOutputLayer = false; - static const bool IsBiasLayer = true; - static const bool IsLSTMLayer = false; - static const bool IsConnection = true; -}; - -} // namespace ann -} // namespace mlpack - -#endif diff --git a/src/mlpack/methods/ann/layer/sparse_output_layer.hpp b/src/mlpack/methods/ann/layer/sparse_output_layer.hpp deleted file mode 100644 index 33a2a72f7f3..00000000000 --- a/src/mlpack/methods/ann/layer/sparse_output_layer.hpp +++ /dev/null @@ -1,227 +0,0 @@ -/** - * @file sparse_output_layer.hpp - * @author Tham Ngap Wei - * - * This is the fourth layer of sparse autoencoder. - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. - */ -#ifndef MLPACK_METHODS_ANN_LAYER_SPARSE_OUTPUT_LAYER_HPP -#define MLPACK_METHODS_ANN_LAYER_SPARSE_OUTPUT_LAYER_HPP - -#include -#include - -namespace mlpack { -namespace ann /** Artificial Neural Network. */ { - -/** - * Implementation of the SparseOutputLayer class. The SparseOutputLayer class - * represents the fourth layer of the sparse autoencoder. - * - * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, - * arma::sp_mat or arma::cube). - * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, - * arma::sp_mat or arma::cube). - */ -template < - typename InputDataType = arma::mat, - typename OutputDataType = arma::mat -> -class SparseOutputLayer -{ - public: - /** - * Create the SparseLayer object using the specified number of units. - * - * @param inSize The number of input units. - * @param outSize The number of output units. - */ - SparseOutputLayer(const size_t inSize, - const size_t outSize, - const double lambda = 0.0001, - const double beta = 3, - const double rho = 0.01) : - inSize(inSize), - outSize(outSize), - lambda(lambda), - beta(beta), - rho(rho) - { - weights.set_size(outSize, inSize); - } - - /** - * Ordinary feed forward pass of a neural network, evaluating the function - * f(x) by propagating the activity forward through f. - * - * @param input Input data used for evaluating the specified function. - * @param output Resulting output activation. - */ - template - void Forward(const arma::Mat& input, arma::Mat& output) - { - output = weights * input; - // Average activations of the hidden layer. - rhoCap = arma::sum(input, 1) / static_cast(input.n_cols); - } - - /** - * Ordinary feed backward pass of a neural network, calculating the function - * f(x) by propagating x backwards trough f. Using the results from the feed - * forward pass. - * - * @param input The propagated input activation. - * @param gy The backpropagated error. - * @param g The calculated gradient. - */ - template - void Backward(const InputType& input, - const arma::Mat& gy, - arma::Mat& g) - { - const arma::mat klDivGrad = beta * (-(rho / rhoCap) + (1 - rho) / - (1 - rhoCap)); - - // NOTE: if the armadillo version high enough, find_nonfinite can prevents - // overflow value: - // klDivGrad.elem(arma::find_nonfinite(klDivGrad)).zeros(); - g = weights.t() * gy + - arma::repmat(klDivGrad, 1, input.n_cols); - } - - /* - * Calculate the gradient using the output delta and the input activation. - * - * @param input The propagated input. - * @param d The calculated error. - * @param g The calculated gradient. - */ - template - void Gradient(const InputType input, const arma::Mat& d, arma::Mat& g) - { - g = d * input.t() / static_cast( - input.n_cols) + lambda * weights; - } - - //! Sets the KL divergence parameter. - void Beta(const double b) - { - beta = b; - } - - //! Gets the KL divergence parameter. - double Beta() const - { - return beta; - } - - //! Sets the sparsity parameter. - void Rho(const double r) - { - rho = r; - } - - //! Gets the sparsity parameter. - double Rho() const - { - return rho; - } - - //! Get the weights. - OutputDataType const& Weights() const { return weights; } - //! Modify the weights. - OutputDataType& Weights() { return weights; } - - //! Get the RhoCap. - OutputDataType const& RhoCap() const { return rhoCap; } - //! Modify the RhoCap. - OutputDataType& RhoCap() { return rhoCap; } - - //! Get the input parameter. - InputDataType const& InputParameter() const { return inputParameter; } - //! Modify the input parameter. - InputDataType& InputParameter() { return inputParameter; } - - //! Get the output parameter. - OutputDataType const& OutputParameter() const { return outputParameter; } - //! Modify the output parameter. - OutputDataType& OutputParameter() { return outputParameter; } - - //! Get the delta. - OutputDataType const& Delta() const { return delta; } - //! Modify the delta. - OutputDataType& Delta() { return delta; } - - //! Get the gradient. - OutputDataType const& Gradient() const { return gradient; } - //! Modify the gradient. - OutputDataType& Gradient() { return gradient; } - - /** - * Serialize the layer. - */ - template - void Serialize(Archive& ar, const unsigned int /* version */) - { - ar & data::CreateNVP(weights, "weights"); - ar & data::CreateNVP(lambda, "lambda"); - ar & data::CreateNVP(beta, "beta"); - ar & data::CreateNVP(rho, "rho"); - } - - private: - //! Locally-stored number of input units. - size_t inSize; - - //! Locally-stored number of output units. - size_t outSize; - - //! L2-regularization parameter. - double lambda; - - //! KL divergence parameter. - double beta; - - //! Sparsity parameter. - double rho; - - //! Locally-stored weight object. - OutputDataType weights; - - //! Locally-stored delta object. - OutputDataType delta; - - //! Locally-stored gradient object. - OutputDataType gradient; - - //! Average activations of the hidden layer. - OutputDataType rhoCap; - - //! Locally-stored input parameter object. - InputDataType inputParameter; - - //! Locally-stored output parameter object. - OutputDataType outputParameter; -}; // class SparseOutputLayer - -//! Layer traits for the SparseOutputLayer. -template -class LayerTraits > -{ -public: - static const bool IsBinary = false; - static const bool IsOutputLayer = false; - static const bool IsBiasLayer = false; - static const bool IsLSTMLayer = false; - static const bool IsConnection = true; -}; - -} // namespace ann -} // namespace mlpack - -#endif diff --git a/src/mlpack/methods/ann/layer/vr_class_reward_layer.hpp b/src/mlpack/methods/ann/layer/vr_class_reward.hpp similarity index 52% rename from src/mlpack/methods/ann/layer/vr_class_reward_layer.hpp rename to src/mlpack/methods/ann/layer/vr_class_reward.hpp index 5b4da8ed0b7..f820e351aa8 100644 --- a/src/mlpack/methods/ann/layer/vr_class_reward_layer.hpp +++ b/src/mlpack/methods/ann/layer/vr_class_reward.hpp @@ -1,20 +1,18 @@ /** - * @file vr_class_reward_layer.hpp + * @file vr_class_reward.hpp * @author Marcus Edel * - * Definition of the VRClassRewardLayer class, which implements the variance + * Definition of the VRClassReward class, which implements the variance * reduced classification reinforcement layer. - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. */ -#ifndef MLPACK_METHODS_ANN_LAYER_VR_CLASS_REWARD_LAYER_HPP -#define MLPACK_METHODS_ANN_LAYER_VR_CLASS_REWARD_LAYER_HPP +#ifndef MLPACK_METHODS_ANN_LAYER_VR_CLASS_REWARD_HPP +#define MLPACK_METHODS_ANN_LAYER_VR_CLASS_REWARD_HPP #include +#include "layer_types.hpp" +#include "layer_visitor.hpp" + namespace mlpack { namespace ann /** Artificial Neural Network. */ { @@ -30,24 +28,19 @@ namespace ann /** Artificial Neural Network. */ { * arma::sp_mat or arma::cube). */ template < - typename InputDataType = arma::field, - typename OutputDataType = arma::field + typename InputDataType = arma::mat, + typename OutputDataType = arma::mat > -class VRClassRewardLayer +class VRClassReward { public: /** - * Create the VRClassRewardLayer object. + * Create the VRClassReward object. * * @param scale Parameter used to scale the reward. * @param sizeAverage Take the average over all batches. */ - VRClassRewardLayer(const double scale = 1, const bool sizeAverage = true) : - scale(scale), - sizeAverage(sizeAverage) - { - // Nothing to do here. - } + VRClassReward(const double scale = 1, const bool sizeAverage = true); /** * Ordinary feed forward pass of a neural network, evaluating the function @@ -58,69 +51,23 @@ class VRClassRewardLayer * between 1 and the number of classes. */ template - double Forward(const arma::field >& input, - const arma::Mat& target) - { - return Forward(input(0, 0), target); - } + double Forward(const arma::Mat&& input, const arma::Mat&& target); /** - * Ordinary feed forward pass of a neural network, evaluating the function - * f(x) by propagating the activity forward through f. + * Ordinary feed backward pass of a neural network. The negative log + * likelihood layer expectes that the input contains log-probabilities for + * each class. The layer also expects a class index, in the range between 1 + * and the number of classes, as target when calling the Forward function. * - * @param input Input data that contains the log-probabilities for each class. + * @param input The propagated input activation. * @param target The target vector, that contains the class index in the range * between 1 and the number of classes. + * @param output The calculated error. */ template - double Forward(const arma::Mat& input, const arma::Mat& target) - { - reward = 0; - arma::uword index = 0; - - for (size_t i = 0; i < input.n_cols; i++) - { - input.unsafe_col(i).max(index); - reward = ((index + 1) == target(i)) * scale; - } - - if (sizeAverage) - { - return -reward / input.n_cols; - } - - return -reward; - } - - /** - * Ordinary feed backward pass of a neural network, calculating the function - * f(x) by propagating x backwards through f. Using the results from the feed - * forward pass. - * - * @param input The propagated input activation. - * @param gy The backpropagated error. - * @param g The calculated gradient. - */ - template - double Backward(const arma::field >& input, - const arma::Mat& /* gy */, - arma::field >& g) - { - g = arma::field >(2, 1); - g(0, 0) = arma::zeros(input(0, 0).n_rows, input(0, 0).n_cols); - - double vrReward = reward - arma::as_scalar(input(1, 0)); - if (sizeAverage) - { - vrReward /= input(0, 0).n_cols; - } - - const double norm = sizeAverage ? 2.0 / input.n_cols : 2.0; - - g(1, 0) = norm * (input(1, 0) - reward); - - return vrReward; - } + void Backward(const arma::Mat&& input, + const arma::Mat&& target, + arma::Mat&& output); //! Get the input parameter. InputDataType& InputParameter() const {return inputParameter; } @@ -142,6 +89,27 @@ class VRClassRewardLayer //! Modify the value of the deterministic parameter. bool& Deterministic() { return deterministic; } + /* + * Add a new module to the model. + * + * @param args The layer parameter. + */ + template + void Add(Args... args) { network.push_back(new LayerType(args...)); } + + /* + * Add a new module to the model. + * + * @param layer The Layer to be added to the model. + */ + void Add(LayerTypes layer) { network.push_back(layer); } + + /** + * Serialize the layer + */ + template + void Serialize(Archive& /* ar */, const unsigned int /* version */); + private: //! Locally-stored value to scale the reward. const double scale; @@ -163,9 +131,15 @@ class VRClassRewardLayer //! If true dropout and scaling is disabled, see notes above. bool deterministic; -}; // class VRClassRewardLayer -}; // namespace ann -}; // namespace mlpack + //! Locally-stored network modules. + std::vector network; +}; // class VRClassReward + +} // namespace ann +} // namespace mlpack + +// Include implementation. +#include "vr_class_reward_impl.hpp" #endif diff --git a/src/mlpack/methods/ann/layer/vr_class_reward_impl.hpp b/src/mlpack/methods/ann/layer/vr_class_reward_impl.hpp new file mode 100644 index 00000000000..a4a2703545b --- /dev/null +++ b/src/mlpack/methods/ann/layer/vr_class_reward_impl.hpp @@ -0,0 +1,101 @@ +/** + * @file vr_class_reward_impl.hpp + * @author Marcus Edel + * + * Implementation of the VRClassReward class, which implements the variance + * reduced classification reinforcement layer. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_VR_CLASS_REWARD_IMPL_HPP +#define MLPACK_METHODS_ANN_LAYER_VR_CLASS_REWARD_IMPL_HPP + +// In case it hasn't yet been included. +#include "vr_class_reward.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +template +VRClassReward::VRClassReward( + const double scale, + const bool sizeAverage) : + scale(scale), + sizeAverage(sizeAverage) +{ + // Nothing to do here. +} + +template +template +double VRClassReward::Forward( + const arma::Mat&& input, const arma::Mat&& target) +{ + double output = 0; + + for (size_t i = 0; i < input.n_cols - 1; ++i) + { + size_t currentTarget = target(i) - 1; + Log::Assert(currentTarget >= 0 && currentTarget < input.n_rows, + "Target class out of range."); + + output -= input(currentTarget, i); + } + + reward = 0; + arma::uword index = 0; + + for (size_t i = 0; i < input.n_cols - 1; i++) + { + input.unsafe_col(i).max(index); + reward = ((index + 1) == target(i)) * scale; + } + + if (sizeAverage) + { + return output - reward / (input.n_cols - 1); + } + + return output - reward; +} + +template +template +void VRClassReward::Backward( + const arma::Mat&& input, + const arma::Mat&& target, + arma::Mat&& output) +{ + output = arma::zeros >(input.n_rows, input.n_cols); + for (size_t i = 0; i < (input.n_cols - 1); ++i) + { + size_t currentTarget = target(i) - 1; + Log::Assert(currentTarget >= 0 && currentTarget < input.n_rows, + "Target class out of range."); + + output(currentTarget, i) = -1; + } + + double vrReward = reward - input(0, 1); + if (sizeAverage) + { + vrReward /= input.n_cols - 1; + } + + const double norm = sizeAverage ? 2.0 / (input.n_cols - 1) : 2.0; + + output(0, 1) = norm * (input(0, 1) - reward); + boost::apply_visitor(RewardSetVisitor(vrReward), network.back()); +} + +template +template +void VRClassReward::Serialize( + Archive& /* ar */, + const unsigned int /* version */) +{ + // Nothing to do here. +} + +} // namespace ann +} // namespace mlpack + +#endif diff --git a/src/mlpack/methods/ann/network_traits.hpp b/src/mlpack/methods/ann/network_traits.hpp deleted file mode 100644 index 5aa91e8ae60..00000000000 --- a/src/mlpack/methods/ann/network_traits.hpp +++ /dev/null @@ -1,55 +0,0 @@ -/** - * @file network_traits.hpp - * @author Marcus Edel - * - * NetworkTraits class, a template class to get information about various - * networks. - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. - */ -#ifndef MLPACK_METHODS_ANN_NETWORK_TRAITS_HPP -#define MLPACK_METHODS_ANN_NETWORK_TRAITS_HPP - -namespace mlpack { -namespace ann { - -/** - * This is a template class that can provide information about various - * networks. By default, this class will provide the weakest possible - * assumptions on networks, and each network should override values as - * necessary. If a network doesn't need to override a value, then there's no - * need to write a NetworkTraits specialization for that class. - */ -template -class NetworkTraits -{ - public: - /** - * This is true if the network is a feed forward neural network. - */ - static const bool IsFNN = false; - - /** - * This is true if the network is a recurrent neural network. - */ - static const bool IsRNN = false; - - /** - * This is true if the network is a convolutional neural network. - */ - static const bool IsCNN = false; - - /** - * This is true if the network is a sparse autoencoder. - */ - static const bool IsSAE = false; -}; - -} // namespace ann -} // namespace mlpack - -#endif - diff --git a/src/mlpack/methods/ann/network_util.hpp b/src/mlpack/methods/ann/network_util.hpp deleted file mode 100644 index 93bdf044355..00000000000 --- a/src/mlpack/methods/ann/network_util.hpp +++ /dev/null @@ -1,247 +0,0 @@ -/** - * @file network_util.hpp - * @author Marcus Edel - * - * Neural network utilities. - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. - */ -#ifndef MLPACK_METHODS_ANN_NETWORK_UTIL_HPP -#define MLPACK_METHODS_ANN_NETWORK_UTIL_HPP - -#include - -#include - -/** - * Neural network utility functions. - */ -namespace mlpack { -namespace ann /** Artificial Neural Network. */ { - -/** - * Auxiliary function to get the number of weights of the specified network. - * - * @param network The network used for specifying the number of weights. - * @return The number of weights. - */ -template -typename std::enable_if::type -NetworkSize(std::tuple& network); - -template -typename std::enable_if::type -NetworkSize(std::tuple& network); - -/** - * Auxiliary function to get the number of weights of the specified layer. - * - * @param layer The layer used for specifying the number of weights. - * @param output The layer output parameter. - * @return The number of weights. - */ -template -typename std::enable_if< - !HasWeightsCheck::value, size_t>::type -LayerSize(T& layer, P& output); - -template -typename std::enable_if< - HasWeightsCheck::value, size_t>::type -LayerSize(T& layer, P& output); - -/** - * Auxiliary function to set the weights of the specified network. - * - * @param weights The weights used to set the weights of the network. - * @param network The network used to set the weights. - * @param offset The memory offset of the weights. - */ -template -typename std::enable_if::type -NetworkWeights(arma::mat& weights, - std::tuple& network, - size_t offset = 0); - -template -typename std::enable_if::type -NetworkWeights(arma::mat& weights, - std::tuple& network, - size_t offset = 0); - -/** - * Auxiliary function to set the weights of the specified layer. - * - * @param layer The layer used to set the weights. - * @param weights The weights used to set the weights of the layer. - * @param offset The memory offset of the weights. - * @param output The output parameter of the layer. - * @return The number of weights. - */ -template -typename std::enable_if< - HasWeightsCheck::value, size_t>::type -LayerWeights(T& layer, arma::mat& weights, size_t offset, arma::mat& output); - -template -typename std::enable_if< - HasWeightsCheck::value, size_t>::type -LayerWeights(T& layer, arma::mat& weights, size_t offset, arma::cube& output); - -template -typename std::enable_if< - !HasWeightsCheck::value, size_t>::type -LayerWeights(T& layer, arma::mat& weights, size_t offset, P& output); - -/** - * Auxiliary function to set the gradients of the specified network. - * - * @param gradients The gradients used to set the gradient of the network. - * @param network The network used to set the gradients. - * @param offset The memory offset of the gradients. - * return The number of gradients. - */ -template -typename std::enable_if::type -NetworkGradients(arma::mat& gradients, - std::tuple& network, - size_t offset = 0); - -template -typename std::enable_if::type -NetworkGradients(arma::mat& gradients, - std::tuple& network, - size_t offset = 0); - -/** - * Auxiliary function to set the gradients of the specified layer. - * - * @param layer The layer used to set the gradients. - * @param gradients The gradients used to set the gradient of the layer. - * @param offset The memory offset of the gradients. - * @param output The output parameter of the layer. - * @return The number of gradients. - */ -template -typename std::enable_if< - HasGradientCheck::value, size_t>::type -LayerGradients(T& layer, - arma::mat& gradients, - size_t offset, - arma::mat& output); - -template -typename std::enable_if< - HasGradientCheck::value, size_t>::type -LayerGradients(T& layer, - arma::mat& gradients, - size_t offset, - arma::cube& output); - -template -typename std::enable_if< - !HasGradientCheck::value, size_t>::type -LayerGradients(T& layer, arma::mat& gradients, size_t offset, P& output); - -/** - * Auxiliary function to get the input size of the specified network. - * - * @param network The network used for specifying the input size. - * @return The input size. - */ -template -typename std::enable_if::type -NetworkInputSize(std::tuple& network); - -template -typename std::enable_if::type -NetworkInputSize(std::tuple& network); - -/** - * Auxiliary function to get the input size of the specified layer. - * - * @param layer The layer used for specifying the input size. - * @param output The layer output parameter. - * @return The input size. - */ -template -typename std::enable_if< - !HasWeightsCheck::value, size_t>::type -LayerInputSize(T& layer, P& output); - -template -typename std::enable_if< - HasWeightsCheck::value, size_t>::type -LayerInputSize(T& layer, P& output); - -/** - * Auxiliary function to set the weights of the specified network using a given - * initialize rule. - * - * @param initializeRule The rule used to initialize the network weights. - * @param weights The weights used to set the weights of the network. - * @param network The network used to set the weights. - * @param offset The memory offset of the weights. - */ -template -typename std::enable_if::type -NetworkWeights(InitializationRuleType& initializeRule, - arma::mat& weights, - std::tuple& network, - size_t offset = 0); - -template -typename std::enable_if::type -NetworkWeights(InitializationRuleType& initializeRule, - arma::mat& weights, - std::tuple& network, - size_t offset = 0); - -/** - * Auxiliary function to set the weights of the specified layer using the given - * initialize rule. - * - * @param initializeRule The rule used to initialize the layer weights. - * @param layer The layer used to set the weights. - * @param weights The weights used to set the weights of the layer. - * @param offset The memory offset of the weights. - * @param output The output parameter of the layer. - * @return The number of weights. - */ -template -typename std::enable_if< - HasWeightsCheck::value, size_t>::type -LayerWeights(InitializationRuleType& initializeRule, - T& layer, - arma::mat& weights, - size_t offset, - arma::mat& output); - -template -typename std::enable_if< - HasWeightsCheck::value, size_t>::type -LayerWeights(InitializationRuleType& initializeRule, - T& layer, - arma::mat& weights, - size_t offset, - arma::cube& output); - -template -typename std::enable_if< - !HasWeightsCheck::value, size_t>::type -LayerWeights(InitializationRuleType& initializeRule, - T& layer, - arma::mat& weights, - size_t offset, - P& output); - -} // namespace ann -} // namespace mlpack - -// Include implementation. -#include "network_util_impl.hpp" - -#endif diff --git a/src/mlpack/methods/ann/network_util_impl.hpp b/src/mlpack/methods/ann/network_util_impl.hpp deleted file mode 100644 index 32034576290..00000000000 --- a/src/mlpack/methods/ann/network_util_impl.hpp +++ /dev/null @@ -1,286 +0,0 @@ -/** - * @file network_util_impl.hpp - * @author Marcus Edel - * - * Implementation of the network auxiliary functions. - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. - */ -#ifndef MLPACK_METHODS_ANN_NETWORK_UTIL_IMPL_HPP -#define MLPACK_METHODS_ANN_NETWORK_UTIL_IMPL_HPP - -#include "network_util_impl.hpp" - -#include - -namespace mlpack { -namespace ann { - -template -typename std::enable_if::type -NetworkSize(std::tuple& /* unused */) -{ - return 0; -} - -template -typename std::enable_if::type -NetworkSize(std::tuple& network) -{ - return LayerSize(std::get(network), std::get( - network).OutputParameter()) + NetworkSize(network); -} - -template -typename std::enable_if< - HasWeightsCheck::value, size_t>::type -LayerSize(T& layer, P& /* unused */) -{ - return layer.Weights().n_elem; -} - -template -typename std::enable_if< - !HasWeightsCheck::value, size_t>::type -LayerSize(T& /* unused */, P& /* unused */) -{ - return 0; -} - -template -typename std::enable_if::type -NetworkWeights(arma::mat& weights, - std::tuple& network, - size_t offset) -{ - NetworkWeights(weights, network, - offset + LayerWeights(std::get(network), weights, - offset, std::get(network).OutputParameter())); - -} - -template -typename std::enable_if::type -NetworkWeights(arma::mat& /* unused */, - std::tuple& /* unused */, - size_t /* unused */) -{ - /* Nothing to do here */ -} - -template -typename std::enable_if< - HasWeightsCheck::value, size_t>::type -LayerWeights(T& layer, - arma::mat& weights, - size_t offset, - arma::mat& /* unused */) -{ - layer.Weights() = arma::mat(weights.memptr() + offset, - layer.Weights().n_rows, layer.Weights().n_cols, false, false); - - return layer.Weights().n_elem; -} - -template -typename std::enable_if< - HasWeightsCheck::value, size_t>::type -LayerWeights(T& layer, - arma::mat& weights, - size_t offset, - arma::cube& /* unused */) -{ - layer.Weights() = arma::cube(weights.memptr() + offset, - layer.Weights().n_rows, layer.Weights().n_cols, - layer.Weights().n_slices, false, false); - - return layer.Weights().n_elem; -} - -template -typename std::enable_if< - !HasWeightsCheck::value, size_t>::type -LayerWeights(T& /* unused */, - arma::mat& /* unused */, - size_t /* unused */, - P& /* unused */) -{ - return 0; -} - -template -typename std::enable_if::type -NetworkGradients(arma::mat& gradients, - std::tuple& network, - size_t offset) -{ - NetworkGradients(gradients, network, - offset + LayerGradients(std::get(network), gradients, - offset, std::get(network).OutputParameter())); -} - -template -typename std::enable_if::type -NetworkGradients(arma::mat& /* unused */, - std::tuple& /* unused */, - size_t /* unused */) -{ - /* Nothing to do here */ -} - -template -typename std::enable_if< - HasGradientCheck::value, size_t>::type -LayerGradients(T& layer, - arma::mat& gradients, - size_t offset, - arma::mat& /* unused */) -{ - layer.Gradient() = arma::mat(gradients.memptr() + offset, - layer.Weights().n_rows, layer.Weights().n_cols, false, false); - - return layer.Weights().n_elem; -} - -template -typename std::enable_if< - HasGradientCheck::value, size_t>::type -LayerGradients(T& layer, - arma::mat& gradients, - size_t offset, - arma::cube& /* unused */) -{ - layer.Gradient() = arma::cube(gradients.memptr() + offset, - layer.Weights().n_rows, layer.Weights().n_cols, - layer.Weights().n_slices, false, false); - - return layer.Weights().n_elem; -} - -template -typename std::enable_if< - !HasGradientCheck::value, size_t>::type -LayerGradients(T& /* unused */, - arma::mat& /* unused */, - size_t /* unused */, - P& /* unused */) -{ - return 0; -} - -template -typename std::enable_if::type -NetworkInputSize(std::tuple& /* unused */) -{ - return 0; -} - -template -typename std::enable_if::type -NetworkInputSize(std::tuple& network) -{ - const size_t inputSize = LayerInputSize(std::get(network), std::get( - network).OutputParameter()); - - if (inputSize) - { - return inputSize; - } - - return NetworkInputSize(network); -} - -template -typename std::enable_if< - HasWeightsCheck::value, size_t>::type -LayerInputSize(T& layer, P& /* unused */) -{ - return layer.Weights().n_cols; -} - -template -typename std::enable_if< - !HasWeightsCheck::value, size_t>::type -LayerInputSize(T& /* unused */, P& /* unused */) -{ - return 0; -} - -template -typename std::enable_if::type -NetworkWeights(InitializationRuleType& initializeRule, - arma::mat& weights, - std::tuple& network, - size_t offset) -{ - NetworkWeights(initializeRule, weights, - network, offset + LayerWeights(initializeRule, std::get(network), - weights, offset, std::get(network).OutputParameter())); -} - -template -typename std::enable_if::type -NetworkWeights(InitializationRuleType& /* initializeRule */, - arma::mat& /* weights */, - std::tuple& /* network */, - size_t /* offset */) -{ - /* Nothing to do here */ -} - -template -typename std::enable_if< - HasWeightsCheck::value, size_t>::type -LayerWeights(InitializationRuleType& initializeRule, - T& layer, - arma::mat& weights, - size_t offset, - arma::mat& /* output */) -{ - layer.Weights() = arma::mat(weights.memptr() + offset, - layer.Weights().n_rows, layer.Weights().n_cols, false, false); - - initializeRule.Initialize(layer.Weights(), layer.Weights().n_rows, - layer.Weights().n_cols); - - return layer.Weights().n_elem; -} - -template -typename std::enable_if< - HasWeightsCheck::value, size_t>::type -LayerWeights(InitializationRuleType& initializeRule, - T& layer, - arma::mat& weights, - size_t offset, - arma::cube& /* output */) -{ - layer.Weights() = arma::cube(weights.memptr() + offset, - layer.Weights().n_rows, layer.Weights().n_cols, - layer.Weights().n_slices, false, false); - - initializeRule.Initialize(layer.Weights(), layer.Weights().n_rows, - layer.Weights().n_cols); - - return layer.Weights().n_elem; -} - -template -typename std::enable_if< - !HasWeightsCheck::value, size_t>::type -LayerWeights(InitializationRuleType& /* initializeRule */, - T& /* layer */, - arma::mat& /* weights */, - size_t /* offset */, - P& /* output */) -{ - return 0; -} - -} // namespace ann -} // namespace mlpack - -#endif diff --git a/src/mlpack/methods/ann/performance_functions/CMakeLists.txt b/src/mlpack/methods/ann/performance_functions/CMakeLists.txt deleted file mode 100644 index c64f7263cea..00000000000 --- a/src/mlpack/methods/ann/performance_functions/CMakeLists.txt +++ /dev/null @@ -1,17 +0,0 @@ -# Define the files we need to compile -# Anything not in this list will not be compiled into mlpack. -set(SOURCES - mse_function.hpp - sse_function.hpp - cee_function.hpp - sparse_function.hpp -) - -# Add directory name to sources. -set(DIR_SRCS) -foreach(file ${SOURCES}) - set(DIR_SRCS ${DIR_SRCS} ${CMAKE_CURRENT_SOURCE_DIR}/${file}) -endforeach() -# Append sources (with directory name) to list of all mlpack sources (used at -# the parent scope). -set(MLPACK_SRCS ${MLPACK_SRCS} ${DIR_SRCS} PARENT_SCOPE) diff --git a/src/mlpack/methods/ann/performance_functions/cee_function.hpp b/src/mlpack/methods/ann/performance_functions/cee_function.hpp deleted file mode 100644 index 34244521dc8..00000000000 --- a/src/mlpack/methods/ann/performance_functions/cee_function.hpp +++ /dev/null @@ -1,74 +0,0 @@ -/** - * @file cee_function.hpp - * @author Marcus Edel - * - * Definition and implementation of the cross-entropy error performance - * function. - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. - */ -#ifndef MLPACK_METHODS_ANN_PERFORMANCE_FUNCTIONS_CEE_FUNCTION_HPP -#define MLPACK_METHODS_ANN_PERFORMANCE_FUNCTIONS_CEE_FUNCTION_HPP - -#include -#include -#include - -namespace mlpack { -namespace ann /** Artificial Neural Network. */ { - -/** - * The cross-entropy error performance function measures the network's - * performance according to the cross entropy errors. The log in the cross- - * entropy take sinto account the closeness of a prediction and is a more - * granular way to calculate the error. - * - * @tparam Layer The layer that is connected with the output layer. - */ -template< - class Layer = LinearLayer< > -> -class CrossEntropyErrorFunction -{ - public: - /** - * Computes the cross-entropy error function.. - * - * @param network Network type of FFN, CNN or RNN - * @param target Target data. - * @param error same as place holder - * @return sum of squared errors. - */ - template - static double Error(const std::tuple& network, - const DataType& target, const DataType &error) - { - return Error(std::get(network).OutputParameter(), - target, error); - } - - /** - * Computes the cross-entropy error function. - * - * @param input Input data. - * @param target Target data. - * @return cross-entropy error. - */ - template - static double Error(const DataType& input, const DataType& target, const DataType&) - { - if (LayerTraits::IsBinary) - return -arma::dot(arma::trunc_log(arma::abs(target - input)), target); - - return -arma::dot(arma::trunc_log(input), target); - } - -}; // class CrossEntropyErrorFunction - -} // namespace ann -} // namespace mlpack - -#endif diff --git a/src/mlpack/methods/ann/performance_functions/mse_function.hpp b/src/mlpack/methods/ann/performance_functions/mse_function.hpp deleted file mode 100644 index d2f19334d6f..00000000000 --- a/src/mlpack/methods/ann/performance_functions/mse_function.hpp +++ /dev/null @@ -1,61 +0,0 @@ -/** - * @file mse_function.hpp - * @author Marcus Edel - * - * Definition and implementation of the mean squared error performance function. - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. - */ -#ifndef MLPACK_METHODS_ANN_PERFORMANCE_FUNCTIONS_MSE_FUNCTION_HPP -#define MLPACK_METHODS_ANN_PERFORMANCE_FUNCTIONS_MSE_FUNCTION_HPP - -#include - -namespace mlpack { -namespace ann /** Artificial Neural Network. */ { - -/** - * The mean squared error performance function measures the network's - * performance according to the mean of squared errors. - */ -class MeanSquaredErrorFunction -{ - public: - /** - * Computes the mean squared error function. - * - * @param network Network type of FFN, CNN or RNN - * @param target Target data. - * @param error same as place holder - * @return sum of squared errors. - */ - template - static double Error(const std::tuple& network, - const DataType& target, const DataType &error) - { - return Error(std::get(network).OutputParameter(), - target, error); - } - - /** - * Computes the mean squared error function. - * - * @param input Input data. - * @param target Target data. - * @return mean of squared errors. - */ - template - static double Error(const DataType& input, const DataType& target, const DataType&) - { - return arma::mean(arma::mean(arma::square(target - input))); - } - -}; // class MeanSquaredErrorFunction - -} // namespace ann -} // namespace mlpack - -#endif diff --git a/src/mlpack/methods/ann/performance_functions/sparse_function.hpp b/src/mlpack/methods/ann/performance_functions/sparse_function.hpp deleted file mode 100644 index 145a0b64aff..00000000000 --- a/src/mlpack/methods/ann/performance_functions/sparse_function.hpp +++ /dev/null @@ -1,141 +0,0 @@ -/** - * @file sparse_function.hpp - * @author Siddharth Agrawal - * @author Tham Ngap Wei - * - * Definition and implementation of the sparse performance function. - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. - */ - -#ifndef MLPACK_METHODS_ANN_PERFORMANCE_FUNCTIONS_SPARSE_FUNCTION_HPP -#define MLPACK_METHODS_ANN_PERFORMANCE_FUNCTIONS_SPARSE_FUNCTION_HPP - -#include - -namespace mlpack { -namespace ann /** Artificial Neural Network. */ { - -/** - * The cost function design for the sparse autoencoder. - */ -template -class SparseErrorFunction -{ - public: - /** - * Computes the cost of sparse autoencoder. - * - * @param lambda L2-regularization parameter. - * @param beta KL divergence parameter. - * @param rho Sparsity parameter. - */ - SparseErrorFunction(const double lambda = 0.0001, - const double beta = 3, - const double rho = 0.01) : - lambda(lambda), beta(beta), rho(rho) - { - // Nothing to do here. - } - - SparseErrorFunction(SparseErrorFunction &&layer) noexcept - { - *this = std::move(layer); - } - - SparseErrorFunction& operator=(SparseErrorFunction &&layer) noexcept - { - lambda = layer.lambda; - beta = layer.beta; - rho = layer.rho; - - return *this; - } - - //! Get the KL divergence parameter. - double Beta() const { return beta; } - //! Modify the KL divergence parameter. - void Beta(double value) { beta = value;} - - //! Get the L2-regularization parameter. - double Lambda() const { return lambda; } - //! Modify the L2-regularization parameter. - void Lambda(double value) { lambda = value;} - - //! Get the sparsity parameter. - double Rho() const { return rho; } - //! Modify the sparsity parameter. - void Rho(double value) { rho = value;} - - /** - * Computes the cost of sparse autoencoder. - * - * @param network Network type of FFN, CNN or RNN - * @param target Target data. - * @param error different between output and the input - * @return sum of squared errors. - */ - template - double Error(const Tp& network, - const InType& target, const InType &error) - { - return Error(std::get<0>(network).Weights(), std::get<3>(network).Weights(), - std::get<3>(network).RhoCap(), target, error); - } - - /** - * Computes the cost of sparse autoencoder. - * - * @param w1 weights of hidden layer - * @param w2 weights of output layer - * @param rhoCap Average activations of the hidden layer - * @param target Target data. - * @param error different between output and the input - * @return sum of squared errors. - */ - template - double Error(const InType& w1, const InType& w2, - const InType& rhoCap, const InType& target, - const InType& error) - { - // Calculate squared L2-norms of w1 and w2. - const double wL2SquaredNorm = - arma::accu(w1 % w1) + arma::accu(w2 % w2); - - // Calculate the reconstruction error, the regularization cost and the KL - // divergence cost terms. 'sumOfSquaresError' is the average squared l2-norm - // of the reconstructed data difference. 'weightDecay' is the squared l2-norm - // of the weights w1 and w2. 'klDivergence' is the cost of the hidden layer - // activations not being low. It is given by the following formula: - // KL = sum_over_hSize(rho*log(rho/rhoCaq) + (1-rho)*log((1-rho)/(1-rhoCap))) - const double sumOfSquaresError = - 0.5 * arma::accu(error % error) / target.n_cols; - - const double weightDecay = 0.5 * lambda * wL2SquaredNorm; - const double klDivergence = - beta * arma::accu(rho * arma::trunc_log(rho / rhoCap) + (1 - rho) * - arma::trunc_log((1 - rho) / (1 - rhoCap))); - - // The cost is the sum of the terms calculated above. - return sumOfSquaresError + weightDecay + klDivergence; - } - - private: - //! Locally stored L2-regularization parameter. - double lambda; - - //! Locally stored KL divergence parameter. - double beta; - - //! Locally stored sparsity parameter. - double rho; - -}; // class SparseErrorFunction - -} // namespace ann -} // namespace mlpack - -#endif diff --git a/src/mlpack/methods/ann/performance_functions/sse_function.hpp b/src/mlpack/methods/ann/performance_functions/sse_function.hpp deleted file mode 100644 index 34055fb74c0..00000000000 --- a/src/mlpack/methods/ann/performance_functions/sse_function.hpp +++ /dev/null @@ -1,64 +0,0 @@ -/** - * @file sse_function.hpp - * @author Marcus Edel - * - * Definition and implementation of the sum squared error performance function. - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. - */ -#ifndef MLPACK_METHODS_ANN_PERFORMANCE_FUNCTIONS_SSE_FUNCTION_HPP -#define MLPACK_METHODS_ANN_PERFORMANCE_FUNCTIONS_SSE_FUNCTION_HPP - -#include - -namespace mlpack { -namespace ann /** Artificial Neural Network. */ { - -/** - * The sum squared error performance function measures the network's performance - * according to the sum of squared errors. - */ -class SumSquaredErrorFunction -{ - public: - /** - * Computes the sum squared error function. - * - * @param network Network type of FFN, CNN or RNN - * @param target Target data. - * @param error same as place holder - * @return sum of squared errors. - */ - template - static double Error(const std::tuple& network, - const DataType& target, - const DataType &error) - { - return Error(std::get(network).OutputParameter(), - target, error); - } - - /** - * Computes the sum squared error function. - * - * @param input Input data. - * @param target Target data. - * @return sum of squared errors. - */ - template - static double Error(const DataType& input, - const DataType& target, - const DataType&) - { - return arma::sum(arma::square(target - input)); - } - -}; // class SumSquaredErrorFunction - -} // namespace ann -} // namespace mlpack - -#endif diff --git a/src/mlpack/methods/ann/pooling_rules/CMakeLists.txt b/src/mlpack/methods/ann/pooling_rules/CMakeLists.txt deleted file mode 100644 index 99b6b803bb1..00000000000 --- a/src/mlpack/methods/ann/pooling_rules/CMakeLists.txt +++ /dev/null @@ -1,15 +0,0 @@ -# Define the files we need to compile -# Anything not in this list will not be compiled into mlpack. -set(SOURCES - max_pooling.hpp - mean_pooling.hpp -) - -# Add directory name to sources. -set(DIR_SRCS) -foreach(file ${SOURCES}) - set(DIR_SRCS ${DIR_SRCS} ${CMAKE_CURRENT_SOURCE_DIR}/${file}) -endforeach() -# Append sources (with directory name) to list of all mlpack sources (used at -# the parent scope). -set(MLPACK_SRCS ${MLPACK_SRCS} ${DIR_SRCS} PARENT_SCOPE) diff --git a/src/mlpack/methods/ann/pooling_rules/max_pooling.hpp b/src/mlpack/methods/ann/pooling_rules/max_pooling.hpp deleted file mode 100644 index f50b0419fe3..00000000000 --- a/src/mlpack/methods/ann/pooling_rules/max_pooling.hpp +++ /dev/null @@ -1,56 +0,0 @@ -/** - * @file max_pooling.hpp - * @author Shangtong Zhang - * - * Definition of the MaxPooling class, which implements max pooling. - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. - */ -#ifndef MLPACK_METHODS_ANN_POOLING_RULES_MAX_POOLING_HPP -#define MLPACK_METHODS_ANN_POOLING_RULES_MAX_POOLING_HPP - -#include - -namespace mlpack { -namespace ann /** Artificial Neural Network. */ { - -/* - * The max pooling rule for convolution neural networks. Take the maximum value - * within the receptive block. - */ -class MaxPooling -{ - public: - /* - * Return the maximum value within the receptive block. - * - * @param input Input used to perform the pooling operation. - */ - template - double Pooling(const MatType& input) - { - return input.max(); - } - - /* - * Set the maximum value within the receptive block. - * - * @param input Input used to perform the pooling operation. - * @param value The unpooled value. - * @param output The unpooled output data. - */ - template - void Unpooling(const MatType& input, const double value, MatType& output) - { - output = MatType(input.n_rows, input.n_cols); - output.fill(value / input.n_elem); - } -}; - -} // namespace ann -} // namespace mlpack - -#endif diff --git a/src/mlpack/methods/ann/pooling_rules/mean_pooling.hpp b/src/mlpack/methods/ann/pooling_rules/mean_pooling.hpp deleted file mode 100644 index 7ab88c329f4..00000000000 --- a/src/mlpack/methods/ann/pooling_rules/mean_pooling.hpp +++ /dev/null @@ -1,56 +0,0 @@ -/** - * @file mean_pooling.hpp - * @author Shangtong Zhang - * - * Definition of the MeanPooling class, which implements mean pooling. - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. - */ -#ifndef MLPACK_METHODS_ANN_POOLING_RULES_MEAN_POOLING_HPP -#define MLPACK_METHODS_ANN_POOLING_RULES_MEAN_POOLING_HPP - -#include - -namespace mlpack { -namespace ann /** Artificial Neural Network. */ { - -/* - * The mean pooling rule for convolution neural networks. Average all values - * within the receptive block. - */ -class MeanPooling -{ - public: - /* - * Return the average value within the receptive block. - * - * @param input Input used to perform the pooling operation. - */ - template - double Pooling(const MatType& input) - { - return arma::mean(arma::mean(input)); - } - - /* - * Set the average value within the receptive block. - * - * @param input Input used to perform the pooling operation. - * @param value The unpooled value. - * @param output The unpooled output data. - */ - template - void Unpooling(const MatType& input, const double value, MatType& output) - { - output = MatType(input.n_rows, input.n_cols); - output.fill(value / input.n_elem); - } -}; - -} // namespace ann -} // namespace mlpack - -#endif diff --git a/src/mlpack/methods/ann/rnn.hpp b/src/mlpack/methods/ann/rnn.hpp index 6b9483cd831..c071f052483 100644 --- a/src/mlpack/methods/ann/rnn.hpp +++ b/src/mlpack/methods/ann/rnn.hpp @@ -14,39 +14,29 @@ #include -#include - -#include -#include -#include -#include +#include +#include +#include #include namespace mlpack { namespace ann /** Artificial Neural Network. */ { /** - * Implementation of a standard recurrent neural network. + * Implementation of a standard recurrent neural network container. * - * @tparam LayerTypes Contains all layer modules used to construct the network. * @tparam OutputLayerType The output layer type used to evaluate the network. * @tparam InitializationRuleType Rule used to initialize the weight matrix. - * @tparam PerformanceFunction Performance strategy used to calculate the error. */ -template < - typename LayerTypes, - typename OutputLayerType, - typename InitializationRuleType = NguyenWidrowInitialization, - class PerformanceFunction = CrossEntropyErrorFunction<> +template< + typename OutputLayerType = NegativeLogLikelihood<>, + typename InitializationRuleType = RandomInitialization > class RNN { public: //! Convenience typedef for the internal model construction. - using NetworkType = RNN; + using NetworkType = RNN; /** * Create the RNN object with the given predictors and responses set (this is @@ -54,71 +44,44 @@ class RNN * Optionally, specify which initialize rule and performance function should * be used. * - * @param network Network modules used to construct the network. + * @param rho Maximum number of steps to backpropagate through time (BPTT). + * @param single Predict only the last element of the input sequence. * @param outputLayer Output layer used to evaluate the network. - * @param predictors Input training variables. - * @param responses Outputs resulting from input training variables. - * @param optimizer Instantiated optimizer used to train the model. * @param initializeRule Optional instantiated InitializationRule object * for initializing the network parameter. - * @param performanceFunction Optional instantiated PerformanceFunction - * object used to calculate the error. */ - template class OptimizerType> - RNN(LayerType &&network, - OutputType &&outputLayer, - const arma::mat& predictors, - const arma::mat& responses, - OptimizerType& optimizer, - InitializationRuleType initializeRule = InitializationRuleType(), - PerformanceFunction performanceFunction = PerformanceFunction()); + RNN(const size_t rho, + const bool single = false, + OutputLayerType outputLayer = OutputLayerType(), + InitializationRuleType initializeRule = InitializationRuleType()); /** * Create the RNN object with the given predictors and responses set (this is - * the set that is used to train the network). Optionally, specify which - * initialize rule and performance function should be used. + * the set that is used to train the network) and the given optimizer. + * Optionally, specify which initialize rule and performance function should + * be used. * - * @param network Network modules used to construct the network. - * @param outputLayer Output layer used to evaluate the network. * @param predictors Input training variables. - * @param responses Outputs resulting from input training variables. + * @param responses Outputs results from input training variables. + * @param rho Maximum number of steps to backpropagate through time (BPTT). + * @param single Predict only the last element of the input sequence. + * @param outputLayer Output layer used to evaluate the network. * @param initializeRule Optional instantiated InitializationRule object * for initializing the network parameter. - * @param performanceFunction Optional instantiated PerformanceFunction - * object used to calculate the error. */ - template - RNN(LayerType &&network, - OutputType &&outputLayer, - const arma::mat& predictors, + RNN(const arma::mat& predictors, const arma::mat& responses, - InitializationRuleType initializeRule = InitializationRuleType(), - PerformanceFunction performanceFunction = PerformanceFunction()); + const size_t rho, + const bool single = false, + OutputLayerType outputLayer = OutputLayerType(), + InitializationRuleType initializeRule = InitializationRuleType()); - /** - * Create the RNN object with an empty predictors and responses set and - * default optimizer. Make sure to call Train(predictors, responses) when - * training. - * - * @param network Network modules used to construct the network. - * @param outputLayer Output layer used to evaluate the network. - * @param initializeRule Optional instantiated InitializationRule object - * for initializing the network parameter. - * @param performanceFunction Optional instantiated PerformanceFunction - * object used to calculate the error. - */ - template - RNN(LayerType &&network, - OutputType &&outputLayer, - InitializationRuleType initializeRule = InitializationRuleType(), - PerformanceFunction performanceFunction = PerformanceFunction()); + //! Destructor to release allocated memory. + ~RNN(); /** - * Train the recurrent neural network on the given input data. By default, the - * SGD optimization algorithm is used, but others can be specified - * (such as mlpack::optimization::RMSprop). + * Train the recurrent neural network on the given input data using the given + * optimizer. * * This will use the existing model parameters as a starting point for the * optimization. If this is not what you want, then you should access the @@ -127,31 +90,19 @@ class RNN * @tparam OptimizerType Type of optimizer to use to train the model. * @param predictors Input training variables. * @param responses Outputs results from input training variables. - */ - template< - template class OptimizerType = mlpack::optimization::SGD - > - void Train(const arma::mat& predictors, const arma::mat& responses); - - /** - * Train the recurrent neural network with the given instantiated optimizer. - * Using this overload allows configuring the instantiated optimizer before - * training is performed. - * - * This will use the existing model parameters as a starting point for the - * optimization. If this is not what you want, then you should access the - * parameters vector directly with Parameters() and modify it as desired. - * * @param optimizer Instantiated optimizer used to train the model. */ template< template class OptimizerType = mlpack::optimization::SGD > - void Train(OptimizerType& optimizer); + void Train(const arma::mat& predictors, + const arma::mat& responses, + OptimizerType& optimizer); /** - * Train the recurrent neural network on the given input data using the given - * optimizer. + * Train the recurrent neural network on the given input data. By default, the + * SGD optimization algorithm is used, but others can be specified + * (such as mlpack::optimization::RMSprop). * * This will use the existing model parameters as a starting point for the * optimization. If this is not what you want, then you should access the @@ -160,19 +111,16 @@ class RNN * @tparam OptimizerType Type of optimizer to use to train the model. * @param predictors Input training variables. * @param responses Outputs results from input training variables. - * @param optimizer Instantiated optimizer used to train the model. */ template< template class OptimizerType = mlpack::optimization::SGD > - void Train(const arma::mat& predictors, - const arma::mat& responses, - OptimizerType& optimizer); + void Train(const arma::mat& predictors, const arma::mat& responses); /** * Predict the responses to a given set of predictors. The responses will * reflect the output of the given output layer as returned by the - * OutputClass() function. + * output layer function. * * @param predictors Input predictors. * @param responses Matrix to put output predictions of responses into. @@ -186,9 +134,9 @@ class RNN * @param parameters Matrix model parameters. * @param i Index of point to use for objective function evaluation. * @param deterministic Whether or not to train or test the model. Note some - * layer act differently in training or testing mode. + * layer act differently in training or testing mode. */ - double Evaluate(const arma::mat& parameters, + double Evaluate(const arma::mat& /* parameters */, const size_t i, const bool deterministic = true); @@ -206,6 +154,29 @@ class RNN const size_t i, arma::mat& gradient); + /* + * Add a new module to the model. + * + * @param layer The Layer to be added to the model. + */ + template + void Add(const LayerType& layer) { network.push_back(new LayerType(layer)); } + + /* + * Add a new module to the model. + * + * @param args The layer parameter. + */ + template + void Add(Args... args) { network.push_back(new LayerType(args...)); } + + /* + * Add a new module to the model. + * + * @param layer The Layer to be added to the model. + */ + void Add(LayerTypes layer) { network.push_back(layer); } + //! Return the number of separable functions (the number of predictor points). size_t NumFunctions() const { return numFunctions; } @@ -219,575 +190,117 @@ class RNN void Serialize(Archive& ar, const unsigned int /* version */); private: - /* - * Predict the response of the given input matrix. - */ - template - void SinglePredict(const DataType& input, DataType& output) - { - deterministic = true; - seqLen = input.n_rows / inputSize; - ResetParameter(network); - - // Iterate through the input sequence and perform the feed forward pass. - for (seqNum = 0; seqNum < seqLen; seqNum++) - { - // Perform the forward pass and save the activations. - Forward(input.rows(seqNum * inputSize, (seqNum + 1) * inputSize - 1), - network); - SaveActivations(network); - - // Retrieve output of the subsequence. - if (seqOutput) - { - DataType seqOutput; - OutputPrediction(seqOutput, network); - output = arma::join_cols(output, seqOutput); - } - } - - // Retrieve output of the complete sequence. - if (!seqOutput) - OutputPrediction(output, network); - } - + // Helper functions. /** - * Reset the network by clearing the layer activations and by setting the - * layer status. - */ - template - typename std::enable_if::type - ResetParameter(std::tuple& /* unused */) - { - activations.clear(); - } - - template - typename std::enable_if::type - ResetParameter(std::tuple& network) - { - ResetDeterministic(std::get(network)); - ResetSeqLen(std::get(network)); - ResetRecurrent(std::get(network), std::get(network).InputParameter()); - std::get(network).Delta().zeros(); - - ResetParameter(network); - } - - /** - * Reset the layer status by setting the current deterministic parameter - * for all layer that implement the Deterministic function. - */ - template - typename std::enable_if< - HasDeterministicCheck::value, void>::type - ResetDeterministic(T& layer) - { - layer.Deterministic() = deterministic; - } - - template - typename std::enable_if< - !HasDeterministicCheck::value, void>::type - ResetDeterministic(T& /* unused */) { /* Nothing to do here */ } - - /** - * Reset the layer sequence length by setting the current seqLen parameter - * for all layer that implement the SeqLen function. - */ - template - typename std::enable_if< - HasSeqLenCheck::value, void>::type - ResetSeqLen(T& layer) - { - layer.SeqLen() = seqLen; - } - - template - typename std::enable_if< - !HasSeqLenCheck::value, void>::type - ResetSeqLen(T& /* unused */) { /* Nothing to do here */ } - - /** - * Distinguish between recurrent layer and non-recurrent layer when resetting - * the recurrent parameter. - */ - template - typename std::enable_if< - HasRecurrentParameterCheck::value, void>::type - ResetRecurrent(T& layer, P& /* unused */) - { - layer.RecurrentParameter().zeros(); - } - - template - typename std::enable_if< - !HasRecurrentParameterCheck::value, void>::type - ResetRecurrent(T& /* unused */, P& /* unused */) - { - /* Nothing to do here */ - } - - /** - * Initialize the network by setting the input size and output size. - */ - template - typename std::enable_if::type - InitLayer(const InputDataType& /* unused */, - const TargetDataType& target, - std::tuple& /* unused */) - { - seqOutput = outputSize < target.n_elem ? true : false; - } - - template - typename std::enable_if::type - InitLayer(const InputDataType& input, - const TargetDataType& target, - std::tuple& network) - { - Init(std::get(network), std::get(network).OutputParameter(), - std::get(network).Delta()); - - InitLayer(input, target, - network); - } - - /** - * Retrieve the weight matrix for all layer that implement the Weights - * function to extract the input size and output size. - */ - template - typename std::enable_if< - HasGradientCheck::value, void>::type - Init(T& layer, P& /* unused */, D& /* unused */) - { - // Initialize the input size only once. - if (!inputSize) - inputSize = layer.Weights().n_cols; - - outputSize = layer.Weights().n_rows; - } - - template - typename std::enable_if< - !HasGradientCheck::value, void>::type - Init(T& /* unused */, P& /* unused */, D& /* unused */) - { - /* Nothing to do here */ - } - - /** - * Save the network layer activations. + * The Forward algorithm (part of the Forward-Backward algorithm). Computes + * forward probabilities for each module. + * + * @param input Data sequence to compute probabilities for. */ - template< - size_t I = 0, - size_t Max = std::tuple_size::value - 1, - typename... Tp - > - typename std::enable_if::type - SaveActivations(std::tuple& /* unused */) - { - Save(I, std::get(network), std::get(network).InputParameter()); - LinkRecurrent(network); - } - - template< - size_t I = 0, - size_t Max = std::tuple_size::value - 1, - typename... Tp - > - typename std::enable_if::type - SaveActivations(std::tuple& network) - { - Save(I, std::get(network), std::get(network).InputParameter()); - SaveActivations(network); - } + void Forward(arma::mat&& input); /** - * Distinguish between recurrent layer and non-recurrent layer when storing - * the activations. + * The Backward algorithm (part of the Forward-Backward algorithm). Computes + * backward pass for module. */ - template - typename std::enable_if< - HasRecurrentParameterCheck::value, void>::type - Save(const size_t layerNumber, T& layer, P& /* unused */) - { - if (activations.size() == layerNumber) - { - activations.push_back(new arma::mat(layer.RecurrentParameter().n_rows, - seqLen)); - } - - activations[layerNumber].unsafe_col(seqNum) = layer.RecurrentParameter(); - } - - template - typename std::enable_if< - !HasRecurrentParameterCheck::value, void>::type - Save(const size_t layerNumber, T& layer, P& /* unused */) - { - if (activations.size() == layerNumber) - { - activations.push_back(new arma::mat(layer.OutputParameter().n_rows, - seqLen)); - } - - activations[layerNumber].unsafe_col(seqNum) = layer.OutputParameter(); - } + void Backward(); /** - * Load the network layer activations. + * Iterate through all layer modules and update the the gradient using the + * layer defined optimizer. */ - template< - size_t I = 0, - size_t Max = std::tuple_size::value - 1, - typename DataType, typename... Tp - > - typename std::enable_if::type - LoadActivations(DataType& input, std::tuple& network) - { - Load(I, std::get(network), std::get(network).InputParameter()); - std::get<0>(network).InputParameter() = input; - } + void Gradient(); - template< - size_t I = 0, - size_t Max = std::tuple_size::value - 1, - typename DataType, typename... Tp - > - typename std::enable_if::type - LoadActivations(DataType& input, std::tuple& network) - { - Load(I, std::get(network), std::get(network).InputParameter()); - LoadActivations(input, network); - } - - /** - * Distinguish between recurrent layer and non-recurrent layer when storing - * the activations. + /* + * Predict the response of the given input sequence. + * + * @param predictors Input predictors. + * @param responses Vector to put output prediction of a response into. */ - template - typename std::enable_if< - HasRecurrentParameterCheck::value, void>::type - Load(const size_t layerNumber, T& layer, P& /* unused */) - { - layer.RecurrentParameter() = activations[layerNumber].unsafe_col(seqNum); - } - - template - typename std::enable_if< - !HasRecurrentParameterCheck::value, void>::type - Load(const size_t layerNumber, T& layer, P& /* unused */) - { - layer.OutputParameter() = activations[layerNumber].unsafe_col(seqNum); - } + void SinglePredict(const arma::mat& predictors, arma::mat& responses); /** - * Run a single iteration of the feed forward algorithm, using the given - * input and target vector, store the calculated error into the error - * vector. + * Reset the module infomration (weights/parameters). */ - template - void Forward(const DataType& input, std::tuple& network) - { - std::get(network).InputParameter() = input; - std::get(network).Forward(std::get(network).InputParameter(), - std::get(network).OutputParameter()); - - ForwardTail(network); - } - - template - typename std::enable_if::type - ForwardTail(std::tuple& /* unused */) { /* Nothing to do here */ } - - template - typename std::enable_if::type - ForwardTail(std::tuple& network) - { - std::get(network).Forward(std::get(network).OutputParameter(), - std::get(network).OutputParameter()); - - ForwardTail(network); - } + void ResetParameters(); /** - * Link the calculated activation with the correct layer. + * Reset the module status by setting the current deterministic parameter + * for all modules that implement the Deterministic function. */ - template< - size_t I = 1, - size_t Max = std::tuple_size::value - 1, - typename... Tp - > - typename std::enable_if::type - LinkParameter(std::tuple& /* unused */) - { - if (!LayerTraits(network))>::type>::IsBiasLayer) - { - std::get(network).InputParameter() = std::get( - network).OutputParameter(); - } - } - - template< - size_t I = 1, - size_t Max = std::tuple_size::value - 1, - typename... Tp - > - typename std::enable_if::type - LinkParameter(std::tuple& network) - { - if (!LayerTraits(network))>::type>::IsBiasLayer) - { - std::get(network).InputParameter() = std::get( - network).OutputParameter(); - } - - LinkParameter(network); - } + void ResetDeterministic(); /** - * Link the calculated activation with the correct recurrent layer. + * Reset the gradient for all modules that implement the Gradient function. */ - template< - size_t I = 0, - size_t Max = std::tuple_size::value - 1, - typename... Tp - > - typename std::enable_if::type - LinkRecurrent(std::tuple& /* unused */) { /* Nothing to do here */ } + void ResetGradients(arma::mat& gradient); - template< - size_t I = 0, - size_t Max = std::tuple_size::value - 1, - typename... Tp - > - typename std::enable_if::type - LinkRecurrent(std::tuple& network) - { - UpdateRecurrent(std::get(network), std::get(network).InputParameter(), - std::get(network).OutputParameter()); - LinkRecurrent(network); - } + //! Number of steps to backpropagate through time (BPTT). + size_t rho; - /** - * Distinguish between recurrent layer and non-recurrent layer when updating - * the recurrent activations. - */ - template - typename std::enable_if< - HasRecurrentParameterCheck::value, void>::type - UpdateRecurrent(T& layer, P& /* unused */, D& output) - { - layer.RecurrentParameter() = output; - } - - template - typename std::enable_if< - !HasRecurrentParameterCheck::value, void>::type - UpdateRecurrent(T& /* unused */, P& /* unused */, D& /* unused */) - { - /* Nothing to do here */ - } + //! Instantiated outputlayer used to evaluate the network. + OutputLayerType outputLayer; - /* - * Calculate the output error and update the overall error. - */ - template - double OutputError(const DataType& target, - ErrorType& error, - const std::tuple& network) - { - // Calculate and store the output error. - outputLayer.CalculateError( - std::get(network).OutputParameter(), target, error); - - // Masures the network's performance with the specified performance - // function. - return performanceFunc.Error(network, target, error); - } + //! Instantiated InitializationRule object for initializing the network + //! parameter. + InitializationRuleType initializeRule; - /** - * Run a single iteration of the feed backward algorithm, using the given - * error of the output layer. Note that we iterate backward through the - * layer modules. - */ - template - void Backward(DataType& error, std::tuple& network) - { - std::get(network).Backward( - std::get(network).OutputParameter(), error, - std::get(network).Delta()); - - BackwardTail(error, network); - } - - template - typename std::enable_if::type - BackwardTail(const DataType& /* unused */, std::tuple& /* unused */) - { - /* Nothing to do here */ - } - - template - typename std::enable_if::type - BackwardTail(const DataType& error, std::tuple& network) - { - BackwardRecurrent(std::get(network), - std::get(network).InputParameter(), - std::get(network).Delta()); - - std::get(network).Backward( - std::get(network).OutputParameter(), - std::get(network).Delta(), - std::get(network).Delta()); - - BackwardTail(error, network); - } + //! The input size. + size_t inputSize; - /* - * Update the delta of the recurrent layer. - */ - template - typename std::enable_if< - HasRecurrentParameterCheck::value, void>::type - BackwardRecurrent(T& layer, P& /* unused */, D& delta) - { - if (!layer.Delta().is_empty()) - delta += layer.Delta(); - } - - template - typename std::enable_if< - !HasRecurrentParameterCheck::value, void>::type - BackwardRecurrent(T& /* unused */, P& /* unused */, D& /* unused */) - { - /* Nothing to do here */ - } + //! The output size. + size_t outputSize; - /** - * Iterate through all layer modules and update the the gradient using the - * layer defined optimizer. - */ - template::value - 2, - typename... Tp> - typename std::enable_if::type - UpdateGradients(std::tuple& network) - { - Update(std::get(network), std::get(network).OutputParameter(), - std::get(network).Delta(), std::get(network), - std::get(network).InputParameter(), - std::get(network).Delta()); - } - - template::value - 2, - typename... Tp> - typename std::enable_if::type - UpdateGradients(std::tuple& network) - { - Update(std::get(network), std::get(network).OutputParameter(), - std::get(network).Delta(), std::get(network), - std::get(network).InputParameter(), - std::get(network).Delta()); - - UpdateGradients(network); - } - - template - typename std::enable_if< - HasGradientCheck::value && - HasRecurrentParameterCheck::value, void>::type - Update(T1& layer, P1& /* unused */, D1& /* unused */, T2& /* unused */, - P2& /* unused */, D2& delta2) - { - layer.Gradient(layer.InputParameter(), delta2, layer.Gradient()); - } - - template - typename std::enable_if< - (!HasGradientCheck::value && - !HasRecurrentParameterCheck::value) || - (!HasGradientCheck::value && - HasRecurrentParameterCheck::value), void>::type - Update(T1& /* unused */, P1& /* unused */, D1& /* unused */, T2& /* unused */, - P2& /* unused */, D2& /* unused */) - { - /* Nothing to do here */ - } - - template - typename std::enable_if< - HasGradientCheck::value && - !HasRecurrentParameterCheck::value, void>::type - Update(T1& layer, P1& /* unused */, D1& delta1, T2& /* unused */, - P2& /* unused */, D2& /* unused */) - { - layer.Gradient(layer.InputParameter(), delta1, layer.Gradient()); - } + //! The target size. + size_t targetSize; - /* - * Calculate and store the output activation. - */ - template - void OutputPrediction(DataType& output, std::tuple& network) - { - // Calculate and store the output prediction. - outputLayer.OutputClass(std::get( - network).OutputParameter(), output); - } + //! Indicator if we already trained the model. + bool reset; - //! Instantiated recurrent neural network. - LayerTypes network; + //! Only predict the last element of the input sequence. + bool single; - //! The outputlayer used to evaluate the network - OutputLayerType& outputLayer; + //! Locally-stored model modules. + std::vector network; - //! Performance strategy used to claculate the error. - PerformanceFunction performanceFunc; + //! The matrix of data points (predictors). + arma::mat predictors; - //! The current evaluation mode (training or testing). - bool deterministic; + //! The matrix of responses to the input data points. + arma::mat responses; //! Matrix of (trained) parameters. arma::mat parameter; - //! The matrix of data points (predictors). - arma::mat predictors; + //! The number of separable functions (the number of predictor points). + size_t numFunctions; - //! The matrix of responses to the input data points. - arma::mat responses; + //! The current error for the backward pass. + arma::mat error; - //! Locally stored network input size. - size_t inputSize; + //! THe current input of the forward/backward pass. + arma::mat currentInput; - //! Locally stored network output size. - size_t outputSize; + //! Locally-stored delta visitor. + DeltaVisitor deltaVisitor; - //! The index of the current sequence number. - size_t seqNum; + //! Locally-stored output parameter visitor. + OutputParameterVisitor outputParameterVisitor; - //! Locally stored number of samples in one input sequence. - size_t seqLen; + //! List of all module parameters for the backward pass (BBTT). + std::vector moduleOutputParameter; - //! Locally stored parameter that indicates if the input is a sequence. - bool seqOutput; + //! Locally-stored weight size visitor. + WeightSizeVisitor weightSizeVisitor; - //! The activation storage we are using to perform the feed backward pass. - boost::ptr_vector activations; + //! Locally-stored reset visitor. + ResetVisitor resetVisitor; - //! The number of separable functions (the number of predictor points). - size_t numFunctions; + //! Locally-stored delete visitor. + DeleteVisitor deleteVisitor; - //! Locally stored backward error. - arma::mat error; + //! The current evaluation mode (training or testing). + bool deterministic; }; // class RNN } // namespace ann diff --git a/src/mlpack/methods/ann/rnn_impl.hpp b/src/mlpack/methods/ann/rnn_impl.hpp index d8d2f07f0ac..cdac030a2ec 100644 --- a/src/mlpack/methods/ann/rnn_impl.hpp +++ b/src/mlpack/methods/ann/rnn_impl.hpp @@ -19,129 +19,85 @@ namespace mlpack { namespace ann /** Artificial Neural Network. */ { -template -template class OptimizerType -> -RNN::RNN(LayerType &&network, - OutputType &&outputLayer, - const arma::mat& predictors, - const arma::mat& responses, - OptimizerType& optimizer, - InitializationRuleType initializeRule, - PerformanceFunction performanceFunction) : - network(std::forward(network)), - outputLayer(std::forward(outputLayer)), - performanceFunc(std::move(performanceFunction)), - predictors(predictors), - responses(responses), - numFunctions(predictors.n_cols), +template +RNN::RNN( + const size_t rho, + const bool single, + OutputLayerType outputLayer, + InitializationRuleType initializeRule) : + rho(rho), + outputLayer(outputLayer), + initializeRule(initializeRule), inputSize(0), - outputSize(0) + outputSize(0), + targetSize(0), + reset(false), + single(single) { - static_assert(std::is_same::type, - LayerTypes>::value, - "The type of network must be LayerTypes."); - - static_assert(std::is_same::type, - OutputLayerType>::value, - "The type of outputLayer must be OutputLayerType."); - - initializeRule.Initialize(parameter, NetworkSize(this->network), 1); - NetworkWeights(parameter, this->network); - - // Train the model. - Timer::Start("rnn_optimization"); - const double out = optimizer.Optimize(parameter); - Timer::Stop("rnn_optimization"); - - Log::Info << "RNN::RNN(): final objective of trained model is " << out - << "." << std::endl; + /* Nothing to do here */ } -template -template -RNN::RNN(LayerType &&network, - OutputType &&outputLayer, - const arma::mat& predictors, - const arma::mat& responses, - InitializationRuleType initializeRule, - PerformanceFunction performanceFunction) : - network(std::forward(network)), - outputLayer(std::forward(outputLayer)), - performanceFunc(std::move(performanceFunction)), +template +RNN::RNN( + const arma::mat& predictors, + const arma::mat& responses, + const size_t rho, + const bool single, + OutputLayerType outputLayer, + InitializationRuleType initializeRule) : + rho(rho), + outputLayer(outputLayer), + initializeRule(initializeRule), inputSize(0), - outputSize(0) + outputSize(0), + targetSize(0), + reset(false), + single(single) { - static_assert(std::is_same::type, - LayerTypes>::value, - "The type of network must be LayerTypes."); + numFunctions = responses.n_cols; - static_assert(std::is_same::type, - OutputLayerType>::value, - "The type of outputLayer must be OutputLayerType."); + this->predictors = std::move(predictors); + this->responses = std::move(responses); - initializeRule.Initialize(parameter, NetworkSize(this->network), 1); - NetworkWeights(parameter, this->network); + this->deterministic = true; + ResetDeterministic(); - Train(predictors, responses); + if (!reset) + { + ResetParameters(); + reset = true; + } } -template -template -RNN::RNN(LayerType &&network, - OutputType &&outputLayer, - InitializationRuleType initializeRule, - PerformanceFunction performanceFunction) : - network(std::forward(network)), - outputLayer(std::forward(outputLayer)), - performanceFunc(std::move(performanceFunction)), - inputSize(0), - outputSize(0) +template +RNN::~RNN() { - static_assert(std::is_same::type, - LayerTypes>::value, - "The type of network must be LayerTypes."); - - static_assert(std::is_same::type, - OutputLayerType>::value, - "The type of outputLayer must be OutputLayerType."); - - initializeRule.Initialize(parameter, NetworkSize(this->network), 1); - NetworkWeights(parameter, this->network); + for (LayerTypes& layer : network) + { + boost::apply_visitor(deleteVisitor, layer); + } } -template +template template class OptimizerType> -void RNN< -LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction ->::Train(const arma::mat& predictors, const arma::mat& responses) +void RNN::Train( + const arma::mat& predictors, + const arma::mat& responses, + OptimizerType& optimizer) { - numFunctions = predictors.n_cols; - this->predictors = predictors; - this->responses = responses; + numFunctions = responses.n_cols; - OptimizerType optimizer(*this); + this->predictors = std::move(predictors); + this->responses = std::move(responses); + + this->deterministic = true; + ResetDeterministic(); + + if (!reset) + { + ResetParameters(); + reset = true; + } // Train the model. Timer::Start("rnn_optimization"); @@ -152,43 +108,27 @@ LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction << "." << std::endl; } -template +template template class OptimizerType> -void RNN< -LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction ->::Train(const arma::mat& predictors, - const arma::mat& responses, - OptimizerType& optimizer) +void RNN::Train( + const arma::mat& predictors, const arma::mat& responses) { - numFunctions = predictors.n_cols; - this->predictors = predictors; - this->responses = responses; + numFunctions = responses.n_cols; - // Train the model. - Timer::Start("rnn_optimization"); - const double out = optimizer.Optimize(parameter); - Timer::Stop("rnn_optimization"); + this->predictors = std::move(predictors); + this->responses = std::move(responses); - Log::Info << "RNN::RNN(): final objective of trained model is " << out - << "." << std::endl; -} + this->deterministic = true; + ResetDeterministic(); + + if (!reset) + { + ResetParameters(); + reset = true; + } + + OptimizerType optimizer(*this); -template -template< - template class OptimizerType -> -void RNN< -LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction ->::Train(OptimizerType& optimizer) -{ // Train the model. Timer::Start("rnn_optimization"); const double out = optimizer.Optimize(parameter); @@ -198,96 +138,120 @@ LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction << "." << std::endl; } -template -void RNN< -LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction ->::Predict(arma::mat& predictors, arma::mat& responses) +template +void RNN::Predict( + arma::mat& predictors, arma::mat& responses) { - arma::mat responsesTemp; - SinglePredict(arma::mat(predictors.colptr(0), predictors.n_rows, - 1, false, true), responsesTemp); + if (parameter.is_empty()) + { + ResetParameters(); + } - responses = arma::mat(responsesTemp.n_elem, predictors.n_cols); - responses.col(0) = responsesTemp.col(0); + if (!deterministic) + { + deterministic = true; + ResetDeterministic(); + } - for (size_t i = 1; i < predictors.n_cols; i++) + responses = arma::zeros(outputSize * rho, predictors.n_cols); + arma::mat responsesTemp = responses.col(0); + + for (size_t i = 0; i < predictors.n_cols; i++) { - SinglePredict(arma::mat(predictors.colptr(i), predictors.n_rows, - 1, false, true), responsesTemp); - responses.col(i) = responsesTemp.col(0); + SinglePredict( + arma::mat(predictors.colptr(i), predictors.n_rows, 1, false, true), + responsesTemp); + + responses.col(i) = responsesTemp; } } -template -double RNN< -LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction ->::Evaluate(const arma::mat& /* unused */, - const size_t i, - const bool deterministic) +template +void RNN::SinglePredict( + const arma::mat& predictors, arma::mat& responses) { - this->deterministic = deterministic; + for (size_t seqNum = 0; seqNum < rho; ++seqNum) + { + currentInput = predictors.rows(seqNum * inputSize, + (seqNum + 1) * inputSize - 1); + Forward(std::move(currentInput)); + + responses.rows(seqNum * outputSize, (seqNum + 1) * outputSize - 1) = + boost::apply_visitor(outputParameterVisitor, network.back()); + } +} + +template +double RNN::Evaluate( + const arma::mat& /* parameters */, const size_t i, const bool deterministic) +{ + if (parameter.is_empty()) + { + ResetParameters(); + reset = true; + } + + if (deterministic != this->deterministic) + { + this->deterministic = deterministic; + ResetDeterministic(); + } arma::mat input = arma::mat(predictors.colptr(i), predictors.n_rows, 1, false, true); arma::mat target = arma::mat(responses.colptr(i), responses.n_rows, 1, false, true); - // Initialize the activation storage only once. - if (activations.empty()) - InitLayer(input, target, network); - - double networkError = 0; - seqLen = input.n_rows / inputSize; - ResetParameter(network); + if (!inputSize) + { + inputSize = input.n_elem / rho; + targetSize = target.n_elem / rho; + } - error = arma::mat(outputSize, outputSize < target.n_elem ? seqLen : 1); + double performance = 0; - // Iterate through the input sequence and perform the feed forward pass. - for (seqNum = 0; seqNum < seqLen; seqNum++) + for (size_t seqNum = 0; seqNum < rho; ++seqNum) { - // Perform the forward pass and save the activations. - Forward(input.rows(seqNum * inputSize, (seqNum + 1) * inputSize - 1), - network); - SaveActivations(network); + currentInput = input.rows(seqNum * inputSize, (seqNum + 1) * inputSize - 1); + arma::mat currentTarget = target.rows(seqNum * targetSize, + (seqNum + 1) * targetSize - 1); - // Retrieve output error of the subsequence. - if (seqOutput) + Forward(std::move(currentInput)); + + if (!deterministic) { - arma::mat seqError = error.unsafe_col(seqNum); - arma::mat seqTarget = target.submat(seqNum * outputSize, 0, - (seqNum + 1) * outputSize - 1, 0); - networkError += OutputError(seqTarget, seqError, network); + for (size_t l = 0; l < network.size(); ++l) + { + boost::apply_visitor(SaveOutputParameterVisitor( + std::move(moduleOutputParameter)), network[l]); + } } + + performance += outputLayer.Forward(std::move(boost::apply_visitor( + outputParameterVisitor, network.back())), std::move(currentTarget)); } - // Retrieve output error of the complete sequence. - if (!seqOutput) - return OutputError(target, error, network); + if (!outputSize) + { + outputSize = boost::apply_visitor(outputParameterVisitor, + network.back()).n_elem; + } - return networkError; + return performance; } -template -void RNN< -LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction ->::Gradient(const arma::mat& /* unused */, - const size_t i, - arma::mat& gradient) +template +void RNN::Gradient( + const arma::mat& parameters, const size_t i, arma::mat& gradient) { if (gradient.is_empty()) { + if (parameter.is_empty()) + { + ResetParameters(); + reset = true; + } + gradient = arma::zeros(parameter.n_rows, parameter.n_cols); } else @@ -295,59 +259,167 @@ LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction gradient.zeros(); } - Evaluate(parameter, i, false); + Evaluate(parameters, i, false); - arma::mat currentGradient = arma::mat(gradient.n_rows, gradient.n_cols); - NetworkGradients(currentGradient, network); + arma::mat currentGradient = arma::zeros(parameter.n_rows, + parameter.n_cols); + ResetGradients(currentGradient); - const arma::mat input = arma::mat(predictors.colptr(i), predictors.n_rows, + arma::mat input = arma::mat(predictors.colptr(i), predictors.n_rows, + 1, false, true); + arma::mat target = arma::mat(responses.colptr(i), responses.n_rows, 1, false, true); - // Iterate through the input sequence and perform the feed backward pass. - for (seqNum = seqLen - 1; seqNum >= 0; seqNum--) + for (size_t seqNum = 0; seqNum < rho; ++seqNum) { - // Load the network activation for the upcoming backward pass. - LoadActivations(input.rows(seqNum * inputSize, (seqNum + 1) * - inputSize - 1), network); + currentGradient.zeros(); + + arma::mat currentTarget = target.rows((rho - seqNum - 1) * targetSize, + (rho - seqNum) * targetSize - 1); + currentInput = input.rows((rho - seqNum - 1) * inputSize, + (rho - seqNum) * inputSize - 1); + + for (size_t l = 0; l < network.size(); ++l) + { + boost::apply_visitor(LoadOutputParameterVisitor( + std::move(moduleOutputParameter)), network[network.size() - 1 - l]); + } - // Perform the backward pass. - if (seqOutput) + if (single && seqNum > 0) { - arma::mat seqError = error.unsafe_col(seqNum); - Backward(seqError, network); + error.zeros(); } else { - Backward(error, network); + outputLayer.Backward(std::move(boost::apply_visitor( + outputParameterVisitor, network.back())), std::move(currentTarget), + std::move(error)); } - // Link the parameters and update the gradients. - LinkParameter(network); - UpdateGradients<>(network); - - // Update the overall gradient. + Backward(); + Gradient(); gradient += currentGradient; + } +} + +template +void RNN::ResetParameters() +{ + size_t weights = 0; + for (LayerTypes& layer : network) + { + weights += boost::apply_visitor(weightSizeVisitor, layer); + } + + parameter.set_size(weights, 1); + initializeRule.Initialize(parameter, parameter.n_elem, 1); + + size_t offset = 0; + for (LayerTypes& layer : network) + { + offset += boost::apply_visitor(WeightSetVisitor(std::move(parameter), + offset), layer); + + boost::apply_visitor(resetVisitor, layer); + } +} - if (seqNum == 0) break; +template +void RNN::ResetDeterministic() +{ + DeterministicSetVisitor deterministicSetVisitor(deterministic); + std::for_each(network.begin(), network.end(), + boost::apply_visitor(deterministicSetVisitor)); +} + +template +void RNN::ResetGradients( + arma::mat& gradient) +{ + size_t offset = 0; + for (LayerTypes& layer : network) + { + offset += boost::apply_visitor(GradientSetVisitor(std::move(gradient), + offset), layer); + } +} + +template +void RNN::Forward(arma::mat&& input) +{ + boost::apply_visitor(ForwardVisitor(std::move(input), std::move( + boost::apply_visitor(outputParameterVisitor, network.front()))), + network.front()); + + for (size_t i = 1; i < network.size(); ++i) + { + boost::apply_visitor(ForwardVisitor( + std::move(boost::apply_visitor(outputParameterVisitor, network[i - 1])), + std::move(boost::apply_visitor(outputParameterVisitor, network[i]))), + network[i]); } } -template +template +void RNN::Backward() +{ + boost::apply_visitor(BackwardVisitor( + std::move(boost::apply_visitor(outputParameterVisitor, network.back())), + std::move(error), std::move(boost::apply_visitor(deltaVisitor, + network.back()))), network.back()); + + for (size_t i = 2; i < network.size(); ++i) + { + boost::apply_visitor(BackwardVisitor( + std::move(boost::apply_visitor(outputParameterVisitor, + network[network.size() - i])), std::move(boost::apply_visitor( + deltaVisitor, network[network.size() - i + 1])), std::move( + boost::apply_visitor(deltaVisitor, network[network.size() - i]))), + network[network.size() - i]); + } +} + +template +void RNN::Gradient() +{ + boost::apply_visitor(GradientVisitor(std::move(currentInput), std::move( + boost::apply_visitor(deltaVisitor, network[1]))), network.front()); + + for (size_t i = 1; i < network.size() - 1; ++i) + { + boost::apply_visitor(GradientVisitor( + std::move(boost::apply_visitor(outputParameterVisitor, network[i - 1])), + std::move(boost::apply_visitor(deltaVisitor, network[i + 1]))), + network[i]); + } +} + +template template -void RNN< -LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction ->::Serialize(Archive& ar, const unsigned int /* version */) +void RNN::Serialize( + Archive& ar, const unsigned int /* version */) { ar & data::CreateNVP(parameter, "parameter"); + ar & data::CreateNVP(rho, "rho"); + ar & data::CreateNVP(single, "single"); + ar & data::CreateNVP(inputSize, "inputSize"); + ar & data::CreateNVP(outputSize, "outputSize"); + ar & data::CreateNVP(targetSize, "targetSize"); + ar & data::CreateNVP(currentInput, "currentInput"); // If we are loading, we need to initialize the weights. if (Archive::is_loading::value) { - NetworkWeights(parameter, network); + reset = false; + + size_t offset = 0; + for (LayerTypes& layer : network) + { + offset += boost::apply_visitor(WeightSetVisitor(std::move(parameter), + offset), layer); + + boost::apply_visitor(resetVisitor, layer); + } } } diff --git a/src/mlpack/methods/rmva/CMakeLists.txt b/src/mlpack/methods/rmva/CMakeLists.txt deleted file mode 100644 index ced53a30ea7..00000000000 --- a/src/mlpack/methods/rmva/CMakeLists.txt +++ /dev/null @@ -1,17 +0,0 @@ -# Define the files we need to compile -# Anything not in this list will not be compiled into mlpack. -set(SOURCES - rmva.hpp - rmva_impl.hpp -) - -# Add directory name to sources. -set(DIR_SRCS) -foreach(file ${SOURCES}) - set(DIR_SRCS ${DIR_SRCS} ${CMAKE_CURRENT_SOURCE_DIR}/${file}) -endforeach() -# Append sources (with directory name) to list of all mlpack sources (used at -# the parent scope). -set(MLPACK_SRCS ${MLPACK_SRCS} ${DIR_SRCS} PARENT_SCOPE) - -add_cli_executable(rmva) diff --git a/src/mlpack/methods/rmva/rmva.hpp b/src/mlpack/methods/rmva/rmva.hpp deleted file mode 100644 index 5f4f03112c8..00000000000 --- a/src/mlpack/methods/rmva/rmva.hpp +++ /dev/null @@ -1,963 +0,0 @@ -/** - * @file rmva.hpp - * @author Marcus Edel - * - * Definition of the RecurrentNeuralAttention class, which implements the - * Recurrent Model for Visual Attention. - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. - */ -#ifndef __MLPACK_METHODS_RMVA_RMVA_HPP -#define __MLPACK_METHODS_RMVA_RMVA_HPP - -#include - -#include -#include -#include -#include -#include -#include -#include - -#include - -namespace mlpack { -namespace ann /** Artificial Neural Network. */ { - -/** - * This class implements the Recurrent Model for Visual Attention, using a - * variety of possible layer implementations. - * - * For more information, see the following paper. - * - * @code - * @article{MnihHGK14, - * title={Recurrent Models of Visual Attention}, - * author={Volodymyr Mnih, Nicolas Heess, Alex Graves, Koray Kavukcuoglu}, - * journal={CoRR}, - * volume={abs/1406.6247}, - * year={2014} - * } - * @endcode - * - * @tparam LocatorType Type of locator network. - * @tparam LocationSensorType Type of location sensor network. - * @tparam GlimpseSensorType Type of glimpse sensor network. - * @tparam GlimpseType Type of glimpse network. - * @tparam StartType Type of start network. - * @tparam FeedbackType Type of feedback network. - * @tparam TransferType Type of transfer network. - * @tparam ClassifierType Type of classifier network. - * @tparam RewardPredictorType Type of reward predictor network. - * @tparam InitializationRuleType Rule used to initialize the weight matrix. - * @tparam MatType Matrix type (arma::mat or arma::sp_mat). - */ -template< - typename LocatorType, - typename LocationSensorType, - typename GlimpseSensorType, - typename GlimpseType, - typename StartType, - typename FeedbackType, - typename TransferType, - typename ClassifierType, - typename RewardPredictorType, - typename InitializationRuleType = RandomInitialization, - typename MatType = arma::mat -> -class RecurrentNeuralAttention -{ - public: - //! Convenience typedef for the internal model construction. - using NetworkType = RecurrentNeuralAttention< - LocatorType, - LocationSensorType, - GlimpseSensorType, - GlimpseType, - StartType, - FeedbackType, - TransferType, - ClassifierType, - RewardPredictorType, - InitializationRuleType, - MatType>; - - /** - * Construct the RecurrentNeuralAttention object, which will construct the - * recurrent model for visual attentionh using the specified networks. - * - * @param locator The locator network. - * @param locationSensor The location sensor network. - * @param glimpseSensor The glimpse sensor network. - * @param glimpse The glimpse network. - * @param start The start network. - * @param feedback The feedback network. - * @param transfer The transfer network. - * @param classifier The classifier network. - * @param rewardPredictor The reward predictor network. - * @param nStep Number of steps for the back-propagate through time. - * @param initializeRule Rule used to initialize the weight matrix. - */ - template - RecurrentNeuralAttention(TypeLocator&& locator, - TypeLocationSensor&& locationSensor, - TypeGlimpseSensor&& glimpseSensor, - TypeGlimpse&& glimpse, - TypeStart&& start, - TypeFeedback&& feedback, - TypeTransfer&& transfer, - TypeClassifier&& classifier, - TypeRewardPredictor&& rewardPredictor, - const size_t nStep, - InitializationRuleType initializeRule = - InitializationRuleType()); - /** - * Train the network on the given input data using the given optimizer. - * - * This will use the existing model parameters as a starting point for the - * optimization. If this is not what you want, then you should access the - * parameters vector directly with Parameters() and modify it as desired. - * - * @tparam OptimizerType Type of optimizer to use to train the model. - * @param predictors Input training variables. - * @param responses Outputs results from input training variables. - * @param optimizer Instantiated optimizer used to train the model. - */ - template< - template class OptimizerType = mlpack::optimization::RMSprop - > - void Train(const arma::mat& predictors, - const arma::mat& responses, - OptimizerType& optimizer); - - /** - * Predict the responses to a given set of predictors. The responses will - * reflect the output of the given output layer as returned by the - * OutputClass() function. - * - * @param predictors Input predictors. - * @param responses Matrix to put output predictions of responses into. - */ - void Predict(arma::mat& predictors, arma::mat& responses); - - /** - * Evaluate the network with the given parameters. This function is usually - * called by the optimizer to train the model. - * - * @param parameters Matrix model parameters. - * @param i Index of point to use for objective function evaluation. - * @param deterministic Whether or not to train or test the model. Note some - * layer act differently in training or testing mode. - */ - double Evaluate(const arma::mat& parameters, - const size_t i, - const bool deterministic = true); - - /** - * Evaluate the gradient of the network with the given parameters, and with - * respect to only one point in the dataset. This is useful for - * optimizers such as SGD, which require a separable objective function. - * - * @param parameters Matrix of the model parameters to be optimized. - * @param i Index of points to use for objective function gradient evaluation. - * @param gradient Matrix to output gradient into. - */ - void Gradient(const arma::mat& parameters, - const size_t i, - arma::mat& gradient); - - //! Return the number of separable functions (the number of predictor points). - size_t NumFunctions() const { return numFunctions; } - - //! Return the initial point for the optimization. - const arma::mat& Parameters() const { return parameter; } - //! Modify the initial point for the optimization. - arma::mat& Parameters() { return parameter; } - - //! Return the number of steps to back-propagate through time. - const size_t& Rho() const { return nStep; } - //! Modify the number of steps to back-propagate through time. - size_t& Rho() { return nStep; } - - //! Return the current location. - const arma::mat& Location(); - - //! Serialize the model. - template - void Serialize(Archive& ar, const unsigned int /* version */); - - private: - /* - * Predict the response of the given input matrix. - */ - template - void SinglePredict(const InputType& input, OutputType& output) - { - // Get the locator input size. - if (!inputSize) - { - inputSize = NetworkInputSize(locator); - } - - // Reset networks. - ResetParameter(locator); - ResetParameter(locationSensor); - ResetParameter(glimpseSensor); - ResetParameter(glimpse); - ResetParameter(feedback); - ResetParameter(transfer); - ResetParameter(classifier); - ResetParameter(rewardPredictor); - ResetParameter(start); - - // Sample an initial starting actions by forwarding zeros through the - // locator. - locatorInput.push_back(new arma::cube(arma::zeros(inputSize, 1, - input.n_slices))); - - // Forward pass throught the recurrent network. - for (step = 0; step < nStep; step++) - { - // Locator forward pass. - Forward(locatorInput.back(), locator); - - // Location sensor forward pass. - Forward(std::get::value - 1>( - locator).OutputParameter(), locationSensor); - - // Set the location parameter for all layer that implement a Location - // function e.g. GlimpseLayer. - ResetLocation(std::get::value - 1>( - locator).OutputParameter(), glimpseSensor); - - // Glimpse sensor forward pass. - Forward(input, glimpseSensor); - - // Concat the parameter activation from the location sensor and - // glimpse sensor. - arma::mat concatLayerOutput = arma::join_cols( - std::get::value - 1>( - locationSensor).OutputParameter(), - std::get::value - 1>( - glimpseSensor).OutputParameter()); - - // Glimpse forward pass. - Forward(concatLayerOutput, glimpse); - - if (step == 0) - { - // Start forward pass. - Forward(std::get::value - 1>( - glimpse).OutputParameter(), start); - - // Transfer forward pass. - Forward(std::get::value - 1>( - start).OutputParameter(), transfer); - } - else - { - // Feedback forward pass. - Forward(std::get::value - 1>( - transfer).OutputParameter(), feedback); - - arma::mat feedbackLayerOutput = - std::get::value - 1>( - glimpse).OutputParameter() + - std::get::value - 1>( - feedback).OutputParameter(); - - // Transfer forward pass. - Forward(feedbackLayerOutput, transfer); - } - - // Update the input for the next run - locatorInput.push_back(new arma::cube( - std::get::value - 1>( - transfer).OutputParameter().memptr(), locatorInput.back().n_rows, - locatorInput.back().n_cols, locatorInput.back().n_slices)); - } - - // Classifier forward pass. - Forward(locatorInput.back().slice(0), classifier); - - output = std::get::value - 1>( - classifier).OutputParameter(); - } - - /** - * Update the layer reward for all layer that implement the Rewards function. - */ - template - typename std::enable_if::type - ResetReward(const double reward, std::tuple& network) - { - SetReward(reward, std::get(network)); - ResetReward(reward, network); - } - - template - typename std::enable_if::type - ResetReward(const double /* reward */, std::tuple& /* network */) - { - } - - template - typename std::enable_if< - HasRewardCheck::value, void>::type - SetReward(const double reward, T& layer) - { - layer.Reward() = reward; - } - - template - typename std::enable_if< - !HasRewardCheck::value, void>::type - SetReward(const double /* reward */, T& /* layer */) - { - /* Nothing to do here */ - } - - /** - * Reset the network by clearing the delta and by setting the layer status. - */ - template - typename std::enable_if::type - ResetParameter(std::tuple& /* network */) { /* Nothing to do here */ } - - template - typename std::enable_if::type - ResetParameter(std::tuple& network) - { - ResetDeterministic(std::get(network)); - std::get(network).Delta().zeros(); - - ResetParameter(network); - } - - template - typename std::enable_if< - HasDeterministicCheck::value, void>::type - ResetDeterministic(T& layer) - { - layer.Deterministic() = deterministic; - } - - template - typename std::enable_if< - !HasDeterministicCheck::value, void>::type - ResetDeterministic(T& /* layer */) { /* Nothing to do here */ } - - /** - * Reset the location by updating the location for all layer that implement - * the Location function. - */ - template - typename std::enable_if::type - ResetLocation(const arma::mat& /* location */, - std::tuple& /* network */) - { - // Nothing to do here. - } - - template - typename std::enable_if::type - ResetLocation(const arma::mat& location, std::tuple& network) - { - SetLocation(std::get(network), location); - ResetLocation(location, network); - } - - template - typename std::enable_if< - HasLocationCheck::value, void>::type - SetLocation(T& layer, const arma::mat& location) - { - layer.Location(location); - } - - template - typename std::enable_if< - !HasLocationCheck::value, void>::type - SetLocation(T& /* layer */, const arma::mat& /* location */) - { - // Nothing to do here. - } - - /** - * Save the network layer activations. - */ - template - typename std::enable_if::type - SaveActivations(boost::ptr_vector& activations, - std::tuple& network, - size_t& activationCounter) - { - Save(I, activations, std::get(network), - std::get(network).InputParameter()); - - activationCounter++; - SaveActivations(activations, network, activationCounter); - } - - template - typename std::enable_if::type - SaveActivations(boost::ptr_vector& /* activations */, - std::tuple& /* network */, - size_t& /* activationCounter */) - { - // Nothing to do here. - } - - /** - * Distinguish between recurrent layer and non-recurrent layer when storing - * the activations. - */ - template - typename std::enable_if< - HasRecurrentParameterCheck::value, void>::type - Save(const size_t /* layerNumber */, - boost::ptr_vector& activations, - T& layer, - P& /* unused */) - { - activations.push_back(new MatType(layer.RecurrentParameter())); - } - - template - typename std::enable_if< - !HasRecurrentParameterCheck::value, void>::type - Save(const size_t /* layerNumber */, - boost::ptr_vector& activations, - T& layer, - P& /* unused */) - { - activations.push_back(new MatType(layer.OutputParameter())); - } - - template - typename std::enable_if::type - SaveActivations(boost::ptr_vector& activationsA, - boost::ptr_vector& activationsB, - size_t& dataTypeACounter, - size_t& dataTypeBCounter, - std::tuple& network) - { - Save(activationsA, activationsB, dataTypeACounter, dataTypeBCounter, - std::get(network), std::get(network).OutputParameter()); - - SaveActivations( - activationsA, activationsB, dataTypeACounter, dataTypeBCounter, - network); - } - - template - typename std::enable_if::type - SaveActivations(boost::ptr_vector& /* activationsA */, - boost::ptr_vector& /* activationsB */, - size_t& /* dataTypeACounter */, - size_t& /* dataTypeBCounter */, - std::tuple& /* network */) - { - // Nothing to do here. - } - - template - void Save(boost::ptr_vector& activationsA, - boost::ptr_vector& /* activationsB */, - size_t& dataTypeACounter, - size_t& /* dataTypeBCounter */, - T& layer, - DataTypeA& /* unused */) - { - activationsA.push_back(new DataTypeA(layer.OutputParameter())); - dataTypeACounter++; - } - - template - void Save(boost::ptr_vector& /* activationsA */, - boost::ptr_vector& activationsB, - size_t& /* dataTypeACounter */, - size_t& dataTypeBCounter, - T& layer, - DataTypeB& /* unused */) - { - activationsB.push_back(new DataTypeB(layer.OutputParameter())); - dataTypeBCounter++; - } - - /** - * Load the network layer activations. - */ - template - typename std::enable_if::type - LoadActivations(DataType& input, - boost::ptr_vector& /* activations */, - size_t& /* activationCounter */, - std::tuple& network) - { - std::get<0>(network).InputParameter() = input; - LinkParameter(network); - } - - template - typename std::enable_if::type - LoadActivations(DataType& input, - boost::ptr_vector& activations, - size_t& activationCounter, - std::tuple& network) - { - Load(--activationCounter, activations, - std::get(network), - std::get(network).InputParameter()); - - LoadActivations(input, activations, - activationCounter, network); - } - - /** - * Distinguish between recurrent layer and non-recurrent layer when storing - * the activations. - */ - template - typename std::enable_if< - HasRecurrentParameterCheck::value, void>::type - Load(const size_t layerNumber, - boost::ptr_vector& activations, - T& layer, - P& /* output */) - { - layer.RecurrentParameter() = activations[layerNumber]; - } - - template - typename std::enable_if< - !HasRecurrentParameterCheck::value, void>::type - Load(const size_t layerNumber, - boost::ptr_vector& activations, - T& layer, - P& /* output */) - { - layer.OutputParameter() = activations[layerNumber]; - } - - template - typename std::enable_if::type - LoadActivations(DataType& input, - boost::ptr_vector& activationsA, - boost::ptr_vector& activationsB, - size_t& dataTypeACounter, - size_t& dataTypeBCounter, - std::tuple& network) - { - Load(activationsA, - activationsB, - dataTypeACounter, - dataTypeBCounter, - std::get(network), - std::get(network).OutputParameter()); - - LoadActivations( - input, activationsA, activationsB, dataTypeACounter, dataTypeBCounter, - network); - } - - template - typename std::enable_if::type - LoadActivations(DataType& input, - boost::ptr_vector& /* activationsA */, - boost::ptr_vector& /* activationsB */, - size_t& /* dataTypeACounter */, - size_t& /* dataTypeBCounter */, - std::tuple& network) - { - std::get<0>(network).InputParameter() = input; - LinkParameter(network); - } - - template - void Load(boost::ptr_vector& activationsA, - boost::ptr_vector& /* activationsB */, - size_t& dataTypeACounter, - size_t& /* dataTypeBCounter */, - T& layer, - DataTypeA& /* output */) - { - layer.OutputParameter() = activationsA[--dataTypeACounter]; - } - - template - void Load(boost::ptr_vector& /* activationsA */, - boost::ptr_vector& activationsB, - size_t& /* dataTypeACounter */, - size_t& dataTypeBCounter, - T& layer, - DataTypeB& /* output */) - { - layer.OutputParameter() = activationsB[--dataTypeBCounter]; - } - - /** - * Run a single iteration of the feed forward algorithm, using the given - * input and target vector, store the calculated error into the error - * vector. - */ - template - void Forward(const DataType& input, std::tuple& t) - { - std::get(t).InputParameter() = input; - std::get(t).Forward(std::get(t).InputParameter(), - std::get(t).OutputParameter()); - - ForwardTail(t); - } - - template - typename std::enable_if::type - ForwardTail(std::tuple& network) - { - LinkParameter(network); - } - - template - typename std::enable_if::type - ForwardTail(std::tuple& t) - { - std::get(t).Forward(std::get(t).OutputParameter(), - std::get(t).OutputParameter()); - - ForwardTail(t); - } - - /** - * Run a single iteration of the backward algorithm, using the given - * input and target vector, store the calculated error into the error - * vector. - */ - template - typename std::enable_if::type - Backward(const DataType& error, std::tuple& t) - { - std::get(t).Backward( - std::get(t).OutputParameter(), error, - std::get(t).Delta()); - } - - template - typename std::enable_if::type - Backward(const DataType& error, std::tuple& t) - { - std::get(t).Backward( - std::get(t).OutputParameter(), error, - std::get(t).Delta()); - - BackwardTail(error, t); - } - - template - typename std::enable_if::type - BackwardTail(const DataType& /* error */, std::tuple& t) - { - std::get(t).Backward( - std::get(t).OutputParameter(), - std::get(t).Delta(), - std::get(t).Delta()); - } - - template - typename std::enable_if::type - BackwardTail(const DataType& error, std::tuple& t) - { - std::get(t).Backward( - std::get(t).OutputParameter(), - std::get(t).Delta(), - std::get(t).Delta()); - - BackwardTail(error, t); - } - - /** - * Link the calculated activation with the correct layer. - */ - template - typename std::enable_if::type - LinkParameter(std::tuple& /* network */) { /* Nothing to do here */ } - - template - typename std::enable_if::type - LinkParameter(std::tuple& network) - { - if (!LayerTraits(network))>::type>::IsBiasLayer) - { - std::get(network).InputParameter() = std::get( - network).OutputParameter(); - } - - LinkParameter(network); - } - - /** - * Iterate through all layer modules and update the the gradient using the - * layer defined optimizer. - */ - template - void UpdateGradients(const InputType& input, - const ErrorType& error, - std::tuple& network) - { - Update(std::get<0>(network), - input, - std::get<1>(network).Delta(), - std::get<1>(network).OutputParameter()); - - UpdateGradients<1, ErrorType, Tp...>(error, network); - } - - template - typename std::enable_if::type - UpdateGradients(const ErrorType& error, std::tuple& network) - { - Update(std::get(network), - std::get(network).InputParameter(), - std::get(network).Delta(), - std::get(network).OutputParameter()); - - UpdateGradients(error, network); - } - - template - typename std::enable_if::type - UpdateGradients(const ErrorType& error, std::tuple& network) - { - Update(std::get(network), - std::get(network).InputParameter(), - error, - std::get(network).OutputParameter()); - } - - template - typename std::enable_if< - HasGradientCheck::value, void>::type - Update(LayerType& layer, - const InputType& input, - const ErrorType& error, - GradientType& /* gradient */) - { - layer.Gradient(input, error, layer.Gradient()); - } - - template - typename std::enable_if< - !HasGradientCheck::value, void>::type - Update(LayerType& /* layer */, - const InputType& /* input */, - const ErrorType& /* error */, - GradientType& /* gradient */) - { - // Nothing to do here - } - - //! The locator network. - LocatorType locator; - - //! The location sensor network. - LocationSensorType locationSensor; - - //! The glimpse sensor network. - GlimpseSensorType glimpseSensor; - - //! The glimpse network. - GlimpseType glimpse; - - //! The start network. - StartType start; - - //! The feedback network. - FeedbackType feedback; - - //! The transfer network. - TransferType transfer; - - //! The classifier network. - ClassifierType classifier; - - //! The reward predictor network. - RewardPredictorType rewardPredictor; - - //! The number of steps for the back-propagate through time. - size_t nStep; - - //! Locally stored network input size. - size_t inputSize; - - //! The current evaluation mode (training or testing). - bool deterministic; - - //! The index of the current step. - size_t step; - - //! The activation storage we are using to perform the feed backward pass for - //! the glimpse network. - boost::ptr_vector glimpseActivations; - - //! The activation storage we are using to perform the feed backward pass for - //! the locator network. - boost::ptr_vector locatorActivations; - - //! The activation storage we are using to perform the feed backward pass for - //! the feedback network. - boost::ptr_vector feedbackActivations; - - //! The activation storage we are using to save the feedback network input. - boost::ptr_vector feedbackActivationsInput; - - //! The activation storage we are using to perform the feed backward pass for - //! the transfer network. - boost::ptr_vector transferActivations; - - //! The activation storage we are using to perform the feed backward pass for - //! the location sensor network. - boost::ptr_vector locationSensorActivations; - - //! The activation storage we are using to perform the feed backward pass for - //! the glimpse sensor network. - boost::ptr_vector glimpseSensorMatActivations; - boost::ptr_vector glimpseSensorCubeActivations; - - //! The activation storage we are using to perform the feed backward pass for - //! the locator input. - boost::ptr_vector locatorInput; - - //! The storage we are using to save the location. - boost::ptr_vector location; - - //! The current number of activations in the glimpse sensor network. - size_t glimpseSensorMatCounter; - size_t glimpseSensorCubeCounter; - - //! The current number of activations in the glimpse network. - size_t glimpseActivationsCounter; - - //! The current number of activations in the glimpse start network. - size_t startActivationsCounter; - - //! The current number of activations in the feedback network. - size_t feedbackActivationsCounter; - - //! The current number of activations in the transfer network. - size_t transferActivationsCounter; - - //! The current number of activations in the locator network. - size_t locatorActivationsCounter; - - //! The current number of activations in the location sensor network. - size_t locationSensorActivationsCounter; - - //! The current number of activations in the glimpse sensor network. - size_t glimpseSensorMatActivationsCounter; - size_t glimpseSensorCubeActivationsCounter; - - //! The current number of location for the location storage. - size_t locationCounter; - - //! Matrix of (trained) parameters. - arma::mat parameter; - - //! The matrix of data points (predictors). - arma::mat predictors; - - //! The matrix of responses to the input data points. - arma::mat responses; - - //! The number of separable functions (the number of predictor points). - size_t numFunctions; - - //! Storage the merge the reward input. - arma::field rewardInput; - - //! The current input. - arma::cube input; - - //! The current target. - arma::mat target; - - //! Locally stored performance functions. - NegativeLogLikelihoodLayer<> negativeLogLikelihoodFunction; - VRClassRewardLayer<> vRClassRewardFunction; - - //! Locally stored size of the locator network. - size_t locatorSize; - - //! Locally stored size of the location sensor network. - size_t locationSensorSize; - - //! Locally stored size of the glimpse sensor network. - size_t glimpseSensorSize; - - //! Locally stored size of the glimpse network. - size_t glimpseSize; - - //! Locally stored size of the start network. - size_t startSize; - - //! Locally stored size of the feedback network. - size_t feedbackSize; - - //! Locally stored size of the transfer network. - size_t transferSize; - - //! Locally stored size of the classifier network. - size_t classifierSize; - - //! Locally stored size of the reward predictor network. - size_t rewardPredictorSize; - - //! Locally stored recurrent gradient. - arma::mat recurrentGradient; - - //! Locally stored action error. - arma::mat actionError; - - //! Locally stored current location. - arma::mat evaluationLocation; -}; // class RecurrentNeuralAttention - -} // namespace ann -} // namespace mlpack - -// Include implementation. -#include "rmva_impl.hpp" - -#endif diff --git a/src/mlpack/methods/rmva/rmva_impl.hpp b/src/mlpack/methods/rmva/rmva_impl.hpp deleted file mode 100644 index cfb310b3993..00000000000 --- a/src/mlpack/methods/rmva/rmva_impl.hpp +++ /dev/null @@ -1,740 +0,0 @@ -/** - * @file rmva_impl.hpp - * @author Marcus Edel - * - * Implementation of the Recurrent Model for Visual Attention. - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. - */ -#ifndef __MLPACK_METHODS_RMVA_RMVA_IMPL_HPP -#define __MLPACK_METHODS_RMVA_RMVA_IMPL_HPP - -// In case it hasn't been included yet. -#include "rmva.hpp" - -namespace mlpack { -namespace ann /** Artificial Neural Network. */ { - -template< - typename LocatorType, - typename LocationSensorType, - typename GlimpseSensorType, - typename GlimpseType, - typename StartType, - typename FeedbackType, - typename TransferType, - typename ClassifierType, - typename RewardPredictorType, - typename InitializationRuleType, - typename MatType -> -template< - typename TypeLocator, - typename TypeLocationSensor, - typename TypeGlimpseSensor, - typename TypeGlimpse, - typename TypeStart, - typename TypeFeedback, - typename TypeTransfer, - typename TypeClassifier, - typename TypeRewardPredictor -> -RecurrentNeuralAttention< - LocatorType, - LocationSensorType, - GlimpseSensorType, - GlimpseType, - StartType, - FeedbackType, - TransferType, - ClassifierType, - RewardPredictorType, - InitializationRuleType, - MatType ->::RecurrentNeuralAttention(TypeLocator&& locator, - TypeLocationSensor&& locationSensor, - TypeGlimpseSensor&& glimpseSensor, - TypeGlimpse&& glimpse, - TypeStart&& start, - TypeFeedback&& feedback, - TypeTransfer&& transfer, - TypeClassifier&& classifier, - TypeRewardPredictor&& rewardPredictor, - const size_t nStep, - InitializationRuleType initializeRule) : - locator(std::forward(locator)), - locationSensor(std::forward(locationSensor)), - glimpseSensor(std::forward(glimpseSensor)), - glimpse(std::forward(glimpse)), - start(std::forward(start)), - feedback(std::forward(feedback)), - transfer(std::forward(transfer)), - classifier(std::forward(classifier)), - rewardPredictor(std::forward(rewardPredictor)), - nStep(nStep), - inputSize(0) -{ - // Set the network size. - locatorSize = NetworkSize(this->locator); - locationSensorSize = NetworkSize(this->locationSensor); - glimpseSensorSize = NetworkSize(this->glimpseSensor); - glimpseSize = NetworkSize(this->glimpse); - feedbackSize = NetworkSize(this->feedback); - transferSize = NetworkSize(this->transfer); - classifierSize = NetworkSize(this->classifier); - rewardPredictorSize = NetworkSize(this->rewardPredictor); - startSize = NetworkSize(this->start); - - initializeRule.Initialize(parameter, locatorSize + locationSensorSize + glimpseSensorSize + - glimpseSize + feedbackSize + transferSize + classifierSize + rewardPredictorSize + startSize, 1); - - // Set the network weights. - NetworkWeights(initializeRule, parameter, this->locator); - NetworkWeights(initializeRule, parameter, this->locationSensor, locatorSize); - NetworkWeights(initializeRule, parameter, this->glimpseSensor, locatorSize + - locationSensorSize); - NetworkWeights(initializeRule, parameter, this->glimpse, locatorSize + - locationSensorSize + glimpseSensorSize); - NetworkWeights(initializeRule, parameter, this->feedback, locatorSize + - locationSensorSize + glimpseSensorSize + glimpseSize); - NetworkWeights(initializeRule, parameter, this->transfer, locatorSize + - locationSensorSize + glimpseSensorSize + glimpseSize + feedbackSize); - NetworkWeights(initializeRule, parameter, this->classifier, locatorSize + - locationSensorSize + glimpseSensorSize + glimpseSize + feedbackSize + - transferSize); - NetworkWeights(initializeRule, parameter, this->rewardPredictor, locatorSize + - locationSensorSize + glimpseSensorSize + glimpseSize + feedbackSize + - transferSize + classifierSize); - NetworkWeights(initializeRule, parameter, this->start, locatorSize + - locationSensorSize + glimpseSensorSize + glimpseSize + feedbackSize + - transferSize + classifierSize + rewardPredictorSize); - - rewardInput = arma::field(2, 1); -} - -template< - typename LocatorType, - typename LocationSensorType, - typename GlimpseSensorType, - typename GlimpseType, - typename StartType, - typename FeedbackType, - typename TransferType, - typename ClassifierType, - typename RewardPredictorType, - typename InitializationRuleType, - typename MatType -> -template class OptimizerType> -void RecurrentNeuralAttention< - LocatorType, - LocationSensorType, - GlimpseSensorType, - GlimpseType, - StartType, - FeedbackType, - TransferType, - ClassifierType, - RewardPredictorType, - InitializationRuleType, - MatType ->::Train(const arma::mat& predictors, - const arma::mat& responses, - OptimizerType& optimizer) -{ - numFunctions = predictors.n_cols; - this->predictors = predictors; - this->responses = responses; - - // Train the model. - Timer::Start("ffn_optimization"); - const double out = optimizer.Optimize(parameter); - Timer::Stop("ffn_optimization"); - - Log::Info << "FFN::FFN(): final objective of trained model is " << out - << "." << std::endl; -} - -template< - typename LocatorType, - typename LocationSensorType, - typename GlimpseSensorType, - typename GlimpseType, - typename StartType, - typename FeedbackType, - typename TransferType, - typename ClassifierType, - typename RewardPredictorType, - typename InitializationRuleType, - typename MatType -> -void RecurrentNeuralAttention< - LocatorType, - LocationSensorType, - GlimpseSensorType, - GlimpseType, - StartType, - FeedbackType, - TransferType, - ClassifierType, - RewardPredictorType, - InitializationRuleType, - MatType ->::Predict(arma::mat& predictors, arma::mat& responses) -{ - deterministic = true; - - arma::mat responsesTemp; - SinglePredict(arma::cube(predictors.colptr(0), 28, 28, 1), responsesTemp); - - responses = arma::mat(responsesTemp.n_elem, predictors.n_cols); - responses.col(0) = responsesTemp.col(0); - - for (size_t i = 1; i < predictors.n_cols; i++) - { - SinglePredict(arma::cube(predictors.colptr(i), 28, 28, 1), responsesTemp); - responses.col(i) = responsesTemp.col(0); - } -} - -template< - typename LocatorType, - typename LocationSensorType, - typename GlimpseSensorType, - typename GlimpseType, - typename StartType, - typename FeedbackType, - typename TransferType, - typename ClassifierType, - typename RewardPredictorType, - typename InitializationRuleType, - typename MatType -> -double RecurrentNeuralAttention< - LocatorType, - LocationSensorType, - GlimpseSensorType, - GlimpseType, - StartType, - FeedbackType, - TransferType, - ClassifierType, - RewardPredictorType, - InitializationRuleType, - MatType ->::Evaluate(const arma::mat& /* unused */, - const size_t i, - const bool deterministic) -{ - this->deterministic = deterministic; - - input = arma::cube(predictors.colptr(i), 28, 28, 1); - target = arma::mat(responses.colptr(i), responses.n_rows, 1, false, true); - - // Get the locator input size. - if (!inputSize) - { - inputSize = NetworkInputSize(locator); - } - - glimpseSensorMatCounter = 0; - glimpseSensorCubeCounter = 0; - glimpseActivationsCounter = 0; - locatorActivationsCounter = 0; - locationSensorActivationsCounter = 0; - glimpseSensorMatActivationsCounter = 0; - glimpseSensorCubeActivationsCounter = 0; - locationCounter = 0; - feedbackActivationsCounter = 0; - transferActivationsCounter = 0; - - // Reset networks. - ResetParameter(locator); - ResetParameter(locationSensor); - ResetParameter(glimpseSensor); - ResetParameter(glimpse); - ResetParameter(feedback); - ResetParameter(transfer); - ResetParameter(classifier); - ResetParameter(rewardPredictor); - ResetParameter(start); - - // Reset activation storage. - glimpseActivations.clear(); - locatorActivations.clear(); - locationSensorActivations.clear(); - glimpseSensorMatActivations.clear(); - glimpseSensorCubeActivations.clear(); - feedbackActivations.clear(); - transferActivations.clear(); - locatorInput.clear(); - location.clear(); - feedbackActivationsInput.clear(); - - // Sample an initial starting actions by forwarding zeros through the locator. - locatorInput.push_back(new arma::cube(arma::zeros(inputSize, 1, - input.n_slices))); - - // Forward pass throught the recurrent network. - for (step = 0; step < nStep; step++) - { - // Locator forward pass. - Forward(locatorInput.back(), locator); - SaveActivations(locatorActivations, locator, locatorActivationsCounter); - - // Location sensor forward pass. - Forward(std::get::value - 1>( - locator).OutputParameter(), locationSensor); - SaveActivations(locationSensorActivations, locationSensor, - locationSensorActivationsCounter); - - // Set the location parameter for all layer that implement a Location - // function e.g. GlimpseLayer. - ResetLocation(std::get::value - 1>( - locator).OutputParameter(), glimpseSensor); - - // Save the location for the backward path. - location.push_back(new arma::mat(std::get::value - 1>(locator).OutputParameter())); - - // Glimpse sensor forward pass. - Forward(input, glimpseSensor); - SaveActivations(glimpseSensorMatActivations, glimpseSensorCubeActivations, - glimpseSensorMatCounter, glimpseSensorCubeCounter, glimpseSensor); - - // Concat the parameter activation from the location sensor and - // glimpse sensor. - arma::mat concatLayerOutput = arma::join_cols( - std::get::value - 1>( - locationSensor).OutputParameter(), - std::get::value - 1>( - glimpseSensor).OutputParameter()); - - // Glimpse forward pass. - Forward(concatLayerOutput, glimpse); - SaveActivations(glimpseActivations, glimpse, glimpseActivationsCounter); - - if (step == 0) - { - // Start forward pass. - Forward(std::get::value - 1>( - glimpse).OutputParameter(), start); - - // Transfer forward pass. - Forward(std::get::value - 1>( - start).OutputParameter(), transfer); - SaveActivations(transferActivations, transfer, - transferActivationsCounter); - } - else - { - // Feedback forward pass. - Forward(std::get::value - 1>( - transfer).OutputParameter(), feedback); - SaveActivations(feedbackActivations, feedback, - feedbackActivationsCounter); - - feedbackActivationsInput.push_back(new arma::mat( - std::get::value - 1>( - transfer).OutputParameter().memptr(), - std::get::value - 1>( - transfer).OutputParameter().n_rows, - std::get::value - 1>( - transfer).OutputParameter().n_cols)); - - arma::mat feedbackLayerOutput = - std::get::value - 1>( - glimpse).OutputParameter() + - std::get::value - 1>( - feedback).OutputParameter(); - - // Transfer forward pass. - Forward(feedbackLayerOutput, transfer); - SaveActivations(transferActivations, transfer, - transferActivationsCounter); - } - - // Update the input for the next run - locatorInput.push_back(new arma::cube( - std::get::value - 1>( - transfer).OutputParameter().memptr(), locatorInput.back().n_rows, - locatorInput.back().n_cols, locatorInput.back().n_slices)); - } - - // Classifier forward pass. - Forward(locatorInput.back().slice(0), classifier); - - // Reward predictor forward pass. - Forward(std::get::value - 1>( - classifier).OutputParameter(), rewardPredictor); - - double performanceError = negativeLogLikelihoodFunction.Forward( - std::get::value - 1>( - classifier).OutputParameter(), target); - - // Create the input for the vRClassRewardFunction function. - // For which we use the output from the classifier and the rewardPredictor. - rewardInput(0, 0) = std::get::value - 1>( - classifier).OutputParameter(); - rewardInput(1, 0) = std::get::value - 1>( - rewardPredictor).OutputParameter(); - - performanceError += vRClassRewardFunction.Forward(rewardInput, target); - - return performanceError; -} - -template< - typename LocatorType, - typename LocationSensorType, - typename GlimpseSensorType, - typename GlimpseType, - typename StartType, - typename FeedbackType, - typename TransferType, - typename ClassifierType, - typename RewardPredictorType, - typename InitializationRuleType, - typename MatType -> -void RecurrentNeuralAttention< - LocatorType, - LocationSensorType, - GlimpseSensorType, - GlimpseType, - StartType, - FeedbackType, - TransferType, - ClassifierType, - RewardPredictorType, - InitializationRuleType, - MatType ->::Gradient(const arma::mat& /* unused */, - const size_t i, - arma::mat& gradient) -{ - Evaluate(parameter, i, false); - - // Reset the gradient. - if (gradient.is_empty()) - { - gradient = arma::zeros(parameter.n_rows, parameter.n_cols); - } - else - { - gradient.zeros(); - } - - // Reset the recurrent gradient. - if (recurrentGradient.is_empty()) - { - recurrentGradient = arma::zeros(parameter.n_rows, - parameter.n_cols); - - actionError = arma::zeros( - std::get::value - 1>( - locator).OutputParameter().n_rows, - std::get::value - 1>( - locator).OutputParameter().n_cols); - } - else - { - recurrentGradient.zeros(); - } - - // Set the recurrent gradient. - NetworkGradients(recurrentGradient, this->locator); - NetworkGradients(recurrentGradient, this->locationSensor, locatorSize); - NetworkGradients(recurrentGradient, this->glimpseSensor, locatorSize + - locationSensorSize); - NetworkGradients(recurrentGradient, this->glimpse, locatorSize + - locationSensorSize + glimpseSensorSize); - NetworkGradients(recurrentGradient, this->feedback, locatorSize + - locationSensorSize + glimpseSensorSize + glimpseSize); - NetworkGradients(recurrentGradient, this->transfer, locatorSize + - locationSensorSize + glimpseSensorSize + glimpseSize + feedbackSize); - - // Set the gradient. - NetworkGradients(gradient, this->classifier, locatorSize + locationSensorSize - + glimpseSensorSize + glimpseSize + feedbackSize + transferSize); - NetworkGradients(gradient, this->rewardPredictor, locatorSize + - locationSensorSize + glimpseSensorSize + glimpseSize + feedbackSize + - transferSize + classifierSize); - NetworkGradients(gradient, this->start, locatorSize + locationSensorSize + - glimpseSensorSize + glimpseSize + feedbackSize + transferSize + - classifierSize + rewardPredictorSize); - - // Negative log likelihood backward pass. - negativeLogLikelihoodFunction.Backward(std::get::value - 1>(classifier).OutputParameter(), target, - negativeLogLikelihoodFunction.OutputParameter()); - - const double reward = vRClassRewardFunction.Backward(rewardInput, target, - vRClassRewardFunction.OutputParameter()); - - // Propogate reward through all modules. - ResetReward(reward, locator); - ResetReward(reward, locationSensor); - ResetReward(reward, glimpseSensor); - ResetReward(reward, glimpse); - ResetReward(reward, classifier); - - // RewardPredictor backward pass. - Backward(vRClassRewardFunction.OutputParameter()(1, 0), rewardPredictor); - - arma::mat classifierError = - negativeLogLikelihoodFunction.OutputParameter() + - vRClassRewardFunction.OutputParameter()(0, 0) + - std::get<0>(rewardPredictor).Delta(); - - // Classifier backward pass. - Backward(classifierError, classifier); - - // Set the initial recurrent error for the first backward step. - arma::mat recurrentError = std::get<0>(classifier).Delta(); - - for (step = nStep - 1; nStep >= 0; step--) - { - // Load the locator activations. - LoadActivations(locatorInput[step], locatorActivations, - locatorActivationsCounter, locator); - - // Load the location sensor activations. - LoadActivations(std::get::value - 1>( - locator).OutputParameter(), locationSensorActivations, - locationSensorActivationsCounter, locationSensor); - - // Load the glimpse sensor activations. - LoadActivations(input, glimpseSensorMatActivations, - glimpseSensorCubeActivations, glimpseSensorMatCounter, - glimpseSensorCubeCounter, glimpseSensor); - - // Concat the parameter activation from the location and glimpse sensor. - arma::mat concatLayerOutput = arma::join_cols( - std::get::value - 1>( - locationSensor).OutputParameter(), - std::get::value - 1>( - glimpseSensor).OutputParameter()); - - // Load the glimpse activations. - LoadActivations(concatLayerOutput, glimpseActivations, - glimpseActivationsCounter, glimpse); - - - if (step == 0) - { - // Load the transfer activations. - LoadActivations(std::get::value - 1>( - start).OutputParameter(), transferActivations, - transferActivationsCounter, transfer); - } - else - { - // Load the feedback activations. - LoadActivations(std::get::value - 1>( - transfer).OutputParameter(), feedbackActivations, - feedbackActivationsCounter, feedback); - - arma::mat feedbackLayerOutput = - std::get::value - 1>( - glimpse).OutputParameter() + - std::get::value - 1>( - feedback).OutputParameter(); - - // Load the transfer activations. - LoadActivations(feedbackLayerOutput, transferActivations, - transferActivationsCounter, transfer); - } - - // Set the location parameter for all layer that implement a Location - // function e.g. GlimpseLayer. - ResetLocation(location[step], glimpseSensor); - - // Locator backward pass. - Backward(actionError, locator); - - // Transfer backward pass. - Backward(recurrentError, transfer); - - // glimpse network - Backward(std::get<0>(transfer).Delta(), glimpse); - - // Split up the error of the concat layer. - arma::mat locationSensorError = std::get<0>(glimpse).Delta().submat( - 0, 0, std::get<0>(glimpse).Delta().n_elem / 2 - 1, 0); - arma::mat glimpseSensorError = std::get<0>(glimpse).Delta().submat( - std::get<0>(glimpse).Delta().n_elem / 2, 0, - std::get<0>(glimpse).Delta().n_elem - 1, 0); - - // Location sensor backward pass. - Backward(locationSensorError, locationSensor); - - // Glimpse sensor backward pass. - Backward(glimpseSensorError, glimpseSensor); - - if (step != 0) - { - // Feedback backward pass. - Backward(std::get<0>(transfer).Delta(), feedback); - } - - // Update the recurrent network gradients. - UpdateGradients(std::get<0>(locationSensor).Delta(), locator); - UpdateGradients(std::get<0>(transfer).Delta(), glimpse); - UpdateGradients(std::get<0>(transfer).Delta(), locationSensor); - UpdateGradients(std::get<0>(transfer).Delta(), glimpseSensor); - - // Feedback module. - if (step != 0) - { - UpdateGradients(feedbackActivationsInput[step - 1], - std::get<0>(transfer).Delta(), feedback); - } - else - { - // Set the feedback gradient to zero. - recurrentGradient.submat(locatorSize + locationSensorSize + - glimpseSensorSize + glimpseSize, 0, locatorSize + locationSensorSize + - glimpseSensorSize + glimpseSize + feedbackSize - 1, 0).zeros(); - - UpdateGradients(std::get<0>(transfer).Delta(), start); - } - - // Update the overall recurrent gradient. - gradient += recurrentGradient; - - if (step != 0) - { - // Update the recurrent error for the next backward step. - recurrentError = std::get<0>(locator).Delta() + - std::get<0>(feedback).Delta(); - } - else - { - break; - } - } - - // Reward predictor gradient update. - UpdateGradients(vRClassRewardFunction.OutputParameter()(1, 0), - rewardPredictor); - - // Classifier gradient update. - UpdateGradients(std::get<1>(classifier).Delta(), classifier); -} - -template< - typename LocatorType, - typename LocationSensorType, - typename GlimpseSensorType, - typename GlimpseType, - typename StartType, - typename FeedbackType, - typename TransferType, - typename ClassifierType, - typename RewardPredictorType, - typename InitializationRuleType, - typename MatType -> -const arma::mat& RecurrentNeuralAttention< - LocatorType, - LocationSensorType, - GlimpseSensorType, - GlimpseType, - StartType, - FeedbackType, - TransferType, - ClassifierType, - RewardPredictorType, - InitializationRuleType, - MatType ->::Location() -{ - if (!location.empty()) - { - evaluationLocation = arma::mat(location[0].n_elem, location.size()); - - for (size_t i = 0; i < location.size(); i++) - { - evaluationLocation.col(i) = arma::vectorise(location[i]); - } - } - - return evaluationLocation; -} - -template< - typename LocatorType, - typename LocationSensorType, - typename GlimpseSensorType, - typename GlimpseType, - typename StartType, - typename FeedbackType, - typename TransferType, - typename ClassifierType, - typename RewardPredictorType, - typename InitializationRuleType, - typename MatType -> -template -void RecurrentNeuralAttention< - LocatorType, - LocationSensorType, - GlimpseSensorType, - GlimpseType, - StartType, - FeedbackType, - TransferType, - ClassifierType, - RewardPredictorType, - InitializationRuleType, - MatType ->::Serialize(Archive& ar, const unsigned int /* version */) -{ - ar & data::CreateNVP(parameter, "parameter"); - ar & data::CreateNVP(inputSize, "inputSize"); - ar & data::CreateNVP(nStep, "nStep"); - - // If we are loading, we need to initialize the weights. - if (Archive::is_loading::value) - { - // Set the netork size. - locatorSize = NetworkSize(this->locator); - locationSensorSize = NetworkSize(this->locationSensor); - glimpseSensorSize = NetworkSize(this->glimpseSensor); - glimpseSize = NetworkSize(this->glimpse); - feedbackSize = NetworkSize(this->feedback); - transferSize = NetworkSize(this->transfer); - classifierSize = NetworkSize(this->classifier); - rewardPredictorSize = NetworkSize(this->rewardPredictor); - startSize = NetworkSize(this->start); - - // Set the network weights. - NetworkWeights(parameter, this->locator); - NetworkWeights(parameter, this->locationSensor, locatorSize); - NetworkWeights(parameter, this->glimpseSensor, locatorSize + - locationSensorSize); - NetworkWeights(parameter, this->glimpse, locatorSize + locationSensorSize + - glimpseSensorSize); - NetworkWeights(parameter, this->feedback, locatorSize + locationSensorSize + - glimpseSensorSize + glimpseSize); - NetworkWeights(parameter, this->transfer, locatorSize + locationSensorSize + - glimpseSensorSize + glimpseSize + feedbackSize); - NetworkWeights(parameter, this->classifier, locatorSize + locationSensorSize - + glimpseSensorSize + glimpseSize + feedbackSize + transferSize); - NetworkWeights(parameter, this->rewardPredictor, locatorSize + - locationSensorSize + glimpseSensorSize + glimpseSize + feedbackSize + - transferSize + classifierSize); - NetworkWeights(parameter, this->start, locatorSize + locationSensorSize + - glimpseSensorSize + glimpseSize + feedbackSize + transferSize + - classifierSize + rewardPredictorSize); - } -} - -} // namespace ann -} // namespace mlpack - -#endif diff --git a/src/mlpack/methods/rmva/rmva_main.cpp b/src/mlpack/methods/rmva/rmva_main.cpp deleted file mode 100644 index a3483d63de6..00000000000 --- a/src/mlpack/methods/rmva/rmva_main.cpp +++ /dev/null @@ -1,285 +0,0 @@ -/** - * @file rmva_main.cpp - * @author Marcus Edel - * - * Main executable for the Recurrent Model for Visual Attention. - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. - */ -#include - -#include "rmva.hpp" - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -using namespace mlpack; -using namespace mlpack::ann; -using namespace mlpack::optimization; -using namespace std; - -PROGRAM_INFO("Recurrent Model for Visual Attention", - "This program trains the Recurrent Model for Visual Attention on the given " - "labeled training set, or loads a model from the given model file, and then" - " may use that trained model to classify the points in a given test set." - "\n\n" - "Labels are expected to be passed in separately as their own file " - "(--labels_file). If training is not desired, a pre-existing model can be " - "loaded with the --input_model_file (-m) option." - "\n\n" - "If classifying a test set is desired, the test set should be in the file " - "specified with the --test_file (-T) option, and the classifications will " - "be saved to the file specified with the --output_file (-o) option. If " - "saving a trained model is desired, the --output_model_file (-M) option " - "should be given."); - -// Model loading/saving. -PARAM_STRING_IN("input_model_file", "File containing the Recurrent Model for " - "Visual Attention.", "m", ""); -PARAM_STRING_OUT("output_model_file", "File to save trained Recurrent Model for" - " Visual Attention to.", "M"); - -// Training parameters. -PARAM_MATRIX_IN("training", "Matrix containing the training set.", "t"); -PARAM_MATRIX_IN("labels", "Matrix containing labels for the training set.", - "l"); - -PARAM_STRING_IN("optimizer", "Optimizer to use; 'sgd', 'minibatch-sgd', or " - "'lbfgs'.", "O", "minibatch-sgd"); - -PARAM_INT_IN("max_iterations", "Maximum number of iterations for SGD or RMSProp" - " (0 indicates no limit).", "n", 500000); -PARAM_DOUBLE_IN("tolerance", "Maximum tolerance for termination of SGD or " - "RMSProp.", "e", 1e-7); - -PARAM_DOUBLE_IN("step_size", "Step size for stochastic gradient descent " - "(alpha),", "a", 0.01); -PARAM_FLAG("linear_scan", "Don't shuffle the order in which data points are " - "visited for SGD or mini-batch SGD.", "L"); -PARAM_INT_IN("batch_size", "Batch size for mini-batch SGD.", "b", 20); - -PARAM_INT_IN("rho", "Number of steps for the back-propagate through time.", "r", - 7); - -PARAM_INT_IN("classes", "The number of classes.", "c", 10); - -PARAM_INT_IN("seed", "Random seed. If 0, 'std::time(NULL)' is used.", "s", 0); - -// Test parameters. -PARAM_MATRIX_IN("test", "Matrix containing the test set.", "T"); -PARAM_MATRIX_OUT("output", "The matrix in which the predicted labels for the " - "test set will be written.", "o"); - -int main(int argc, char** argv) -{ - CLI::ParseCommandLine(argc, argv); - - // Check input parameters. - if (CLI::HasParam("training") && CLI::HasParam("input_model_file")) - Log::Fatal << "Cannot specify both --training_file (-t) and " - << "--input_model_file (-m)!" << endl; - - if (!CLI::HasParam("training") && !CLI::HasParam("input_model_file")) - Log::Fatal << "Neither --training_file (-t) nor --input_model_file (-m) are" - << " specified!" << endl; - - if (!CLI::HasParam("training") && CLI::HasParam("labels")) - Log::Warn << "--labels_file (-l) ignored because --training_file (-t) is " - << "not specified." << endl; - - if (!CLI::HasParam("output") && !CLI::HasParam("output_model_file")) - Log::Warn << "Neither --output_file (-o) nor --output_model_file (-M) " - << "specified; no output will be saved!" << endl; - - if (CLI::HasParam("output") && !CLI::HasParam("test")) - Log::Warn << "--output_file (-o) ignored because no test file specified " - << "with --test_file (-T)." << endl; - - if (!CLI::HasParam("output") && CLI::HasParam("test")) - Log::Warn << "--test_file (-T) specified, but classification results will " - << "not be saved because --output_file (-o) is not specified." << endl; - - const string optimizerType = CLI::GetParam("optimizer"); - - if ((optimizerType != "sgd") && (optimizerType != "lbfgs") && - (optimizerType != "minibatch-sgd")) - { - Log::Fatal << "Optimizer type '" << optimizerType << "' unknown; must be " - << "'sgd', 'minibatch-sgd', or 'lbfgs'!" << endl; - } - - const double stepSize = CLI::GetParam("step_size"); - const size_t maxIterations = (size_t) CLI::GetParam("max_iterations"); - const double tolerance = CLI::GetParam("tolerance"); - const bool shuffle = !CLI::HasParam("linear_scan"); - const size_t batchSize = (size_t) CLI::GetParam("batch_size"); - const size_t rho = (size_t) CLI::GetParam("rho"); - const size_t numClasses = (size_t) CLI::GetParam("classes"); - - const size_t hiddenSize = 256; - const double unitPixels = 13; - const double locatorStd = 0.11; - const size_t imageSize = 28; - const size_t locatorHiddenSize = 128; - const size_t glimpsePatchSize = 8; - const size_t glimpseDepth = 1; - const size_t glimpseScale = 2; - const size_t glimpseHiddenSize = 128; - const size_t imageHiddenSize = 256; - - - // Locator network. - LinearMappingLayer<> linearLayer0(hiddenSize, 2); - BiasLayer<> biasLayer0(2, 1); - HardTanHLayer<> hardTanhLayer0; - ReinforceNormalLayer<> reinforceNormalLayer0(2 * locatorStd); - HardTanHLayer<> hardTanhLayer1; - MultiplyConstantLayer<> multiplyConstantLayer0(2 * unitPixels / imageSize); - auto locator = std::tie(linearLayer0, biasLayer0, hardTanhLayer0, - reinforceNormalLayer0, hardTanhLayer1, multiplyConstantLayer0); - - // Location sensor network. - LinearLayer<> linearLayer1(2, locatorHiddenSize); - BiasLayer<> biasLayer1(locatorHiddenSize, 1); - ReLULayer<> rectifierLayer0; - auto locationSensor = std::tie(linearLayer1, biasLayer1, rectifierLayer0); - - // Glimpse sensor network. - GlimpseLayer<> glimpseLayer0(1, glimpsePatchSize, glimpseDepth, glimpseScale); - LinearMappingLayer<> linearLayer2(64, glimpseHiddenSize); - BiasLayer<> biasLayer2(glimpseHiddenSize, 1); - ReLULayer<> rectifierLayer1; - auto glimpseSensor = std::tie(glimpseLayer0, linearLayer2, biasLayer2, - rectifierLayer1); - - // Glimpse network. - LinearLayer<> linearLayer3(glimpseHiddenSize + locatorHiddenSize, - imageHiddenSize); - BiasLayer<> biasLayer3(imageHiddenSize, 1); - ReLULayer<> rectifierLayer2; - LinearLayer<> linearLayer4(imageHiddenSize, hiddenSize); - BiasLayer<> biasLayer4(hiddenSize, 1); - auto glimpse = std::tie(linearLayer3, biasLayer3, rectifierLayer2, - linearLayer4, biasLayer4); - - // Feedback network. - LinearLayer<> recurrentLayer0(imageHiddenSize, hiddenSize); - BiasLayer<> recurrentLayerBias0(hiddenSize, 1); - auto feedback = std::tie(recurrentLayer0, recurrentLayerBias0); - - // Start network. - AdditionLayer<> startLayer0(hiddenSize, 1); - auto start = std::tie(startLayer0); - - // Transfer network. - ReLULayer<> rectifierLayer3; - auto transfer = std::tie(rectifierLayer3); - - // Classifier network. - LinearLayer<> linearLayer5(hiddenSize, numClasses); - BiasLayer<> biasLayer6(numClasses, 1); - LogSoftmaxLayer<> logSoftmaxLayer0; - auto classifier = std::tie(linearLayer5, biasLayer6, logSoftmaxLayer0); - - // Reward predictor network. - ConstantLayer<> constantLayer0(1, 1); - AdditionLayer<> additionLayer0(1, 1); - auto rewardPredictor = std::tie(constantLayer0, additionLayer0); - - // Recurrent Model for Visual Attention. - RecurrentNeuralAttention - net(locator, locationSensor, glimpseSensor, glimpse, start, feedback, - transfer, classifier, rewardPredictor, rho); - - // Either we have to train a model, or load a model. - if (CLI::HasParam("training")) - { - arma::mat trainingData = std::move(CLI::GetParam("training")); - - arma::mat labels; - - // Did the user pass in labels? - if (CLI::HasParam("labels")) - { - // Load labels. - labels = std::move(CLI::GetParam("labels")); - - // Do the labels need to be transposed? - if (labels.n_cols == 1) - labels = labels.t(); - } - - // Now run the optimization. - if (optimizerType == "sgd") - { - SGD opt(net); - opt.StepSize() = stepSize; - opt.MaxIterations() = maxIterations; - opt.Tolerance() = tolerance; - opt.Shuffle() = shuffle; - - Timer::Start("rmva_training"); - net.Train(trainingData, labels, opt); - Timer::Stop("rmva_training"); - } - else if (optimizerType == "minibatch-sgd") - { - MiniBatchSGD opt(net); - opt.StepSize() = stepSize; - opt.MaxIterations() = maxIterations; - opt.Tolerance() = tolerance; - opt.Shuffle() = shuffle; - opt.BatchSize() = batchSize; - - Timer::Start("rmva_training"); - net.Train(trainingData, labels, opt); - Timer::Stop("rmva_training"); - } - } - else - { - // Load the model from file. - data::Load(CLI::GetParam("input_model_file"), "rmva_model", net); - } - - // Do we need to do testing? - if (CLI::HasParam("test")) - { - arma::mat testingData = std::move(CLI::GetParam("test")); - - // Time the running of the Naive Bayes Classifier. - arma::mat results; - Timer::Start("rmva_testing"); - net.Predict(testingData, results); - Timer::Stop("rmva_testing"); - - if (CLI::HasParam("output")) - CLI::GetParam("output") = std::move(results); - } - - // Save the model, if requested. - if (CLI::HasParam("output_model_file")) - data::Save(CLI::GetParam("output_model_file"), "rmva_model", net); -} diff --git a/src/mlpack/prereqs.hpp b/src/mlpack/prereqs.hpp index eb172ab15b5..178341ead5a 100644 --- a/src/mlpack/prereqs.hpp +++ b/src/mlpack/prereqs.hpp @@ -62,6 +62,12 @@ using enable_if_t = typename enable_if::type; #endif #endif +// Increase the number of template arguments for the boost list class. +#undef BOOST_MPL_CFG_NO_PREPROCESSED_HEADERS +#undef BOOST_MPL_LIMIT_LIST_SIZE +#define BOOST_MPL_CFG_NO_PREPROCESSED_HEADERS +#define BOOST_MPL_LIMIT_LIST_SIZE 40 + // We'll need the necessary boost::serialization features, as well as what we // use with mlpack. In Boost 1.59 and newer, the BOOST_PFTO code is no longer // defined, but we still need to define it (as nothing) so that the mlpack @@ -95,4 +101,4 @@ using enable_if_t = typename enable_if::type; #define ARMA_USE_CXX11 #endif -#endif +#endif \ No newline at end of file diff --git a/src/mlpack/tests/CMakeLists.txt b/src/mlpack/tests/CMakeLists.txt index 3b3ab0d30b1..b1dc438eaa2 100644 --- a/src/mlpack/tests/CMakeLists.txt +++ b/src/mlpack/tests/CMakeLists.txt @@ -6,6 +6,7 @@ add_executable(mlpack_test ada_delta_test.cpp akfn_test.cpp aknn_test.cpp + ann_layer_test.cpp arma_extend_test.cpp armadillo_svd_test.cpp aug_lagrangian_test.cpp @@ -56,7 +57,6 @@ add_executable(mlpack_test mlpack_test.cpp nbc_test.cpp nca_test.cpp - network_util_test.cpp nmf_test.cpp nystroem_method_test.cpp octree_test.cpp diff --git a/src/mlpack/tests/activation_functions_test.cpp b/src/mlpack/tests/activation_functions_test.cpp index bebca0de09b..94df3b59380 100644 --- a/src/mlpack/tests/activation_functions_test.cpp +++ b/src/mlpack/tests/activation_functions_test.cpp @@ -12,23 +12,13 @@ */ #include +#include #include #include #include #include #include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - #include #include "test_tools.hpp" @@ -125,7 +115,7 @@ void CheckInverseCorrect(const arma::colvec input) /* * Implementation of the HardTanH activation function test. The function is - * implemented as a HardTanH Layer in hard_tanh_layer.hpp + * implemented as a HardTanH Layer in hard_tanh.hpp * * @param input Input data used for evaluating the HardTanH activation function. * @param target Target data used to evaluate the HardTanH activation. @@ -133,11 +123,11 @@ void CheckInverseCorrect(const arma::colvec input) void CheckHardTanHActivationCorrect(const arma::colvec input, const arma::colvec target) { - HardTanHLayer<> htf; + HardTanH<> htf; // Test the activation function using the entire vector as input. arma::colvec activations; - htf.Forward(input, activations); + htf.Forward(std::move(input), std::move(activations)); for (size_t i = 0; i < activations.n_elem; i++) { BOOST_REQUIRE_CLOSE(activations.at(i), target.at(i), 1e-3); @@ -146,7 +136,7 @@ void CheckHardTanHActivationCorrect(const arma::colvec input, /* * Implementation of the HardTanH activation function derivative test. The - * derivative is implemented as HardTanH Layer in hard_tanh_layer.hpp + * derivative is implemented as HardTanH Layer in hard_tanh.hpp * * @param input Input data used for evaluating the HardTanH activation function. * @param target Target data used to evaluate the HardTanH activation. @@ -154,14 +144,15 @@ void CheckHardTanHActivationCorrect(const arma::colvec input, void CheckHardTanHDerivativeCorrect(const arma::colvec input, const arma::colvec target) { - HardTanHLayer<> htf; + HardTanH<> htf; // Test the calculation of the derivatives using the entire vector as input. arma::colvec derivatives; // This error vector will be set to 1 to get the derivatives. - arma::colvec error(input.n_elem); - htf.Backward(input, (arma::colvec)error.ones(), derivatives); + arma::colvec error = arma::ones(input.n_elem); + htf.Backward(std::move(input), std::move(error), std::move(derivatives)); + for (size_t i = 0; i < derivatives.n_elem; i++) { BOOST_REQUIRE_CLOSE(derivatives.at(i), target.at(i), 1e-3); @@ -170,7 +161,7 @@ void CheckHardTanHDerivativeCorrect(const arma::colvec input, /* * Implementation of the LeakyReLU activation function test. The function is - * implemented as LeakyReLU layer in the file leaky_relu_layer.hpp + * implemented as LeakyReLU layer in the file leaky_relu.hpp * * @param input Input data used for evaluating the LeakyReLU activation function. * @param target Target data used to evaluate the LeakyReLU activation. @@ -178,11 +169,11 @@ void CheckHardTanHDerivativeCorrect(const arma::colvec input, void CheckLeakyReLUActivationCorrect(const arma::colvec input, const arma::colvec target) { - LeakyReLULayer<> lrf; + LeakyReLU<> lrf; // Test the activation function using the entire vector as input. arma::colvec activations; - lrf.Forward(input, activations); + lrf.Forward(std::move(input), std::move(activations)); for (size_t i = 0; i < activations.n_elem; i++) { BOOST_REQUIRE_CLOSE(activations.at(i), target.at(i), 1e-3); @@ -197,18 +188,17 @@ void CheckLeakyReLUActivationCorrect(const arma::colvec input, * @param input Input data used for evaluating the LeakyReLU activation function. * @param target Target data used to evaluate the LeakyReLU activation. */ - void CheckLeakyReLUDerivativeCorrect(const arma::colvec input, const arma::colvec target) { - LeakyReLULayer<> lrf; + LeakyReLU<> lrf; // Test the calculation of the derivatives using the entire vector as input. arma::colvec derivatives; // This error vector will be set to 1 to get the derivatives. - arma::colvec error(input.n_elem); - lrf.Backward(input, (arma::colvec)error.ones(), derivatives); + arma::colvec error = arma::ones(input.n_elem); + lrf.Backward(std::move(input), std::move(error), std::move(derivatives)); for (size_t i = 0; i < derivatives.n_elem; i++) { BOOST_REQUIRE_CLOSE(derivatives.at(i), target.at(i), 1e-3); diff --git a/src/mlpack/tests/ann_layer_test.cpp b/src/mlpack/tests/ann_layer_test.cpp new file mode 100644 index 00000000000..101a0170751 --- /dev/null +++ b/src/mlpack/tests/ann_layer_test.cpp @@ -0,0 +1,524 @@ +/** + * @file ann_layer_test.cpp + * @author Marcus Edel + * + * Tests the ann layer modules. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#include + +#include +#include +#include +#include + +#include +#include "test_tools.hpp" + +using namespace mlpack; +using namespace mlpack::ann; + +BOOST_AUTO_TEST_SUITE(ANNLayerTest); + +// Helper function whcih calls the Reset function of the given module. +template +void ResetFunction( + T& layer, + typename std::enable_if::value>::type* = 0) +{ + layer.Reset(); +} + +template +void ResetFunction( + T& /* layer */, + typename std::enable_if::value>::type* = 0) +{ + /* Nothing to do here */ +} + +// Approximate Jacobian and supposedly-true Jacobian, then compare them +// similarly to before. +template +double JacobianTest(ModuleType& module, + arma::mat& input, + const double minValue = -2, + const double maxValue = -1, + const double perturbation = 1e-6) +{ + arma::mat output, outputA, outputB, jacobianA, jacobianB; + + // Initialize the input matrix. + RandomInitialization init(minValue, maxValue); + init.Initialize(input, input.n_rows, input.n_cols); + + // Initialize the module parameters. + ResetFunction(module); + + // Initialize the jacobian matrix. + module.Forward(std::move(input), std::move(output)); + jacobianA = arma::zeros(input.n_elem, output.n_elem); + + // Share the input paramter matrix. + arma::mat sin = arma::mat(input.memptr(), input.n_rows, input.n_cols, + false, false); + + for (size_t i = 0; i < input.n_elem; ++i) + { + double original = sin(i); + sin(i) = original - perturbation; + module.Forward(std::move(input), std::move(outputA)); + sin(i) = original + perturbation; + module.Forward(std::move(input), std::move(outputB)); + sin(i) = original; + + outputB -= outputA; + outputB /= 2 * perturbation; + jacobianA.row(i) = outputB.t(); + } + + // Initialize the derivative parameter. + arma::mat deriv = arma::zeros(output.n_rows, output.n_cols); + + // Share the derivative parameter. + arma::mat derivTemp = arma::mat(deriv.memptr(), deriv.n_rows, deriv.n_cols, + false, false); + + // Initialize the jacobian matrix. + jacobianB = arma::zeros(input.n_elem, output.n_elem); + + for (size_t i = 0; i < derivTemp.n_elem; ++i) + { + deriv.zeros(); + derivTemp(i) = 1; + + arma::mat delta; + module.Backward(std::move(input), std::move(deriv), std::move(delta)); + + jacobianB.col(i) = delta; + } + + return arma::max(arma::max(arma::abs(jacobianA - jacobianB))); +} + +// Approximate Jacobian and supposedly-true Jacobian, then compare them +// similarly to before. +template +double JacobianPerformanceTest(ModuleType& module, + arma::mat& input, + arma::mat& target, + const double eps = 1e-6) +{ + module.Forward(std::move(input), std::move(target)); + + arma::mat delta; + module.Backward(std::move(input), std::move(target), std::move(delta)); + + arma::mat centralDifference = arma::zeros(delta.n_rows, delta.n_cols); + arma::mat inputTemp = arma::mat(input.memptr(), input.n_rows, input.n_cols, + false, false); + + arma::mat centralDifferenceTemp = arma::mat(centralDifference.memptr(), + centralDifference.n_rows, centralDifference.n_cols, false, false); + + for (size_t i = 0; i < input.n_elem; ++i) + { + inputTemp(i) = inputTemp(i) + eps; + double outputA = module.Forward(std::move(input), std::move(target)); + inputTemp(i) = inputTemp(i) - (2 * eps); + double outputB = module.Forward(std::move(input), std::move(target)); + + centralDifferenceTemp(i) = (outputA - outputB) / ( 2 * eps); + inputTemp(i) = inputTemp(i) + eps; + } + + return arma::max(arma::max(arma::abs(centralDifference - delta))); +} + +/** + * Simple add module test. + */ +BOOST_AUTO_TEST_CASE(SimpleAddLayerTest) +{ + arma::mat output, input, delta; + Add<> module(10); + + // Test the Forward function. + input = arma::zeros(10, 1); + module.Forward(std::move(input), std::move(output)); + BOOST_REQUIRE_EQUAL(arma::accu(module.Parameters()), arma::accu(output)); + + // Test the Backward function. + module.Backward(std::move(input), std::move(output), std::move(delta)); + BOOST_REQUIRE_EQUAL(arma::accu(output), arma::accu(delta)); + + // Test the forward function. + input = arma::ones(10, 1); + module.Forward(std::move(input), std::move(output)); + BOOST_REQUIRE_CLOSE(10 + arma::accu(module.Parameters()), + arma::accu(output), 1e-3); + + // Test the backward function. + module.Backward(std::move(input), std::move(output), std::move(delta)); + BOOST_REQUIRE_CLOSE(arma::accu(output), arma::accu(delta), 1e-3); +} + +/** + * Jacobian add module test. + */ +BOOST_AUTO_TEST_CASE(JacobianAddLayerTest) +{ + for (size_t i = 0; i < 5; i++) + { + const size_t elements = math::RandInt(2, 1000); + arma::mat input; + input.set_size(elements, 1); + + Add<> module(elements); + module.Parameters().randu(); + + double error = JacobianTest(module, input); + BOOST_REQUIRE_LE(error, 1e-5); + } +} + +/** + * Simple constant module test. + */ +BOOST_AUTO_TEST_CASE(SimpleConstantLayerTest) +{ + arma::mat output, input, delta; + Constant<> module(10, 3.0); + + // Test the Forward function. + input = arma::zeros(10, 1); + module.Forward(std::move(input), std::move(output)); + BOOST_REQUIRE_EQUAL(arma::accu(output), 30.0); + + // Test the Backward function. + module.Backward(std::move(input), std::move(output), std::move(delta)); + BOOST_REQUIRE_EQUAL(arma::accu(delta), 0); + + // Test the forward function. + input = arma::ones(10, 1); + module.Forward(std::move(input), std::move(output)); + BOOST_REQUIRE_EQUAL(arma::accu(output), 30.0); + + // Test the backward function. + module.Backward(std::move(input), std::move(output), std::move(delta)); + BOOST_REQUIRE_EQUAL(arma::accu(delta), 0); +} + +/** + * Jacobian constant module test. + */ +BOOST_AUTO_TEST_CASE(JacobianConstantLayerTest) +{ + for (size_t i = 0; i < 5; i++) + { + const size_t elements = math::RandInt(2, 1000); + arma::mat input; + input.set_size(elements, 1); + + Constant<> module(elements, 1.0); + + double error = JacobianTest(module, input); + BOOST_REQUIRE_LE(error, 1e-5); + } +} + +/** + * Simple dropout module test. + */ +BOOST_AUTO_TEST_CASE(SimpleDropoutLayerTest) +{ + // Initialize the probability of setting a value to zero and the scale + // parameter. + const double p = 0.2; + const double scale = 1.0 / (1.0 - p); + + // Initialize the input parameter. + arma::mat input(1000, 1); + input.fill(1 - p); + + Dropout<> module(p); + module.Deterministic() = false; + + // Test the Forward function. + arma::mat output; + module.Forward(std::move(input), std::move(output)); + BOOST_REQUIRE_LE( + arma::as_scalar(arma::abs(arma::mean(output) - (1 - p))), 0.05); + + // Test the Backward function. + arma::mat delta; + module.Backward(std::move(input), std::move(input), std::move(delta)); + BOOST_REQUIRE_LE( + arma::as_scalar(arma::abs(arma::mean(delta) - (1 - p))), 0.05); + + // Test the Forward function. + module.Deterministic() = true; + module.Rescale() = false; + module.Forward(std::move(input), std::move(output)); + BOOST_REQUIRE_EQUAL(arma::accu(input), arma::accu(output)); + + // Test the Forward function. + module.Rescale() = true; + module.Forward(std::move(input), std::move(output)); + BOOST_REQUIRE_CLOSE(arma::accu(input) * scale, arma::accu(output), 1e-3); +} + +/** + * Simple linear module test. + */ +BOOST_AUTO_TEST_CASE(SimpleLinearLayerTest) +{ + arma::mat output, input, delta; + Linear<> module(10, 10); + module.Parameters().randu(); + module.Reset(); + + // Test the Forward function. + input = arma::zeros(10, 1); + module.Forward(std::move(input), std::move(output)); + BOOST_REQUIRE_CLOSE(arma::accu( + module.Parameters().submat(100, 0, module.Parameters().n_elem - 1, 0)), + arma::accu(output), 1e-3); + + // Test the Backward function. + module.Backward(std::move(input), std::move(input), std::move(delta)); + BOOST_REQUIRE_EQUAL(arma::accu(delta), 0); +} + +/** + * Jacobian linear module test. + */ +BOOST_AUTO_TEST_CASE(JacobianLinearLayerTest) +{ + for (size_t i = 0; i < 5; i++) + { + const size_t inputElements = math::RandInt(2, 1000); + const size_t outputElements = math::RandInt(2, 1000); + + arma::mat input; + input.set_size(inputElements, 1); + + Linear<> module(inputElements, outputElements); + module.Parameters().randu(); + + double error = JacobianTest(module, input); + BOOST_REQUIRE_LE(error, 1e-5); + } +} + +/** + * Simple linear no bias module test. + */ +BOOST_AUTO_TEST_CASE(SimpleLinearNoBiasLayerTest) +{ + arma::mat output, input, delta; + LinearNoBias<> module(10, 10); + module.Parameters().randu(); + module.Reset(); + + // Test the Forward function. + input = arma::zeros(10, 1); + module.Forward(std::move(input), std::move(output)); + BOOST_REQUIRE_EQUAL(0, arma::accu(output)); + + // Test the Backward function. + module.Backward(std::move(input), std::move(input), std::move(delta)); + BOOST_REQUIRE_EQUAL(arma::accu(delta), 0); +} + +/** + * Jacobian linear no bias module test. + */ +BOOST_AUTO_TEST_CASE(JacobianLinearNoBiasLayerTest) +{ + for (size_t i = 0; i < 5; i++) + { + const size_t inputElements = math::RandInt(2, 1000); + const size_t outputElements = math::RandInt(2, 1000); + + arma::mat input; + input.set_size(inputElements, 1); + + LinearNoBias<> module(inputElements, outputElements); + module.Parameters().randu(); + + double error = JacobianTest(module, input); + BOOST_REQUIRE_LE(error, 1e-5); + } +} + +/** + * Jacobian negative log likelihood module test. + */ +BOOST_AUTO_TEST_CASE(JacobianNegativeLogLikelihoodLayerTest) +{ + for (size_t i = 0; i < 5; i++) + { + NegativeLogLikelihood<> module; + const size_t inputElements = math::RandInt(5, 100); + arma::mat input; + RandomInitialization init(0, 1); + init.Initialize(input, inputElements, 1); + + arma::mat target(1, 1); + target(0) = math::RandInt(1, inputElements - 1); + + double error = JacobianPerformanceTest(module, input, target); + BOOST_REQUIRE_LE(error, 1e-5); + } +} + +/** + * Jacobian LeakyReLU module test. + */ +BOOST_AUTO_TEST_CASE(JacobianLeakyReLULayerTest) +{ + for (size_t i = 0; i < 5; i++) + { + const size_t inputElements = math::RandInt(2, 1000); + + arma::mat input; + input.set_size(inputElements, 1); + + LeakyReLU<> module; + + double error = JacobianTest(module, input); + BOOST_REQUIRE_LE(error, 1e-5); + } +} + +/** + * Jacobian MultiplyConstant module test. + */ +BOOST_AUTO_TEST_CASE(JacobianMultiplyConstantLayerTest) +{ + for (size_t i = 0; i < 5; i++) + { + const size_t inputElements = math::RandInt(2, 1000); + + arma::mat input; + input.set_size(inputElements, 1); + + MultiplyConstant<> module(3.0); + + double error = JacobianTest(module, input); + BOOST_REQUIRE_LE(error, 1e-5); + } +} + +/** + * Jacobian HardTanH module test. + */ +BOOST_AUTO_TEST_CASE(JacobianHardTanHLayerTest) +{ + for (size_t i = 0; i < 5; i++) + { + const size_t inputElements = math::RandInt(2, 1000); + + arma::mat input; + input.set_size(inputElements, 1); + + HardTanH<> module; + + double error = JacobianTest(module, input); + BOOST_REQUIRE_LE(error, 1e-5); + } +} + +/** + * Simple select module test. + */ +BOOST_AUTO_TEST_CASE(SimpleSelectLayerTest) +{ + arma::mat outputA, outputB, input, delta; + + input = arma::ones(10, 5); + for (size_t i = 0; i < input.n_cols; ++i) + { + input.col(i) *= i; + } + + // Test the Forward function. + Select<> moduleA(3); + moduleA.Forward(std::move(input), std::move(outputA)); + BOOST_REQUIRE_EQUAL(30, arma::accu(outputA)); + + // Test the Forward function. + Select<> moduleB(3, 5); + moduleB.Forward(std::move(input), std::move(outputB)); + BOOST_REQUIRE_EQUAL(15, arma::accu(outputB)); + + // Test the Backward function. + moduleA.Backward(std::move(input), std::move(outputA), std::move(delta)); + BOOST_REQUIRE_EQUAL(30, arma::accu(delta)); + + // Test the Backward function. + moduleB.Backward(std::move(input), std::move(outputA), std::move(delta)); + BOOST_REQUIRE_EQUAL(15, arma::accu(delta)); +} + +/** + * Simple join module test. + */ +BOOST_AUTO_TEST_CASE(SimpleJoinLayerTest) +{ + arma::mat output, input, delta; + input = arma::ones(10, 5); + + // Test the Forward function. + Join<> module; + module.Forward(std::move(input), std::move(output)); + BOOST_REQUIRE_EQUAL(50, arma::accu(output)); + + bool b = output.n_rows == 1 || output.n_cols == 1; + BOOST_REQUIRE_EQUAL(b, true); + + // Test the Backward function. + module.Backward(std::move(input), std::move(output), std::move(delta)); + BOOST_REQUIRE_EQUAL(50, arma::accu(delta)); + + b = delta.n_rows == input.n_rows && input.n_cols; + BOOST_REQUIRE_EQUAL(b, true); +} + +/** + * Simple add merge module test. + */ +BOOST_AUTO_TEST_CASE(SimpleAddMergeLayerTest) +{ + arma::mat output, input, delta; + input = arma::ones(10, 1); + + for (size_t i = 0; i < 5; ++i) + { + AddMerge<> module; + const size_t numMergeModules = math::RandInt(2, 10); + for (size_t m = 0; m < numMergeModules; ++m) + { + IdentityLayer<> identityLayer; + identityLayer.Forward(std::move(input), + std::move(identityLayer.OutputParameter())); + + module.Add(identityLayer); + } + + // Test the Forward function. + module.Forward(std::move(input), std::move(output)); + BOOST_REQUIRE_EQUAL(10 * numMergeModules, arma::accu(output)); + + // Test the Backward function. + module.Backward(std::move(input), std::move(output), std::move(delta)); + BOOST_REQUIRE_EQUAL(arma::accu(output), arma::accu(delta)); + } +} + +BOOST_AUTO_TEST_SUITE_END(); \ No newline at end of file diff --git a/src/mlpack/tests/convolutional_network_test.cpp b/src/mlpack/tests/convolutional_network_test.cpp index 52e1a6c6394..0c99722ca1c 100644 --- a/src/mlpack/tests/convolutional_network_test.cpp +++ b/src/mlpack/tests/convolutional_network_test.cpp @@ -11,21 +11,9 @@ */ #include -#include - -#include -#include -#include -#include -#include -#include -#include - -#include #include - -#include -#include +#include +#include #include #include "test_tools.hpp" @@ -38,12 +26,9 @@ using namespace mlpack::optimization; BOOST_AUTO_TEST_SUITE(ConvolutionalNetworkTest); /** - * Train and evaluate a vanilla network with the specified structure. + * Train the vanilla network on a larger dataset. */ -template< - typename PerformanceFunction -> -void BuildVanillaNetwork() +BOOST_AUTO_TEST_CASE(VanillaNetworkTest) { arma::mat X; X.load("mnist_first250_training_4s_and_9s.arm"); @@ -56,23 +41,19 @@ void BuildVanillaNetwork() } // Build the target matrix. - arma::mat Y = arma::zeros(10, nPoints); + arma::mat Y = arma::zeros(1, nPoints); for (size_t i = 0; i < nPoints; i++) { if (i < nPoints / 2) { - Y.col(i)(5) = 1; + Y(i) = 4; } else { - Y.col(i)(8) = 1; + Y(i) = 9; } } - arma::cube input = arma::cube(28, 28, nPoints); - for (size_t i = 0; i < nPoints; i++) - input.slice(i) = arma::mat(X.colptr(i), 28, 28); - /* * Construct a convolutional neural network with a 28x28x1 input layer, * 24x24x8 convolution layer, 12x12x8 pooling layer, 8x8x12 convolution layer @@ -90,57 +71,46 @@ void BuildVanillaNetwork() * | | +-+ | +-+ | +-+ | +-+ | | | * +---+ +---+ +---+ +---+ +---+ +---+ */ - - ConvLayer<> convLayer0(1, 8, 5, 5); - BiasLayer2D<> biasLayer0(8); - BaseLayer2D<> baseLayer0; - PoolingLayer<> poolingLayer0(2); - - ConvLayer<> convLayer1(8, 12, 5, 5); - BiasLayer2D<> biasLayer1(12); - BaseLayer2D<> baseLayer1; - PoolingLayer<> poolingLayer1(2); - - LinearMappingLayer<> linearLayer0(4608, 10); - BiasLayer<> biasLayer2(10); - SoftmaxLayer<> softmaxLayer0; - - OneHotLayer outputLayer; - - auto modules = std::tie(convLayer0, baseLayer0, linearLayer0, softmaxLayer0); - - CNN net(modules, outputLayer); - biasLayer0.Weights().zeros(); - biasLayer1.Weights().zeros(); - - RMSprop opt(net, 0.01, 0.88, 1e-8, 10 * input.n_slices, 0); - - net.Train(input, Y, opt); - - arma::mat prediction; - net.Predict(input, prediction); + FFN > model; + + model.Add >(1, 8, 5, 5, 1, 1, 0, 0, 28, 28); + model.Add >(); + model.Add >(8, 8, 2, 2); + model.Add >(8, 12, 2, 2); + model.Add >(); + model.Add >(2, 2, 2, 2); + model.Add >(192, 20); + model.Add >(); + model.Add >(20, 30); + model.Add >(); + model.Add >(30, 10); + model.Add >(); + + RMSprop opt(model, 0.01, 0.88, 1e-8, 5000, -1); + + model.Train(std::move(X), std::move(Y), opt); + + arma::mat predictionTemp; + model.Predict(X, predictionTemp); + arma::mat prediction = arma::zeros(1, predictionTemp.n_cols); + + for (size_t i = 0; i < predictionTemp.n_cols; ++i) + { + prediction(i) = arma::as_scalar(arma::find( + arma::max(predictionTemp.col(i)) == predictionTemp.col(i), 1)) + 1; + } size_t error = 0; - for (size_t i = 0; i < nPoints; i++) + for (size_t i = 0; i < X.n_cols; i++) { - if (arma::sum(arma::sum( - arma::abs(prediction.col(i) - Y.col(i)))) == 0) + if (prediction(i) == Y(i)) { error++; } } - double classificationError = 1 - double(error) / nPoints; - BOOST_REQUIRE_LE(classificationError, 0.6); -} - -/** - * Train the vanilla network on a larger dataset. - */ -BOOST_AUTO_TEST_CASE(VanillaNetworkTest) -{ - BuildVanillaNetwork(); + double classificationError = 1 - double(error) / X.n_cols; + BOOST_REQUIRE_LE(classificationError, 0.2); } BOOST_AUTO_TEST_SUITE_END(); diff --git a/src/mlpack/tests/feedforward_network_test.cpp b/src/mlpack/tests/feedforward_network_test.cpp index 4477bf22568..883fe9e6b00 100644 --- a/src/mlpack/tests/feedforward_network_test.cpp +++ b/src/mlpack/tests/feedforward_network_test.cpp @@ -12,21 +12,9 @@ */ #include -#include -#include - -#include - -#include -#include -#include -#include -#include -#include - -#include -#include #include +#include +#include #include #include "test_tools.hpp" @@ -40,16 +28,12 @@ BOOST_AUTO_TEST_SUITE(FeedForwardNetworkTest); /** * Train and evaluate a vanilla network with the specified structure. */ -template< - typename PerformanceFunction, - typename OutputLayerType, - typename PerformanceFunctionType, - typename MatType = arma::mat -> +template void BuildVanillaNetwork(MatType& trainData, MatType& trainLabels, MatType& testData, MatType& testLabels, + const size_t outputSize, const size_t hiddenLayerSize, const size_t maxEpochs, const double classificationErrorThreshold) @@ -76,35 +60,32 @@ void BuildVanillaNetwork(MatType& trainData, * +-----+ +-----+ */ - LinearLayer<> inputLayer(trainData.n_rows, hiddenLayerSize); - BiasLayer<> inputBiasLayer(hiddenLayerSize); - BaseLayer inputBaseLayer; - - LinearLayer<> hiddenLayer1(hiddenLayerSize, trainLabels.n_rows); - BiasLayer<> hiddenBiasLayer1(trainLabels.n_rows); - BaseLayer outputLayer; + FFN > model; + model.Add >(trainData.n_rows, hiddenLayerSize); + model.Add >(); + model.Add >(hiddenLayerSize, outputSize); + model.Add >(); - OutputLayerType classOutputLayer; + RMSprop opt(model, 0.01, 0.88, 1e-8, + maxEpochs * trainData.n_cols, -1); - auto modules = std::tie(inputLayer, inputBiasLayer, inputBaseLayer, - hiddenLayer1, hiddenBiasLayer1, outputLayer); + model.Train(std::move(trainData), std::move(trainLabels), opt); - FFN net(modules, classOutputLayer); + MatType predictionTemp; + model.Predict(testData, predictionTemp); + MatType prediction = arma::zeros(1, predictionTemp.n_cols); - RMSprop opt(net, 0.01, 0.88, 1e-8, - maxEpochs * trainData.n_cols, 1e-18); - - net.Train(trainData, trainLabels, opt); - - MatType prediction; - net.Predict(testData, prediction); + for (size_t i = 0; i < predictionTemp.n_cols; ++i) + { + prediction(i) = arma::as_scalar(arma::find( + arma::max(predictionTemp.col(i)) == predictionTemp.col(i), 1)) + 1; + } size_t error = 0; for (size_t i = 0; i < testData.n_cols; i++) { - if (arma::sum(arma::sum( - arma::abs(prediction.col(i) - testLabels.col(i)))) == 0) + if (int(arma::as_scalar(prediction.col(i))) == + int(arma::as_scalar(testLabels.col(i)))) { error++; } @@ -125,23 +106,36 @@ BOOST_AUTO_TEST_CASE(VanillaNetworkTest) arma::mat trainData = dataset.submat(0, 0, dataset.n_rows - 4, dataset.n_cols - 1); - arma::mat trainLabels = dataset.submat(dataset.n_rows - 3, 0, + + arma::mat trainLabelsTemp = dataset.submat(dataset.n_rows - 3, 0, dataset.n_rows - 1, dataset.n_cols - 1); + arma::mat trainLabels = arma::zeros(1, trainLabelsTemp.n_cols); + for (size_t i = 0; i < trainLabelsTemp.n_cols; ++i) + { + trainLabels(i) = arma::as_scalar(arma::find( + arma::max(trainLabelsTemp.col(i)) == trainLabelsTemp.col(i), 1)) + 1; + } data::Load("thyroid_test.csv", dataset, true); arma::mat testData = dataset.submat(0, 0, dataset.n_rows - 4, dataset.n_cols - 1); - arma::mat testLabels = dataset.submat(dataset.n_rows - 3, 0, + + arma::mat testLabelsTemp = dataset.submat(dataset.n_rows - 3, 0, dataset.n_rows - 1, dataset.n_cols - 1); + arma::mat testLabels = arma::zeros(1, testLabelsTemp.n_cols); + for (size_t i = 0; i < testLabels.n_cols; ++i) + { + testLabels(i) = arma::as_scalar(arma::find( + arma::max(testLabelsTemp.col(i)) == testLabelsTemp.col(i), 1)) + 1; + } + // Vanilla neural net with logistic activation function. // Because 92 percent of the patients are not hyperthyroid the neural // network must be significant better than 92%. - BuildVanillaNetwork - (trainData, trainLabels, testData, testLabels, 8, 200, 0.1); + BuildVanillaNetwork<> + (trainData, trainLabels, testData, testLabels, 3, 8, 70, 0.1); dataset.load("mnist_first250_training_4s_and_9s.arm"); @@ -151,33 +145,22 @@ BOOST_AUTO_TEST_CASE(VanillaNetworkTest) arma::mat labels = arma::zeros(1, dataset.n_cols); labels.submat(0, labels.n_cols / 2, 0, labels.n_cols - 1).fill(1); + labels += 1; // Vanilla neural net with logistic activation function. - BuildVanillaNetwork - (dataset, labels, dataset, labels, 30, 30, 0.4); - - // Vanilla neural net with tanh activation function. - BuildVanillaNetwork - (dataset, labels, dataset, labels, 10, 30, 0.4); + BuildVanillaNetwork<> + (dataset, labels, dataset, labels, 2, 10, 50, 0.2); } /** * Train and evaluate a Dropout network with the specified structure. */ -template< - typename PerformanceFunction, - typename OutputLayerType, - typename PerformanceFunctionType, - typename MatType = arma::mat -> +template void BuildDropoutNetwork(MatType& trainData, MatType& trainLabels, MatType& testData, MatType& testLabels, + const size_t outputSize, const size_t hiddenLayerSize, const size_t maxEpochs, const double classificationErrorThreshold) @@ -204,35 +187,33 @@ void BuildDropoutNetwork(MatType& trainData, * +-----+ */ - LinearLayer<> inputLayer(trainData.n_rows, hiddenLayerSize); - BiasLayer<> biasLayer(hiddenLayerSize); - BaseLayer hiddenLayer0; - DropoutLayer<> dropoutLayer0; - - LinearLayer<> hiddenLayer1(hiddenLayerSize, trainLabels.n_rows); - BaseLayer outputLayer; - - OutputLayerType classOutputLayer; + FFN > model; + model.Add >(trainData.n_rows, hiddenLayerSize); + model.Add >(); + model.Add >(); + model.Add >(hiddenLayerSize, outputSize); + model.Add >(); - auto modules = std::tie(inputLayer, biasLayer, hiddenLayer0, dropoutLayer0, - hiddenLayer1, outputLayer); + RMSprop opt(model, 0.01, 0.88, 1e-8, + maxEpochs * trainData.n_cols, -1); - FFN net(modules, classOutputLayer); + model.Train(std::move(trainData), std::move(trainLabels), opt); - RMSprop opt(net, 0.01, 0.88, 1e-8, - maxEpochs * trainData.n_cols, 1e-18); + MatType predictionTemp; + model.Predict(testData, predictionTemp); + MatType prediction = arma::zeros(1, predictionTemp.n_cols); - net.Train(trainData, trainLabels, opt); - - MatType prediction; - net.Predict(testData, prediction); + for (size_t i = 0; i < predictionTemp.n_cols; ++i) + { + prediction(i) = arma::as_scalar(arma::find( + arma::max(predictionTemp.col(i)) == predictionTemp.col(i), 1)) + 1; + } size_t error = 0; for (size_t i = 0; i < testData.n_cols; i++) { - if (arma::sum(arma::sum( - arma::abs(prediction.col(i) - testLabels.col(i)))) == 0) + if (int(arma::as_scalar(prediction.col(i))) == + int(arma::as_scalar(testLabels.col(i)))) { error++; } @@ -253,23 +234,36 @@ BOOST_AUTO_TEST_CASE(DropoutNetworkTest) arma::mat trainData = dataset.submat(0, 0, dataset.n_rows - 4, dataset.n_cols - 1); - arma::mat trainLabels = dataset.submat(dataset.n_rows - 3, 0, + + arma::mat trainLabelsTemp = dataset.submat(dataset.n_rows - 3, 0, dataset.n_rows - 1, dataset.n_cols - 1); + arma::mat trainLabels = arma::zeros(1, trainLabelsTemp.n_cols); + for (size_t i = 0; i < trainLabelsTemp.n_cols; ++i) + { + trainLabels(i) = arma::as_scalar(arma::find( + arma::max(trainLabelsTemp.col(i)) == trainLabelsTemp.col(i), 1)) + 1; + } data::Load("thyroid_test.csv", dataset, true); arma::mat testData = dataset.submat(0, 0, dataset.n_rows - 4, dataset.n_cols - 1); - arma::mat testLabels = dataset.submat(dataset.n_rows - 3, 0, + + arma::mat testLabelsTemp = dataset.submat(dataset.n_rows - 3, 0, dataset.n_rows - 1, dataset.n_cols - 1); + arma::mat testLabels = arma::zeros(1, testLabelsTemp.n_cols); + for (size_t i = 0; i < testLabels.n_cols; ++i) + { + testLabels(i) = arma::as_scalar(arma::find( + arma::max(testLabelsTemp.col(i)) == testLabelsTemp.col(i), 1)) + 1; + } + // Vanilla neural net with logistic activation function. // Because 92 percent of the patients are not hyperthyroid the neural // network must be significant better than 92%. - BuildDropoutNetwork - (trainData, trainLabels, testData, testLabels, 4, 100, 0.1); + BuildDropoutNetwork<> + (trainData, trainLabels, testData, testLabels, 3, 8, 70, 0.1); dataset.load("mnist_first250_training_4s_and_9s.arm"); @@ -279,34 +273,23 @@ BOOST_AUTO_TEST_CASE(DropoutNetworkTest) arma::mat labels = arma::zeros(1, dataset.n_cols); labels.submat(0, labels.n_cols / 2, 0, labels.n_cols - 1).fill(1); + labels += 1; // Vanilla neural net with logistic activation function. - BuildDropoutNetwork - (dataset, labels, dataset, labels, 8, 30, 0.4); - - // Vanilla neural net with tanh activation function. - BuildDropoutNetwork - (dataset, labels, dataset, labels, 8, 30, 0.4); + BuildDropoutNetwork<> + (dataset, labels, dataset, labels, 2, 10, 50, 0.2); } /** * Train and evaluate a DropConnect network(with a baselayer) with the * specified structure. */ -template< - typename PerformanceFunction, - typename OutputLayerType, - typename PerformanceFunctionType, - typename MatType = arma::mat -> +template void BuildDropConnectNetwork(MatType& trainData, MatType& trainLabels, MatType& testData, MatType& testLabels, + const size_t outputSize, const size_t hiddenLayerSize, const size_t maxEpochs, const double classificationErrorThreshold) @@ -334,122 +317,42 @@ void BuildDropConnectNetwork(MatType& trainData, * * */ - LinearLayer<> inputLayer(trainData.n_rows, hiddenLayerSize); - BiasLayer<> biasLayer(hiddenLayerSize); - BaseLayer hiddenLayer0; - - LinearLayer<> hiddenLayer1(hiddenLayerSize, trainLabels.n_rows); - DropConnectLayer dropConnectLayer0(hiddenLayer1); - - BaseLayer outputLayer; - - OutputLayerType classOutputLayer; - auto modules = std::tie(inputLayer, biasLayer, hiddenLayer0, - dropConnectLayer0, outputLayer); + FFN > model; + model.Add >(trainData.n_rows, hiddenLayerSize); + model.Add >(); + model.Add >(hiddenLayerSize, outputSize); + model.Add >(); - FFN net(modules, classOutputLayer); + RMSprop opt(model, 0.01, 0.88, 1e-8, + maxEpochs * trainData.n_cols, -1); - RMSprop opt(net, 0.01, 0.88, 1e-8, - maxEpochs * trainData.n_cols, 1e-18); + model.Train(std::move(trainData), std::move(trainLabels), opt); - net.Train(trainData, trainLabels, opt); + MatType predictionTemp; + model.Predict(testData, predictionTemp); + MatType prediction = arma::zeros(1, predictionTemp.n_cols); - MatType prediction; - net.Predict(testData, prediction); - - size_t error = 0; - for (size_t i = 0; i < testData.n_cols; i++) + for (size_t i = 0; i < predictionTemp.n_cols; ++i) { - if (arma::sum(arma::sum( - arma::abs(prediction.col(i) - testLabels.col(i)))) == 0) - { - error++; - } + prediction(i) = arma::as_scalar(arma::find( + arma::max(predictionTemp.col(i)) == predictionTemp.col(i), 1)) + 1; } - double classificationError = 1 - double(error) / testData.n_cols; - BOOST_REQUIRE_LE(classificationError, classificationErrorThreshold); -} - -/** - * Train and evaluate a DropConnect network(with a linearlayer) with the - * specified structure. - */ -template< - typename PerformanceFunction, - typename OutputLayerType, - typename PerformanceFunctionType, - typename MatType = arma::mat -> -void BuildDropConnectNetworkLinear(MatType& trainData, - MatType& trainLabels, - MatType& testData, - MatType& testLabels, - const size_t hiddenLayerSize, - const size_t maxEpochs, - const double classificationErrorThreshold) -{ - /* - * Construct a feed forward network with trainData.n_rows input nodes, - * hiddenLayerSize hidden nodes and trainLabels.n_rows output nodes. The - * network struct that looks like: - * - * Input Hidden DropConnect Output - * Layer Layer Layer Layer - * +-----+ +-----+ +-----+ +-----+ - * | | | | | | | | - * | +------>| +------>| +------>| | - * | | +>| | | | | | - * +-----+ | +--+--+ +-----+ +-----+ - * | - * Bias | - * Layer | - * +-----+ | - * | | | - * | +-----+ - * | | - * +-----+ - * - * - */ - LinearLayer<> inputLayer(trainData.n_rows, hiddenLayerSize); - BiasLayer<> biasLayer(hiddenLayerSize); - BaseLayer hiddenLayer0; - - DropConnectLayer<> dropConnectLayer0(hiddenLayerSize, trainLabels.n_rows); - - BaseLayer outputLayer; - - OutputLayerType classOutputLayer; - auto modules = std::tie(inputLayer, biasLayer, hiddenLayer0, - dropConnectLayer0, outputLayer); - - FFN net(modules, classOutputLayer); - - RMSprop opt(net, 0.01, 0.88, 1e-8, - maxEpochs * trainData.n_cols, 1e-18); - - net.Train(trainData, trainLabels, opt); - - MatType prediction; - net.Predict(testData, prediction); - size_t error = 0; for (size_t i = 0; i < testData.n_cols; i++) { - if (arma::sum(arma::sum( - arma::abs(prediction.col(i) - testLabels.col(i)))) == 0) - { - error++; - } + if (int(arma::as_scalar(prediction.col(i))) == + int(arma::as_scalar(testLabels.col(i)))) + { + error++; + } } double classificationError = 1 - double(error) / testData.n_cols; BOOST_REQUIRE_LE(classificationError, classificationErrorThreshold); } + /** * Train the dropconnect network on a larger dataset. */ @@ -461,28 +364,36 @@ BOOST_AUTO_TEST_CASE(DropConnectNetworkTest) arma::mat trainData = dataset.submat(0, 0, dataset.n_rows - 4, dataset.n_cols - 1); - arma::mat trainLabels = dataset.submat(dataset.n_rows - 3, 0, + + arma::mat trainLabelsTemp = dataset.submat(dataset.n_rows - 3, 0, dataset.n_rows - 1, dataset.n_cols - 1); + arma::mat trainLabels = arma::zeros(1, trainLabelsTemp.n_cols); + for (size_t i = 0; i < trainLabelsTemp.n_cols; ++i) + { + trainLabels(i) = arma::as_scalar(arma::find( + arma::max(trainLabelsTemp.col(i)) == trainLabelsTemp.col(i), 1)) + 1; + } data::Load("thyroid_test.csv", dataset, true); arma::mat testData = dataset.submat(0, 0, dataset.n_rows - 4, dataset.n_cols - 1); - arma::mat testLabels = dataset.submat(dataset.n_rows - 3, 0, + + arma::mat testLabelsTemp = dataset.submat(dataset.n_rows - 3, 0, dataset.n_rows - 1, dataset.n_cols - 1); + arma::mat testLabels = arma::zeros(1, testLabelsTemp.n_cols); + for (size_t i = 0; i < testLabels.n_cols; ++i) + { + testLabels(i) = arma::as_scalar(arma::find( + arma::max(testLabelsTemp.col(i)) == testLabelsTemp.col(i), 1)) + 1; + } + // Vanilla neural net with logistic activation function. // Because 92 percent of the patients are not hyperthyroid the neural // network must be significant better than 92%. - BuildDropConnectNetwork - (trainData, trainLabels, testData, testLabels, 4, 100, 0.1); - - BuildDropConnectNetworkLinear - (trainData, trainLabels, testData, testLabels, 4, 100, 0.1); + BuildDropConnectNetwork<> + (trainData, trainLabels, testData, testLabels, 3, 8, 70, 0.1); dataset.load("mnist_first250_training_4s_and_9s.arm"); @@ -492,18 +403,11 @@ BOOST_AUTO_TEST_CASE(DropConnectNetworkTest) arma::mat labels = arma::zeros(1, dataset.n_cols); labels.submat(0, labels.n_cols / 2, 0, labels.n_cols - 1).fill(1); + labels += 1; // Vanilla neural net with logistic activation function. - BuildDropConnectNetwork - (dataset, labels, dataset, labels, 8, 30, 0.4); - - - BuildDropConnectNetworkLinear - (dataset, labels, dataset, labels, 8, 30, 0.4); + BuildDropConnectNetwork<> + (dataset, labels, dataset, labels, 2, 10, 50, 0.2); } -BOOST_AUTO_TEST_SUITE_END(); +BOOST_AUTO_TEST_SUITE_END(); \ No newline at end of file diff --git a/src/mlpack/tests/network_util_test.cpp b/src/mlpack/tests/network_util_test.cpp deleted file mode 100644 index 4f0fcf105e0..00000000000 --- a/src/mlpack/tests/network_util_test.cpp +++ /dev/null @@ -1,149 +0,0 @@ -/** - * @file network_util_test.cpp - * @author Marcus Edel - * - * Simple tests for things in the network_util file. - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. - */ -#include - -#include -#include -#include -#include - -#include -#include "test_tools.hpp" - -using namespace mlpack; -using namespace mlpack::ann; - -BOOST_AUTO_TEST_SUITE(NetworkUtilTest); - -/** - * Test the network size auxiliary function. - */ -BOOST_AUTO_TEST_CASE(NetworkSizeTest) -{ - // Create a two layer network without weights. - BaseLayer<> baseLayer1; - BaseLayer<> baseLayer2; - auto noneWeightNetwork = std::tie(baseLayer1, baseLayer2); - - BOOST_REQUIRE_EQUAL(NetworkSize(noneWeightNetwork), 0); - - // Create a two layer network. - LinearLayer<> linearLayer1(10, 10); - LinearLayer<> linearLayer2(10, 100); - - // Reuse the layer form the first network. - auto weightNetwork = std::tie(linearLayer1, baseLayer1, linearLayer2, - baseLayer2); - - BOOST_REQUIRE_EQUAL(NetworkSize(weightNetwork), 1100); -} - -/** - * Test the layer size auxiliary function. - */ -BOOST_AUTO_TEST_CASE(LayerSizeTest) -{ - // Create layer without weights. - BaseLayer<> baseLayer; - BOOST_REQUIRE_EQUAL(LayerSize(baseLayer, baseLayer.OutputParameter()), 0); - - // Create layer with weights. - LinearLayer<> linearLayer(10, 10); - BOOST_REQUIRE_EQUAL(LayerSize(linearLayer, - linearLayer.OutputParameter()), 100); -} - -/** - * Test the network input size auxiliary function. - */ -BOOST_AUTO_TEST_CASE(NetworkInputSizeTest) -{ - // Create a two layer network without weights. - BaseLayer<> baseLayer1; - BaseLayer<> baseLayer2; - auto noneWeightNetwork = std::tie(baseLayer1, baseLayer2); - - BOOST_REQUIRE_EQUAL(NetworkInputSize(noneWeightNetwork), 0); - - // Create a two layer network. - LinearLayer<> linearLayer1(5, 10); - LinearLayer<> linearLayer2(10, 100); - - // Reuse the layer form the first network. - auto weightNetwork = std::tie(linearLayer1, baseLayer1, linearLayer2, - baseLayer2); - - BOOST_REQUIRE_EQUAL(NetworkInputSize(weightNetwork), 5); -} - -/** - * Test the layer input size auxiliary function. - */ -BOOST_AUTO_TEST_CASE(LayerInputSizeTest) -{ - // Create layer without weights. - BaseLayer<> baseLayer; - BOOST_REQUIRE_EQUAL(LayerInputSize(baseLayer, - baseLayer.OutputParameter()), 0); - - // Create layer with weights. - LinearLayer<> linearLayer(5, 10); - BOOST_REQUIRE_EQUAL(LayerInputSize(linearLayer, - linearLayer.OutputParameter()), 5); -} - -/** - * Test the network weight auxiliary function using the given initialization - * rule. - */ -BOOST_AUTO_TEST_CASE(NetworkWeightsInitTest) -{ - // Create a two layer network. - LinearLayer<> linearLayer1(10, 10); - LinearLayer<> linearLayer2(10, 100); - - arma::mat parameter = arma::zeros(1100, 1); - - // Create the network. - auto network = std::tie(linearLayer1, linearLayer2); - - BOOST_REQUIRE_EQUAL(arma::accu(parameter), 0); - - RandomInitialization constantInit(1, 1); - NetworkWeights(constantInit, parameter, network); - - BOOST_REQUIRE_EQUAL(arma::accu(linearLayer1.Weights()), 100); - BOOST_REQUIRE_EQUAL(arma::accu(linearLayer2.Weights()), 1000); - BOOST_REQUIRE_EQUAL(arma::accu(parameter), 1100); -} - -/** - * Test the layer weight auxiliary function using the given initialization rule. - */ -BOOST_AUTO_TEST_CASE(LayerWeightsInitTest) -{ - // Create a two layer network. - LinearLayer<> linearLayer1(10, 10); - - arma::mat parameter = arma::zeros(100, 1); - - BOOST_REQUIRE_EQUAL(arma::accu(parameter), 0); - - RandomInitialization constantInit(1, 1); - arma::mat output; - LayerWeights(constantInit, linearLayer1, parameter, 0, output); - - BOOST_REQUIRE_EQUAL(arma::accu(linearLayer1.Weights()), 100); - BOOST_REQUIRE_EQUAL(arma::accu(parameter), 100); -} - -BOOST_AUTO_TEST_SUITE_END(); diff --git a/src/mlpack/tests/performance_functions_test.cpp b/src/mlpack/tests/performance_functions_test.cpp deleted file mode 100644 index 35902911422..00000000000 --- a/src/mlpack/tests/performance_functions_test.cpp +++ /dev/null @@ -1,54 +0,0 @@ -/** - * @file performance_functions_test.cpp - * @author Marcus Edel - * - * Tests for the various performance functions. - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. - */ -#include - -#include -#include -#include - -#include -#include "test_tools.hpp" - -using namespace mlpack; -using namespace mlpack::ann; - -BOOST_AUTO_TEST_SUITE(PerformanceFunctionsTest); - -// Test the mean squared error performance function. -BOOST_AUTO_TEST_CASE(MeanSquaredErrorTest) -{ - arma::colvec input("1.0 0.0 1.0 0.0 -1.0 0.0 -1.0 0.0"); - arma::colvec target = arma::zeros(8); - - BOOST_REQUIRE_EQUAL(MeanSquaredErrorFunction::Error(input, target), 0.5); -} - -// Test the cross entropy performance function. -BOOST_AUTO_TEST_CASE(CrossEntropyErrorTest) -{ - arma::colvec input; - input << std::exp(-2.0) << std::exp(-1.0); - arma::colvec target = arma::ones(2); - - BOOST_REQUIRE_EQUAL(CrossEntropyErrorFunction<>::Error(input, target), 3); -} - -// Test the sum squared error performance function. -BOOST_AUTO_TEST_CASE(SumSquaredErrorTest) -{ - arma::colvec input("1.0 0.0 1.0 0.0 -1.0 0.0 -1.0 0.0"); - arma::colvec target = arma::zeros(8); - - BOOST_REQUIRE_EQUAL(SumSquaredErrorFunction::Error(input, target), 4); -} - -BOOST_AUTO_TEST_SUITE_END(); diff --git a/src/mlpack/tests/pooling_rules_test.cpp b/src/mlpack/tests/pooling_rules_test.cpp deleted file mode 100644 index 0dd2c9dcb24..00000000000 --- a/src/mlpack/tests/pooling_rules_test.cpp +++ /dev/null @@ -1,80 +0,0 @@ -/** - * @file convolution_test.cpp - * @author Marcus Edel - * - * Tests for various convolution strategies. - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. - */ -#include - -#include -#include - -#include -#include "test_tools.hpp" - -using namespace mlpack; -using namespace mlpack::ann; - -BOOST_AUTO_TEST_SUITE(PoolingTest); - -/** - * Test the max pooling rule. - */ -BOOST_AUTO_TEST_CASE(MaxPoolingTest) -{ - // The data was generated by magic(6) in MATLAB. - arma::mat input, output; - input << 35 << 1 << 6 << 26 << 19 << 24 << arma::endr - << 3 << 32 << 7 << 21 << 23 << 25 << arma::endr - << 31 << 9 << 2 << 22 << 27 << 20 << arma::endr - << 8 << 28 << 33 << 17 << 10 << 15 << arma::endr - << 30 << 5 << 34 << 12 << 14 << 16 << arma::endr - << 4 << 36 << 29 << 13 << 18 << 11; - - // Expected output of the generated 6 x 6 matrix. - const double poolingOutput = 36; - - MaxPooling poolingRule; - - // Test the pooling function. - BOOST_REQUIRE_EQUAL(poolingRule.Pooling(input), poolingOutput); - - // Test the unpooling function. - poolingRule.Unpooling(input, input.max(), output); - BOOST_REQUIRE_EQUAL(arma::accu(output), input.max()); -} - -/** - * Test the mean pooling rule. - */ -BOOST_AUTO_TEST_CASE(MeanPoolingTest) -{ - // The data was generated by magic(6) in MATLAB. - arma::mat input, output; - input << 35 << 1 << 6 << 26 << 19 << 24 << arma::endr - << 3 << 32 << 7 << 21 << 23 << 25 << arma::endr - << 31 << 9 << 2 << 22 << 27 << 20 << arma::endr - << 8 << 28 << 33 << 17 << 10 << 15 << arma::endr - << 30 << 5 << 34 << 12 << 14 << 16 << arma::endr - << 4 << 36 << 29 << 13 << 18 << 11; - - // Expected output of the generated 6 x 6 matrix. - const double poolingOutput = 18.5; - - MeanPooling poolingRule; - - // Test the pooling function. - BOOST_REQUIRE_EQUAL(poolingRule.Pooling(input), poolingOutput); - - // Test the unpooling function. - poolingRule.Unpooling(input, input.max(), output); - bool b = arma::all(arma::vectorise(output) == (input.max() / input.n_elem)); - BOOST_REQUIRE_EQUAL(b, true); -} - -BOOST_AUTO_TEST_SUITE_END(); diff --git a/src/mlpack/tests/recurrent_network_test.cpp b/src/mlpack/tests/recurrent_network_test.cpp index c49ae423ae2..f7546e83b50 100644 --- a/src/mlpack/tests/recurrent_network_test.cpp +++ b/src/mlpack/tests/recurrent_network_test.cpp @@ -11,18 +11,10 @@ */ #include -#include -#include -#include -#include -#include - -#include -#include #include -#include -#include - #include +#include +#include +#include #include #include "test_tools.hpp" @@ -78,19 +70,29 @@ BOOST_AUTO_TEST_CASE(SequenceClassificationTest) // times, I'm fine with that. All I want to know is that the network is able // to escape from local minima and to solve the task. size_t successes = 0; + const size_t rho = 10; for (size_t trial = 0; trial < 5; ++trial) { - // Generate 12 (2 * 6) noisy sines. A single sine contains 10 points/features. - arma::mat input, labels; - GenerateNoisySines(input, labels, 10, 6); + // Generate 12 (2 * 6) noisy sines. A single sine contains rho points/features. + arma::mat input, labelsTemp; + GenerateNoisySines(input, labelsTemp, rho, 6); + + arma::mat labels = arma::zeros(rho, labelsTemp.n_cols); + for (size_t i = 0; i < labelsTemp.n_cols; ++i) + { + const int value = arma::as_scalar(arma::find( + arma::max(labelsTemp.col(i)) == labelsTemp.col(i), 1)) + 1; + labels.col(i).fill(value); + } /* - * Construct a network with 1 input unit, 4 hidden units and 2 output units. - * The hidden layer is connected to itself. The network structure looks like: + * Construct a network with 1 input unit, 4 hidden units and 10 output + * units. The hidden layer is connected to itself. The network structure + * looks like: * * Input Hidden Output - * Layer(1) Layer(4) Layer(2) + * Layer(1) Layer(4) Layer(10) * +-----+ +-----+ +-----+ * | | | | | | * | +------>| +------>| | @@ -100,38 +102,45 @@ BOOST_AUTO_TEST_CASE(SequenceClassificationTest) * . . * ....... */ - LinearLayer<> linearLayer0(1, 4); - RecurrentLayer<> recurrentLayer0(4); - BaseLayer inputBaseLayer; + Add<> add(4); + Linear<> lookup(1, 4); + SigmoidLayer<> sigmoidLayer; + Linear<> linear(4, 4); + Recurrent<> recurrent(add, lookup, linear, sigmoidLayer, rho); - LinearLayer<> hiddenLayer(4, 2); - BaseLayer hiddenBaseLayer; + RNN<> model(rho); + model.Add >(); + model.Add(recurrent); + model.Add >(4, 10); + model.Add >(); - BinaryClassificationLayer classOutputLayer; - - auto modules = std::tie(linearLayer0, recurrentLayer0, inputBaseLayer, - hiddenLayer, hiddenBaseLayer); - - RNN net(modules, classOutputLayer); - - SGD opt(net, 0.5, 500 * input.n_cols, -100); - - net.Train(input, labels, opt); + SGD opt(model, 0.1, 500 * input.n_cols, -100); + model.Train(input, labels, opt); arma::mat prediction; - net.Predict(input, prediction); + model.Predict(input, prediction); size_t error = 0; - for (size_t i = 0; i < labels.n_cols; i++) + for (size_t i = 0; i < prediction.n_cols; ++i) { - if (arma::sum(arma::sum(arma::abs(prediction.col(i) - labels.col(i)))) == 0) + arma::mat singlePrediction = prediction.submat((rho - 1) * rho, i, + rho * rho - 1, i); + + const int predictionValue = arma::as_scalar(arma::find( + arma::max(singlePrediction.col(0)) == + singlePrediction.col(0), 1) + 1); + + const int targetValue = arma::as_scalar(arma::find( + arma::max(labelsTemp.col(i)) == labelsTemp.col(i), 1)) + 1; + + if (predictionValue == targetValue) { error++; } } - double classificationError = 1 - double(error) / labels.n_cols; + double classificationError = 1 - double(error) / prediction.n_cols; + if (classificationError <= 0.2) { ++successes; @@ -279,9 +288,7 @@ void GenerateNextEmbeddedReber(const arma::Mat& transitions, /** * Train the specified network and the construct a Reber grammar dataset. */ -template -void ReberGrammarTestNetwork(HiddenLayerType& hiddenLayer0, - bool embedded = false) +void ReberGrammarTestNetwork(bool embedded = false) { // Reber state transition matrix. (The last two columns are the indices to the // next path). @@ -346,36 +353,34 @@ void ReberGrammarTestNetwork(HiddenLayerType& hiddenLayer0, * | | | | | | * | +------>| +------>| | * | | ..>| | | | - * +-----+ . +--+--+ +-----+ + * +-----+ . +--+--+ +-- ---+ * . . * . . * ....... */ - const size_t lstmSize = 4 * 10; - LinearLayer<> linearLayer0(7, lstmSize); - RecurrentLayer<> recurrentLayer0(10, lstmSize); + const size_t outputSize = 7; + const size_t inputSize = 7; + const size_t rho = trainInput.at(0, 0).n_elem / inputSize; - LinearLayer<>hiddenLayer(10, 7); - BaseLayer hiddenBaseLayer; + RNN > model(rho); - BinaryClassificationLayer classOutputLayer; + model.Add >(); + model.Add >(inputSize, 20); + model.Add >(20, 7, rho); + model.Add >(7, outputSize); + model.Add >(); - auto modules = std::tie(linearLayer0, recurrentLayer0, hiddenLayer0, - hiddenLayer, hiddenBaseLayer); - - RNN net(modules, classOutputLayer); - - SGD opt(net, 0.5, 2, -200); + SGD opt(model, 0.1, 2, -50000); arma::mat inputTemp, labelsTemp; - for (size_t i = 0; i < 15; i++) + for (size_t i = 0; i < 40; i++) { for (size_t j = 0; j < trainReberGrammarCount; j++) { inputTemp = trainInput.at(0, j); labelsTemp = trainLabels.at(0, j); - net.Train(inputTemp, labelsTemp, opt); + + model.Train(inputTemp, labelsTemp, opt); } } @@ -384,10 +389,11 @@ void ReberGrammarTestNetwork(HiddenLayerType& hiddenLayer0, // Ask the network to predict the next Reber grammar in the given sequence. for (size_t i = 0; i < testReberGrammarCount; i++) { - arma::mat output; + arma::mat output, prediction; arma::mat input = testInput.at(0, i); - net.Predict(input, output); + model.Predict(input, prediction); + data::Binarize(prediction, output, 0.5); const size_t reberGrammerSize = 7; std::string inputReber = ""; @@ -429,8 +435,7 @@ void ReberGrammarTestNetwork(HiddenLayerType& hiddenLayer0, */ BOOST_AUTO_TEST_CASE(ReberGrammarTest) { - LSTMLayer<> hiddenLayerLSTM(10); - ReberGrammarTestNetwork(hiddenLayerLSTM); + ReberGrammarTestNetwork(false); } /** @@ -438,8 +443,7 @@ BOOST_AUTO_TEST_CASE(ReberGrammarTest) */ BOOST_AUTO_TEST_CASE(EmbeddedReberGrammarTest) { - LSTMLayer<> hiddenLayerLSTM(10); - ReberGrammarTestNetwork(hiddenLayerLSTM, true); + ReberGrammarTestNetwork(true); } /* @@ -490,7 +494,6 @@ void GenerateDistractedSequence(arma::mat& input, arma::mat& output) for (size_t i = 2; i < 8; i++) input(2 + rand() % 6, index(i)) = 1; - // Set the prompts which direct the network to give an answer. input(8, 8) = 1; input(9, 9) = 1; @@ -503,8 +506,7 @@ void GenerateDistractedSequence(arma::mat& input, arma::mat& output) * Train the specified network and the construct distracted sequence recall * dataset. */ -template -void DistractedSequenceRecallTestNetwork(HiddenLayerType& hiddenLayer0) +void DistractedSequenceRecallTestNetwork() { const size_t trainDistractedSequenceCount = 1000; const size_t testDistractedSequenceCount = 1000; @@ -538,22 +540,18 @@ void DistractedSequenceRecallTestNetwork(HiddenLayerType& hiddenLayer0) * . . * ....... */ - const size_t lstmSize = 4 * 10; - LinearLayer<> linearLayer0(10, lstmSize); - RecurrentLayer<> recurrentLayer0(10, lstmSize); - - LinearLayer<> hiddenLayer(10, 3); - TanHLayer<> hiddenBaseLayer; - - BinaryClassificationLayer classOutputLayer; - - auto modules = std::tie(linearLayer0, recurrentLayer0, hiddenLayer0, - hiddenLayer, hiddenBaseLayer); + const size_t outputSize = 3; + const size_t inputSize = 10; + const size_t rho = trainInput.at(0, 0).n_elem / inputSize; - RNN net(modules, classOutputLayer); + RNN > model(rho); + model.Add >(); + model.Add >(inputSize, 20); + model.Add >(20, 7, rho); + model.Add >(7, outputSize); + model.Add >(); - SGD opt(net, 0.04, 2, -200); + SGD opt(model, 0.1, 2, -50000); arma::mat inputTemp, labelsTemp; for (size_t i = 0; i < 40; i++) @@ -563,7 +561,7 @@ void DistractedSequenceRecallTestNetwork(HiddenLayerType& hiddenLayer0) inputTemp = trainInput.at(0, j); labelsTemp = trainLabels.at(0, j); - net.Train(inputTemp, labelsTemp, opt); + model.Train(inputTemp, labelsTemp, opt); } } @@ -576,7 +574,8 @@ void DistractedSequenceRecallTestNetwork(HiddenLayerType& hiddenLayer0) arma::mat output; arma::mat input = testInput.at(0, i); - net.Predict(input, output); + model.Predict(input, output); + data::Binarize(output, output, 0.5); if (arma::accu(arma::abs(testLabels.at(0, i) - output)) != 0) error += 1; @@ -597,8 +596,7 @@ void DistractedSequenceRecallTestNetwork(HiddenLayerType& hiddenLayer0) */ BOOST_AUTO_TEST_CASE(DistractedSequenceRecallTest) { - LSTMLayer<> hiddenLayerLSTMPeephole(10, true); - DistractedSequenceRecallTestNetwork(hiddenLayerLSTMPeephole); + DistractedSequenceRecallTestNetwork(); } BOOST_AUTO_TEST_SUITE_END(); diff --git a/src/mlpack/tests/rmsprop_test.cpp b/src/mlpack/tests/rmsprop_test.cpp index 481741a4cc1..831df74302d 100644 --- a/src/mlpack/tests/rmsprop_test.cpp +++ b/src/mlpack/tests/rmsprop_test.cpp @@ -16,14 +16,6 @@ #include -#include -#include -#include -#include -#include -#include -#include - #include #include "test_tools.hpp" @@ -35,8 +27,6 @@ using namespace mlpack::optimization::test; using namespace mlpack::distribution; using namespace mlpack::regression; -using namespace mlpack::ann; - BOOST_AUTO_TEST_SUITE(RMSpropTest); /** @@ -116,47 +106,4 @@ BOOST_AUTO_TEST_CASE(LogisticRegressionTest) BOOST_REQUIRE_CLOSE(testAcc, 100.0, 0.6); // 0.6% error tolerance. } -/** - * Run RMSprop on a feedforward neural network and make sure the results are - * acceptable. - */ -BOOST_AUTO_TEST_CASE(FeedforwardTest) -{ - // Test on a non-linearly separable dataset (XOR). - arma::mat input, labels; - input << 0 << 1 << 1 << 0 << arma::endr - << 1 << 0 << 1 << 0 << arma::endr; - labels << 1 << 1 << 0 << 0; - - // Instantiate the first layer. - LinearLayer<> inputLayer(input.n_rows, 8); - BiasLayer<> biasLayer(8); - TanHLayer<> hiddenLayer0; - - // Instantiate the second layer. - LinearLayer<> hiddenLayer1(8, labels.n_rows); - TanHLayer<> outputLayer; - - // Instantiate the output layer. - BinaryClassificationLayer classOutputLayer; - - // Instantiate the feedforward network. - auto modules = std::tie(inputLayer, biasLayer, hiddenLayer0, hiddenLayer1, - outputLayer); - FFN net(modules, classOutputLayer); - - RMSprop opt(net, 0.03, 0.99, 1e-8, 300 * input.n_cols, -10); - - net.Train(input, labels, opt); - - arma::mat prediction; - net.Predict(input, prediction); - - BOOST_REQUIRE_EQUAL(prediction(0), 1); - BOOST_REQUIRE_EQUAL(prediction(1), 1); - BOOST_REQUIRE_EQUAL(prediction(2), 0); - BOOST_REQUIRE_EQUAL(prediction(3), 0); -} - BOOST_AUTO_TEST_SUITE_END();