diff --git a/.travis.yml b/.travis.yml
index 838397c895d..b2cd2e96fed 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -11,7 +11,7 @@ before_install:
   - printenv
   - sudo cp .travis/config.hpp /usr/include/armadillo_bits/config.hpp
 install:
-  - mkdir build && cd build && cmake -DDEBUG=OFF -DPROFILE=OFF .. && make -j4
+  - mkdir build && cd build && cmake -DDEBUG=OFF -DPROFILE=OFF .. && make -j2
 script:
   - travis_wait 30 ./bin/mlpack_test -p
 notifications:
diff --git a/src/mlpack/methods/CMakeLists.txt b/src/mlpack/methods/CMakeLists.txt
index f292e9756c9..dde69de0b1b 100644
--- a/src/mlpack/methods/CMakeLists.txt
+++ b/src/mlpack/methods/CMakeLists.txt
@@ -51,7 +51,6 @@ set(DIRS
   randomized_svd
   range_search
   rann
-  rmva
   regularized_svd
   softmax_regression
   sparse_autoencoder
diff --git a/src/mlpack/methods/ann/CMakeLists.txt b/src/mlpack/methods/ann/CMakeLists.txt
index 6ff7011404b..46174b88578 100644
--- a/src/mlpack/methods/ann/CMakeLists.txt
+++ b/src/mlpack/methods/ann/CMakeLists.txt
@@ -1,12 +1,8 @@
 # Define the files we need to compile
 # Anything not in this list will not be compiled into mlpack.
 set(SOURCES
-  cnn.hpp
-  cnn_impl.hpp
   ffn.hpp
   ffn_impl.hpp
-  network_util.hpp
-  network_util_impl.hpp
   rnn.hpp
   rnn_impl.hpp
 )
@@ -23,6 +19,4 @@ set(MLPACK_SRCS ${MLPACK_SRCS} ${DIR_SRCS} PARENT_SCOPE)
 add_subdirectory(activation_functions)
 add_subdirectory(init_rules)
 add_subdirectory(layer)
-add_subdirectory(performance_functions)
-add_subdirectory(pooling_rules)
 add_subdirectory(convolution_rules)
diff --git a/src/mlpack/methods/ann/cnn.hpp b/src/mlpack/methods/ann/cnn.hpp
deleted file mode 100644
index 72e0803f179..00000000000
--- a/src/mlpack/methods/ann/cnn.hpp
+++ /dev/null
@@ -1,448 +0,0 @@
-/**
- * @file cnn.hpp
- * @author Shangtong Zhang
- * @author Marcus Edel
- *
- * Definition of the CNN class, which implements convolutional neural networks.
- *
- * mlpack is free software; you may redistribute it and/or modify it under the
- * terms of the 3-clause BSD license.  You should have received a copy of the
- * 3-clause BSD license along with mlpack.  If not, see
- * http://www.opensource.org/licenses/BSD-3-Clause for more information.
- */
-#ifndef MLPACK_METHODS_ANN_CNN_HPP
-#define MLPACK_METHODS_ANN_CNN_HPP
-
-#include <mlpack/core.hpp>
-
-#include <mlpack/methods/ann/network_util.hpp>
-#include <mlpack/methods/ann/layer/layer_traits.hpp>
-#include <mlpack/methods/ann/init_rules/nguyen_widrow_init.hpp>
-#include <mlpack/methods/ann/performance_functions/cee_function.hpp>
-#include <mlpack/core/optimizers/rmsprop/rmsprop.hpp>
-
-namespace mlpack {
-namespace ann /** Artificial Neural Network. */ {
-
-/**
- * An implementation of a standard convolutional network.
- *
- * @tparam LayerTypes Contains all layer modules used to construct the network.
- * @tparam OutputLayerType The outputlayer type used to evaluate the network.
- * @tparam PerformanceFunction Performance strategy used to calculate the error.
- */
-template <
-  typename LayerTypes,
-  typename OutputLayerType,
-  typename InitializationRuleType = NguyenWidrowInitialization,
-  class PerformanceFunction = CrossEntropyErrorFunction<>
->
-class CNN
-{
- public:
-  //! Convenience typedef for the internal model construction.
-  using NetworkType = CNN<LayerTypes,
-                          OutputLayerType,
-                          InitializationRuleType,
-                          PerformanceFunction>;
-
-  /**
-   * Create the CNN object with the given predictors and responses set (this is
-   * the set that is used to train the network) and the given optimizer.
-   * Optionally, specify which initialize rule and performance function should
-   * be used.
-   *
-   * @param network Network modules used to construct the network.
-   * @param outputLayer Outputlayer used to evaluate the network.
-   * @param predictors Input training variables.
-   * @param responses Outputs resulting from input training variables.
-   * @param optimizer Instantiated optimizer used to train the model.
-   * @param initializeRule Optional instantiated InitializationRule object
-   *        for initializing the network paramter.
-   * @param performanceFunction Optional instantiated PerformanceFunction
-   *        object used to claculate the error.
-   */
-  template<typename LayerType,
-           typename OutputType,
-           template<typename> class OptimizerType>
-  CNN(LayerType &&network,
-      OutputType &&outputLayer,
-      const arma::cube& predictors,
-      const arma::mat& responses,
-      OptimizerType<NetworkType>& optimizer,
-      InitializationRuleType initializeRule = InitializationRuleType(),
-      PerformanceFunction performanceFunction = PerformanceFunction());
-
-  /**
-   * Create the CNN object with the given predictors and responses set (this is
-   * the set that is used to train the network). Optionally, specify which
-   * initialize rule and performance function should be used.
-   *
-   * @param network Network modules used to construct the network.
-   * @param outputLayer Outputlayer used to evaluate the network.
-   * @param predictors Input training variables.
-   * @param responses Outputs resulting from input training variables.
-   * @param initializeRule Optional instantiated InitializationRule object
-   *        for initializing the network paramter.
-   * @param performanceFunction Optional instantiated PerformanceFunction
-   *        object used to claculate the error.
-   */
-  template<typename LayerType, typename OutputType>
-  CNN(LayerType &&network,
-      OutputType &&outputLayer,
-      const arma::cube& predictors,
-      const arma::mat& responses,
-      InitializationRuleType initializeRule = InitializationRuleType(),
-      PerformanceFunction performanceFunction = PerformanceFunction());
-
-  /**
-   * Create the CNN object with an empty predictors and responses set and
-   * default optimizer. Make sure to call Train(predictors, responses) when
-   * training.
-   *
-   * @param network Network modules used to construct the network.
-   * @param outputLayer Outputlayer used to evaluate the network.
-   * @param initializeRule Optional instantiated InitializationRule object
-   *        for initializing the network paramter.
-   * @param performanceFunction Optional instantiated PerformanceFunction
-   *        object used to claculate the error.
-   */
-  template<typename LayerType, typename OutputType>
-  CNN(LayerType &&network,
-      OutputType &&outputLayer,
-      InitializationRuleType initializeRule = InitializationRuleType(),
-      PerformanceFunction performanceFunction = PerformanceFunction());
-  /**
-   * Train the convolutional neural network on the given input data. By default, the
-   * RMSprop optimization algorithm is used, but others can be specified
-   * (such as mlpack::optimization::SGD).
-   *
-   * This will use the existing model parameters as a starting point for the
-   * optimization. If this is not what you want, then you should access the
-   * parameters vector directly with Parameters() and modify it as desired.
-   *
-   * @tparam OptimizerType Type of optimizer to use to train the model.
-   * @param predictors Input training variables.
-   * @param responses Outputs results from input training variables.
-   */
-  template<
-      template<typename> class OptimizerType = mlpack::optimization::RMSprop
-  >
-  void Train(const arma::cube& predictors, const arma::mat& responses);
-
-  /**
-   * Train the convolutional neural network with the given instantiated optimizer.
-   * Using this overload allows configuring the instantiated optimizer before
-   * training is performed.
-   *
-   * This will use the existing model parameters as a starting point for the
-   * optimization. If this is not what you want, then you should access the
-   * parameters vector directly with Parameters() and modify it as desired.
-   *
-   * @param optimizer Instantiated optimizer used to train the model.
-   */
-  template<
-      template<typename> class OptimizerType = mlpack::optimization::RMSprop
-  >
-  void Train(OptimizerType<NetworkType>& optimizer);
-
-  /**
-   * Train the convolutional neural network on the given input data using the
-   * given optimizer.
-   *
-   * This will use the existing model parameters as a starting point for the
-   * optimization. If this is not what you want, then you should access the
-   * parameters vector directly with Parameters() and modify it as desired.
-   *
-   * @tparam OptimizerType Type of optimizer to use to train the model.
-   * @param predictors Input training variables.
-   * @param responses Outputs results from input training variables.
-   * @param optimizer Instantiated optimizer used to train the model.
-   */
-  template<
-      template<typename> class OptimizerType = mlpack::optimization::RMSprop
-  >
-  void Train(const arma::cube& predictors,
-             const arma::mat& responses,
-             OptimizerType<NetworkType>& optimizer);
-
-  /**
-   * Predict the responses to a given set of predictors. The responses will
-   * reflect the output of the given output layer as returned by the
-   * OutputClass() function.
-   *
-   * @param predictors Input predictors.
-   * @param responses Matrix to put output predictions of responses into.
-   */
-  void Predict(arma::cube& predictors, arma::mat& responses);
-
-  /**
-   * Evaluate the convolutional neural network with the given parameters. This
-   * function is usually called by the optimizer to train the model.
-   *
-   * @param parameters Matrix model parameters.
-   * @param i Index of point to use for objective function evaluation.
-   * @param deterministic Whether or not to train or test the model. Note some
-   * layer act differently in training or testing mode.
-   */
-  double Evaluate(const arma::mat& parameters,
-                  const size_t i,
-                  const bool deterministic = true);
-
-  /**
-   * Evaluate the gradient of the convolutional neural network with the given
-   * parameters, and with respect to only one point in the dataset. This is
-   * useful for optimizers such as SGD, which require a separable objective
-   * function.
-   *
-   * @param parameters Matrix of the model parameters to be optimized.
-   * @param i Index of points to use for objective function gradient evaluation.
-   * @param gradient Matrix to output gradient into.
-   */
-  void Gradient(const arma::mat& parameters,
-                const size_t i,
-                arma::mat& gradient);
-
-  //! Return the number of separable functions (the number of predictor points).
-  size_t NumFunctions() const { return numFunctions; }
-
-  //! Return the initial point for the optimization.
-  const arma::mat& Parameters() const { return parameter; }
-  //! Modify the initial point for the optimization.
-  arma::mat& Parameters() { return parameter; }
-
-  /**
-   * Serialize the convolutional neural network.
-   */
-  template<typename Archive>
-  void Serialize(Archive& ar, const unsigned int /* version */);
-
- private:
-  /**
-   * Reset the network by setting the layer status.
-   */
-  template<size_t I = 0, typename... Tp>
-  typename std::enable_if<I == sizeof...(Tp), void>::type
-  ResetParameter(std::tuple<Tp...>& /* unused */) { /* Nothing to do here */ }
-
-  template<size_t I = 0, typename... Tp>
-  typename std::enable_if<I < sizeof...(Tp), void>::type
-  ResetParameter(std::tuple<Tp...>& network)
-  {
-    ResetDeterministic(std::get<I>(network));
-    ResetParameter<I + 1, Tp...>(network);
-  }
-
-  /**
-   * Reset the layer status by setting the current deterministic parameter
-   * through all layer that implement the Deterministic function.
-   */
-  template<typename T>
-  typename std::enable_if<
-      HasDeterministicCheck<T, bool&(T::*)(void)>::value, void>::type
-  ResetDeterministic(T& layer)
-  {
-    layer.Deterministic() = deterministic;
-  }
-
-  template<typename T>
-  typename std::enable_if<
-      !HasDeterministicCheck<T, bool&(T::*)(void)>::value, void>::type
-  ResetDeterministic(T& /* unused */) { /* Nothing to do here */
-  }
-
-  /**
-   * Run a single iteration of the feed forward algorithm, using the given
-   * input and target vector, store the calculated error into the error
-   * vector.
-   */
-  template<size_t I = 0, typename DataType, typename... Tp>
-  void Forward(const DataType& input, std::tuple<Tp...>& network)
-  {
-    std::get<I>(network).InputParameter() = input;
-
-    std::get<I>(network).Forward(std::get<I>(network).InputParameter(),
-                           std::get<I>(network).OutputParameter());
-
-    ForwardTail<I + 1, Tp...>(network);
-  }
-
-  template<size_t I = 1, typename... Tp>
-  typename std::enable_if<I == sizeof...(Tp), void>::type
-  ForwardTail(std::tuple<Tp...>& network)
-  {
-    LinkParameter(network);
-  }
-
-  template<size_t I = 1, typename... Tp>
-  typename std::enable_if<I < sizeof...(Tp), void>::type
-  ForwardTail(std::tuple<Tp...>& network)
-  {
-    std::get<I>(network).Forward(std::get<I - 1>(network).OutputParameter(),
-        std::get<I>(network).OutputParameter());
-
-    ForwardTail<I + 1, Tp...>(network);
-  }
-
-  /**
-   * Link the calculated activation with the connection layer.
-   */
-  template<size_t I = 1, typename... Tp>
-  typename std::enable_if<I == sizeof...(Tp), void>::type
-  LinkParameter(std::tuple<Tp...>& /* unused */) { /* Nothing to do here */ }
-
-  template<size_t I = 1, typename... Tp>
-  typename std::enable_if<I < sizeof...(Tp), void>::type
-  LinkParameter(std::tuple<Tp...>& network)
-  {
-    if (!LayerTraits<typename std::remove_reference<
-        decltype(std::get<I>(network))>::type>::IsBiasLayer)
-    {
-      std::get<I>(network).InputParameter() = std::get<I - 1>(
-          network).OutputParameter();
-    }
-
-    LinkParameter<I + 1, Tp...>(network);
-  }
-
-  /*
-   * Calculate the output error and update the overall error.
-   */
-  template<typename DataType, typename ErrorType, typename... Tp>
-  double OutputError(const DataType& target,
-                     ErrorType& error,
-                     const std::tuple<Tp...>& network)
-  {
-    // Calculate and store the output error.
-    outputLayer.CalculateError(
-        std::get<sizeof...(Tp) - 1>(network).OutputParameter(), target, error);
-
-    // Masures the network's performance with the specified performance
-    // function.
-    return performanceFunc.Error(network, target, error);
-  }
-
-  /**
-   * Run a single iteration of the feed backward algorithm, using the given
-   * error of the output layer. Note that we iterate backward through the
-   * layer modules.
-   */
-  template<size_t I = 1, typename DataType, typename... Tp>
-  typename std::enable_if<I < (sizeof...(Tp) - 1), void>::type
-  Backward(const DataType& error, std::tuple<Tp...>& network)
-  {
-    std::get<sizeof...(Tp) - I>(network).Backward(
-        std::get<sizeof...(Tp) - I>(network).OutputParameter(), error,
-        std::get<sizeof...(Tp) - I>(network).Delta());
-
-    BackwardTail<I + 1, DataType, Tp...>(error, network);
-  }
-
-  template<size_t I = 1, typename DataType, typename... Tp>
-  typename std::enable_if<I == (sizeof...(Tp)), void>::type
-  BackwardTail(const DataType& /* unused */,
-               std::tuple<Tp...>& /* unused */) { /* Nothing to do here */ }
-
-  template<size_t I = 1, typename DataType, typename... Tp>
-  typename std::enable_if<I < (sizeof...(Tp)), void>::type
-  BackwardTail(const DataType& error, std::tuple<Tp...>& network)
-  {
-    std::get<sizeof...(Tp) - I>(network).Backward(
-        std::get<sizeof...(Tp) - I>(network).OutputParameter(),
-        std::get<sizeof...(Tp) - I + 1>(network).Delta(),
-        std::get<sizeof...(Tp) - I>(network).Delta());
-
-    BackwardTail<I + 1, DataType, Tp...>(error, network);
-  }
-
-  /**
-   * Iterate through all layer modules and update the the gradient using the
-   * layer defined optimizer.
-   */
-  template<
-      size_t I = 0,
-      size_t Max = std::tuple_size<LayerTypes>::value - 1,
-      typename... Tp
-  >
-  typename std::enable_if<I == Max, void>::type
-  UpdateGradients(std::tuple<Tp...>& /* unused */) { /* Nothing to do here */ }
-
-  template<
-      size_t I = 0,
-      size_t Max = std::tuple_size<LayerTypes>::value - 1,
-      typename... Tp
-  >
-  typename std::enable_if<I < Max, void>::type
-  UpdateGradients(std::tuple<Tp...>& network)
-  {
-    Update(std::get<I>(network), std::get<I>(network).OutputParameter(),
-           std::get<I + 1>(network).Delta());
-
-    UpdateGradients<I + 1, Max, Tp...>(network);
-  }
-
-  template<typename T, typename P, typename D>
-  typename std::enable_if<
-      HasGradientCheck<T, P&(T::*)()>::value, void>::type
-  Update(T& layer, P& /* unused */, D& delta)
-  {
-    layer.Gradient(layer.InputParameter(), delta, layer.Gradient());
-  }
-
-  template<typename T, typename P, typename D>
-  typename std::enable_if<
-      !HasGradientCheck<T, P&(T::*)()>::value, void>::type
-  Update(T& /* unused */, P& /* unused */, D& /* unused */)
-  {
-    /* Nothing to do here */
-  }
-
-  /*
-   * Calculate and store the output activation.
-   */
-  template<typename DataType, typename... Tp>
-  void OutputPrediction(DataType& output, std::tuple<Tp...>& network)
-  {
-    // Calculate and store the output prediction.
-    outputLayer.OutputClass(std::get<sizeof...(Tp) - 1>(
-        network).OutputParameter(), output);
-  }
-
-  //! Instantiated convolutional neural network.
-  LayerTypes network;
-
-  //! The outputlayer used to evaluate the network
-  OutputLayerType& outputLayer;
-
-  //! Performance strategy used to claculate the error.
-  PerformanceFunction performanceFunc;
-
-  //! The current evaluation mode (training or testing).
-  bool deterministic;
-
-  //! Matrix of (trained) parameters.
-  arma::mat parameter;
-
-  //! The matrix of data points (predictors).
-  arma::cube predictors;
-
-  //! The matrix of responses to the input data points.
-  arma::mat responses;
-
-  //! The number of separable functions (the number of predictor points).
-  size_t numFunctions;
-
-  //! Locally stored backward error.
-  arma::mat error;
-
-  //! Locally stored sample size.
-  size_t sampleSize;
-}; // class CNN
-
-} // namespace ann
-} // namespace mlpack
-
-// Include implementation.
-#include "cnn_impl.hpp"
-
-#endif
diff --git a/src/mlpack/methods/ann/cnn_impl.hpp b/src/mlpack/methods/ann/cnn_impl.hpp
deleted file mode 100644
index ba774ba3097..00000000000
--- a/src/mlpack/methods/ann/cnn_impl.hpp
+++ /dev/null
@@ -1,289 +0,0 @@
-/**
- * @file cnn_impl.hpp
- * @author Marcus Edel
- *
- * Definition of the CNN class, which implements convolutional neural networks.
- *
- * mlpack is free software; you may redistribute it and/or modify it under the
- * terms of the 3-clause BSD license.  You should have received a copy of the
- * 3-clause BSD license along with mlpack.  If not, see
- * http://www.opensource.org/licenses/BSD-3-Clause for more information.
- */
-#ifndef MLPACK_METHODS_ANN_CNN_IMPL_HPP
-#define MLPACK_METHODS_ANN_CNN_IMPL_HPP
-
-// In case it hasn't been included yet.
-#include "cnn.hpp"
-
-namespace mlpack {
-namespace ann /** Artificial Neural Network. */ {
-
-
-template<typename LayerTypes,
-         typename OutputLayerType,
-         typename InitializationRuleType,
-         typename PerformanceFunction
->
-template<typename LayerType,
-         typename OutputType,
-         template<typename> class OptimizerType
->
-CNN<LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction
->::CNN(LayerType &&network,
-       OutputType &&outputLayer,
-       const arma::cube& predictors,
-       const arma::mat& responses,
-       OptimizerType<NetworkType>& optimizer,
-       InitializationRuleType initializeRule,
-       PerformanceFunction performanceFunction) :
-    network(std::forward<LayerType>(network)),
-    outputLayer(std::forward<OutputType>(outputLayer)),
-    performanceFunc(std::move(performanceFunction)),
-    predictors(predictors),
-    responses(responses),
-    numFunctions(predictors.n_cols)
-{
-  static_assert(std::is_same<typename std::decay<LayerType>::type,
-                  LayerTypes>::value,
-                  "The type of network must be LayerTypes.");
-
-  static_assert(std::is_same<typename std::decay<OutputType>::type,
-                OutputLayerType>::value,
-                "The type of outputLayer must be OutputLayerType.");
-
-  initializeRule.Initialize(parameter, NetworkSize(this->network), 1);
-  NetworkWeights(parameter, this->network);
-
-  // Train the model.
-  Timer::Start("cnn_optimization");
-  const double out = optimizer.Optimize(parameter);
-  Timer::Stop("cnn_optimization");
-
-  Log::Info << "CNN::CNN(): final objective of trained model is " << out
-      << "." << std::endl;
-}
-
-template<typename LayerTypes,
-         typename OutputLayerType,
-         typename InitializationRuleType,
-         typename PerformanceFunction
->
-template<typename LayerType, typename OutputType>
-CNN<LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction
->::CNN(LayerType &&network,
-       OutputType &&outputLayer,
-       const arma::cube& predictors,
-       const arma::mat& responses,
-       InitializationRuleType initializeRule,
-       PerformanceFunction performanceFunction) :
-    network(std::forward<LayerType>(network)),
-    outputLayer(std::forward<OutputType>(outputLayer)),
-    performanceFunc(std::move(performanceFunction))
-{
-  static_assert(std::is_same<typename std::decay<LayerType>::type,
-                  LayerTypes>::value,
-                  "The type of network must be LayerTypes.");
-
-  static_assert(std::is_same<typename std::decay<OutputType>::type,
-                OutputLayerType>::value,
-                "The type of outputLayer must be OutputLayerType.");
-
-  initializeRule.Initialize(parameter, NetworkSize(this->network), 1);
-  NetworkWeights(parameter, this->network);
-
-  Train(predictors, responses);
-}
-
-template<typename LayerTypes,
-         typename OutputLayerType,
-         typename InitializationRuleType,
-         typename PerformanceFunction
->
-template<typename LayerType, typename OutputType>
-CNN<LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction
->::CNN(LayerType &&network,
-       OutputType &&outputLayer,
-       InitializationRuleType initializeRule,
-       PerformanceFunction performanceFunction) :
-    network(std::forward<LayerType>(network)),
-    outputLayer(std::forward<OutputType>(outputLayer)),
-    performanceFunc(std::move(performanceFunction))
-{
-  static_assert(std::is_same<typename std::decay<LayerType>::type,
-                  LayerTypes>::value,
-                  "The type of network must be LayerTypes.");
-
-  static_assert(std::is_same<typename std::decay<OutputType>::type,
-                OutputLayerType>::value,
-                "The type of outputLayer must be OutputLayerType.");
-
-  initializeRule.Initialize(parameter, NetworkSize(this->network), 1);
-  NetworkWeights(parameter, this->network);
-}
-
-template<typename LayerTypes,
-         typename OutputLayerType,
-         typename InitializationRuleType,
-         typename PerformanceFunction
->
-template<template<typename> class OptimizerType>
-void CNN<
-LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction
->::Train(const arma::cube& predictors, const arma::mat& responses)
-{
-  numFunctions = predictors.n_cols;
-  sampleSize = predictors.n_slices / responses.n_cols;
-  this->predictors = predictors;
-  this->responses = responses;
-
-  OptimizerType<decltype(*this)> optimizer(*this);
-
-  // Train the model.
-  Timer::Start("cnn_optimization");
-  const double out = optimizer.Optimize(parameter);
-  Timer::Stop("cnn_optimization");
-
-  Log::Info << "CNN::CNN(): final objective of trained model is " << out
-      << "." << std::endl;
-}
-
-template<typename LayerTypes,
-         typename OutputLayerType,
-         typename InitializationRuleType,
-         typename PerformanceFunction
->
-template<template<typename> class OptimizerType>
-void CNN<
-LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction
->::Train(const arma::cube& predictors,
-         const arma::mat& responses,
-         OptimizerType<NetworkType>& optimizer)
-{
-  numFunctions = responses.n_cols;
-  sampleSize = predictors.n_slices / responses.n_cols;
-  this->predictors = predictors;
-  this->responses = responses;
-
-  // Train the model.
-  Timer::Start("cnn_optimization");
-  const double out = optimizer.Optimize(parameter);
-  Timer::Stop("cnn_optimization");
-
-  Log::Info << "CNN::CNN(): final objective of trained model is " << out
-      << "." << std::endl;
-}
-
-template<typename LayerTypes,
-         typename OutputLayerType,
-         typename InitializationRuleType,
-         typename PerformanceFunction
->
-template<
-    template<typename> class OptimizerType
->
-void CNN<
-LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction
->::Train(OptimizerType<NetworkType>& optimizer)
-{
-  // Train the model.
-  Timer::Start("cnn_optimization");
-  const double out = optimizer.Optimize(parameter);
-  Timer::Stop("cnn_optimization");
-
-  Log::Info << "CNN::CNN(): final objective of trained model is " << out
-      << "." << std::endl;
-}
-
-template<typename LayerTypes,
-         typename OutputLayerType,
-         typename InitializationRuleType,
-         typename PerformanceFunction
->
-void CNN<
-LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction
->::Predict(arma::cube& predictors, arma::mat& responses)
-{
-  deterministic = true;
-
-  arma::mat responsesTemp;
-  ResetParameter(network);
-  Forward(predictors.slices(0, sampleSize - 1), network);
-  OutputPrediction(responsesTemp, network);
-
-  responses = arma::mat(responsesTemp.n_elem, predictors.n_slices);
-  responses.col(0) = responsesTemp.col(0);
-
-  for (size_t i = 1; i < (predictors.n_slices / sampleSize); i++)
-  {
-    Forward(predictors.slices(i, (i + 1) * sampleSize - 1), network);
-
-    responsesTemp = arma::mat(responses.colptr(i), responses.n_rows, 1, false,
-        true);
-    OutputPrediction(responsesTemp, network);
-    responses.col(i) = responsesTemp.col(0);
-  }
-}
-
-template<typename LayerTypes,
-         typename OutputLayerType,
-         typename InitializationRuleType,
-         typename PerformanceFunction
->
-double CNN<
-LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction
->::Evaluate(const arma::mat& /* unused */,
-            const size_t i,
-            const bool deterministic)
-{
-  this->deterministic = deterministic;
-
-  ResetParameter(network);
-  Forward(predictors.slices(i, (i + 1) * sampleSize - 1), network);
-
-  return OutputError(arma::mat(responses.colptr(i), responses.n_rows, 1, false,
-      true), error, network);
-}
-
-template<typename LayerTypes,
-         typename OutputLayerType,
-         typename InitializationRuleType,
-         typename PerformanceFunction
->
-void CNN<
-LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction
->::Gradient(const arma::mat& /* unused */,
-            const size_t i,
-            arma::mat& gradient)
-{
-  Evaluate(parameter, i, false);
-
-  NetworkGradients(gradient, network);
-
-  Backward<>(error, network);
-  UpdateGradients<>(network);
-}
-
-template<typename LayerTypes,
-         typename OutputLayerType,
-         typename InitializationRuleType,
-         typename PerformanceFunction
->
-template<typename Archive>
-void CNN<
-LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction
->::Serialize(Archive& ar, const unsigned int /* version */)
-{
-  ar & data::CreateNVP(parameter, "parameter");
-  ar & data::CreateNVP(sampleSize, "sampleSize");
-
-  // If we are loading, we need to initialize the weights.
-  if (Archive::is_loading::value)
-  {
-    NetworkWeights(parameter, network);
-  }
-}
-
-} // namespace ann
-} // namespace mlpack
-
-#endif
diff --git a/src/mlpack/methods/ann/convolution_rules/naive_convolution.hpp b/src/mlpack/methods/ann/convolution_rules/naive_convolution.hpp
index fc7fc6926fb..c90574293ba 100644
--- a/src/mlpack/methods/ann/convolution_rules/naive_convolution.hpp
+++ b/src/mlpack/methods/ann/convolution_rules/naive_convolution.hpp
@@ -41,16 +41,20 @@ class NaiveConvolution
    * @param input Input used to perform the convolution.
    * @param filter Filter used to perform the conolution.
    * @param output Output data that contains the results of the convolution.
+   * @param dW Stride of filter application in the x direction.
+   * @param dH Stride of filter application in the y direction.
    */
   template<typename eT, typename Border = BorderMode>
   static typename std::enable_if<
       std::is_same<Border, ValidConvolution>::value, void>::type
   Convolution(const arma::Mat<eT>& input,
               const arma::Mat<eT>& filter,
-              arma::Mat<eT>& output)
+              arma::Mat<eT>& output,
+              const size_t dW = 1,
+              const size_t dH = 1)
   {
-    output = arma::zeros<arma::Mat<eT> >(input.n_rows - filter.n_rows + 1,
-        input.n_cols - filter.n_cols + 1);
+    output = arma::zeros<arma::Mat<eT> >((input.n_rows - filter.n_rows + 1) /
+        dW, (input.n_cols - filter.n_cols + 1) / dH);
 
     // It seems to be about 3.5 times faster to use pointers instead of
     // filter(ki, kj) * input(leftInput + ki, topInput + kj) and output(i, j).
@@ -63,7 +67,7 @@ class NaiveConvolution
         const eT* kernelPtr = filter.memptr();
         for (size_t kj = 0; kj < filter.n_cols; ++kj)
         {
-          const eT* inputPtr = input.colptr(kj + j) + i;
+          const eT* inputPtr = input.colptr(kj + j * dW) + i * dH;
           for (size_t ki = 0; ki < filter.n_rows; ++ki, ++kernelPtr, ++inputPtr)
             *outputPtr += *kernelPtr * (*inputPtr);
         }
@@ -77,26 +81,30 @@ class NaiveConvolution
    * @param input Input used to perform the convolution.
    * @param filter Filter used to perform the conolution.
    * @param output Output data that contains the results of the convolution.
+   * @param dW Stride of filter application in the x direction.
+   * @param dH Stride of filter application in the y direction.
    */
   template<typename eT, typename Border = BorderMode>
   static typename std::enable_if<
       std::is_same<Border, FullConvolution>::value, void>::type
   Convolution(const arma::Mat<eT>& input,
               const arma::Mat<eT>& filter,
-              arma::Mat<eT>& output)
+              arma::Mat<eT>& output,
+              const size_t dW = 1,
+              const size_t dH = 1)
   {
-    const size_t outputRows = input.n_rows + 2 * (filter.n_rows - 1);
-    const size_t outputCols = input.n_cols + 2 * (filter.n_cols - 1);
+    const size_t outputRows = (input.n_rows + 2 * (filter.n_rows - 1)) * dW;
+    const size_t outputCols = (input.n_cols + 2 * (filter.n_cols - 1)) * dH;
 
     // Pad filter and input to the working output shape.
     arma::Mat<eT> inputPadded = arma::zeros<arma::Mat<eT> >(outputRows,
         outputCols);
     inputPadded.submat(filter.n_rows - 1, filter.n_cols - 1,
-          filter.n_rows - 1 + input.n_rows - 1,
-          filter.n_cols - 1 + input.n_cols - 1) = input;
+        filter.n_rows - 1 + input.n_rows - 1,
+        filter.n_cols - 1 + input.n_cols - 1) = input;
 
     NaiveConvolution<ValidConvolution>::Convolution(inputPadded, filter,
-        output);
+        output, 1, 1);
   }
 
   /*
@@ -105,15 +113,19 @@ class NaiveConvolution
    * @param input Input used to perform the convolution.
    * @param filter Filter used to perform the conolution.
    * @param output Output data that contains the results of the convolution.
+   * @param dW Stride of filter application in the x direction.
+   * @param dH Stride of filter application in the y direction.
    */
   template<typename eT>
   static void Convolution(const arma::Cube<eT>& input,
                           const arma::Cube<eT>& filter,
-                          arma::Cube<eT>& output)
+                          arma::Cube<eT>& output,
+                          const size_t dW = 1,
+                          const size_t dH = 1)
   {
     arma::Mat<eT> convOutput;
     NaiveConvolution<BorderMode>::Convolution(input.slice(0), filter.slice(0),
-        convOutput);
+        convOutput, dW, dH);
 
     output = arma::Cube<eT>(convOutput.n_rows, convOutput.n_cols,
         input.n_slices);
@@ -122,7 +134,7 @@ class NaiveConvolution
     for (size_t i = 1; i < input.n_slices; i++)
     {
       NaiveConvolution<BorderMode>::Convolution(input.slice(i), filter.slice(i),
-          output.slice(i));
+          output.slice(i), dW, dH);
     }
   }
 
@@ -133,15 +145,19 @@ class NaiveConvolution
    * @param input Input used to perform the convolution.
    * @param filter Filter used to perform the conolution.
    * @param output Output data that contains the results of the convolution.
+   * @param dW Stride of filter application in the x direction.
+   * @param dH Stride of filter application in the y direction.
    */
   template<typename eT>
   static void Convolution(const arma::Mat<eT>& input,
                           const arma::Cube<eT>& filter,
-                          arma::Cube<eT>& output)
+                          arma::Cube<eT>& output,
+                          const size_t dW = 1,
+                          const size_t dH = 1)
   {
     arma::Mat<eT> convOutput;
     NaiveConvolution<BorderMode>::Convolution(input, filter.slice(0),
-        convOutput);
+        convOutput, dW, dH);
 
     output = arma::Cube<eT>(convOutput.n_rows, convOutput.n_cols,
         filter.n_slices);
@@ -150,7 +166,7 @@ class NaiveConvolution
     for (size_t i = 1; i < filter.n_slices; i++)
     {
       NaiveConvolution<BorderMode>::Convolution(input, filter.slice(i),
-          output.slice(i));
+          output.slice(i), dW, dH);
     }
   }
 
@@ -161,15 +177,19 @@ class NaiveConvolution
    * @param input Input used to perform the convolution.
    * @param filter Filter used to perform the conolution.
    * @param output Output data that contains the results of the convolution.
+   * @param dW Stride of filter application in the x direction.
+   * @param dH Stride of filter application in the y direction.
    */
   template<typename eT>
   static void Convolution(const arma::Cube<eT>& input,
                           const arma::Mat<eT>& filter,
-                          arma::Cube<eT>& output)
+                          arma::Cube<eT>& output,
+                          const size_t dW = 1,
+                          const size_t dH = 1)
   {
     arma::Mat<eT> convOutput;
     NaiveConvolution<BorderMode>::Convolution(input.slice(0), filter,
-        convOutput);
+        convOutput, dW, dH);
 
     output = arma::Cube<eT>(convOutput.n_rows, convOutput.n_cols,
         input.n_slices);
@@ -178,7 +198,7 @@ class NaiveConvolution
     for (size_t i = 1; i < input.n_slices; i++)
     {
       NaiveConvolution<BorderMode>::Convolution(input.slice(i), filter,
-          output.slice(i));
+          output.slice(i), dW, dH);
     }
   }
 
diff --git a/src/mlpack/methods/ann/convolution_rules/svd_convolution.hpp b/src/mlpack/methods/ann/convolution_rules/svd_convolution.hpp
index a0b317ebb0c..5206ec1996f 100644
--- a/src/mlpack/methods/ann/convolution_rules/svd_convolution.hpp
+++ b/src/mlpack/methods/ann/convolution_rules/svd_convolution.hpp
@@ -49,6 +49,8 @@ class SVDConvolution
    * @param input Input used to perform the convolution.
    * @param filter Filter used to perform the conolution.
    * @param output Output data that contains the results of the convolution.
+   * @param dW Stride of filter application in the x direction.
+   * @param dH Stride of filter application in the y direction.
    */
   template<typename eT>
   static void Convolution(const arma::Mat<eT>& input,
@@ -113,6 +115,8 @@ class SVDConvolution
    * @param input Input used to perform the convolution.
    * @param filter Filter used to perform the conolution.
    * @param output Output data that contains the results of the convolution.
+   * @param dW Stride of filter application in the x direction.
+   * @param dH Stride of filter application in the y direction.
    */
   template<typename eT>
   static void Convolution(const arma::Cube<eT>& input,
@@ -142,6 +146,8 @@ class SVDConvolution
    * @param input Input used to perform the convolution.
    * @param filter Filter used to perform the conolution.
    * @param output Output data that contains the results of the convolution.
+   * @param dW Stride of filter application in the x direction.
+   * @param dH Stride of filter application in the y direction.
    */
   template<typename eT>
   static void Convolution(const arma::Mat<eT>& input,
@@ -170,6 +176,8 @@ class SVDConvolution
    * @param input Input used to perform the convolution.
    * @param filter Filter used to perform the conolution.
    * @param output Output data that contains the results of the convolution.
+   * @param dW Stride of filter application in the x direction.
+   * @param dH Stride of filter application in the y direction.
    */
   template<typename eT>
   static void Convolution(const arma::Cube<eT>& input,
diff --git a/src/mlpack/methods/ann/ffn.hpp b/src/mlpack/methods/ann/ffn.hpp
index f9bc4d53a8b..5e4b30fa4dc 100644
--- a/src/mlpack/methods/ann/ffn.hpp
+++ b/src/mlpack/methods/ann/ffn.hpp
@@ -14,10 +14,10 @@
 
 #include <mlpack/core.hpp>
 
-#include <mlpack/methods/ann/network_util.hpp>
-#include <mlpack/methods/ann/layer/layer_traits.hpp>
+#include <mlpack/methods/ann/layer/layer_types.hpp>
+#include <mlpack/methods/ann/layer/layer_visitor.hpp>
+#include <mlpack/methods/ann/init_rules/random_init.hpp>
 #include <mlpack/methods/ann/init_rules/nguyen_widrow_init.hpp>
-#include <mlpack/methods/ann/performance_functions/cee_function.hpp>
 #include <mlpack/core/optimizers/rmsprop/rmsprop.hpp>
 
 namespace mlpack {
@@ -26,25 +26,18 @@ namespace ann /** Artificial Neural Network. */ {
 /**
  * Implementation of a standard feed forward network.
  *
- * @tparam LayerTypes Contains all layer modules used to construct the network.
  * @tparam OutputLayerType The output layer type used to evaluate the network.
  * @tparam InitializationRuleType Rule used to initialize the weight matrix.
- * @tparam PerformanceFunction Performance strategy used to calculate the error.
  */
-template <
-  typename LayerTypes,
-  typename OutputLayerType,
-  typename InitializationRuleType = NguyenWidrowInitialization,
-  class PerformanceFunction = CrossEntropyErrorFunction<>
+template<
+  typename OutputLayerType = NegativeLogLikelihood<>,
+  typename InitializationRuleType = RandomInitialization
 >
 class FFN
 {
  public:
   //! Convenience typedef for the internal model construction.
-  using NetworkType = FFN<LayerTypes,
-                          OutputLayerType,
-                          InitializationRuleType,
-                          PerformanceFunction>;
+  using NetworkType = FFN<OutputLayerType, InitializationRuleType>;
 
   /**
    * Create the FFN object with the given predictors and responses set (this is
@@ -52,71 +45,36 @@ class FFN
    * Optionally, specify which initialize rule and performance function should
    * be used.
    *
-   * @param network Network modules used to construct the network.
    * @param outputLayer Output layer used to evaluate the network.
-   * @param predictors Input training variables.
-   * @param responses Outputs resulting from input training variables.
-   * @param optimizer Instantiated optimizer used to train the model.
    * @param initializeRule Optional instantiated InitializationRule object
    *        for initializing the network parameter.
-   * @param performanceFunction Optional instantiated PerformanceFunction
-   *        object used to calculate the error.
    */
-  template<typename LayerType,
-           typename OutputType,
-           template<typename> class OptimizerType>
-  FFN(LayerType &&network,
-      OutputType &&outputLayer,
-      const arma::mat& predictors,
-      const arma::mat& responses,
-      OptimizerType<NetworkType>& optimizer,
-      InitializationRuleType initializeRule = InitializationRuleType(),
-      PerformanceFunction performanceFunction = PerformanceFunction());
+  FFN(OutputLayerType&& outputLayer = OutputLayerType(),
+      InitializationRuleType initializeRule = InitializationRuleType());
 
   /**
    * Create the FFN object with the given predictors and responses set (this is
-   * the set that is used to train the network). Optionally, specify which
-   * initialize rule and performance function should be used.
+   * the set that is used to train the network) and the given optimizer.
+   * Optionally, specify which initialize rule and performance function should
+   * be used.
    *
-   * @param network Network modules used to construct the network.
-   * @param outputLayer Output layer used to evaluate the network.
    * @param predictors Input training variables.
-   * @param responses Outputs resulting from input training variables.
+   * @param responses Outputs results from input training variables.
+   * @param outputLayer Output layer used to evaluate the network.
    * @param initializeRule Optional instantiated InitializationRule object
    *        for initializing the network parameter.
-   * @param performanceFunction Optional instantiated PerformanceFunction
-   *        object used to calculate the error.
    */
-  template<typename LayerType, typename OutputType>
-  FFN(LayerType &&network,
-      OutputType &&outputLayer,
-      const arma::mat& predictors,
+  FFN(const arma::mat& predictors,
       const arma::mat& responses,
-      InitializationRuleType initializeRule = InitializationRuleType(),
-      PerformanceFunction performanceFunction = PerformanceFunction());
+      OutputLayerType&& outputLayer = OutputLayerType(),
+      InitializationRuleType initializeRule = InitializationRuleType());
 
-  /**
-   * Create the FNN object with an empty predictors and responses set and
-   * default optimizer. Make sure to call Train(predictors, responses) when
-   * training.
-   *
-   * @param network Network modules used to construct the network.
-   * @param outputLayer Output layer used to evaluate the network.
-   * @param initializeRule Optional instantiated InitializationRule object
-   *        for initializing the network parameter.
-   * @param performanceFunction Optional instantiated PerformanceFunction
-   *        object used to calculate the error.
-   */
-  template<typename LayerType, typename OutputType>
-  FFN(LayerType &&network,
-      OutputType &&outputLayer,
-      InitializationRuleType initializeRule = InitializationRuleType(),
-      PerformanceFunction performanceFunction = PerformanceFunction());
+  //! Destructor to release allocated memory.
+  ~FFN();
 
   /**
-   * Train the feedforward network on the given input data. By default, the
-   * RMSprop optimization algorithm is used, but others can be specified
-   * (such as mlpack::optimization::SGD).
+   * Train the feedforward network on the given input data using the given
+   * optimizer.
    *
    * This will use the existing model parameters as a starting point for the
    * optimization. If this is not what you want, then you should access the
@@ -125,31 +83,19 @@ class FFN
    * @tparam OptimizerType Type of optimizer to use to train the model.
    * @param predictors Input training variables.
    * @param responses Outputs results from input training variables.
-   */
-  template<
-      template<typename> class OptimizerType = mlpack::optimization::RMSprop
-  >
-  void Train(const arma::mat& predictors, const arma::mat& responses);
-
-  /**
-   * Train the feedforward network with the given instantiated optimizer.
-   * Using this overload allows configuring the instantiated optimizer before
-   * training is performed.
-   *
-   * This will use the existing model parameters as a starting point for the
-   * optimization. If this is not what you want, then you should access the
-   * parameters vector directly with Parameters() and modify it as desired.
-   *
    * @param optimizer Instantiated optimizer used to train the model.
    */
   template<
       template<typename> class OptimizerType = mlpack::optimization::RMSprop
   >
-  void Train(OptimizerType<NetworkType>& optimizer);
+  void Train(const arma::mat& predictors,
+             const arma::mat& responses,
+             OptimizerType<NetworkType>& optimizer);
 
   /**
-   * Train the feedforward network on the given input data using the given
-   * optimizer.
+   * Train the feedforward network on the given input data. By default, the
+   * RMSprop optimization algorithm is used, but others can be specified
+   * (such as mlpack::optimization::SGD).
    *
    * This will use the existing model parameters as a starting point for the
    * optimization. If this is not what you want, then you should access the
@@ -158,19 +104,16 @@ class FFN
    * @tparam OptimizerType Type of optimizer to use to train the model.
    * @param predictors Input training variables.
    * @param responses Outputs results from input training variables.
-   * @param optimizer Instantiated optimizer used to train the model.
    */
   template<
       template<typename> class OptimizerType = mlpack::optimization::RMSprop
   >
-  void Train(const arma::mat& predictors,
-             const arma::mat& responses,
-             OptimizerType<NetworkType>& optimizer);
+  void Train(const arma::mat& predictors, const arma::mat& responses);
 
   /**
    * Predict the responses to a given set of predictors. The responses will
    * reflect the output of the given output layer as returned by the
-   * OutputClass() function.
+   * output layer function.
    *
    * @param predictors Input predictors.
    * @param responses Matrix to put output predictions of responses into.
@@ -184,7 +127,7 @@ class FFN
    * @param parameters Matrix model parameters.
    * @param i Index of point to use for objective function evaluation.
    * @param deterministic Whether or not to train or test the model. Note some
-   * layer act differently in training or testing mode.
+   *        layer act differently in training or testing mode.
    */
   double Evaluate(const arma::mat& parameters,
                   const size_t i,
@@ -203,6 +146,21 @@ class FFN
                 const size_t i,
                 arma::mat& gradient);
 
+  /*
+   * Add a new module to the model.
+   *
+   * @param args The layer parameter.
+   */
+  template <class LayerType, class... Args>
+  void Add(Args... args) { network.push_back(new LayerType(args...)); }
+
+  /*
+   * Add a new module to the model.
+   *
+   * @param layer The Layer to be added to the model.
+   */
+  void Add(LayerTypes layer) { network.push_back(layer); }
+
   //! Return the number of separable functions (the number of predictor points).
   size_t NumFunctions() const { return numFunctions; }
 
@@ -216,214 +174,61 @@ class FFN
   void Serialize(Archive& ar, const unsigned int /* version */);
 
 private:
+  // Helper functions.
   /**
-   * Reset the network by zeroing the layer activations and by setting the
-   * layer status.
+   * The Forward algorithm (part of the Forward-Backward algorithm).  Computes
+   * forward probabilities for each module.
    *
-   * enable_if (SFINAE) is used to iterate through the network. The general
-   * case peels off the first type and recurses, as usual with
-   * variadic function templates.
+   * @param input Data sequence to compute probabilities for.
    */
-  template<size_t I = 0, typename... Tp>
-  typename std::enable_if<I == sizeof...(Tp), void>::type
-  ResetParameter(std::tuple<Tp...>& /* unused */) { /* Nothing to do here */ }
-
-  template<size_t I = 0, typename... Tp>
-  typename std::enable_if<I < sizeof...(Tp), void>::type
-  ResetParameter(std::tuple<Tp...>& network)
-  {
-    ResetDeterministic(std::get<I>(network));
-    ResetParameter<I + 1, Tp...>(network);
-  }
+  void Forward(arma::mat&& input);
 
   /**
-   * Reset the layer status by setting the current deterministic parameter
-   * through all layer that implement the Deterministic function.
+   * The Backward algorithm (part of the Forward-Backward algorithm). Computes
+   * backward pass for module.
    */
-  template<typename T>
-  typename std::enable_if<
-      HasDeterministicCheck<T, bool&(T::*)(void)>::value, void>::type
-  ResetDeterministic(T& layer)
-  {
-    layer.Deterministic() = deterministic;
-  }
-
-  template<typename T>
-  typename std::enable_if<
-      !HasDeterministicCheck<T, bool&(T::*)(void)>::value, void>::type
-  ResetDeterministic(T& /* unused */) { /* Nothing to do here */ }
+  void Backward();
 
   /**
-   * Run a single iteration of the feed forward algorithm, using the given
-   * input and target vector, store the calculated error into the error
-   * vector.
+   * Iterate through all layer modules and update the the gradient using the
+   * layer defined optimizer.
    */
-  template<size_t I = 0, typename DataType, typename... Tp>
-  void Forward(const DataType& input, std::tuple<Tp...>& network)
-  {
-    std::get<I>(network).InputParameter() = input;
-
-    std::get<I>(network).Forward(std::get<I>(network).InputParameter(),
-        std::get<I>(network).OutputParameter());
-
-    ForwardTail<I + 1, Tp...>(network);
-  }
-
-  template<size_t I = 1, typename... Tp>
-  typename std::enable_if<I == sizeof...(Tp), void>::type
-  ForwardTail(std::tuple<Tp...>& network)
-  {
-    LinkParameter(network);
-  }
-
-  template<size_t I = 1, typename... Tp>
-  typename std::enable_if<I < sizeof...(Tp), void>::type
-  ForwardTail(std::tuple<Tp...>& network)
-  {
-    std::get<I>(network).Forward(std::get<I - 1>(network).OutputParameter(),
-                           std::get<I>(network).OutputParameter());
-
-    ForwardTail<I + 1, Tp...>(network);
-  }
+  void Gradient();
 
   /**
-   * Link the calculated activation with the connection layer.
-   */
-  template<size_t I = 1, typename... Tp>
-  typename std::enable_if<I == sizeof...(Tp), void>::type
-  LinkParameter(std::tuple<Tp ...>& /* unused */) { /* Nothing to do here */ }
-
-  template<size_t I = 1, typename... Tp>
-  typename std::enable_if<I < sizeof...(Tp), void>::type
-  LinkParameter(std::tuple<Tp...>& network)
-  {
-    if (!LayerTraits<typename std::remove_reference<
-        decltype(std::get<I>(network))>::type>::IsBiasLayer)
-    {
-      std::get<I>(network).InputParameter() = std::get<I - 1>(
-          network).OutputParameter();
-    }
-
-    LinkParameter<I + 1, Tp...>(network);
-  }
-
-  /*
-   * Calculate the output error and update the overall error.
+   * Reset the module infomration (weights/parameters).
    */
-  template<typename DataType, typename ErrorType, typename... Tp>
-  double OutputError(const DataType& target,
-                     ErrorType& error,
-                     const std::tuple<Tp...>& network)
-  {
-    // Calculate and store the output error.
-    outputLayer.CalculateError(
-        std::get<sizeof...(Tp) - 1>(network).OutputParameter(), target, error);
-
-    // Measures the network's performance with the specified performance
-    // function.
-    return performanceFunc.Error(network, target, error);
-  }
+  void ResetParameters();
 
   /**
-   * Run a single iteration of the feed backward algorithm, using the given
-   * error of the output layer. Note that we iterate backward through the
-   * layer modules.
+   * Reset the module status by setting the current deterministic parameter
+   * for all modules that implement the Deterministic function.
    */
-  template<size_t I = 1, typename DataType, typename... Tp>
-  typename std::enable_if<I < (sizeof...(Tp) - 1), void>::type
-  Backward(const DataType& error, std::tuple<Tp ...>& network)
-  {
-    std::get<sizeof...(Tp) - I>(network).Backward(
-        std::get<sizeof...(Tp) - I>(network).OutputParameter(), error,
-        std::get<sizeof...(Tp) - I>(network).Delta());
-
-    BackwardTail<I + 1, DataType, Tp...>(error, network);
-  }
-
-  template<size_t I = 1, typename DataType, typename... Tp>
-  typename std::enable_if<I == (sizeof...(Tp)), void>::type
-  BackwardTail(const DataType& /* unused */,
-               std::tuple<Tp...>& /* unused */) { /* Nothing to do here */ }
-
-  template<size_t I = 1, typename DataType, typename... Tp>
-  typename std::enable_if<I < (sizeof...(Tp)), void>::type
-  BackwardTail(const DataType& error, std::tuple<Tp...>& network)
-  {
-    std::get<sizeof...(Tp) - I>(network).Backward(
-        std::get<sizeof...(Tp) - I>(network).OutputParameter(),
-        std::get<sizeof...(Tp) - I + 1>(network).Delta(),
-        std::get<sizeof...(Tp) - I>(network).Delta());
-
-    BackwardTail<I + 1, DataType, Tp...>(error, network);
-  }
+  void ResetDeterministic();
 
   /**
-   * Iterate through all layer modules and update the the gradient using the
-   * layer defined optimizer.
+   * Reset the gradient for all modules that implement the Gradient function.
    */
-  template<
-      size_t I = 0,
-      size_t Max = std::tuple_size<LayerTypes>::value - 1,
-      typename... Tp
-  >
-  typename std::enable_if<I == Max, void>::type
-  UpdateGradients(std::tuple<Tp...>& /* unused */) { /* Nothing to do here */ }
+  void ResetGradients(arma::mat& gradient);
 
-  template<
-      size_t I = 0,
-      size_t Max = std::tuple_size<LayerTypes>::value - 1,
-      typename... Tp
-  >
-  typename std::enable_if<I < Max, void>::type
-  UpdateGradients(std::tuple<Tp...>& network)
-  {
-    Update(std::get<I>(network), std::get<I>(network).OutputParameter(),
-           std::get<I + 1>(network).Delta());
-
-    UpdateGradients<I + 1, Max, Tp...>(network);
-  }
-
-  template<typename T, typename P, typename D>
-  typename std::enable_if<
-      HasGradientCheck<T, P&(T::*)()>::value, void>::type
-  Update(T& layer, P& /* unused */, D& delta)
-  {
-    layer.Gradient(layer.InputParameter(), delta, layer.Gradient());
-  }
-
-  template<typename T, typename P, typename D>
-  typename std::enable_if<
-      !HasGradientCheck<T, P&(T::*)()>::value, void>::type
-  Update(T& /* unused */, P& /* unused */, D& /* unused */)
-  {
-    /* Nothing to do here */
-  }
-
-  /*
-   * Calculate and store the output activation.
-   */
-  template<typename DataType, typename... Tp>
-  void OutputPrediction(DataType& output, std::tuple<Tp...>& network)
-  {
-    // Calculate and store the output prediction.
-    outputLayer.OutputClass(std::get<sizeof...(Tp) - 1>(
-        network).OutputParameter(), output);
-  }
-
-  //! Instantiated feedforward network.
-  LayerTypes network;
-
-  //! The output layer used to evaluate the network
+  //! Instantiated outputlayer used to evaluate the network.
   OutputLayerType outputLayer;
 
-  //! Performance strategy used to calculate the error.
-  PerformanceFunction performanceFunc;
+  //! Instantiated InitializationRule object for initializing the network
+  //! parameter.
+  InitializationRuleType initializeRule;
 
-  //! The current evaluation mode (training or testing).
-  bool deterministic;
+  //! The input width.
+  size_t width;
 
-  //! Matrix of (trained) parameters.
-  arma::mat parameter;
+  //! The input height.
+  size_t height;
+
+  //! Indicator if we already trained the model.
+  bool reset;
+
+  //! Locally-stored model modules.
+  std::vector<LayerTypes> network;
 
   //! The matrix of data points (predictors).
   arma::mat predictors;
@@ -431,11 +236,56 @@ class FFN
   //! The matrix of responses to the input data points.
   arma::mat responses;
 
+  //! Matrix of (trained) parameters.
+  arma::mat parameter;
+
   //! The number of separable functions (the number of predictor points).
   size_t numFunctions;
 
-  //! Locally stored backward error.
+  //! The current error for the backward pass.
   arma::mat error;
+
+  //! THe current input of the forward/backward pass.
+  arma::mat currentInput;
+
+  //! THe current target of the forward/backward pass.
+  arma::mat currentTarget;
+
+  //! Locally-stored delta visitor.
+  DeltaVisitor deltaVisitor;
+
+  //! Locally-stored output parameter visitor.
+  OutputParameterVisitor outputParameterVisitor;
+
+  //! Locally-stored weight size visitor.
+  WeightSizeVisitor weightSizeVisitor;
+
+  //! Locally-stored output width visitor.
+  OutputWidthVisitor outputWidthVisitor;
+
+  //! Locally-stored output height visitor.
+  OutputHeightVisitor outputHeightVisitor;
+
+  //! Locally-stored reset visitor.
+  ResetVisitor resetVisitor;
+
+  //! Locally-stored delete visitor.
+  DeleteVisitor deleteVisitor;
+
+  //! The current evaluation mode (training or testing).
+  bool deterministic;
+
+  //! Locally-stored delta object.
+  arma::mat delta;
+
+  //! Locally-stored input parameter object.
+  arma::mat inputParameter;
+
+  //! Locally-stored output parameter object.
+  arma::mat outputParameter;
+
+  //! Locally-stored gradient parameter.
+  arma::mat gradient;
 }; // class FFN
 
 } // namespace ann
diff --git a/src/mlpack/methods/ann/ffn_impl.hpp b/src/mlpack/methods/ann/ffn_impl.hpp
index 5b1cc611871..6d8ee25e1a3 100644
--- a/src/mlpack/methods/ann/ffn_impl.hpp
+++ b/src/mlpack/methods/ann/ffn_impl.hpp
@@ -19,123 +19,70 @@ namespace mlpack {
 namespace ann /** Artificial Neural Network. */ {
 
 
-template<typename LayerTypes,
-         typename OutputLayerType,
-         typename InitializationRuleType,
-         typename PerformanceFunction
->
-template<typename LayerType,
-         typename OutputType,
-         template<typename> class OptimizerType
->
-FFN<LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction
->::FFN(LayerType &&network,
-       OutputType &&outputLayer,
-       const arma::mat& predictors,
-       const arma::mat& responses,
-       OptimizerType<NetworkType>& optimizer,
-       InitializationRuleType initializeRule,
-       PerformanceFunction performanceFunction) :
-    network(std::forward<LayerType>(network)),
-    outputLayer(std::forward<OutputType>(outputLayer)),
-    performanceFunc(std::move(performanceFunction)),
-    predictors(predictors),
-    responses(responses),
-    numFunctions(predictors.n_cols)
+template<typename OutputLayerType, typename InitializationRuleType>
+FFN<OutputLayerType, InitializationRuleType>::FFN(
+    OutputLayerType&& outputLayer, InitializationRuleType initializeRule) :
+    outputLayer(std::move(outputLayer)),
+    initializeRule(initializeRule),
+    width(0),
+    height(0),
+    reset(false)
 {
-  static_assert(std::is_same<typename std::decay<LayerType>::type,
-                  LayerTypes>::value,
-                  "The type of network must be LayerTypes.");
-
-  static_assert(std::is_same<typename std::decay<OutputType>::type,
-                OutputLayerType>::value,
-                "The type of outputLayer must be OutputLayerType.");
-
-  initializeRule.Initialize(parameter, NetworkSize(this->network), 1);
-  NetworkWeights(parameter, this->network);
-
-  // Train the model.
-  Timer::Start("ffn_optimization");
-  const double out = optimizer.Optimize(parameter);
-  Timer::Stop("ffn_optimization");
-
-  Log::Info << "FFN::FFN(): final objective of trained model is " << out
-      << "." << std::endl;
+  /* Nothing to do here */
 }
 
-template<typename LayerTypes,
-         typename OutputLayerType,
-         typename InitializationRuleType,
-         typename PerformanceFunction
->
-template<typename LayerType, typename OutputType>
-FFN<LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction
->::FFN(LayerType &&network,
-       OutputType &&outputLayer,
-       const arma::mat& predictors,
-       const arma::mat& responses,
-       InitializationRuleType initializeRule,
-       PerformanceFunction performanceFunction) :
-    network(std::forward<LayerType>(network)),
-    outputLayer(std::forward<OutputType>(outputLayer)),
-    performanceFunc(std::move(performanceFunction))
+template<typename OutputLayerType, typename InitializationRuleType>
+FFN<OutputLayerType, InitializationRuleType>::FFN(
+    const arma::mat& predictors,
+    const arma::mat& responses,
+    OutputLayerType&& outputLayer,
+    InitializationRuleType initializeRule) :
+    outputLayer(std::move(outputLayer)),
+    initializeRule(initializeRule),
+    width(0),
+    height(0),
+    reset(false)
 {
-  static_assert(std::is_same<typename std::decay<LayerType>::type,
-                  LayerTypes>::value,
-                  "The type of network must be LayerTypes.");
+  numFunctions = responses.n_cols;
 
-  static_assert(std::is_same<typename std::decay<OutputType>::type,
-                OutputLayerType>::value,
-                "The type of outputLayer must be OutputLayerType.");
+  this->predictors = std::move(predictors);
+  this->responses = std::move(responses);
 
-  initializeRule.Initialize(parameter, NetworkSize(this->network), 1);
-  NetworkWeights(parameter, this->network);
+  this->deterministic = true;
+  ResetDeterministic();
 
-  Train(predictors, responses);
+  if (!reset)
+  {
+    ResetParameters();
+  }
 }
 
-template<typename LayerTypes,
-         typename OutputLayerType,
-         typename InitializationRuleType,
-         typename PerformanceFunction
->
-template<typename LayerType, typename OutputType>
-FFN<LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction
->::FFN(LayerType &&network,
-       OutputType &&outputLayer,
-       InitializationRuleType initializeRule,
-       PerformanceFunction performanceFunction) :
-    network(std::forward<LayerType>(network)),
-    outputLayer(std::forward<OutputType>(outputLayer)),
-    performanceFunc(std::move(performanceFunction))
+template<typename OutputLayerType, typename InitializationRuleType>
+FFN<OutputLayerType, InitializationRuleType>::~FFN()
 {
-  static_assert(std::is_same<typename std::decay<LayerType>::type,
-                  LayerTypes>::value,
-                  "The type of network must be LayerTypes.");
-
-  static_assert(std::is_same<typename std::decay<OutputType>::type,
-                OutputLayerType>::value,
-                "The type of outputLayer must be OutputLayerType.");
-
-  initializeRule.Initialize(parameter, NetworkSize(this->network), 1);
-  NetworkWeights(parameter, this->network);
+  std::for_each(network.begin(), network.end(),
+      boost::apply_visitor(deleteVisitor));
 }
 
-template<typename LayerTypes,
-         typename OutputLayerType,
-         typename InitializationRuleType,
-         typename PerformanceFunction
->
+template<typename OutputLayerType, typename InitializationRuleType>
 template<template<typename> class OptimizerType>
-void FFN<
-LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction
->::Train(const arma::mat& predictors, const arma::mat& responses)
+void FFN<OutputLayerType, InitializationRuleType>::Train(
+      const arma::mat& predictors,
+      const arma::mat& responses,
+      OptimizerType<NetworkType>& optimizer)
 {
-  numFunctions = predictors.n_cols;
-  this->predictors = predictors;
-  this->responses = responses;
+  numFunctions = responses.n_cols;
 
-  OptimizerType<decltype(*this)> optimizer(*this);
+  this->predictors = std::move(predictors);
+  this->responses = std::move(responses);
+
+  this->deterministic = true;
+  ResetDeterministic();
+
+  if (!reset)
+  {
+    ResetParameters();
+  }
 
   // Train the model.
   Timer::Start("ffn_optimization");
@@ -146,43 +93,26 @@ LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction
       << "." << std::endl;
 }
 
-template<typename LayerTypes,
-         typename OutputLayerType,
-         typename InitializationRuleType,
-         typename PerformanceFunction
->
+template<typename OutputLayerType, typename InitializationRuleType>
 template<template<typename> class OptimizerType>
-void FFN<
-LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction
->::Train(const arma::mat& predictors,
-         const arma::mat& responses,
-         OptimizerType<NetworkType>& optimizer)
+void FFN<OutputLayerType, InitializationRuleType>::Train(
+    const arma::mat& predictors, const arma::mat& responses)
 {
-  numFunctions = predictors.n_cols;
-  this->predictors = predictors;
-  this->responses = responses;
+  numFunctions = responses.n_cols;
 
-  // Train the model.
-  Timer::Start("ffn_optimization");
-  const double out = optimizer.Optimize(parameter);
-  Timer::Stop("ffn_optimization");
+  this->predictors = std::move(predictors);
+  this->responses = std::move(responses);
 
-  Log::Info << "FFN::FFN(): final objective of trained model is " << out
-      << "." << std::endl;
-}
+  this->deterministic = true;
+  ResetDeterministic();
+
+  if (!reset)
+  {
+    ResetParameters();
+  }
+
+  OptimizerType<decltype(*this)> optimizer(*this);
 
-template<typename LayerTypes,
-         typename OutputLayerType,
-         typename InitializationRuleType,
-         typename PerformanceFunction
->
-template<
-    template<typename> class OptimizerType
->
-void FFN<
-LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction
->::Train(OptimizerType<NetworkType>& optimizer)
-{
   // Train the model.
   Timer::Start("ffn_optimization");
   const double out = optimizer.Optimize(parameter);
@@ -192,101 +122,252 @@ LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction
       << "." << std::endl;
 }
 
-template<typename LayerTypes,
-         typename OutputLayerType,
-         typename InitializationRuleType,
-         typename PerformanceFunction
->
-void FFN<
-LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction
->::Predict(arma::mat& predictors, arma::mat& responses)
+template<typename OutputLayerType, typename InitializationRuleType>
+void FFN<OutputLayerType, InitializationRuleType>::Predict(
+    arma::mat& predictors, arma::mat& responses)
 {
-  deterministic = true;
+  if (parameter.is_empty())
+  {
+    ResetParameters();
+  }
+
+  if (!deterministic)
+  {
+    deterministic = true;
+    ResetDeterministic();
+  }
 
   arma::mat responsesTemp;
-  ResetParameter(network);
-  Forward(arma::mat(predictors.colptr(0), predictors.n_rows, 1, false, true),
-      network);
-  OutputPrediction(responsesTemp, network);
+  Forward(std::move(arma::mat(predictors.colptr(0),
+      predictors.n_rows, 1, false, true)));
+  responsesTemp = boost::apply_visitor(outputParameterVisitor,
+      network.back()).col(0);
 
   responses = arma::mat(responsesTemp.n_elem, predictors.n_cols);
   responses.col(0) = responsesTemp.col(0);
 
   for (size_t i = 1; i < predictors.n_cols; i++)
   {
-    Forward(arma::mat(predictors.colptr(i), predictors.n_rows, 1, false, true),
-        network);
+    Forward(std::move(arma::mat(predictors.colptr(i),
+        predictors.n_rows, 1, false, true)));
 
-    responsesTemp = arma::mat(responses.colptr(i), responses.n_rows, 1, false,
-        true);
-    OutputPrediction(responsesTemp, network);
+    responsesTemp = boost::apply_visitor(outputParameterVisitor,
+        network.back());
     responses.col(i) = responsesTemp.col(0);
   }
 }
 
-template<typename LayerTypes,
-         typename OutputLayerType,
-         typename InitializationRuleType,
-         typename PerformanceFunction
->
-double FFN<
-LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction
->::Evaluate(const arma::mat& /* unused */,
-            const size_t i,
-            const bool deterministic)
+template<typename OutputLayerType, typename InitializationRuleType>
+double FFN<OutputLayerType, InitializationRuleType>::Evaluate(
+    const arma::mat& /* parameters */, const size_t i, const bool deterministic)
 {
-  this->deterministic = deterministic;
+  if (parameter.is_empty())
+  {
+    ResetParameters();
+  }
 
-  ResetParameter(network);
+  if (deterministic != this->deterministic)
+  {
+    this->deterministic = deterministic;
+    ResetDeterministic();
+  }
+
+  currentInput = predictors.unsafe_col(i);
+  currentTarget = responses.unsafe_col(i);
 
-  Forward(arma::mat(predictors.colptr(i), predictors.n_rows, 1, false, true),
-      network);
+  Forward(std::move(currentInput));
+  double res = outputLayer.Forward(std::move(boost::apply_visitor(
+      outputParameterVisitor, network.back())), std::move(currentTarget));
 
-  return OutputError(arma::mat(responses.colptr(i), responses.n_rows, 1, false,
-      true), error, network);
+  return res;
 }
 
-template<typename LayerTypes,
-         typename OutputLayerType,
-         typename InitializationRuleType,
-         typename PerformanceFunction
->
-void FFN<
-LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction
->::Gradient(const arma::mat& /* unused */,
-            const size_t i,
-            arma::mat& gradient)
+template<typename OutputLayerType, typename InitializationRuleType>
+void FFN<OutputLayerType, InitializationRuleType>::Gradient(
+    const arma::mat& parameters, const size_t i, arma::mat& gradient)
 {
   if (gradient.is_empty())
   {
+    if (parameter.is_empty())
+    {
+      ResetParameters();
+    }
+
     gradient = arma::zeros<arma::mat>(parameter.n_rows, parameter.n_cols);
   }
+  else
+  {
+    gradient.zeros();
+  }
 
+  Evaluate(parameters, i, false);
 
-  Evaluate(parameter, i, false);
+  outputLayer.Backward(std::move(boost::apply_visitor(outputParameterVisitor,
+      network.back())), std::move(currentTarget), std::move(error));
 
-  NetworkGradients(gradient, network);
+  Backward();
+  ResetGradients(gradient);
+  Gradient();
+}
+
+template<typename OutputLayerType, typename InitializationRuleType>
+void FFN<OutputLayerType, InitializationRuleType>::ResetParameters()
+{
+  size_t weights = 0;
+  for (size_t i = 0; i < network.size(); ++i)
+  {
+    weights += boost::apply_visitor(weightSizeVisitor, network[i]);
+  }
+
+  parameter.set_size(weights, 1);
+  initializeRule.Initialize(parameter, parameter.n_elem, 1);
+
+  size_t offset = 0;
+  for (size_t i = 0; i < network.size(); ++i)
+  {
+    offset += boost::apply_visitor(WeightSetVisitor(std::move(parameter),
+        offset), network[i]);
+
+    boost::apply_visitor(resetVisitor, network[i]);
+  }
+}
+
+template<typename OutputLayerType, typename InitializationRuleType>
+void FFN<OutputLayerType, InitializationRuleType>::ResetDeterministic()
+{
+  DeterministicSetVisitor deterministicSetVisitor(deterministic);
+  std::for_each(network.begin(), network.end(),
+      boost::apply_visitor(deterministicSetVisitor));
+}
+
+template<typename OutputLayerType, typename InitializationRuleType>
+void FFN<OutputLayerType, InitializationRuleType>::ResetGradients(
+    arma::mat& gradient)
+{
+  size_t offset = 0;
+  for (size_t i = 0; i < network.size(); ++i)
+  {
+    offset += boost::apply_visitor(GradientSetVisitor(std::move(gradient),
+        offset), network[i]);
+  }
+}
+
+template<typename OutputLayerType, typename InitializationRuleType>
+void FFN<OutputLayerType, InitializationRuleType>::Forward(arma::mat&& input)
+{
+  boost::apply_visitor(ForwardVisitor(std::move(input), std::move(
+      boost::apply_visitor(outputParameterVisitor, network.front()))),
+      network.front());
+
+  if (!reset)
+  {
+    if (boost::apply_visitor(outputWidthVisitor, network.front()) != 0)
+    {
+      width = boost::apply_visitor(outputWidthVisitor, network.front());
+    }
+
+    if (boost::apply_visitor(outputHeightVisitor, network.front()) != 0)
+    {
+      height = boost::apply_visitor(outputHeightVisitor, network.front());
+    }
+  }
+
+  for (size_t i = 1; i < network.size(); ++i)
+  {
+    if (!reset)
+    {
+      // Set the input width.
+      boost::apply_visitor(SetInputWidthVisitor(width), network[i]);
+
+      // Set the input height.
+      boost::apply_visitor(SetInputHeightVisitor(height), network[i]);
+    }
+
+    boost::apply_visitor(ForwardVisitor(std::move(boost::apply_visitor(
+        outputParameterVisitor, network[i - 1])), std::move(
+        boost::apply_visitor(outputParameterVisitor, network[i]))), network[i]);
+
+    if (!reset)
+    {
+      // Get the output width.
+      if (boost::apply_visitor(outputWidthVisitor, network[i]) != 0)
+      {
+        width = boost::apply_visitor(outputWidthVisitor, network[i]);
+      }
+
+      // Get the output height.
+      if (boost::apply_visitor(outputHeightVisitor, network[i]) != 0)
+      {
+        height = boost::apply_visitor(outputHeightVisitor, network[i]);
+      }
+    }
+  }
+
+  if (!reset)
+  {
+    reset = true;
+  }
+}
+
+template<typename OutputLayerType, typename InitializationRuleType>
+void FFN<OutputLayerType, InitializationRuleType>::Backward()
+{
+  boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor(
+      outputParameterVisitor, network.back())), std::move(error), std::move(
+      boost::apply_visitor(deltaVisitor, network.back()))), network.back());
 
-  Backward<>(error, network);
-  UpdateGradients<>(network);
+  for (size_t i = 2; i < network.size(); ++i)
+  {
+    boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor(
+        outputParameterVisitor, network[network.size() - i])), std::move(
+        boost::apply_visitor(deltaVisitor, network[network.size() - i + 1])),
+        std::move(boost::apply_visitor(deltaVisitor,
+        network[network.size() - i]))), network[network.size() - i]);
+  }
 }
 
-template<typename LayerTypes,
-         typename OutputLayerType,
-         typename InitializationRuleType,
-         typename PerformanceFunction
->
+template<typename OutputLayerType, typename InitializationRuleType>
+void FFN<OutputLayerType, InitializationRuleType>::Gradient()
+{
+  boost::apply_visitor(GradientVisitor(std::move(currentInput), std::move(
+      boost::apply_visitor(deltaVisitor, network[1]))), network.front());
+
+  for (size_t i = 1; i < network.size() - 1; ++i)
+  {
+    boost::apply_visitor(GradientVisitor(std::move(boost::apply_visitor(
+        outputParameterVisitor, network[i - 1])), std::move(
+        boost::apply_visitor(deltaVisitor, network[i + 1]))), network[i]);
+  }
+
+  boost::apply_visitor(GradientVisitor(std::move(boost::apply_visitor(
+      outputParameterVisitor, network[network.size() - 2])), std::move(error)),
+      network[network.size() - 1]);
+}
+
+template<typename OutputLayerType, typename InitializationRuleType>
 template<typename Archive>
-void FFN<
-LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction
->::Serialize(Archive& ar, const unsigned int /* version */)
+void FFN<OutputLayerType, InitializationRuleType>::Serialize(
+    Archive& ar, const unsigned int /* version */)
 {
   ar & data::CreateNVP(parameter, "parameter");
+  ar & data::CreateNVP(width, "width");
+  ar & data::CreateNVP(height, "height");
+  ar & data::CreateNVP(currentInput, "currentInput");
+  ar & data::CreateNVP(currentTarget, "currentTarget");
 
   // If we are loading, we need to initialize the weights.
   if (Archive::is_loading::value)
   {
-    NetworkWeights(parameter, network);
+    reset = false;
+
+    size_t offset = 0;
+    for (size_t i = 0; i < network.size(); ++i)
+    {
+      offset += boost::apply_visitor(WeightSetVisitor(std::move(parameter),
+          offset), network[i]);
+
+      boost::apply_visitor(resetVisitor, network[i]);
+    }
   }
 }
 
diff --git a/src/mlpack/methods/ann/layer/CMakeLists.txt b/src/mlpack/methods/ann/layer/CMakeLists.txt
index b639cdad785..4211aeaaaa7 100644
--- a/src/mlpack/methods/ann/layer/CMakeLists.txt
+++ b/src/mlpack/methods/ann/layer/CMakeLists.txt
@@ -1,23 +1,68 @@
 # Define the files we need to compile
 # Anything not in this list will not be compiled into mlpack.
 set(SOURCES
-  layer_traits.hpp
-  binary_classification_layer.hpp
+  add.hpp
+  add_impl.hpp
+  add_merge.hpp
+  add_merge_impl.hpp
   base_layer.hpp
-  empty_layer.hpp
-  bias_layer.hpp
-  dropout_layer.hpp
-  dropconnect_layer.hpp
-  hard_tanh_layer.hpp
-  leaky_relu_layer.hpp
-  linear_layer.hpp
-  conv_layer.hpp
-  pooling_layer.hpp
-  recurrent_layer.hpp
-  lstm_layer.hpp
-  sparse_bias_layer.hpp
-  sparse_input_layer.hpp
-  sparse_output_layer.hpp
+  concat.hpp
+  concat_impl.hpp
+  concat_performance.hpp
+  concat_performance_impl.hpp
+  constant.hpp
+  constant_impl.hpp
+  convolution.hpp
+  convolution_impl.hpp
+  dropconnect.hpp
+  dropconnect_impl.hpp
+  dropout.hpp
+  dropout_impl.hpp
+  glimpse.hpp
+  glimpse_impl.hpp
+  hard_tanh.hpp
+  hard_tanh_impl.hpp
+  join.hpp
+  join_impl.hpp
+  layer.hpp
+  layer_traits.hpp
+  layer_visitor.hpp
+  layer_visitor_impl.hpp
+  layer_types.hpp
+  leaky_relu.hpp
+  leaky_relu_impl.hpp
+  linear.hpp
+  linear_impl.hpp
+  linear_no_bias.hpp
+  linear_no_bias_impl.hpp
+  log_softmax.hpp
+  log_softmax_impl.hpp
+  lookup.hpp
+  lookup_impl.hpp
+  lstm.hpp
+  lstm_impl.hpp
+  max_pooling.hpp
+  max_pooling_impl.hpp
+  mean_pooling.hpp
+  mean_pooling_impl.hpp
+  mean_squared_error.hpp
+  mean_squared_error_impl.hpp
+  multiply_constant.hpp
+  multiply_constant_impl.hpp
+  negative_log_likelihood.hpp
+  negative_log_likelihood_impl.hpp
+  recurrent.hpp
+  recurrent_impl.hpp
+  recurrent_attention.hpp
+  recurrent_attention_impl.hpp
+  reinforce_normal.hpp
+  reinforce_normal_impl.hpp
+  select.hpp
+  select_impl.hpp
+  sequential.hpp
+  sequential_impl.hpp
+  vr_class_reward_impl.hpp
+  vr_class_reward_impl.hpp
 )
 
 # Add directory name to sources.
diff --git a/src/mlpack/methods/ann/layer/empty_layer.hpp b/src/mlpack/methods/ann/layer/add.hpp
similarity index 60%
rename from src/mlpack/methods/ann/layer/empty_layer.hpp
rename to src/mlpack/methods/ann/layer/add.hpp
index cf5a70e43ae..1afb1121300 100644
--- a/src/mlpack/methods/ann/layer/empty_layer.hpp
+++ b/src/mlpack/methods/ann/layer/add.hpp
@@ -1,23 +1,26 @@
 /**
- * @file empty_layer.hpp
- * @author Palash Ahuja
+ * @file add.hpp
+ * @author Marcus Edel
  *
- * Definition of the EmptyLayer class, which is basically empty.
+ * Definition of the Add class that applies a bias term to the incoming data.
  *
  * mlpack is free software; you may redistribute it and/or modify it under the
  * terms of the 3-clause BSD license.  You should have received a copy of the
  * 3-clause BSD license along with mlpack.  If not, see
  * http://www.opensource.org/licenses/BSD-3-Clause for more information.
  */
-#ifndef MLPACK_METHODS_ANN_LAYER_EMPTY_LAYER_HPP
-#define MLPACK_METHODS_ANN_LAYER_EMPTY_LAYER_HPP
+#ifndef MLPACK_METHODS_ANN_LAYER_ADD_HPP
+#define MLPACK_METHODS_ANN_LAYER_ADD_HPP
 
-namespace mlpack{
+#include <mlpack/core.hpp>
+#include <mlpack/methods/ann/layer/layer_traits.hpp>
+
+namespace mlpack {
 namespace ann /** Artificial Neural Network. */ {
 
 /**
- * Implementation of the EmptyLayer class. The EmptyLayer class represents a
- * single layer which is mainly used as placeholder.
+ * Implementation of the Add module class. The Add module applies a bias term
+ * to the incoming data.
  *
  * @tparam InputDataType Type of the input data (arma::colvec, arma::mat,
  *         arma::sp_mat or arma::cube).
@@ -28,14 +31,15 @@ template <
     typename InputDataType = arma::mat,
     typename OutputDataType = arma::mat
 >
-class EmptyLayer
+class Add
 {
-  public:
+ public:
   /**
-   * Creates the empty layer object. All the methods are
-   * empty as well.
+   * Create the Add object using the specified number of output units.
+   *
+   * @param outSize The number of output units.
    */
-  EmptyLayer() { /* Nothing to do here. */ }
+  Add(const size_t outSize);
 
   /**
    * Ordinary feed forward pass of a neural network, evaluating the function
@@ -44,11 +48,8 @@ class EmptyLayer
    * @param input Input data used for evaluating the specified function.
    * @param output Resulting output activation.
    */
-  template<typename InputType, typename OutputType>
-  void Forward(const InputType& /* input */, OutputType& /* output */)
-  {
-    /* Nothing to do here. */
-  }
+  template<typename eT>
+  void Forward(const arma::Mat<eT>&& input, arma::Mat<eT>&& output);
 
   /**
    * Ordinary feed backward pass of a neural network, calculating the function
@@ -59,58 +60,58 @@ class EmptyLayer
    * @param gy The backpropagated error.
    * @param g The calculated gradient.
    */
-  template<typename InputType, typename ErrorType, typename GradientType>
-  void Backward(const InputType& /* input */,
-                const ErrorType& /* gy */,
-                GradientType& /* g */)
-  {
-    /* Nothing to do here. */
-  }
+  template<typename eT>
+  void Backward(const arma::Mat<eT>&& /* input */,
+                const arma::Mat<eT>&& gy,
+                arma::Mat<eT>&& g);
 
   /*
    * Calculate the gradient using the output delta and the input activation.
    *
-   * @param d The calculated error.
-   * @param g The calculated gradient.
+   * @param input The propagated input.
+   * @param error The calculated error.
+   * @param gradient The calculated gradient.
    */
-  template<typename InputType, typename ErrorType, typename GradientType>
-  void Gradient(const InputType& /* input */,
-                const ErrorType& /* error */,
-                GradientType& /* gradient */)
-  {
-    /* Nothing to do here. */
-  }
-
-  //! Get the weights.
-  OutputDataType const& Weights() const { return weights; }
+  template<typename eT>
+  void Gradient(const arma::Mat<eT>&& /* input */,
+                arma::Mat<eT>&& error,
+                arma::Mat<eT>&& gradient);
 
-  //! Modify the weights.
-  OutputDataType& Weights() { return weights; }
+  //! Get the parameters.
+  OutputDataType const& Parameters() const { return weights; }
+  //! Modify the parameters.
+  OutputDataType& Parameters() { return weights; }
 
   //! Get the input parameter.
   InputDataType const& InputParameter() const { return inputParameter; }
-
   //! Modify the input parameter.
   InputDataType& InputParameter() { return inputParameter; }
 
   //! Get the output parameter.
   OutputDataType const& OutputParameter() const { return outputParameter; }
-
   //! Modify the output parameter.
   OutputDataType& OutputParameter() { return outputParameter; }
 
   //! Get the delta.
   OutputDataType const& Delta() const { return delta; }
-
   //! Modify the delta.
   OutputDataType& Delta() { return delta; }
 
   //! Get the gradient.
   OutputDataType const& Gradient() const { return gradient; }
-
   //! Modify the gradient.
   OutputDataType& Gradient() { return gradient; }
 
+  /**
+   * Serialize the layer
+   */
+  template<typename Archive>
+  void Serialize(Archive& ar, const unsigned int /* version */);
+
+ private:
+  //! Locally-stored number of output units.
+  size_t outSize;
+
   //! Locally-stored weight object.
   OutputDataType weights;
 
@@ -125,9 +126,12 @@ class EmptyLayer
 
   //! Locally-stored output parameter object.
   OutputDataType outputParameter;
-}; // class EmptyLayer
+}; // class Add
+
+} // namespace ann
+} // namespace mlpack
 
-} //namespace ann
-} //namespace mlpack
+// Include implementation.
+#include "add_impl.hpp"
 
 #endif
diff --git a/src/mlpack/methods/ann/layer/add_impl.hpp b/src/mlpack/methods/ann/layer/add_impl.hpp
new file mode 100644
index 00000000000..3ce562007a4
--- /dev/null
+++ b/src/mlpack/methods/ann/layer/add_impl.hpp
@@ -0,0 +1,68 @@
+/**
+ * @file add_impl.hpp
+ * @author Marcus Edel
+ *
+ * Implementation of the Add class that applies a bias term to the incoming
+ * data.
+ *
+ * mlpack is free software; you may redistribute it and/or modify it under the
+ * terms of the 3-clause BSD license.  You should have received a copy of the
+ * 3-clause BSD license along with mlpack.  If not, see
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
+ */
+#ifndef MLPACK_METHODS_ANN_LAYER_ADD_IMPL_HPP
+#define MLPACK_METHODS_ANN_LAYER_ADD_IMPL_HPP
+
+// In case it hasn't yet been included.
+#include "add.hpp"
+
+namespace mlpack {
+namespace ann /** Artificial Neural Network. */ {
+
+template<typename InputDataType, typename OutputDataType>
+Add<InputDataType, OutputDataType>::Add(const size_t outSize) :
+    outSize(outSize)
+{
+  weights.set_size(outSize, 1);
+}
+
+template<typename InputDataType, typename OutputDataType>
+template<typename eT>
+void Add<InputDataType, OutputDataType>::Forward(
+    const arma::Mat<eT>&& input, arma::Mat<eT>&& output)
+{
+  output = input + weights;
+}
+
+template<typename InputDataType, typename OutputDataType>
+template<typename eT>
+void Add<InputDataType, OutputDataType>::Backward(
+    const arma::Mat<eT>&& /* input */,
+    const arma::Mat<eT>&& gy,
+    arma::Mat<eT>&& g)
+{
+  g = gy;
+}
+
+template<typename InputDataType, typename OutputDataType>
+template<typename eT>
+void Add<InputDataType, OutputDataType>::Gradient(
+    const arma::Mat<eT>&& /* input */,
+    arma::Mat<eT>&& error,
+    arma::Mat<eT>&& gradient)
+{
+  gradient = error;
+}
+
+template<typename InputDataType, typename OutputDataType>
+template<typename Archive>
+void Add<InputDataType, OutputDataType>::Serialize(
+    Archive& ar, const unsigned int /* version */)
+{
+  ar & data::CreateNVP(weights, "weights");
+}
+
+} // namespace ann
+} // namespace mlpack
+
+#endif
diff --git a/src/mlpack/methods/ann/layer/add_merge.hpp b/src/mlpack/methods/ann/layer/add_merge.hpp
new file mode 100644
index 00000000000..222c3ef1a2f
--- /dev/null
+++ b/src/mlpack/methods/ann/layer/add_merge.hpp
@@ -0,0 +1,139 @@
+/**
+ * @file add_merge.hpp
+ * @author Marcus Edel
+ *
+ * Definition of the AddMerge module which accumulates the output of the given
+ * modules.
+ *
+ * mlpack is free software; you may redistribute it and/or modify it under the
+ * terms of the 3-clause BSD license.  You should have received a copy of the
+ * 3-clause BSD license along with mlpack.  If not, see
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
+ */
+#ifndef MLPACK_METHODS_ANN_LAYER_ADD_MERGE_HPP
+#define MLPACK_METHODS_ANN_LAYER_ADD_MERGE_HPP
+
+#include <mlpack/core.hpp>
+
+#include "layer_types.hpp"
+#include "layer_visitor.hpp"
+
+namespace mlpack {
+namespace ann /** Artificial Neural Network. */ {
+
+/**
+ * Implementation of the AddMerge module class. The AddMerge class accumulates
+ * the output of various modules.
+ *
+ * @tparam InputDataType Type of the input data (arma::colvec, arma::mat,
+ *         arma::sp_mat or arma::cube).
+ * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat,
+ *         arma::sp_mat or arma::cube).
+ */
+template<
+    typename InputDataType = arma::mat,
+    typename OutputDataType = arma::mat
+>
+class AddMerge
+{
+ public:
+  //! Create the AddMerge object.
+  AddMerge();
+
+  /**
+   * Ordinary feed forward pass of a neural network, evaluating the function
+   * f(x) by propagating the activity forward through f.
+   *
+   * @param input Input data used for evaluating the specified function.
+   * @param output Resulting output activation.
+   */
+  template<typename InputType, typename OutputType>
+  void Forward(const InputType&& /* input */, OutputType&& output);
+
+  /**
+   * Ordinary feed backward pass of a neural network, calculating the function
+   * f(x) by propagating x backwards trough f. Using the results from the feed
+   * forward pass.
+   *
+   * @param input The propagated input activation.
+   * @param gy The backpropagated error.
+   * @param g The calculated gradient.
+   */
+  template<typename eT>
+  void Backward(const arma::Mat<eT>&& /* input */,
+                arma::Mat<eT>&& gy,
+                arma::Mat<eT>&& g);
+
+  /*
+   * Add a new module to the model.
+   *
+   * @param layer The Layer to be added to the model.
+   */
+  void Add(LayerTypes layer) { network.push_back(layer); }
+
+  /*
+   * Add a new module to the model.
+   *
+   * @param layer The Layer to be added to the model.
+   */
+  template<typename LayerType>
+  void Add(const LayerType& layer) { network.push_back(new LayerType(layer)); }
+
+  /*
+   * Add a new module to the model.
+   *
+   * @param args The layer parameter.
+   */
+  template <class LayerType, class... Args>
+  void Add(Args... args) { network.push_back(new LayerType(args...)); }
+
+  //! Get the input parameter.
+  InputDataType const& InputParameter() const { return inputParameter; }
+  //! Modify the input parameter.
+  InputDataType& InputParameter() { return inputParameter; }
+
+  //! Get the output parameter.
+  OutputDataType const& OutputParameter() const { return outputParameter; }
+  //! Modify the output parameter.
+  OutputDataType& OutputParameter() { return outputParameter; }
+
+  //! Get the delta.
+  OutputDataType const& Delta() const { return delta; }
+  //! Modify the delta.
+  OutputDataType& Delta() { return delta; }
+
+  /**
+   * Serialize the layer.
+   */
+  template<typename Archive>
+  void Serialize(Archive& ar, const unsigned int /* version */);
+
+ private:
+  std::vector<LayerTypes> network;
+
+  //! Locally-stored delete visitor module object.
+  DeleteVisitor deleteVisitor;
+
+  //! Locally-stored output parameter visitor module object.
+  OutputParameterVisitor outputParameterVisitor;
+
+  //! Locally-stored delta visitor module object.
+  DeltaVisitor deltaVisitor;
+
+  //! Locally-stored delta object.
+  OutputDataType delta;
+
+  //! Locally-stored input parameter object.
+  InputDataType inputParameter;
+
+  //! Locally-stored output parameter object.
+  OutputDataType outputParameter;
+}; // class AddMerge
+
+} // namespace ann
+} // namespace mlpack
+
+// Include implementation.
+#include "add_merge_impl.hpp"
+
+#endif
diff --git a/src/mlpack/methods/ann/layer/add_merge_impl.hpp b/src/mlpack/methods/ann/layer/add_merge_impl.hpp
new file mode 100644
index 00000000000..0ef62c2d0ac
--- /dev/null
+++ b/src/mlpack/methods/ann/layer/add_merge_impl.hpp
@@ -0,0 +1,61 @@
+/**
+ * @file add_merge_impl.hpp
+ * @author Marcus Edel
+ *
+ * Definition of the AddMerge module which accumulates the output of the given
+ * modules.
+ *
+ * mlpack is free software; you may redistribute it and/or modify it under the
+ * terms of the 3-clause BSD license.  You should have received a copy of the
+ * 3-clause BSD license along with mlpack.  If not, see
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
+ */
+#ifndef MLPACK_METHODS_ANN_LAYER_ADD_MERGE_IMPL_HPP
+#define MLPACK_METHODS_ANN_LAYER_ADD_MERGE_IMPL_HPP
+
+// In case it hasn't yet been included.
+#include "add_merge_impl.hpp"
+
+namespace mlpack {
+namespace ann /** Artificial Neural Network. */ {
+
+template<typename InputDataType, typename OutputDataType>
+AddMerge<InputDataType, OutputDataType>::AddMerge()
+{
+  // Nothing to do here.
+}
+
+template <typename InputDataType, typename OutputDataType>
+template<typename InputType, typename OutputType>
+void AddMerge<InputDataType, OutputDataType>::Forward(
+    const InputType&& /* input */, OutputType&& output)
+{
+  output = boost::apply_visitor(outputParameterVisitor, network.front());
+
+  for (size_t i = 1; i < network.size(); ++i)
+  {
+    output += boost::apply_visitor(outputParameterVisitor, network[i]);
+  }
+}
+
+template<typename InputDataType, typename OutputDataType>
+template<typename eT>
+void AddMerge<InputDataType, OutputDataType>::Backward(
+    const arma::Mat<eT>&& /* input */, arma::Mat<eT>&& gy, arma::Mat<eT>&& g)
+{
+  g = gy;
+}
+
+
+template<typename InputDataType, typename OutputDataType>
+template<typename Archive>
+void AddMerge<InputDataType, OutputDataType>::Serialize(
+    Archive& ar, const unsigned int /* version */)
+{
+  ar & data::CreateNVP(network, "network");
+}
+
+} // namespace ann
+} // namespace mlpack
+
+#endif
diff --git a/src/mlpack/methods/ann/layer/base_layer.hpp b/src/mlpack/methods/ann/layer/base_layer.hpp
index 2b915a116d5..68afe1cefeb 100644
--- a/src/mlpack/methods/ann/layer/base_layer.hpp
+++ b/src/mlpack/methods/ann/layer/base_layer.hpp
@@ -32,7 +32,6 @@ namespace ann /** Artificial Neural Network. */ {
  *  - IdentityLayer
  *  - ReLULayer
  *  - TanHLayer
- *  - BaseLayer2D
  *
  * @tparam ActivationFunction Activation function used for the embedding layer.
  * @tparam InputDataType Type of the input data (arma::colvec, arma::mat,
@@ -64,33 +63,14 @@ class BaseLayer
    * @param output Resulting output activation.
    */
   template<typename InputType, typename OutputType>
-  void Forward(const InputType& input, OutputType& output)
+  void Forward(const InputType&& input, OutputType&& output)
   {
     ActivationFunction::fn(input, output);
   }
 
   /**
    * Ordinary feed backward pass of a neural network, calculating the function
-   * f(x) by propagating x backwards through f. Using the results from the feed
-   * forward pass.
-   *
-   * @param input The propagated input activation.
-   * @param gy The backpropagated error.
-   * @param g The calculated gradient.
-   */
-  template<typename DataType>
-  void Backward(const DataType& input,
-                const DataType& gy,
-                DataType& g)
-  {
-    DataType derivative;
-    ActivationFunction::deriv(input, derivative);
-    g = gy % derivative;
-  }
-
-  /**
-   * Ordinary feed backward pass of a neural network, calculating the function
-   * f(x) by propagating x backwards through f. Using the results from the feed
+   * f(x) by propagating x backwards trough f. Using the results from the feed
    * forward pass.
    *
    * @param input The propagated input activation.
@@ -98,30 +78,13 @@ class BaseLayer
    * @param g The calculated gradient.
    */
   template<typename eT>
-  void Backward(const arma::Cube<eT>& input,
-                const arma::Mat<eT>& gy,
-                arma::Cube<eT>& g)
+  void Backward(const arma::Mat<eT>&& input,
+                arma::Mat<eT>&& gy,
+                arma::Mat<eT>&& g)
   {
-    // Generate a cube using the backpropagated error matrix.
-    arma::Cube<eT> mappedError = arma::zeros<arma::cube>(input.n_rows,
-        input.n_cols, input.n_slices);
-
-    for (size_t s = 0, j = 0; s < mappedError.n_slices; s+= gy.n_cols, j++)
-    {
-      for (size_t i = 0; i < gy.n_cols; i++)
-      {
-        arma::Col<eT> temp = gy.col(i).subvec(
-            j * input.n_rows * input.n_cols,
-            (j + 1) * input.n_rows * input.n_cols - 1);
-
-        mappedError.slice(s + i) = arma::Mat<eT>(temp.memptr(),
-            input.n_rows, input.n_cols);
-      }
-    }
-
-    arma::Cube<eT> derivative;
+    arma::Mat<eT> derivative;
     ActivationFunction::deriv(input, derivative);
-    g = mappedError % derivative;
+    g = gy % derivative;
   }
 
   //! Get the input parameter.
@@ -205,18 +168,6 @@ template <
 using TanHLayer = BaseLayer<
     ActivationFunction, InputDataType, OutputDataType>;
 
-/**
- * Standard Base-Layer2D using the logistic activation function.
- */
-template <
-    class ActivationFunction = LogisticFunction,
-    typename InputDataType = arma::cube,
-    typename OutputDataType = arma::cube
->
-using BaseLayer2D = BaseLayer<
-    ActivationFunction, InputDataType, OutputDataType>;
-
-
 } // namespace ann
 } // namespace mlpack
 
diff --git a/src/mlpack/methods/ann/layer/bias_layer.hpp b/src/mlpack/methods/ann/layer/bias_layer.hpp
deleted file mode 100644
index 0be535dec7b..00000000000
--- a/src/mlpack/methods/ann/layer/bias_layer.hpp
+++ /dev/null
@@ -1,208 +0,0 @@
-/**
- * @file bias_layer.hpp
- * @author Marcus Edel
- *
- * Definition of the BiasLayer class.
- *
- * mlpack is free software; you may redistribute it and/or modify it under the
- * terms of the 3-clause BSD license.  You should have received a copy of the
- * 3-clause BSD license along with mlpack.  If not, see
- * http://www.opensource.org/licenses/BSD-3-Clause for more information.
- */
-#ifndef MLPACK_METHODS_ANN_LAYER_BIAS_LAYER_HPP
-#define MLPACK_METHODS_ANN_LAYER_BIAS_LAYER_HPP
-
-#include <mlpack/core.hpp>
-#include <mlpack/methods/ann/layer/layer_traits.hpp>
-
-namespace mlpack {
-namespace ann /** Artificial Neural Network. */ {
-
-/**
- * An implementation of a standard bias layer. The BiasLayer class represents a
- * single layer of a neural network.
- *
- * A convenient typedef is given:
- *
- *  - 2DBiasLayer
- *
- * @tparam InputDataType Type of the input data (arma::colvec, arma::mat,
- *         arma::sp_mat or arma::cube).
- * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat,
- *         arma::sp_mat or arma::cube).
- */
-template <
-    typename InputDataType = arma::mat,
-    typename OutputDataType = arma::mat
->
-class BiasLayer
-{
- public:
-  /**
-   * Create the BiasLayer object using the specified number of units and bias
-   * parameter.
-   *
-   * @param outSize The number of output units.
-   * @param bias The bias value.
-   */
-  BiasLayer(const size_t outSize, const double bias = 1) :
-      outSize(outSize),
-      bias(bias)
-  {
-    weights.set_size(outSize, 1);
-  }
-
-  /**
-   * Ordinary feed forward pass of a neural network, evaluating the function
-   * f(x) by propagating the activity forward through f.
-   *
-   * @param input Input data used for evaluating the specified function.
-   * @param output Resulting output activation.
-   */
-  template<typename eT>
-  void Forward(const arma::Mat<eT>& input, arma::Mat<eT>& output)
-  {
-    output = input + (weights * bias);
-  }
-
-  /**
-   * Ordinary feed forward pass of a neural network, evaluating the function
-   * f(x) by propagating the activity forward through f.
-   *
-   * @param input Input data used for evaluating the specified function.
-   * @param output Resulting output activation.
-   */
-  template<typename eT>
-  void Forward(const arma::Cube<eT>& input, arma::Cube<eT>& output)
-  {
-    output = input;
-    for (size_t s = 0; s < input.n_slices; s++)
-    {
-      output.slice(s) += weights(s) * bias;
-    }
-  }
-
-  /**
-   * Ordinary feed backward pass of a neural network, calculating the function
-   * f(x) by propagating x backwards trough f. Using the results from the feed
-   * forward pass.
-   *
-   * @param input The propagated input activation.
-   * @param gy The backpropagated error.
-   * @param g The calculated gradient.
-   */
-  template<typename DataType, typename ErrorType>
-  void Backward(const DataType& /* unused */,
-                const ErrorType& gy,
-                ErrorType& g)
-  {
-    g = gy;
-  }
-
-  /*
-   * Calculate the gradient using the output delta and the bias.
-   *
-   * @param input The propagated input.
-   * @param error The calculated error.
-   * @param gradient The calculated gradient.
-   */
-  template<typename eT, typename ErrorType, typename GradientType>
-  void Gradient(const arma::Mat<eT>& /* input */,
-                const ErrorType& error,
-                GradientType& gradient)
-  {
-    gradient = error * bias;
-  }
-
-  //! Get the weights.
-  InputDataType const& Weights() const { return weights; }
-  //! Modify the weights.
-  InputDataType& Weights() { return weights; }
-
-  //! Get the input parameter.
-  InputDataType const& InputParameter() const { return inputParameter; }
-  //! Modify the input parameter.
-  InputDataType& InputParameter() { return inputParameter; }
-
-  //! Get the output parameter.
-  OutputDataType const& OutputParameter() const { return outputParameter; }
-  //! Modify the output parameter.
-  OutputDataType& OutputParameter() { return outputParameter; }
-
-  //! Get the delta.
-  OutputDataType const& Delta() const { return delta; }
-  //! Modify the delta.
-  OutputDataType& Delta() { return delta; }
-
-  //! Get the gradient.
-  InputDataType const& Gradient() const { return gradient; }
-  //! Modify the gradient.
-  InputDataType& Gradient() { return gradient; }
-
-  /**
-   * Serialize the layer.
-   */
-  template<typename Archive>
-  void Serialize(Archive& ar, const unsigned int /* version */)
-  {
-    ar & data::CreateNVP(weights, "weights");
-    ar & data::CreateNVP(bias, "bias");
-  }
-
- private:
-  //! Locally-stored number of output units.
-  size_t outSize;
-
-  //! Locally-stored bias value.
-  double bias;
-
-  //! Locally-stored weight object.
-  InputDataType weights;
-
-  //! Locally-stored delta object.
-  OutputDataType delta;
-
-  //! Locally-stored gradient object.
-  InputDataType gradient;
-
-  //! Locally-stored input parameter object.
-  InputDataType inputParameter;
-
-  //! Locally-stored output parameter object.
-  OutputDataType outputParameter;
-}; // class BiasLayer
-
-//! Layer traits for the bias layer.
-template<typename InputDataType, typename OutputDataType>
-class LayerTraits<BiasLayer<InputDataType, OutputDataType> >
-{
- public:
-  static const bool IsBinary = false;
-  static const bool IsOutputLayer = false;
-  static const bool IsBiasLayer = true;
-  static const bool IsLSTMLayer = false;
-  static const bool IsConnection = true;
-};
-
-/**
- * Standard 2D-Bias-Layer.
- */
-template <
-    typename InputDataType = arma::mat,
-    typename OutputDataType = arma::cube
->
-using BiasLayer2D = BiasLayer<InputDataType, OutputDataType>;
-
-/**
- * Standard 2D-Bias-Layer.
- */
-template <
-    typename InputDataType = arma::mat,
-    typename OutputDataType = arma::mat
->
-using AdditionLayer = BiasLayer<InputDataType, OutputDataType>;
-
-} // namespace ann
-} // namespace mlpack
-
-#endif
diff --git a/src/mlpack/methods/ann/layer/binary_classification_layer.hpp b/src/mlpack/methods/ann/layer/binary_classification_layer.hpp
deleted file mode 100644
index 1b3d6172a74..00000000000
--- a/src/mlpack/methods/ann/layer/binary_classification_layer.hpp
+++ /dev/null
@@ -1,106 +0,0 @@
-/**
- * @file binary_classification_layer.hpp
- * @author Marcus Edel
- *
- * Definition of the BinaryClassificationLayer class, which implements a
- * binary class classification layer that can be used as output layer.
- *
- * mlpack is free software; you may redistribute it and/or modify it under the
- * terms of the 3-clause BSD license.  You should have received a copy of the
- * 3-clause BSD license along with mlpack.  If not, see
- * http://www.opensource.org/licenses/BSD-3-Clause for more information.
- */
-#ifndef MLPACK_METHODS_ANN_LAYER_BINARY_CLASSIFICATION_LAYER_HPP
-#define MLPACK_METHODS_ANN_LAYER_BINARY_CLASSIFICATION_LAYER_HPP
-
-#include <mlpack/core.hpp>
-#include <mlpack/methods/ann/layer/layer_traits.hpp>
-
-namespace mlpack {
-namespace ann /** Artificial Neural Network. */ {
-
-/**
- * An implementation of a binary classification layer that can be used as
- * output layer.
- */
-class BinaryClassificationLayer
-{
- public:
-  /**
-   * Create the BinaryClassificationLayer object.
-   *
-   * @param confidence The confidence used for the output class transformation.
-   */
-  BinaryClassificationLayer(const double confidence = 0.5) :
-      confidence(confidence)
-  {
-    // Nothing to do here.
-  }
-
-  /*
-   * Calculate the error using the specified input activation and the target.
-   * The error is stored into the given error parameter.
-   *
-   * @param inputActivations Input data used for evaluating the network.
-   * @param target Target data used for evaluating the network.
-   * @param error The calculated error with respect to the input activation and
-   * the given target.
-   */
-  template<typename DataType>
-  void CalculateError(const DataType& inputActivations,
-                      const DataType& target,
-                      DataType& error)
-  {
-    error = inputActivations - target;
-  }
-
-  /*
-   * Calculate the output class using the specified input activation.
-   *
-   * @param inputActivations Input data used to calculate the output class.
-   * @param output Output class of the input activation.
-   */
-  template<typename DataType>
-  void OutputClass(const DataType& inputActivations, DataType& output)
-  {
-    output = inputActivations;
-
-    for (size_t i = 0; i < output.n_elem; i++)
-      output(i) = output(i) > confidence ? 1 : 0;
-  }
-
-  //! Get the confidence parameter.
-  double const& Confidence() const { return confidence; }
-  //! Modify the confidence parameter.
-  double& Confidence() { return confidence; }
-
-  /**
-   * Serialize the layer.
-   */
-  template<typename Archive>
-  void Serialize(Archive& ar, const unsigned int /* version */)
-  {
-    ar & data::CreateNVP(confidence, "confidence");
-  }
-
- private:
-   double confidence;
-
-}; // class BinaryClassificationLayer
-
-//! Layer traits for the binary class classification layer.
-template <>
-class LayerTraits<BinaryClassificationLayer>
-{
- public:
-  static const bool IsBinary = true;
-  static const bool IsOutputLayer = true;
-  static const bool IsBiasLayer = false;
-  static const bool IsLSTMLayer = false;
-  static const bool IsConnection = false;
-};
-
-} // namespace ann
-} // namespace mlpack
-
-#endif
diff --git a/src/mlpack/methods/ann/layer/concat.hpp b/src/mlpack/methods/ann/layer/concat.hpp
new file mode 100644
index 00000000000..808309f7122
--- /dev/null
+++ b/src/mlpack/methods/ann/layer/concat.hpp
@@ -0,0 +1,186 @@
+/**
+ * @file concat.hpp
+ * @author Marcus Edel
+ *
+ * Definition of the Concat class, which acts as a concatenation contain.
+ *
+ * mlpack is free software; you may redistribute it and/or modify it under the
+ * terms of the 3-clause BSD license.  You should have received a copy of the
+ * 3-clause BSD license along with mlpack.  If not, see
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
+ */
+#ifndef MLPACK_METHODS_ANN_LAYER_CONCAT_HPP
+#define MLPACK_METHODS_ANN_LAYER_CONCAT_HPP
+
+#include <mlpack/core.hpp>
+
+#include <boost/ptr_container/ptr_vector.hpp>
+
+#include "layer_types.hpp"
+#include "layer_visitor.hpp"
+
+namespace mlpack {
+namespace ann /** Artificial Neural Network. */ {
+
+/**
+ * Implementation of the Concat class. The Concat class works as a
+ * feed-forward fully connected network container which plugs various layers
+ * together.
+ *
+ * @tparam InputDataType Type of the input data (arma::colvec, arma::mat,
+ *         arma::sp_mat or arma::cube).
+ * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat,
+ *         arma::sp_mat or arma::cube).
+ */
+template <
+    typename InputDataType = arma::mat,
+    typename OutputDataType = arma::mat
+>
+class Concat
+{
+ public:
+  /**
+   * Create the Concat object using the specified parameters.
+   *
+   * @param model Expose all network modules.
+   * @param same Merge the error in the backward pass.
+   */
+  Concat(const bool model = true, const bool same = true);
+
+  /**
+   * Ordinary feed forward pass of a neural network, evaluating the function
+   * f(x) by propagating the activity forward through f.
+   *
+   * @param input Input data used for evaluating the specified function.
+   * @param output Resulting output activation.
+   */
+  template<typename eT>
+  void Forward(arma::Mat<eT>&& input, arma::Mat<eT>&& output);
+
+  /**
+   * Ordinary feed backward pass of a neural network, using 3rd-order tensors as
+   * input, calculating the function f(x) by propagating x backwards through f.
+   * Using the results from the feed forward pass.
+   *
+   * @param input The propagated input activation.
+   * @param gy The backpropagated error.
+   * @param g The calculated gradient.
+   */
+  template<typename eT>
+  void Backward(const arma::Mat<eT>&& /* input */,
+                arma::Mat<eT>&& gy,
+                arma::Mat<eT>&& g);
+
+  /*
+   * Calculate the gradient using the output delta and the input activation.
+   *
+   * @param input The input parameter used for calculating the gradient.
+   * @param error The calculated error.
+   * @param gradient The calculated gradient.
+   */
+  template<typename eT>
+  void Gradient(arma::Mat<eT>&& /* input */,
+                arma::Mat<eT>&& error,
+                arma::Mat<eT>&& /* gradient */);
+
+  /*
+   * Add a new module to the model.
+   *
+   * @param args The layer parameter.
+   */
+  template <class LayerType, class... Args>
+  void Add(Args... args) { network.push_back(new LayerType(args...)); }
+
+  /*
+   * Add a new module to the model.
+   *
+   * @param layer The Layer to be added to the model.
+   */
+  void Add(LayerTypes layer) { network.push_back(layer); }
+
+  //! Return the model modules.
+  std::vector<LayerTypes>& Model()
+  {
+    if (model)
+    {
+      return network;
+    }
+
+    return empty;
+  }
+
+  //! Return the initial point for the optimization.
+  const arma::mat& Parameters() const { return parameters; }
+  //! Modify the initial point for the optimization.
+  arma::mat& Parameters() { return parameters; }
+
+  arma::mat const& InputParameter() const { return inputParameter; }
+  //! Modify the input parameter.
+  arma::mat& InputParameter() { return inputParameter; }
+
+  //! Get the output parameter.
+  arma::mat const& OutputParameter() const { return outputParameter; }
+  //! Modify the output parameter.
+  arma::mat& OutputParameter() { return outputParameter; }
+
+  //! Get the delta.e
+  arma::mat const& Delta() const { return delta; }
+  //! Modify the delta.
+  arma::mat& Delta() { return delta; }
+
+  //! Get the gradient.
+  arma::mat const& Gradient() const { return gradient; }
+  //! Modify the gradient.
+  arma::mat& Gradient() { return gradient; }
+
+  /**
+   * Serialize the layer
+   */
+  template<typename Archive>
+  void Serialize(Archive& /* ar */, const unsigned int /* version */);
+
+ private:
+  //! Parameter which indicates if the modules should be exposed.
+  bool model;
+
+  //! If true merge the error in the backward pass.
+  bool same;
+
+  //! Locally-stored network modules.
+  std::vector<LayerTypes> network;
+
+  //! Locally-stored model parameters.
+  arma::mat parameters;
+
+  //! Locally-stored delta visitor.
+  DeltaVisitor deltaVisitor;
+
+  //! Locally-stored output parameter visitor.
+  OutputParameterVisitor outputParameterVisitor;
+
+  //! Locally-stored delete visitor.
+  DeleteVisitor deleteVisitor;
+
+  //! Locally-stored empty list of modules.
+  std::vector<LayerTypes> empty;
+
+  //! Locally-stored delta object.
+  arma::mat delta;
+
+  //! Locally-stored input parameter object.
+  arma::mat inputParameter;
+
+  //! Locally-stored output parameter object.
+  arma::mat outputParameter;
+
+  //! Locally-stored gradient object.
+  arma::mat gradient;
+}; // class Concat
+
+} // namespace ann
+} // namespace mlpack
+
+// Include implementation.
+#include "concat_impl.hpp"
+
+#endif
diff --git a/src/mlpack/methods/ann/layer/concat_impl.hpp b/src/mlpack/methods/ann/layer/concat_impl.hpp
new file mode 100644
index 00000000000..44efe499231
--- /dev/null
+++ b/src/mlpack/methods/ann/layer/concat_impl.hpp
@@ -0,0 +1,159 @@
+/**
+ * @file concat_impl.hpp
+ * @author Marcus Edel
+ *
+ * Implementation of the Concat class, which acts as a concatenation contain.
+ *
+ * mlpack is free software; you may redistribute it and/or modify it under the
+ * terms of the 3-clause BSD license.  You should have received a copy of the
+ * 3-clause BSD license along with mlpack.  If not, see
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
+ */
+#ifndef MLPACK_METHODS_ANN_LAYER_CONCAT_IMPL_HPP
+#define MLPACK_METHODS_ANN_LAYER_CONCAT_IMPL_HPP
+
+// In case it hasn't yet been included.
+#include "concat.hpp"
+
+namespace mlpack {
+namespace ann /** Artificial Neural Network. */ {
+
+template<typename InputDataType, typename OutputDataType>
+Concat<InputDataType, OutputDataType>::Concat(
+    const bool model, const bool same) : model(model), same(same)
+{
+  parameters.set_size(0, 0);
+}
+
+template<typename InputDataType, typename OutputDataType>
+template<typename eT>
+void Concat<InputDataType, OutputDataType>::Forward(
+    arma::Mat<eT>&& input, arma::Mat<eT>&& output)
+{
+  size_t outSize = 0;
+
+  for (size_t i = 0; i < network.size(); ++i)
+  {
+    boost::apply_visitor(ForwardVisitor(std::move(input), std::move(
+        boost::apply_visitor(outputParameterVisitor, network[i]))),
+        network[i]);
+
+    if (boost::apply_visitor(
+        outputParameterVisitor, network[i]).n_elem > outSize)
+    {
+      outSize = boost::apply_visitor(outputParameterVisitor,
+          network[i]).n_elem;
+    }
+  }
+
+  output = arma::zeros(outSize, network.size());
+  for (size_t i = 0; i < network.size(); ++i)
+  {
+    size_t elements = boost::apply_visitor(outputParameterVisitor,
+        network[i]).n_elem;
+
+    if (elements < outSize)
+    {
+      output.submat(0, i, elements - 1, i) = arma::vectorise(
+          boost::apply_visitor(outputParameterVisitor, network[i]));
+    }
+    else
+    {
+      output.col(i) = arma::vectorise(boost::apply_visitor(
+        outputParameterVisitor, network[i]));
+    }
+  }
+}
+
+template<typename InputDataType, typename OutputDataType>
+template<typename eT>
+void Concat<InputDataType, OutputDataType>::Backward(
+    const arma::Mat<eT>&& /* input */, arma::Mat<eT>&& gy, arma::Mat<eT>&& g)
+{
+  size_t outSize = 0;
+  size_t elements = 0;
+
+  for (size_t i = 0, j = 0; i < network.size(); ++i, j += elements)
+  {
+    elements = boost::apply_visitor(outputParameterVisitor,
+        network[i]).n_elem;
+
+    arma::mat delta;
+    if (gy.n_cols == 1)
+    {
+      delta = gy.submat(j, 0, j + elements - 1, 0);
+    }
+    else
+    {
+      delta = gy.submat(0, i, elements - 1, i);
+    }
+
+    boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor(
+        outputParameterVisitor, network[i])), std::move(delta), std::move(
+        boost::apply_visitor(deltaVisitor, network[i]))), network[i]);
+
+    if (boost::apply_visitor(deltaVisitor, network[i]).n_elem > outSize)
+    {
+      outSize = boost::apply_visitor(deltaVisitor, network[i]).n_elem;
+    }
+
+    if (same)
+    {
+      if (i == 0)
+      {
+        g = std::move(boost::apply_visitor(deltaVisitor, network[i]));
+      }
+      else
+      {
+        g += std::move(boost::apply_visitor(deltaVisitor, network[i]));
+      }
+    }
+  }
+
+  if (!same)
+  {
+    g = arma::zeros(outSize, network.size());
+    for (size_t i = 0; i < network.size(); ++i)
+    {
+      size_t elements = boost::apply_visitor(deltaVisitor, network[i]).n_elem;
+      if (elements < outSize)
+      {
+        g.submat(0, i, elements - 1, i) = arma::vectorise(
+            boost::apply_visitor(deltaVisitor, network[i]));
+      }
+      else
+      {
+        g.col(i) = arma::vectorise(
+            boost::apply_visitor(deltaVisitor, network[i]));
+      }
+    }
+  }
+}
+
+template<typename InputDataType, typename OutputDataType>
+template<typename eT>
+void Concat<InputDataType, OutputDataType>::Gradient(
+    arma::Mat<eT>&& /* input */,
+    arma::Mat<eT>&& error,
+    arma::Mat<eT>&& /* gradient */)
+{
+  for (size_t i = 0; i < network.size(); ++i)
+  {
+    boost::apply_visitor(GradientVisitor(std::move(boost::apply_visitor(
+        outputParameterVisitor, network[i])), std::move(error)), network[i]);
+  }
+}
+
+template<typename InputDataType, typename OutputDataType>
+template<typename Archive>
+void Concat<InputDataType, OutputDataType>::Serialize(
+    Archive& /* ar */, const unsigned int /* version */)
+{
+  // Nothing to do here.
+}
+
+} // namespace ann
+} // namespace mlpack
+
+
+#endif
diff --git a/src/mlpack/methods/ann/layer/concat_performance.hpp b/src/mlpack/methods/ann/layer/concat_performance.hpp
new file mode 100644
index 00000000000..39b7e10f126
--- /dev/null
+++ b/src/mlpack/methods/ann/layer/concat_performance.hpp
@@ -0,0 +1,120 @@
+/**
+ * @file concat_performance.hpp
+ * @author Marcus Edel
+ *
+ * Definition of the ConcatPerformance class.
+ *
+ * mlpack is free software; you may redistribute it and/or modify it under the
+ * terms of the 3-clause BSD license.  You should have received a copy of the
+ * 3-clause BSD license along with mlpack.  If not, see
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
+ */
+#ifndef MLPACK_METHODS_ANN_LAYER_CONCAT_PERFORMANCE_HPP
+#define MLPACK_METHODS_ANN_LAYER_CONCAT_PERFORMANCE_HPP
+
+#include <mlpack/core.hpp>
+
+#include <boost/ptr_container/ptr_vector.hpp>
+
+#include "layer_types.hpp"
+#include "layer_visitor.hpp"
+
+namespace mlpack {
+namespace ann /** Artificial Neural Network. */ {
+
+/**
+ * Implementation of the concat performance class. The class works as a
+ * feed-forward fully connected network container which plugs performance layers
+ * together.
+ *
+ * @tparam InputDataType Type of the input data (arma::colvec, arma::mat,
+ *         arma::sp_mat or arma::cube).
+ * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat,
+ *         arma::sp_mat or arma::cube).
+ */
+template <
+    typename OutputLayerType = NegativeLogLikelihood<>,
+    typename InputDataType = arma::mat,
+    typename OutputDataType = arma::mat
+>
+class ConcatPerformance
+{
+ public:
+  /**
+   * Create the ConcatPerformance object.
+   *
+   * @param inSize The number of inputs.
+   * @param outputLayer Output layer used to evaluate the network.
+   */
+  ConcatPerformance(const size_t inSize,
+                    OutputLayerType&& outputLayer = OutputLayerType());
+
+  /*
+   * Computes the Negative log likelihood.
+   *
+   * @param input Input data used for evaluating the specified function.
+   * @param output Resulting output activation.
+   */
+  template<typename eT>
+  double Forward(const arma::Mat<eT>&& input, arma::Mat<eT>&& target);
+  /**
+   * Ordinary feed backward pass of a neural network. The negative log
+   * likelihood layer expectes that the input contains log-probabilities for
+   * each class. The layer also expects a class index, in the range between 1
+   * and the number of classes, as target when calling the Forward function.
+   *
+   * @param input The propagated input activation.
+   * @param target The target vector, that contains the class index in the range
+   *        between 1 and the number of classes.
+   * @param output The calculated error.
+   */
+  template<typename eT>
+  void Backward(const arma::Mat<eT>&& input,
+                const arma::Mat<eT>&& target,
+                arma::Mat<eT>&& output);
+
+  //! Get the input parameter.
+  InputDataType& InputParameter() const { return inputParameter; }
+  //! Modify the input parameter.
+  InputDataType& InputParameter() { return inputParameter; }
+
+  //! Get the output parameter.
+  OutputDataType& OutputParameter() const { return outputParameter; }
+  //! Modify the output parameter.
+  OutputDataType& OutputParameter() { return outputParameter; }
+
+  //! Get the delta.
+  OutputDataType& Delta() const { return delta; }
+  //! Modify the delta.
+  OutputDataType& Delta() { return delta; }
+
+  /**
+   * Serialize the layer
+   */
+  template<typename Archive>
+  void Serialize(Archive& /* ar */, const unsigned int /* version */);
+
+ private:
+  //! Locally-stored number of inputs.
+  size_t inSize;
+
+  //! Instantiated outputlayer used to evaluate the network.
+  OutputLayerType outputLayer;
+
+  //! Locally-stored delta object.
+  OutputDataType delta;
+
+  //! Locally-stored input parameter object.
+  InputDataType inputParameter;
+
+  //! Locally-stored output parameter object.
+  OutputDataType outputParameter;
+}; // class ConcatPerformance
+
+} // namespace ann
+} // namespace mlpack
+
+// Include implementation.
+#include "concat_performance_impl.hpp"
+
+#endif
diff --git a/src/mlpack/methods/ann/layer/concat_performance_impl.hpp b/src/mlpack/methods/ann/layer/concat_performance_impl.hpp
new file mode 100644
index 00000000000..f014ac33f01
--- /dev/null
+++ b/src/mlpack/methods/ann/layer/concat_performance_impl.hpp
@@ -0,0 +1,118 @@
+/**
+ * @file concat_performance_impl.hpp
+ * @author Marcus Edel
+ *
+ * Implementation of the ConcatPerformance class.
+ *
+ * mlpack is free software; you may redistribute it and/or modify it under the
+ * terms of the 3-clause BSD license.  You should have received a copy of the
+ * 3-clause BSD license along with mlpack.  If not, see
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
+ */
+#ifndef MLPACK_METHODS_ANN_LAYER_CONCAT_PERFORMANCE_IMPL_HPP
+#define MLPACK_METHODS_ANN_LAYER_CONCAT_PERFORMANCE_IMPL_HPP
+
+// In case it hasn't yet been included.
+#include "concat_performance.hpp"
+
+namespace mlpack {
+namespace ann /** Artificial Neural Network. */ {
+
+template<
+    typename OutputLayerType,
+    typename InputDataType,
+    typename OutputDataType
+>
+ConcatPerformance<
+    OutputLayerType,
+    InputDataType,
+    OutputDataType
+>::ConcatPerformance(const size_t inSize, OutputLayerType&& outputLayer) :
+    inSize(inSize),
+    outputLayer(std::move(outputLayer))
+{
+  // Nothing to do here.
+}
+
+template<
+    typename OutputLayerType,
+    typename InputDataType,
+    typename OutputDataType
+>
+template<typename eT>
+double ConcatPerformance<
+    OutputLayerType,
+    InputDataType,
+    OutputDataType
+>::Forward(const arma::Mat<eT>&& input, arma::Mat<eT>&& target)
+{
+  const size_t elements = input.n_elem / inSize;
+
+  double output = 0;
+  for (size_t i = 0; i < input.n_elem; i+= elements)
+  {
+    arma::mat subInput = input.submat(i, 0, i + elements - 1, 0);
+    output += outputLayer.Forward(std::move(subInput), std::move(target));
+  }
+
+  return output;
+}
+
+template<
+    typename OutputLayerType,
+    typename InputDataType,
+    typename OutputDataType
+>
+template<typename eT>
+void ConcatPerformance<
+    OutputLayerType,
+    InputDataType,
+    OutputDataType
+>::Backward(
+    const arma::Mat<eT>&& input,
+    const arma::Mat<eT>&& target,
+    arma::Mat<eT>&& output)
+{
+  const size_t elements = input.n_elem / inSize;
+
+  arma::mat subInput = input.submat(0, 0, elements - 1, 0);
+  arma::mat subOutput;
+
+  outputLayer.Backward(std::move(subInput), std::move(target),
+      std::move(subOutput));
+
+  output = arma::zeros(subOutput.n_elem, inSize);
+  output.col(0) = subOutput;
+
+  for (size_t i = elements, j = 0; i < input.n_elem; i+= elements, j++)
+  {
+    subInput = input.submat(i, 0, i + elements - 1, 0);
+    outputLayer.Backward(std::move(subInput), std::move(target),
+      std::move(subOutput));
+
+    output.col(j) = subOutput;
+  }
+}
+
+template<
+    typename OutputLayerType,
+    typename InputDataType,
+    typename OutputDataType
+>
+template<typename Archive>
+void ConcatPerformance<
+    OutputLayerType,
+    InputDataType,
+    OutputDataType
+>::Serialize(Archive& /* ar */, const unsigned int /* version */)
+{
+  // Nothing to do here.
+}
+
+} // namespace ann
+} // namespace mlpack
+
+// Include implementation.
+#include "concat_performance_impl.hpp"
+
+#endif
diff --git a/src/mlpack/methods/ann/layer/constant_layer.hpp b/src/mlpack/methods/ann/layer/constant.hpp
similarity index 71%
rename from src/mlpack/methods/ann/layer/constant_layer.hpp
rename to src/mlpack/methods/ann/layer/constant.hpp
index 31da87e7d9d..b24b44aa802 100644
--- a/src/mlpack/methods/ann/layer/constant_layer.hpp
+++ b/src/mlpack/methods/ann/layer/constant.hpp
@@ -1,8 +1,8 @@
 /**
- * @file constant_layer.hpp
+ * @file constant.hpp
  * @author Marcus Edel
  *
- * Definition of the ConstantLayer class, which outputs a constant value given
+ * Definition of the Constant class, which outputs a constant value given
  * any input.
  *
  * mlpack is free software; you may redistribute it and/or modify it under the
@@ -10,8 +10,8 @@
  * 3-clause BSD license along with mlpack.  If not, see
  * http://www.opensource.org/licenses/BSD-3-Clause for more information.
  */
-#ifndef MLPACK_METHODS_ANN_LAYER_CONSTANT_LAYER_HPP
-#define MLPACK_METHODS_ANN_LAYER_CONSTANT_LAYER_HPP
+#ifndef MLPACK_METHODS_ANN_LAYER_CONSTANT_HPP
+#define MLPACK_METHODS_ANN_LAYER_CONSTANT_HPP
 
 #include <mlpack/core.hpp>
 
@@ -31,21 +31,17 @@ template <
     typename InputDataType = arma::mat,
     typename OutputDataType = arma::mat
 >
-class ConstantLayer
+class Constant
 {
  public:
   /**
-   * Create the ConstantLayer object that outputs a given constant scalar value
+   * Create the Constant object that outputs a given constant scalar value
    * given any input value.
    *
    * @param outSize The number of output units.
    * @param scalar The constant value used to create the constant output.
    */
-  ConstantLayer(const size_t outSize, const double scalar)
-  {
-    constantOutput = OutputDataType(outSize, 1);
-    constantOutput.fill(scalar);
-  }
+  Constant(const size_t outSize, const double scalar);
 
   /**
    * Ordinary feed forward pass of a neural network. The forward pass fills the
@@ -54,11 +50,8 @@ class ConstantLayer
    * @param input Input data used for evaluating the specified function.
    * @param output Resulting output activation.
    */
-  template<typename eT>
-  void Forward(const arma::Mat<eT>& /* input */, arma::Mat<eT>& output)
-  {
-    output = constantOutput;
-  }
+  template<typename InputType, typename OutputType>
+  void Forward(const InputType&& input, OutputType&& output);
 
   /**
    * Ordinary feed backward pass of a neural network. The backward pass of the
@@ -68,14 +61,10 @@ class ConstantLayer
    * @param gy The backpropagated error.
    * @param g The calculated gradient.
    */
-  template<typename eT>
-  void Backward(const arma::Mat<eT>& /* input */,
-                const arma::Mat<eT>& /* gy */,
-                arma::Mat<eT>& g)
-  {
-    g = arma::zeros<arma::Mat<eT> >(inputParameter.n_rows,
-        inputParameter.n_cols);
-  }
+  template<typename DataType>
+  void Backward(const DataType&& /* input */,
+                DataType&& /* gy */,
+                DataType&& g);
 
   //! Get the input parameter.
   InputDataType& InputParameter() const { return inputParameter; }
@@ -96,12 +85,15 @@ class ConstantLayer
    * Serialize the layer.
    */
   template<typename Archive>
-  void Serialize(Archive& ar, const unsigned int /* version */)
-  {
-    ar & data::CreateNVP(constantOutput, "constantOutput");
-  }
+  void Serialize(Archive& ar, const unsigned int /* version */);
 
  private:
+  //! Locally-stored number of input units.
+  size_t inSize;
+
+  //! Locally-stored number of output units.
+  size_t outSize;
+
   //! Locally-stored constant output matrix.
   OutputDataType constantOutput;
 
@@ -115,7 +107,10 @@ class ConstantLayer
   OutputDataType outputParameter;
 }; // class ConstantLayer
 
-}; // namespace ann
-}; // namespace mlpack
+} // namespace ann
+} // namespace mlpack
+
+// Include implementation.
+#include "constant_impl.hpp"
 
-#endif
+#endif
\ No newline at end of file
diff --git a/src/mlpack/methods/ann/layer/constant_impl.hpp b/src/mlpack/methods/ann/layer/constant_impl.hpp
new file mode 100644
index 00000000000..09e0624c6e0
--- /dev/null
+++ b/src/mlpack/methods/ann/layer/constant_impl.hpp
@@ -0,0 +1,65 @@
+/**
+ * @file constant_impl.hpp
+ * @author Marcus Edel
+ *
+ * Implementation of the Constant class, which outputs a constant value given
+ * any input.
+ *
+ * mlpack is free software; you may redistribute it and/or modify it under the
+ * terms of the 3-clause BSD license.  You should have received a copy of the
+ * 3-clause BSD license along with mlpack.  If not, see
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
+ */
+#ifndef MLPACK_METHODS_ANN_LAYER_CONSTANT_IMPL_HPP
+#define MLPACK_METHODS_ANN_LAYER_CONSTANT_IMPL_HPP
+
+// In case it hasn't yet been included.
+#include "constant.hpp"
+
+namespace mlpack {
+namespace ann /** Artificial Neural Network. */ {
+
+template<typename InputDataType, typename OutputDataType>
+Constant<InputDataType, OutputDataType>::Constant(
+    const size_t outSize,
+    const double scalar) :
+    inSize(0),
+    outSize(outSize)
+{
+  constantOutput = OutputDataType(outSize, 1);
+  constantOutput.fill(scalar);
+}
+
+template<typename InputDataType, typename OutputDataType>
+template<typename InputType, typename OutputType>
+void Constant<InputDataType, OutputDataType>::Forward(
+    const InputType&& input, OutputType&& output)
+{
+  if (inSize == 0)
+  {
+    inSize = input.n_elem;
+  }
+
+  output = constantOutput;
+}
+
+template<typename InputDataType, typename OutputDataType>
+template<typename DataType>
+void Constant<InputDataType, OutputDataType>::Backward(
+    const DataType&& /* input */, DataType&& /* gy */, DataType&& g)
+{
+  g = arma::zeros<DataType>(inSize, 1);
+}
+
+template<typename InputDataType, typename OutputDataType>
+template<typename Archive>
+void Constant<InputDataType, OutputDataType>::Serialize(
+    Archive& ar, const unsigned int /* version */)
+{
+  ar & data::CreateNVP(constantOutput, "constantOutput");
+}
+
+} // namespace ann
+} // namespace mlpack
+
+#endif
diff --git a/src/mlpack/methods/ann/layer/conv_layer.hpp b/src/mlpack/methods/ann/layer/conv_layer.hpp
deleted file mode 100644
index bbb918c8a49..00000000000
--- a/src/mlpack/methods/ann/layer/conv_layer.hpp
+++ /dev/null
@@ -1,324 +0,0 @@
-/**
- * @file conv_layer.hpp
- * @author Marcus Edel
- *
- * Definition of the ConvLayer class.
- *
- * mlpack is free software; you may redistribute it and/or modify it under the
- * terms of the 3-clause BSD license.  You should have received a copy of the
- * 3-clause BSD license along with mlpack.  If not, see
- * http://www.opensource.org/licenses/BSD-3-Clause for more information.
- */
-#ifndef MLPACK_METHODS_ANN_LAYER_CONV_LAYER_HPP
-#define MLPACK_METHODS_ANN_LAYER_CONV_LAYER_HPP
-
-#include <mlpack/core.hpp>
-#include <mlpack/methods/ann/layer/layer_traits.hpp>
-#include <mlpack/methods/ann/convolution_rules/border_modes.hpp>
-#include <mlpack/methods/ann/convolution_rules/naive_convolution.hpp>
-
-namespace mlpack {
-namespace ann /** Artificial Neural Network. */ {
-
-/**
- * Implementation of the ConvLayer class. The ConvLayer class represents a
- * single layer of a neural network.
- *
- * @tparam ForwardConvolutionRule Convolution to perform forward process.
- * @tparam BackwardConvolutionRule Convolution to perform backward process.
- * @tparam GradientConvolutionRule Convolution to calculate gradient.
- * @tparam InputDataType Type of the input data (arma::colvec, arma::mat,
- *         arma::sp_mat or arma::cube).
- * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat,
- *         arma::sp_mat or arma::cube).
- */
-template <
-    typename ForwardConvolutionRule = NaiveConvolution<ValidConvolution>,
-    typename BackwardConvolutionRule = NaiveConvolution<FullConvolution>,
-    typename GradientConvolutionRule = NaiveConvolution<ValidConvolution>,
-    typename InputDataType = arma::cube,
-    typename OutputDataType = arma::cube
->
-class ConvLayer
-{
- public:
-  /**
-   * Create the ConvLayer object using the specified number of input maps,
-   * output maps, filter size, stride and padding parameter.
-   *
-   * @param inMaps The number of input maps.
-   * @param outMaps The number of output maps.
-   * @param wfilter Width of the filter/kernel.
-   * @param wfilter Height of the filter/kernel.
-   * @param xStride Stride of filter application in the x direction.
-   * @param yStride Stride of filter application in the y direction.
-   * @param wPad Spatial padding width of the input.
-   * @param hPad Spatial padding height of the input.
-   */
-  ConvLayer(const size_t inMaps,
-            const size_t outMaps,
-            const size_t wfilter,
-            const size_t hfilter,
-            const size_t xStride = 1,
-            const size_t yStride = 1,
-            const size_t wPad = 0,
-            const size_t hPad = 0) :
-      wfilter(wfilter),
-      hfilter(hfilter),
-      inMaps(inMaps),
-      outMaps(outMaps),
-      xStride(xStride),
-      yStride(yStride),
-      wPad(wPad),
-      hPad(hPad)
-  {
-    weights.set_size(wfilter, hfilter, inMaps * outMaps);
-  }
-
-  /**
-   * Ordinary feed forward pass of a neural network, evaluating the function
-   * f(x) by propagating the activity forward through f.
-   *
-   * @param input Input data used for evaluating the specified function.
-   * @param output Resulting output activation.
-   */
-  template<typename eT>
-  void Forward(const arma::Cube<eT>& input, arma::Cube<eT>& output)
-  {
-    const size_t wConv = ConvOutSize(input.n_rows, wfilter, xStride, wPad);
-    const size_t hConv = ConvOutSize(input.n_cols, hfilter, yStride, hPad);
-
-    output = arma::zeros<arma::Cube<eT> >(wConv, hConv, outMaps);
-    for (size_t outMap = 0, outMapIdx = 0; outMap < outMaps; outMap++)
-    {
-      for (size_t inMap = 0; inMap < inMaps; inMap++, outMapIdx++)
-      {
-        arma::Mat<eT> convOutput;
-        ForwardConvolutionRule::Convolution(input.slice(inMap),
-            weights.slice(outMap), convOutput);
-
-        output.slice(outMap) += convOutput;
-      }
-    }
-  }
-
-  /**
-   * Ordinary feed backward pass of a neural network, calculating the function
-   * f(x) by propagating x backwards through f. Using the results from the feed
-   * forward pass.
-   *
-   * @param input The propagated input activation.
-   * @param gy The backpropagated error.
-   * @param g The calculated gradient.
-   */
-  template<typename eT>
-  void Backward(const arma::Cube<eT>& /* unused */,
-                const arma::Cube<eT>& gy,
-                arma::Cube<eT>& g)
-  {
-    g = arma::zeros<arma::Cube<eT> >(inputParameter.n_rows,
-                                     inputParameter.n_cols,
-                                     inputParameter.n_slices);
-
-    for (size_t outMap = 0, outMapIdx = 0; outMap < inMaps; outMap++)
-    {
-      for (size_t inMap = 0; inMap < outMaps; inMap++, outMapIdx++)
-      {
-        arma::Mat<eT> rotatedFilter;
-        Rotate180(weights.slice(outMap * outMaps + inMap), rotatedFilter);
-
-        arma::Mat<eT> output;
-        BackwardConvolutionRule::Convolution(gy.slice(inMap), rotatedFilter,
-            output);
-
-        g.slice(outMap) += output;
-      }
-    }
-  }
-
-  /*
-   * Calculate the gradient using the output delta and the input activation.
-   *
-   * @param input The input parameter used for calculating the gradient.
-   * @param d The calculated error.
-   * @param g The calculated gradient.
-   */
-  template<typename InputType, typename eT>
-  void Gradient(const InputType& input,
-                const arma::Cube<eT>& d,
-                arma::Cube<eT>& g)
-  {
-    g = arma::zeros<arma::Cube<eT> >(weights.n_rows, weights.n_cols,
-        weights.n_slices);
-
-    for (size_t outMap = 0; outMap < outMaps; outMap++)
-    {
-      for (size_t inMap = 0, s = outMap; inMap < inMaps; inMap++, s += outMaps)
-      {
-        arma::Cube<eT> inputSlices = input.slices(inMap, inMap);
-        arma::Cube<eT> deltaSlices = d.slices(outMap, outMap);
-
-        arma::Cube<eT> output;
-        GradientConvolutionRule::Convolution(inputSlices, deltaSlices, output);
-
-        for (size_t i = 0; i < output.n_slices; i++)
-          g.slice(s) += output.slice(i);
-      }
-    }
-  }
-
-  //! Get the weights.
-  OutputDataType const& Weights() const { return weights; }
-  //! Modify the weights.
-  OutputDataType& Weights() { return weights; }
-
-  //! Get the input parameter.
-  InputDataType const& InputParameter() const { return inputParameter; }
-  //! Modify the input parameter.
-  InputDataType& InputParameter() { return inputParameter; }
-
-  //! Get the output parameter.
-  OutputDataType const& OutputParameter() const { return outputParameter; }
-  //! Modify the output parameter.
-  OutputDataType& OutputParameter() { return outputParameter; }
-
-  //! Get the delta.
-  OutputDataType const& Delta() const { return delta; }
-  //! Modify the delta.
-  OutputDataType& Delta() { return delta; }
-
-  //! Get the gradient.
-  OutputDataType const& Gradient() const { return gradient; }
-  //! Modify the gradient.
-  OutputDataType& Gradient() { return gradient; }
-
-  /**
-   * Serialize the layer.
-   */
-  template<typename Archive>
-  void Serialize(Archive& ar, const unsigned int /* version */)
-  {
-    ar & data::CreateNVP(weights, "weights");
-    ar & data::CreateNVP(wfilter, "wfilter");
-    ar & data::CreateNVP(hfilter, "hfilter");
-    ar & data::CreateNVP(inMaps, "inMaps");
-    ar & data::CreateNVP(outMaps, "outMaps");
-    ar & data::CreateNVP(xStride, "xStride");
-    ar & data::CreateNVP(yStride, "yStride");
-    ar & data::CreateNVP(wPad, "wPad");
-    ar & data::CreateNVP(hPad, "hPad");
-  }
-
- private:
-  /*
-   * Rotates a 3rd-order tesor counterclockwise by 180 degrees.
-   *
-   * @param input The input data to be rotated.
-   * @param output The rotated output.
-   */
-  template<typename eT>
-  void Rotate180(const arma::Cube<eT>& input, arma::Cube<eT>& output)
-  {
-    output = arma::Cube<eT>(input.n_rows, input.n_cols, input.n_slices);
-
-    // * left-right flip, up-down flip */
-    for (size_t s = 0; s < output.n_slices; s++)
-      output.slice(s) = arma::fliplr(arma::flipud(input.slice(s)));
-  }
-
-  /*
-   * Rotates a dense matrix counterclockwise by 180 degrees.
-   *
-   * @param input The input data to be rotated.
-   * @param output The rotated output.
-   */
-  template<typename eT>
-  void Rotate180(const arma::Mat<eT>& input, arma::Mat<eT>& output)
-  {
-    // * left-right flip, up-down flip */
-    output = arma::fliplr(arma::flipud(input));
-  }
-
-  /*
-   * Return the convolution output size.
-   *
-   * @param size The size of the input (row or column).
-   * @param k The size of the filter (width or height).
-   * @param s The stride size (x or y direction).
-   * @param p The size of the padding (width or height).
-   * @return The convolution output size.
-   */
-  size_t ConvOutSize(const size_t size,
-                     const size_t k,
-                     const size_t s,
-                     const size_t p)
-  {
-    return std::floor(size + p * 2 - k) / s + 1;
-  }
-
-  //! Locally-stored filter/kernel width.
-  size_t wfilter;
-
-  //! Locally-stored filter/kernel height.
-  size_t hfilter;
-
-  //! Locally-stored number of input maps.
-  size_t inMaps;
-
-  //! Locally-stored number of output maps.
-  size_t outMaps;
-
-  //! Locally-stored stride of the filter in x-direction.
-  size_t xStride;
-
-  //! Locally-stored stride of the filter in y-direction.
-  size_t yStride;
-
-  //! Locally-stored padding width.
-  size_t wPad;
-
-  //! Locally-stored padding height.
-  size_t hPad;
-
-  //! Locally-stored weight object.
-  OutputDataType weights;
-
-  //! Locally-stored delta object.
-  OutputDataType delta;
-
-  //! Locally-stored gradient object.
-  OutputDataType gradient;
-
-  //! Locally-stored input parameter object.
-  InputDataType inputParameter;
-
-  //! Locally-stored output parameter object.
-  OutputDataType outputParameter;
-}; // class ConvLayer
-
-//! Layer traits for the convolution layer.
-template<
-    typename ForwardConvolutionRule,
-    typename BackwardConvolutionRule,
-    typename GradientConvolutionRule,
-    typename InputDataType,
-    typename OutputDataType
->
-class LayerTraits<ConvLayer<ForwardConvolutionRule,
-                            BackwardConvolutionRule,
-                            GradientConvolutionRule,
-                            InputDataType,
-                            OutputDataType> >
-{
- public:
-  static const bool IsBinary = false;
-  static const bool IsOutputLayer = false;
-  static const bool IsBiasLayer = false;
-  static const bool IsLSTMLayer = false;
-  static const bool IsConnection = true;
-};
-
-} // namespace ann
-} // namespace mlpack
-
-#endif
diff --git a/src/mlpack/methods/ann/layer/convolution.hpp b/src/mlpack/methods/ann/layer/convolution.hpp
new file mode 100644
index 00000000000..a7f647ed226
--- /dev/null
+++ b/src/mlpack/methods/ann/layer/convolution.hpp
@@ -0,0 +1,344 @@
+/**
+ * @file convolution.hpp
+ * @author Marcus Edel
+ *
+ * Definition of the Convolution module class.
+ *
+ * mlpack is free software; you may redistribute it and/or modify it under the
+ * terms of the 3-clause BSD license.  You should have received a copy of the
+ * 3-clause BSD license along with mlpack.  If not, see
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
+ */
+#ifndef MLPACK_METHODS_ANN_LAYER_CONVOLUTION_HPP
+#define MLPACK_METHODS_ANN_LAYER_CONVOLUTION_HPP
+
+#include <mlpack/core.hpp>
+
+#include <mlpack/methods/ann/convolution_rules/border_modes.hpp>
+#include <mlpack/methods/ann/convolution_rules/naive_convolution.hpp>
+#include <mlpack/methods/ann/convolution_rules/fft_convolution.hpp>
+#include <mlpack/methods/ann/convolution_rules/svd_convolution.hpp>
+
+#include "layer_types.hpp"
+
+namespace mlpack {
+namespace ann /** Artificial Neural Network. */ {
+
+/**
+ * Implementation of the Convolution class. The Convolution class represents a
+ * single layer of a neural network.
+ *
+ * @tparam ForwardConvolutionRule Convolution to perform forward process.
+ * @tparam BackwardConvolutionRule Convolution to perform backward process.
+ * @tparam GradientConvolutionRule Convolution to calculate gradient.
+ * @tparam InputDataType Type of the input data (arma::colvec, arma::mat,
+ *         arma::sp_mat or arma::cube).
+ * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat,
+ *         arma::sp_mat or arma::cube).
+ */
+template <
+    typename ForwardConvolutionRule = NaiveConvolution<ValidConvolution>,
+    typename BackwardConvolutionRule = NaiveConvolution<FullConvolution>,
+    typename GradientConvolutionRule = NaiveConvolution<ValidConvolution>,
+    typename InputDataType = arma::mat,
+    typename OutputDataType = arma::mat
+>
+class Convolution
+{
+public:
+  //! Create the Convolution object.
+  Convolution();
+
+  /**
+   * Create the Convolution object using the specified number of input maps,
+   * output maps, filter size, stride and padding parameter.
+   *
+   * @param inSize The number of input maps.
+   * @param outSize The number of output maps.
+   * @param kW Width of the filter/kernel.
+   * @param kH Height of the filter/kernel.
+   * @param dW Stride of filter application in the x direction.
+   * @param dH Stride of filter application in the y direction.
+   * @param padW Padding width of the input.
+   * @param padH Padding height of the input.
+   * @param inputWidth The widht of the input data.
+   * @param inputHeight The height of the input data.
+   */
+  Convolution(const size_t inSize,
+              const size_t outSize,
+              const size_t kW,
+              const size_t kH,
+              const size_t dW = 1,
+              const size_t dH = 1,
+              const size_t padW = 0,
+              const size_t padH = 0,
+              const size_t inputWidth = 0,
+              const size_t inputHeight = 0);
+
+  /*
+   * Set the weight and bias term.
+   */
+  void Reset();
+
+  /**
+   * Ordinary feed forward pass of a neural network, evaluating the function
+   * f(x) by propagating the activity forward through f.
+   *
+   * @param input Input data used for evaluating the specified function.
+   * @param output Resulting output activation.
+   */
+  template<typename eT>
+  void Forward(const arma::Mat<eT>&& input, arma::Mat<eT>&& output);
+
+  /**
+   * Ordinary feed backward pass of a neural network, calculating the function
+   * f(x) by propagating x backwards through f. Using the results from the feed
+   * forward pass.
+   *
+   * @param input The propagated input activation.
+   * @param gy The backpropagated error.
+   * @param g The calculated gradient.
+   */
+  template<typename eT>
+  void Backward(const arma::Mat<eT>&& /* input */,
+                arma::Mat<eT>&& gy,
+                arma::Mat<eT>&& g);
+
+  /*
+   * Calculate the gradient using the output delta and the input activation.
+   *
+   * @param input The input parameter used for calculating the gradient.
+   * @param error The calculated error.
+   * @param gradient The calculated gradient.
+   */
+  template<typename eT>
+  void Gradient(const arma::Mat<eT>&& /* input */,
+                arma::Mat<eT>&& error,
+                arma::Mat<eT>&& gradient);
+
+  //! Get the parameters.
+  OutputDataType const& Parameters() const { return weights; }
+  //! Modify the parameters.
+  OutputDataType& Parameters() { return weights; }
+
+  //! Get the input parameter.
+  InputDataType const& InputParameter() const { return inputParameter; }
+  //! Modify the input parameter.
+  InputDataType& InputParameter() { return inputParameter; }
+
+  //! Get the output parameter.
+  OutputDataType const& OutputParameter() const { return outputParameter; }
+  //! Modify the output parameter.
+  OutputDataType& OutputParameter() { return outputParameter; }
+
+  //! Get the delta.
+  OutputDataType const& Delta() const { return delta; }
+  //! Modify the delta.
+  OutputDataType& Delta() { return delta; }
+
+  //! Get the gradient.
+  OutputDataType const& Gradient() const { return gradient; }
+  //! Modify the gradient.
+  OutputDataType& Gradient() { return gradient; }
+
+  //! Get the input width.
+  size_t const& InputWidth() const { return inputWidth; }
+  //! Modify input the width.
+  size_t& InputWidth() { return inputWidth; }
+
+  //! Get the input height.
+  size_t const& InputHeight() const { return inputHeight; }
+  //! Modify the input height.
+  size_t& InputHeight() { return inputHeight; }
+
+  //! Get the output width.
+  size_t const& OutputWidth() const { return outputWidth; }
+  //! Modify the output width.
+  size_t& OutputWidth() { return outputWidth; }
+
+  //! Get the output height.
+  size_t const& OutputHeight() const { return outputHeight; }
+  //! Modify the output height.
+  size_t& OutputHeight() { return outputHeight; }
+
+  /**
+   * Serialize the layer
+   */
+  template<typename Archive>
+  void Serialize(Archive& ar, const unsigned int /* version */);
+
+ private:
+
+  /*
+   * Return the convolution output size.
+   *
+   * @param size The size of the input (row or column).
+   * @param k The size of the filter (width or height).
+   * @param s The stride size (x or y direction).
+   * @param p The size of the padding (width or height).
+   * @return The convolution output size.
+   */
+  size_t ConvOutSize(const size_t size,
+                     const size_t k,
+                     const size_t s,
+                     const size_t p)
+  {
+    return std::floor(size + p * 2 - k) / s + 1;
+  }
+
+  /*
+   * Rotates a 3rd-order tensor counterclockwise by 180 degrees.
+   *
+   * @param input The input data to be rotated.
+   * @param output The rotated output.
+   */
+  template<typename eT>
+  void Rotate180(const arma::Cube<eT>& input, arma::Cube<eT>& output)
+  {
+    output = arma::Cube<eT>(input.n_rows, input.n_cols, input.n_slices);
+
+    // * left-right flip, up-down flip */
+    for (size_t s = 0; s < output.n_slices; s++)
+      output.slice(s) = arma::fliplr(arma::flipud(input.slice(s)));
+  }
+
+  /*
+   * Rotates a dense matrix counterclockwise by 180 degrees.
+   *
+   * @param input The input data to be rotated.
+   * @param output The rotated output.
+   */
+  template<typename eT>
+  void Rotate180(const arma::Mat<eT>& input, arma::Mat<eT>& output)
+  {
+    // * left-right flip, up-down flip */
+    output = arma::fliplr(arma::flipud(input));
+  }
+
+  /*
+   * Pad the given input data.
+   *
+   * @param input The input to be padded.
+   * @param wPad Padding width of the input.
+   * @param hPad Padding height of the input.
+   * @param output The padded output data.
+   */
+  template<typename eT>
+  void Pad(const arma::Mat<eT>& input,
+           size_t wPad,
+           size_t hPad,
+           arma::Mat<eT>& output)
+  {
+    if (output.n_rows != input.n_rows + wPad * 2 ||
+        output.n_cols != input.n_cols + hPad * 2)
+    {
+      output = arma::zeros(input.n_rows + wPad * 2, input.n_cols + hPad * 2);
+    }
+
+    output.submat(wPad, hPad, wPad + input.n_rows - 1,
+        hPad + input.n_cols - 1) = input;
+  }
+
+  /*
+   * Pad the given input data.
+   *
+   * @param input The input to be padded.
+   * @param wPad Padding width of the input.
+   * @param hPad Padding height of the input.
+   * @param output The padded output data.
+   */
+  template<typename eT>
+  void Pad(const arma::Cube<eT>& input,
+           size_t wPad,
+           size_t hPad,
+           arma::Cube<eT>& output)
+  {
+    output = arma::zeros(input.n_rows + wPad * 2,
+        input.n_cols + hPad * 2, input.n_slices);
+
+    for (size_t i = 0; i < input.n_slices; ++i)
+    {
+      Pad<double>(input.slice(i), wPad, hPad, output.slice(i));
+    }
+  }
+
+  //! Locally-stored number of input units.
+  size_t inSize;
+
+  //! Locally-stored number of output units.
+  size_t outSize;
+
+  //! Locally-stored filter/kernel width.
+  size_t kW;
+
+  //! Locally-stored filter/kernel height.
+  size_t kH;
+
+  //! Locally-stored stride of the filter in x-direction.
+  size_t dW;
+
+  //! Locally-stored stride of the filter in y-direction.
+  size_t dH;
+
+  //! Locally-stored padding width.
+  size_t padW;
+
+  //! Locally-stored padding height.
+  size_t padH;
+
+  //! Locally-stored weight object.
+  OutputDataType weights;
+
+  //! Locally-stored weight object.
+  arma::cube weight;
+
+  //! Locally-stored bias term object.
+  arma::mat bias;
+
+  //! Locally-stored input width.
+  size_t inputWidth;
+
+  //! Locally-stored input height.
+  size_t inputHeight;
+
+  //! Locally-stored output width.
+  size_t outputWidth;
+
+  //! Locally-stored output height.
+  size_t outputHeight;
+
+  //! Locally-stored transformed output parameter.
+  arma::cube outputTemp;
+
+  //! Locally-stored transformed input parameter.
+  arma::cube inputTemp;
+
+  //! Locally-stored transformed padded input parameter.
+  arma::cube inputPaddedTemp;
+
+  //! Locally-stored transformed error parameter.
+  arma::cube gTemp;
+
+  //! Locally-stored transformed gradient parameter.
+  arma::cube gradientTemp;
+
+  //! Locally-stored delta object.
+  OutputDataType delta;
+
+  //! Locally-stored gradient object.
+  OutputDataType gradient;
+
+  //! Locally-stored input parameter object.
+  InputDataType inputParameter;
+
+  //! Locally-stored output parameter object.
+  OutputDataType outputParameter;
+}; // class Convolution
+
+} // namespace ann
+} // namespace mlpack
+
+// Include implementation.
+#include "convolution_impl.hpp"
+
+#endif
\ No newline at end of file
diff --git a/src/mlpack/methods/ann/layer/convolution_impl.hpp b/src/mlpack/methods/ann/layer/convolution_impl.hpp
new file mode 100644
index 00000000000..422c9947095
--- /dev/null
+++ b/src/mlpack/methods/ann/layer/convolution_impl.hpp
@@ -0,0 +1,333 @@
+/**
+ * @file convolution_impl.hpp
+ * @author Marcus Edel
+ *
+ * Implementation of the Convolution module class.
+ *
+ * mlpack is free software; you may redistribute it and/or modify it under the
+ * terms of the 3-clause BSD license.  You should have received a copy of the
+ * 3-clause BSD license along with mlpack.  If not, see
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
+ */
+#ifndef MLPACK_METHODS_ANN_LAYER_CONVOLUTION_IMPL_HPP
+#define MLPACK_METHODS_ANN_LAYER_CONVOLUTION_IMPL_HPP
+
+// In case it hasn't yet been included.
+#include "convolution.hpp"
+
+namespace mlpack {
+namespace ann /** Artificial Neural Network. */ {
+
+template<
+    typename ForwardConvolutionRule,
+    typename BackwardConvolutionRule,
+    typename GradientConvolutionRule,
+    typename InputDataType,
+    typename OutputDataType
+>
+Convolution<
+    ForwardConvolutionRule,
+    BackwardConvolutionRule,
+    GradientConvolutionRule,
+    InputDataType,
+    OutputDataType
+>::Convolution()
+{
+  // Nothing to do here.
+}
+
+template<
+    typename ForwardConvolutionRule,
+    typename BackwardConvolutionRule,
+    typename GradientConvolutionRule,
+    typename InputDataType,
+    typename OutputDataType
+>
+Convolution<
+    ForwardConvolutionRule,
+    BackwardConvolutionRule,
+    GradientConvolutionRule,
+    InputDataType,
+    OutputDataType
+>::Convolution(
+    const size_t inSize,
+    const size_t outSize,
+    const size_t kW,
+    const size_t kH,
+    const size_t dW,
+    const size_t dH,
+    const size_t padW,
+    const size_t padH,
+    const size_t inputWidth,
+    const size_t inputHeight) :
+    inSize(inSize),
+    outSize(outSize),
+    kW(kW),
+    kH(kH),
+    dW(dW),
+    dH(dH),
+    padW(padW),
+    padH(padH),
+    inputWidth(inputWidth),
+    inputHeight(inputHeight),
+    outputWidth(0),
+    outputHeight(0)
+{
+  weights.set_size((outSize * inSize * kW * kH) + outSize, 1);
+}
+
+template<
+    typename ForwardConvolutionRule,
+    typename BackwardConvolutionRule,
+    typename GradientConvolutionRule,
+    typename InputDataType,
+    typename OutputDataType
+>
+void Convolution<
+    ForwardConvolutionRule,
+    BackwardConvolutionRule,
+    GradientConvolutionRule,
+    InputDataType,
+    OutputDataType
+>::Reset()
+{
+    weight = arma::cube(weights.memptr(), kW, kH,
+        outSize * inSize, false, false);
+    bias = arma::mat(weights.memptr() + weight.n_elem,
+        outSize, 1, false, false);
+}
+
+template<
+    typename ForwardConvolutionRule,
+    typename BackwardConvolutionRule,
+    typename GradientConvolutionRule,
+    typename InputDataType,
+    typename OutputDataType
+>
+template<typename eT>
+void Convolution<
+    ForwardConvolutionRule,
+    BackwardConvolutionRule,
+    GradientConvolutionRule,
+    InputDataType,
+    OutputDataType
+>::Forward(const arma::Mat<eT>&& input, arma::Mat<eT>&& output)
+{
+  inputTemp = arma::cube(input.memptr(), inputWidth, inputHeight, inSize);
+
+  if (padW != 0 || padH != 0)
+  {
+    Pad(inputTemp, padW, padH, inputPaddedTemp);
+  }
+
+  size_t wConv = ConvOutSize(inputWidth, kW, dW, padW);
+  size_t hConv = ConvOutSize(inputHeight, kH, dH, padH);
+
+  outputTemp = arma::zeros<arma::Cube<eT> >(wConv, hConv, outSize);
+
+  for (size_t outMap = 0, outMapIdx = 0; outMap < outSize; outMap++)
+  {
+    for (size_t inMap = 0; inMap < inSize; inMap++, outMapIdx++)
+    {
+      arma::Mat<eT> convOutput;
+
+      if (padW != 0 || padH != 0)
+      {
+        ForwardConvolutionRule::Convolution(inputPaddedTemp.slice(inMap),
+            weight.slice(outMapIdx), convOutput, dW, dH);
+      }
+      else
+      {
+        ForwardConvolutionRule::Convolution(inputTemp.slice(inMap),
+            weight.slice(outMapIdx), convOutput, dW, dH);
+      }
+
+      outputTemp.slice(outMap) += convOutput;
+    }
+
+    outputTemp.slice(outMap) += bias(outMap);
+  }
+
+  output = arma::Mat<eT>(outputTemp.memptr(), outputTemp.n_elem, 1);
+
+  outputWidth = outputTemp.n_rows;
+  outputHeight = outputTemp.n_cols;
+}
+
+template<
+    typename ForwardConvolutionRule,
+    typename BackwardConvolutionRule,
+    typename GradientConvolutionRule,
+    typename InputDataType,
+    typename OutputDataType
+>
+template<typename eT>
+void Convolution<
+    ForwardConvolutionRule,
+    BackwardConvolutionRule,
+    GradientConvolutionRule,
+    InputDataType,
+    OutputDataType
+>::Backward(
+    const arma::Mat<eT>&& /* input */, arma::Mat<eT>&& gy, arma::Mat<eT>&& g)
+{
+  arma::cube mappedError = arma::cube(gy.memptr(),
+        outputWidth, outputHeight, outSize);
+  gTemp = arma::zeros<arma::Cube<eT> >(inputTemp.n_rows,
+      inputTemp.n_cols, inputTemp.n_slices);
+
+  for (size_t outMap = 0, outMapIdx = 0; outMap < outSize; outMap++)
+  {
+    for (size_t inMap = 0; inMap < inSize; inMap++, outMapIdx++)
+    {
+      arma::Mat<eT> rotatedFilter;
+      Rotate180(weight.slice(outMapIdx), rotatedFilter);
+
+      arma::Mat<eT> output;
+      BackwardConvolutionRule::Convolution(mappedError.slice(outMap),
+          rotatedFilter, output, dW, dH);
+
+      if (padW != 0 || padH != 0)
+      {
+        gTemp.slice(inMap) += output.submat(rotatedFilter.n_rows / 2,
+            rotatedFilter.n_cols / 2,
+            rotatedFilter.n_rows / 2 + gTemp.n_rows - 1,
+            rotatedFilter.n_cols / 2 + gTemp.n_cols - 1);
+      }
+      else
+      {
+        gTemp.slice(inMap) += output;
+      }
+    }
+  }
+
+  g = arma::mat(gTemp.memptr(), gTemp.n_elem, 1);
+}
+
+template<
+    typename ForwardConvolutionRule,
+    typename BackwardConvolutionRule,
+    typename GradientConvolutionRule,
+    typename InputDataType,
+    typename OutputDataType
+>
+template<typename eT>
+void Convolution<
+    ForwardConvolutionRule,
+    BackwardConvolutionRule,
+    GradientConvolutionRule,
+    InputDataType,
+    OutputDataType
+>::Gradient(
+    const arma::Mat<eT>&& /* input */,
+    arma::Mat<eT>&& error,
+    arma::Mat<eT>&& gradient)
+{
+  arma::cube mappedError;
+  if (padW != 0 && padH != 0)
+  {
+    mappedError = arma::cube(error.memptr(), outputWidth / padW,
+        outputHeight / padH, outSize);
+  }
+  else
+  {
+    mappedError = arma::cube(error.memptr(), outputWidth,
+        outputHeight, outSize);
+  }
+
+  gradientTemp = arma::zeros<arma::Cube<eT> >(weight.n_rows, weight.n_cols,
+      weight.n_slices);
+
+  for (size_t outMap = 0, outMapIdx = 0; outMap < outSize; outMap++)
+  {
+    for (size_t inMap = 0, s = outMap; inMap < inSize; inMap++, outMapIdx++,
+        s += outSize)
+    {
+      arma::Cube<eT> inputSlices;
+      if (padW != 0 || padH != 0)
+      {
+        inputSlices = inputPaddedTemp.slices(inMap, inMap);
+      }
+      else
+      {
+        inputSlices = inputTemp.slices(inMap, inMap);
+      }
+
+      arma::Cube<eT> deltaSlices = mappedError.slices(outMap, outMap);
+
+      arma::Cube<eT> output;
+      GradientConvolutionRule::Convolution(inputSlices, deltaSlices,
+          output, dW, dH);
+
+      if ((padW != 0 || padH != 0) &&
+          (gradientTemp.n_rows < output.n_rows &&
+          gradientTemp.n_cols < output.n_cols))
+      {
+        for (size_t i = 0; i < output.n_slices; i++)
+        {
+          arma::mat subOutput = output.slice(i);
+
+          gradientTemp.slice(s) += subOutput.submat(subOutput.n_rows / 2,
+              subOutput.n_cols / 2,
+              subOutput.n_rows / 2 + gradientTemp.n_rows - 1,
+              subOutput.n_cols / 2 + gradientTemp.n_cols - 1);
+        }
+      }
+      else
+      {
+        for (size_t i = 0; i < output.n_slices; i++)
+        {
+          gradientTemp.slice(s) += output.slice(i);
+        }
+      }
+    }
+
+    gradient.submat(weight.n_elem + outMap, 0,
+        weight.n_elem + outMap, 0) = arma::accu(mappedError.slices(
+        outMap, outMap));
+  }
+
+  // gradient.submat(0, 0, weight.n_elem - 1, 0) = arma::vectorise(gradientTemp);
+  gradient.submat(0, 0, weight.n_elem - 1, 0) = arma::Mat<eT>(
+      gradientTemp.memptr(), gradientTemp.n_elem, 1, false, false);
+
+
+  // arma::vectorise(gradientTemp);
+}
+
+template<
+    typename ForwardConvolutionRule,
+    typename BackwardConvolutionRule,
+    typename GradientConvolutionRule,
+    typename InputDataType,
+    typename OutputDataType
+>
+template<typename Archive>
+void Convolution<
+    ForwardConvolutionRule,
+    BackwardConvolutionRule,
+    GradientConvolutionRule,
+    InputDataType,
+    OutputDataType
+>::Serialize(
+    Archive& ar, const unsigned int /* version */)
+{
+  ar & data::CreateNVP(inSize, "inSize");
+  ar & data::CreateNVP(outSize, "outSize");
+  ar & data::CreateNVP(kW, "kW");
+  ar & data::CreateNVP(kH, "kH");
+  ar & data::CreateNVP(dW, "dW");
+  ar & data::CreateNVP(dH, "dH");
+  ar & data::CreateNVP(padW, "padW");
+  ar & data::CreateNVP(padH, "padH");
+  ar & data::CreateNVP(weights, "weights");
+  ar & data::CreateNVP(inputWidth, "inputWidth");
+  ar & data::CreateNVP(inputHeight, "inputHeight");
+  ar & data::CreateNVP(outputWidth, "outputWidth");
+  ar & data::CreateNVP(outputHeight, "outputHeight");
+}
+
+} // namespace ann
+} // namespace mlpack
+
+#endif
\ No newline at end of file
diff --git a/src/mlpack/methods/ann/layer/dropconnect.hpp b/src/mlpack/methods/ann/layer/dropconnect.hpp
new file mode 100644
index 00000000000..f74c8e6aaac
--- /dev/null
+++ b/src/mlpack/methods/ann/layer/dropconnect.hpp
@@ -0,0 +1,209 @@
+/**
+ * @file dropconnect.hpp
+ * @author Palash Ahuja
+ * @author Marcus Edel
+ *
+ * Definition of the DropConnect class, which implements a regularizer
+ * that randomly sets connections to zero. Preventing units from co-adapting.
+ *
+ * mlpack is free software; you may redistribute it and/or modify it under the
+ * terms of the 3-clause BSD license.  You should have received a copy of the
+ * 3-clause BSD license along with mlpack.  If not, see
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
+ */
+#ifndef MLPACK_METHODS_ANN_LAYER_DROPCONNECT_HPP
+#define MLPACK_METHODS_ANN_LAYER_DROPCONNECT_HPP
+
+#include <mlpack/core.hpp>
+
+#include "layer_types.hpp"
+#include "add_merge.hpp"
+#include "linear.hpp"
+#include "sequential.hpp"
+
+namespace mlpack {
+namespace ann /** Artificial Neural Network. */ {
+
+/**
+ * The DropConnect layer is a regularizer that randomly with probability
+ * ratio sets the connection values to zero and scales the remaining
+ * elements by factor 1 /(1 - ratio). The output is scaled with 1 / (1 - p)
+ * when deterministic is false. In the deterministic mode(during testing),
+ * the layer just computes the output. The output is computed according
+ * to the input layer. If no input layer is given, it will take a linear layer
+ * as default.
+ *
+ * Note:
+ * During training you should set deterministic to false and during testing
+ * you should set deterministic to true.
+ *
+ *  For more information, see the following.
+ *
+ * @code
+ * @inproceedings{WanICML2013,
+ *   title={Regularization of Neural Networks using DropConnect},
+ *   booktitle = {Proceedings of the 30th International Conference on Machine
+ *                Learning(ICML - 13)},
+ *   author = {Li Wan and Matthew Zeiler and Sixin Zhang and Yann L. Cun and
+ *             Rob Fergus},
+ *   year = {2013}
+ * }
+ * @endcode
+ *
+ * @tparam InputDataType Type of the input data (arma::colvec, arma::mat,
+ *         arma::sp_mat or arma::cube).
+ * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat,
+ *         arma::sp_mat or arma::cube).
+ */
+template<
+    typename InputDataType = arma::mat,
+    typename OutputDataType = arma::mat
+>
+class DropConnect
+{
+ public:
+  //! Create the DropConnect object.
+  DropConnect();
+
+  /**
+   * Creates the DropConnect Layer as a Linear Object that takes input size,
+   * output size and ratio as parameter.
+   *
+   * @param inSize The number of input units.
+   * @param outSize The number of output units.
+   * @param ratio The probability of setting a value to zero.
+   */
+  DropConnect(const size_t inSize,
+              const size_t outSize,
+              const double ratio = 0.5);
+
+  ~DropConnect();
+
+  /**
+  * Ordinary feed forward pass of the DropConnect layer.
+  *
+  * @param input Input data used for evaluating the specified function.
+  * @param output Resulting output activation.
+  */
+  template<typename eT>
+  void Forward(arma::Mat<eT>&& input, arma::Mat<eT>&& output);
+
+  /**
+   * Ordinary feed backward pass of the DropConnect layer.
+   *
+   * @param input The propagated input activation.
+   * @param gy The backpropagated error.
+   * @param g The calculated gradient.
+   */
+  template<typename eT>
+  void Backward(arma::Mat<eT>&& input,
+                arma::Mat<eT>&& gy,
+                arma::Mat<eT>&& g);
+
+  /**
+   * Calculate the gradient using the output delta and the input activation.
+   *
+   * @param input The propagated input.
+   * @param d The calculated error.
+   * @param g The calculated gradient.
+   */
+  template<typename eT>
+  void Gradient(arma::Mat<eT>&& input,
+                arma::Mat<eT>&& error,
+                arma::Mat<eT>&& /* gradient */);
+
+  //! Get the model modules.
+  std::vector<LayerTypes>& Model() { return network; }
+
+  //! Get the parameters.
+  OutputDataType const& Parameters() const { return parameters; }
+  //! Modify the parameters.
+  OutputDataType& Parameters() { return parameters; }
+
+  //! Get the input parameter.
+  InputDataType const& InputParameter() const { return inputParameter; }
+  //! Modify the input parameter.
+  InputDataType& InputParameter() { return inputParameter; }
+
+  //! Get the output parameter.
+  OutputDataType const& OutputParameter() const { return outputParameter; }
+  //! Modify the output parameter.
+  OutputDataType& OutputParameter() { return outputParameter; }
+
+  //! Get the delta.
+  OutputDataType const& Delta() const { return delta; }
+  //! Modify the delta.
+  OutputDataType& Delta() { return delta; }
+
+  //! Get the gradient.
+  OutputDataType const& Gradient() const { return gradient; }
+  //! Modify the gradient.
+  OutputDataType& Gradient() { return gradient; }
+
+  //! The value of the deterministic parameter.
+  bool Deterministic() const { return deterministic; }
+
+  //! Modify the value of the deterministic parameter.
+  bool &Deterministic() { return deterministic; }
+
+  //! The probability of setting a value to zero.
+  double Ratio() const { return ratio; }
+
+  //! Modify the probability of setting a value to zero.
+  void Ratio(const double r)
+  {
+    ratio = r;
+    scale = 1.0 / (1.0 - ratio);
+  }
+
+  /**
+   * Serialize the layer.
+   */
+  template<typename Archive>
+  void Serialize(Archive& ar, const unsigned int /* version */);
+
+private:
+  //! The probability of setting a value to zero.
+  double ratio;
+
+  //! The scale fraction.
+  double scale;
+
+  //! Locally-stored weight object.
+  OutputDataType parameters;
+
+  //! Locally-stored delta object.
+  OutputDataType delta;
+
+  //! Locally-stored gradient object.
+  OutputDataType gradient;
+
+  //! Locally-stored input parameter object.
+  InputDataType inputParameter;
+
+  //! Locally-stored output parameter object.
+  OutputDataType outputParameter;
+
+  //! Locally-stored mask object.
+  OutputDataType mask;
+
+  //! If true dropout and scaling is disabled, see notes above.
+  bool deterministic;
+
+  //! Denoise mask for the weights.
+  OutputDataType denoise;
+
+  //! Locally-stored layer module.
+  LayerTypes baseLayer;
+
+  //! Locally-stored network modules.
+  std::vector<LayerTypes> network;
+}; // class DropConnect.
+
+}  // namespace ann
+}  // namespace mlpack
+
+// Include implementation.
+#include "dropconnect_impl.hpp"
+
+#endif
diff --git a/src/mlpack/methods/ann/layer/dropconnect_impl.hpp b/src/mlpack/methods/ann/layer/dropconnect_impl.hpp
new file mode 100644
index 00000000000..294952b9bcf
--- /dev/null
+++ b/src/mlpack/methods/ann/layer/dropconnect_impl.hpp
@@ -0,0 +1,118 @@
+/**
+ * @file dropconnect_impl.hpp
+ * @author Palash Ahuja
+ * @author Marcus Edel
+ *
+ * Implementation of the DropConnect class, which implements a regularizer
+ * that randomly sets connections to zero. Preventing units from co-adapting.
+ *
+ * mlpack is free software; you may redistribute it and/or modify it under the
+ * terms of the 3-clause BSD license.  You should have received a copy of the
+ * 3-clause BSD license along with mlpack.  If not, see
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
+ */
+#ifndef MLPACK_METHODS_ANN_LAYER_DROPCONNECT_IMPL_HPP
+#define MLPACK_METHODS_ANN_LAYER_DROPCONNECT_IMPL_HPP
+
+// In case it hasn't yet been included.
+#include "dropconnect.hpp"
+
+namespace mlpack {
+namespace ann /** Artificial Neural Network. */ {
+
+template<typename InputDataType, typename OutputDataType>
+DropConnect<InputDataType, OutputDataType>::DropConnect()
+{
+  // Nothing to do here.
+}
+
+template<typename InputDataType, typename OutputDataType>
+DropConnect<InputDataType, OutputDataType>::DropConnect(
+    const size_t inSize,
+    const size_t outSize,
+    const double ratio) :
+    ratio(ratio),
+    scale(1.0 / (1 - ratio)),
+    baseLayer(new Linear<InputDataType, OutputDataType>(inSize, outSize))
+{
+  network.push_back(baseLayer);
+}
+
+template <typename InputDataType, typename OutputDataType>
+DropConnect<InputDataType, OutputDataType>::~DropConnect()
+{
+  boost::apply_visitor(DeleteVisitor(), baseLayer);
+}
+
+template<typename InputDataType, typename OutputDataType>
+template<typename eT>
+void DropConnect<InputDataType, OutputDataType>::Forward(
+    arma::Mat<eT>&& input,
+    arma::Mat<eT>&& output)
+{
+  // The DropConnect mask will not be multiplied in the deterministic mode
+  // (during testing).
+  if (deterministic)
+  {
+    boost::apply_visitor(ForwardVisitor(std::move(input), std::move(output)),
+        baseLayer);
+  }
+  else
+  {
+    // Save weights for denoising.
+    boost::apply_visitor(ParametersVisitor(std::move(denoise)), baseLayer);
+
+    // Scale with input / (1 - ratio) and set values to zero with
+    // probability ratio.
+    mask = arma::randu<arma::Mat<eT> >(denoise.n_rows, denoise.n_cols);
+    mask.transform([&](double val) { return (val > ratio); });
+
+    boost::apply_visitor(ParametersSetVisitor(std::move(denoise % mask)),
+        baseLayer);
+
+    boost::apply_visitor(ForwardVisitor(std::move(input), std::move(output)),
+        baseLayer);
+
+    output = output * scale;
+  }
+}
+
+template<typename InputDataType, typename OutputDataType>
+template<typename eT>
+void DropConnect<InputDataType, OutputDataType>::Backward(
+    arma::Mat<eT>&& input,
+    arma::Mat<eT>&& gy,
+    arma::Mat<eT>&& g)
+{
+  boost::apply_visitor(BackwardVisitor(std::move(input), std::move(gy),
+      std::move(g)), baseLayer);
+}
+
+template<typename InputDataType, typename OutputDataType>
+template<typename eT>
+void DropConnect<InputDataType, OutputDataType>::Gradient(
+    arma::Mat<eT>&& input,
+    arma::Mat<eT>&& error,
+    arma::Mat<eT>&& /* gradient */)
+{
+  boost::apply_visitor(GradientVisitor(std::move(input), std::move(error)),
+      baseLayer);
+
+  // Denoise the weights.
+  boost::apply_visitor(ParametersSetVisitor(std::move(denoise)), baseLayer);
+}
+
+template<typename InputDataType, typename OutputDataType>
+template<typename Archive>
+void DropConnect<InputDataType, OutputDataType>::Serialize(
+    Archive& ar,
+    const unsigned int /* version */)
+{
+  ar & data::CreateNVP(ratio, "ratio");
+  ar & data::CreateNVP(scale, "scale");
+}
+
+}  // namespace ann
+}  // namespace mlpack
+
+#endif
diff --git a/src/mlpack/methods/ann/layer/dropconnect_layer.hpp b/src/mlpack/methods/ann/layer/dropconnect_layer.hpp
deleted file mode 100644
index fdb14cbf11c..00000000000
--- a/src/mlpack/methods/ann/layer/dropconnect_layer.hpp
+++ /dev/null
@@ -1,361 +0,0 @@
-/**
- * @file dropconnect_layer.hpp
- * @author Palash Ahuja
- *
- * Definition of the DropConnectLayer class, which implements a regularizer
- * that randomly sets connections to zero. Preventing units from co-adapting.
- *
- * mlpack is free software; you may redistribute it and/or modify it under the
- * terms of the 3-clause BSD license.  You should have received a copy of the
- * 3-clause BSD license along with mlpack.  If not, see
- * http://www.opensource.org/licenses/BSD-3-Clause for more information.
- */
-#ifndef MLPACK_METHODS_ANN_LAYER_DROPCONNECT_LAYER_HPP
-#define MLPACK_METHODS_ANN_LAYER_DROPCONNECT_LAYER_HPP
-
-#include <mlpack/core.hpp>
-
-#include "empty_layer.hpp"
-#include <mlpack/methods/ann/network_util.hpp>
-
-namespace mlpack {
-namespace ann /** Artificial Neural Network. */ {
-
-/**
- * The DropConnect layer is a regularizer that randomly with probability
- * ratio sets the connection values to zero and scales the remaining
- * elements by factor 1 /(1 - ratio). The output is scaled with 1 / (1 - p)
- * when deterministic is false. In the deterministic mode(during testing),
- * the layer just computes the output. The output is computed according
- * to the input layer. If no input layer is given, it will take a linear layer
- * as default.
- *
- * Note:
- * During training you should set deterministic to false and during testing
- * you should set deterministic to true.
- *
- *  For more information, see the following.
- *
- * @code
- * @inproceedings{WanICML2013,
- *   title={Regularization of Neural Networks using DropConnect},
- *   booktitle = {Proceedings of the 30th International Conference on Machine
- *                Learning(ICML - 13)},
- *   author = {Li Wan and Matthew Zeiler and Sixin Zhang and Yann L. Cun and
- *             Rob Fergus},
- *   year = {2013}
- * }
- * @endcode
- *
- * @tparam InputLayer Layer used instead of the internal linear layer.
- * @tparam InputDataType Type of the input data (arma::colvec, arma::mat,
- *         arma::sp_mat or arma::cube).
- * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat,
- *         arma::sp_mat or arma::cube).
- */
-template<
-    typename InputLayer = EmptyLayer<arma::mat, arma::mat>,
-    typename InputDataType = arma::mat,
-    typename OutputDataType = arma::mat
->
-class DropConnectLayer
-{
- public:
- /**
-   * Creates the DropConnect Layer as a Linear Object that takes input size,
-   * output size and ratio as parameter.
-   *
-   * @param inSize The number of input units.
-   * @param outSize The number of output units.
-   * @param ratio The probability of setting a value to zero.
-   */
-  DropConnectLayer (const size_t inSize,
-                    const size_t outSize,
-                    const double ratio = 0.5) :
-      inSize(inSize),
-      outSize(outSize),
-      ratio(ratio),
-      scale(1.0 / (1 - ratio)),
-      uselayer(false)
-  {
-    weights.set_size(outSize, inSize);
-  }
-
-  /**
-   * Create the DropConnectLayer object using the specified ratio and rescale
-   * parameter. This takes the
-   *
-   * @param ratio The probability of setting a connection to zero.
-   * @param inputLayer the layer object that the dropconnect connection would take.
-   */
-  template<typename InputLayerType>
-  DropConnectLayer(InputLayerType &&inputLayer,
-                   const double ratio = 0.5) :
-      baseLayer(std::forward<InputLayerType>(inputLayer)),
-      ratio(ratio),
-      scale(1.0 / (1 - ratio)),
-      uselayer(true)
-  {
-    static_assert(std::is_same<typename std::decay<InputLayerType>::type,
-                  InputLayer>::value,
-                  "The type of the inputLayer must be InputLayerType");
-  }
-  /**
-  * Ordinary feed forward pass of the DropConnect layer.
-  *
-  * @param input Input data used for evaluating the specified function.
-  * @param output Resulting output activation.
-  */
-  template<typename eT>
-  void Forward(const arma::Mat<eT> &input, arma::Mat<eT> &output)
-  {
-    // The DropConnect mask will not be multiplied in the deterministic mode
-    // (during testing).
-    if (deterministic)
-    {
-      if (uselayer)
-      {
-        baseLayer.Forward(input, output);
-      }
-      else
-      {
-        output = weights * input;
-      }
-    }
-    else
-    {
-      if (uselayer)
-      {
-        // Scale with input / (1 - ratio) and set values to zero with
-        // probability ratio.
-        mask = arma::randu<arma::Mat<eT> >(baseLayer.Weights().n_rows,
-            baseLayer.Weights().n_cols);
-        mask.transform([&](double val) { return (val > ratio); });
-
-        // Save weights for denoising.
-        denoise = baseLayer.Weights();
-
-        baseLayer.Weights() = baseLayer.Weights() % mask;
-
-        baseLayer.Forward(input, output);
-      }
-      else
-      {
-        // Scale the input / ( 1 - ratio) and set values to zero with
-        // probability ratio.
-        mask = arma::randu<arma::Mat<eT> >(weights.n_rows, weights.n_cols);
-        mask.transform([&](double val) { return (val > ratio); });
-
-        // Save weights for denoising.
-        denoise = weights;
-
-        weights = weights % mask;
-        output = weights * input;
-      }
-
-      output = output * scale;
-    }
-  }
-
-  /**
-   * Ordinary feed backward pass of the DropConnect layer.
-   *
-   * @param input The propagated input activation.
-   * @param gy The backpropagated error.
-   * @param g The calculated gradient.
-   */
-  template<typename DataType>
-  void Backward(const DataType& input, const DataType& gy, DataType& g)
-  {
-    if (uselayer)
-    {
-      baseLayer.Backward(input, gy, g);
-    }
-    else
-    {
-      g = weights.t() * gy;
-    }
-  }
-
-  /**
-   * Calculate the gradient using the output delta and the input activation.
-   *
-   * @param input The propagated input.
-   * @param d The calculated error.
-   * @param g The calculated gradient.
-   */
-  template<typename InputType, typename eT, typename GradientDataType>
-  void Gradient(const InputType& input,
-                const arma::Mat<eT>& d,
-                GradientDataType& g)
-  {
-    if (uselayer)
-    {
-      baseLayer.Gradient(input, d, g);
-
-      // Denoise the weights.
-      baseLayer.Weights() = denoise;
-    }
-    else
-    {
-      g = d * input.t();
-
-      // Denoise the weights.
-      weights = denoise;
-    }
-  }
-
-  //! Get the weights.
-  OutputDataType const& Weights() const
-  {
-    if (uselayer)
-      return baseLayer.Weights();
-
-    return weights;
-  }
-
-  //! Modify the weights.
-  OutputDataType& Weights()
-  {
-    if (uselayer)
-      return baseLayer.Weights();
-
-    return weights;
-  }
-
-  //! Get the input parameter.
-  InputDataType &InputParameter() const
-  {
-    if (uselayer)
-      return baseLayer.InputParameter();
-
-    return inputParameter;
-  }
-
-  //! Modify the input parameter.
-  InputDataType &InputParameter()
-  {
-    if (uselayer)
-      return baseLayer.InputParameter();
-
-    return inputParameter;
-  }
-
-  //! Get the output parameter.
-  OutputDataType &OutputParameter() const
-  {
-    if (uselayer)
-      return baseLayer.OutputParameter();
-
-    return outputParameter;
-  }
-
-  //! Modify the output parameter.
-  OutputDataType &OutputParameter()
-  {
-    if (uselayer)
-      return baseLayer.OutputParameter();
-
-    return outputParameter;
-  }
-
-  //! Get the delta.
-  OutputDataType const& Delta() const
-  {
-    if (uselayer)
-      return baseLayer.Delta();
-
-    return delta;
-  }
-
-  //! Modify the delta.
-  OutputDataType& Delta()
-  {
-    if (uselayer)
-      return baseLayer.Delta();
-
-    return delta;
-  }
-
-  //! Get the gradient.
-  OutputDataType const& Gradient() const
-  {
-    if (uselayer)
-      return baseLayer.Gradient();
-
-    return gradient;
-   }
-
-  //! Modify the gradient.
-  OutputDataType& Gradient()
-  {
-    if (uselayer)
-      return baseLayer.Gradient();
-
-    return gradient;
-  }
-
-  //! The value of the deterministic parameter.
-  bool Deterministic() const { return deterministic; }
-
-  //! Modify the value of the deterministic parameter.
-  bool &Deterministic() { return deterministic; }
-
-  //! The probability of setting a value to zero.
-  double Ratio() const { return ratio; }
-
-  //! Modify the probability of setting a value to zero.
-  void Ratio(const double r)
-  {
-    ratio = r;
-    scale = 1.0 / (1.0 - ratio);
-  }
-
-private:
-  //! Locally-stored layer object.
-  InputLayer baseLayer;
-
-  //! Locally stored number of input units.
-  size_t inSize;
-
-  //! Locally-stored number of output units.
-  size_t outSize;
-
-  //! The probability of setting a value to zero.
-  double ratio;
-
-  //! The scale fraction.
-  double scale;
-
-  //! If true the default layer is used otherwise a new layer will be created.
-  bool uselayer;
-
-  //! Locally-stored weight object.
-  OutputDataType weights;
-
-  //! Locally-stored delta object.
-  OutputDataType delta;
-
-  //! Locally-stored gradient object.
-  OutputDataType gradient;
-
-  //! Locally-stored input parameter object.
-  InputDataType inputParameter;
-
-  //! Locally-stored output parameter object.
-  OutputDataType outputParameter;
-
-  //! Locally-stored mast object.
-  OutputDataType mask;
-
-  //! If true dropout and scaling is disabled, see notes above.
-  bool deterministic;
-
-  //! Denoise mask for the weights.
-  OutputDataType denoise;
-}; // class DropConnectLayer.
-
-}  // namespace ann
-}  // namespace mlpack
-
-#endif
diff --git a/src/mlpack/methods/ann/layer/dropout_layer.hpp b/src/mlpack/methods/ann/layer/dropout.hpp
similarity index 59%
rename from src/mlpack/methods/ann/layer/dropout_layer.hpp
rename to src/mlpack/methods/ann/layer/dropout.hpp
index 3ed0bd62a60..4371f21dad3 100644
--- a/src/mlpack/methods/ann/layer/dropout_layer.hpp
+++ b/src/mlpack/methods/ann/layer/dropout.hpp
@@ -1,8 +1,8 @@
 /**
- * @file dropout_layer.hpp
+ * @file dropout.hpp
  * @author Marcus Edel
  *
- * Definition of the DropoutLayer class, which implements a regularizer that
+ * Definition of the Dropout class, which implements a regularizer that
  * randomly sets units to zero. Preventing units from co-adapting.
  *
  * mlpack is free software; you may redistribute it and/or modify it under the
@@ -10,8 +10,8 @@
  * 3-clause BSD license along with mlpack.  If not, see
  * http://www.opensource.org/licenses/BSD-3-Clause for more information.
  */
-#ifndef MLPACK_METHODS_ANN_LAYER_DROPOUT_LAYER_HPP
-#define MLPACK_METHODS_ANN_LAYER_DROPOUT_LAYER_HPP
+#ifndef MLPACK_METHODS_ANN_LAYER_DROPOUT_HPP
+#define MLPACK_METHODS_ANN_LAYER_DROPOUT_HPP
 
 #include <mlpack/core.hpp>
 
@@ -51,57 +51,17 @@ template <
     typename InputDataType = arma::mat,
     typename OutputDataType = arma::mat
 >
-class DropoutLayer
+class Dropout
 {
  public:
-
   /**
-   * Create the DropoutLayer object using the specified ratio and rescale
+   * Create the Dropout object using the specified ratio and rescale
    * parameter.
    *
    * @param ratio The probability of setting a value to zero.
    * @param rescale If true the input is rescaled when deterministic is False.
    */
-  DropoutLayer(const double ratio = 0.5,
-               const bool rescale = true) :
-      ratio(ratio),
-      scale(1.0 / (1.0 - ratio)),
-      rescale(rescale)
-  {
-    // Nothing to do here.
-  }
-
-  /**
-   * Ordinary feed forward pass of the dropout layer.
-   *
-   * @param input Input data used for evaluating the specified function.
-   * @param output Resulting output activation.
-   */
-  template<typename eT>
-  void Forward(const arma::Mat<eT>& input, arma::Mat<eT>& output)
-  {
-    // The dropout mask will not be multiplied in the deterministic mode
-    // (during testing).
-    if (deterministic)
-    {
-      if (!rescale)
-      {
-        output = input;
-      }
-      else
-      {
-        output = input * scale;
-      }
-    }
-    else
-    {
-      // Scale with input / (1 - ratio) and set values to zero with probability
-      // ratio.
-      mask = arma::randu<arma::Mat<eT> >(input.n_rows, input.n_cols);
-      mask.transform( [&](double val) { return (val > ratio); } );
-      output = input % mask * scale;
-    }
-  }
+  Dropout(const double ratio = 0.5, const bool rescale = true);
 
   /**
    * Ordinary feed forward pass of the dropout layer.
@@ -110,31 +70,7 @@ class DropoutLayer
    * @param output Resulting output activation.
    */
   template<typename eT>
-  void Forward(const arma::Cube<eT>& input, arma::Cube<eT>& output)
-  {
-    // The dropout mask will not be multiplied in the deterministic mode
-    // (during testing).
-    if (deterministic)
-    {
-      if (!rescale)
-      {
-        output = input;
-      }
-      else
-      {
-        output = input * scale;
-      }
-    }
-    else
-    {
-      // Scale with input / (1 - ratio) and set values to zero with probability
-      // ratio.
-      mask = arma::randu<arma::Cube<eT> >(input.n_rows, input.n_cols,
-          input.n_slices);
-      mask.transform( [&](double val) { return (val > ratio); } );
-      output = input % mask * scale;
-    }
-  }
+  void Forward(const arma::Mat<eT>&& input, arma::Mat<eT>&& output);
 
   /**
    * Ordinary feed backward pass of the dropout layer.
@@ -143,13 +79,10 @@ class DropoutLayer
    * @param gy The backpropagated error.
    * @param g The calculated gradient.
    */
-  template<typename DataType>
-  void Backward(const DataType& /* unused */,
-                const DataType& gy,
-                DataType& g)
-  {
-    g = gy % mask * scale;
-  }
+  template<typename eT>
+  void Backward(const arma::Mat<eT>&& /* input */,
+                arma::Mat<eT>&& gy,
+                arma::Mat<eT>&& g);
 
   //! Get the input parameter.
   InputDataType const& InputParameter() const { return inputParameter; }
@@ -190,11 +123,7 @@ class DropoutLayer
    * Serialize the layer.
    */
   template<typename Archive>
-  void Serialize(Archive& ar, const unsigned int /* version */)
-  {
-    ar & data::CreateNVP(ratio, "ratio");
-    ar & data::CreateNVP(rescale, "rescale");
-  }
+  void Serialize(Archive& ar, const unsigned int /* version */);
 
  private:
   //! Locally-stored delta object.
@@ -220,33 +149,12 @@ class DropoutLayer
 
   //! If true the input is rescaled when deterministic is False.
   bool rescale;
-}; // class DropoutLayer
-
-//! Layer traits for the bias layer.
-template <
-  typename InputDataType,
-  typename OutputDataType
->
-class LayerTraits<DropoutLayer<InputDataType, OutputDataType> >
-{
- public:
-  static const bool IsBinary = false;
-  static const bool IsOutputLayer = false;
-  static const bool IsBiasLayer = false;
-  static const bool IsLSTMLayer = false;
-  static const bool IsConnection = true;
-};
-
-/**
- * Standard Dropout-Layer2D.
- */
-template <
-    typename InputDataType = arma::cube,
-    typename OutputDataType = arma::cube
->
-using DropoutLayer2D = DropoutLayer<InputDataType, OutputDataType>;
+}; // class Dropout
 
 } // namespace ann
 } // namespace mlpack
 
+// Include implementation.
+#include "dropout_impl.hpp"
+
 #endif
diff --git a/src/mlpack/methods/ann/layer/dropout_impl.hpp b/src/mlpack/methods/ann/layer/dropout_impl.hpp
new file mode 100644
index 00000000000..b805a94d464
--- /dev/null
+++ b/src/mlpack/methods/ann/layer/dropout_impl.hpp
@@ -0,0 +1,84 @@
+/**
+ * @file dropout_impl.hpp
+ * @author Marcus Edel
+ *
+ * Implementation of the Dropout class, which implements a regularizer that
+ * randomly sets units to zero. Preventing units from co-adapting.
+ *
+ * mlpack is free software; you may redistribute it and/or modify it under the
+ * terms of the 3-clause BSD license.  You should have received a copy of the
+ * 3-clause BSD license along with mlpack.  If not, see
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
+ */
+#ifndef MLPACK_METHODS_ANN_LAYER_DROPOUT_IMPL_HPP
+#define MLPACK_METHODS_ANN_LAYER_DROPOUT_IMPL_HPP
+
+// In case it hasn't yet been included.
+#include "dropout.hpp"
+
+namespace mlpack {
+namespace ann /** Artificial Neural Network. */ {
+
+template<typename InputDataType, typename OutputDataType>
+Dropout<InputDataType, OutputDataType>::Dropout(
+    const double ratio, const bool rescale) :
+    ratio(ratio),
+    scale(1.0 / (1.0 - ratio)),
+    rescale(rescale)
+{
+  // Nothing to do here.
+}
+
+template<typename InputDataType, typename OutputDataType>
+template<typename eT>
+void Dropout<InputDataType, OutputDataType>::Forward(
+    const arma::Mat<eT>&& input,
+    arma::Mat<eT>&& output)
+{
+  // The dropout mask will not be multiplied in the deterministic mode
+  // (during testing).
+  if (deterministic)
+  {
+    if (!rescale)
+    {
+      output = input;
+    }
+    else
+    {
+      output = input * scale;
+    }
+  }
+  else
+  {
+    // Scale with input / (1 - ratio) and set values to zero with probability
+    // ratio.
+    mask = arma::randu<arma::Mat<eT> >(input.n_rows, input.n_cols);
+    mask.transform( [&](double val) { return (val > ratio); } );
+    output = input % mask * scale;
+  }
+}
+
+template<typename InputDataType, typename OutputDataType>
+template<typename eT>
+void Dropout<InputDataType, OutputDataType>::Backward(
+    const arma::Mat<eT>&& /* input */,
+    arma::Mat<eT>&& gy,
+    arma::Mat<eT>&& g)
+{
+  g = gy % mask * scale;
+}
+
+template<typename InputDataType, typename OutputDataType>
+template<typename Archive>
+void Dropout<InputDataType, OutputDataType>::Serialize(
+    Archive& ar,
+    const unsigned int /* version */)
+{
+  ar & data::CreateNVP(ratio, "ratio");
+  ar & data::CreateNVP(rescale, "rescale");
+}
+
+} // namespace ann
+} // namespace mlpack
+
+#endif
diff --git a/src/mlpack/methods/ann/layer/glimpse_layer.hpp b/src/mlpack/methods/ann/layer/glimpse.hpp
similarity index 59%
rename from src/mlpack/methods/ann/layer/glimpse_layer.hpp
rename to src/mlpack/methods/ann/layer/glimpse.hpp
index 3f1e9dffabb..e007fae30b9 100644
--- a/src/mlpack/methods/ann/layer/glimpse_layer.hpp
+++ b/src/mlpack/methods/ann/layer/glimpse.hpp
@@ -1,5 +1,5 @@
 /**
- * @file glimpse_layer.hpp
+ * @file glimpse.hpp
  * @author Marcus Edel
  *
  * Definition of the GlimpseLayer class, which takes an input image and a
@@ -17,22 +17,54 @@
  *   year    = {2014},
  * }
  * @endcode
- *
- * mlpack is free software; you may redistribute it and/or modify it under the
- * terms of the 3-clause BSD license.  You should have received a copy of the
- * 3-clause BSD license along with mlpack.  If not, see
- * http://www.opensource.org/licenses/BSD-3-Clause for more information.
  */
-#ifndef MLPACK_METHODS_ANN_LAYER_GLIMPSE_LAYER_HPP
-#define MLPACK_METHODS_ANN_LAYER_GLIMPSE_LAYER_HPP
+#ifndef MLPACK_METHODS_ANN_LAYER_GLIMPSE_HPP
+#define MLPACK_METHODS_ANN_LAYER_GLIMPSE_HPP
 
 #include <mlpack/core.hpp>
-#include <mlpack/methods/ann/pooling_rules/mean_pooling.hpp>
+
+#include "layer_types.hpp"
 #include <algorithm>
 
 namespace mlpack {
 namespace ann /** Artificial Neural Network. */ {
 
+
+/*
+ * The mean pooling rule for convolution neural networks. Average all values
+ * within the receptive block.
+ */
+class MeanPoolingRule
+{
+ public:
+  /*
+   * Return the average value within the receptive block.
+   *
+   * @param input Input used to perform the pooling operation.
+   */
+  template<typename MatType>
+  double Pooling(const MatType& input)
+  {
+    return arma::mean(arma::mean(input));
+  }
+
+  /*
+   * Set the average value within the receptive block.
+   *
+   * @param input Input used to perform the pooling operation.
+   * @param value The unpooled value.
+   * @param output The unpooled output data.
+   */
+  template<typename MatType>
+  void Unpooling(const MatType& input, const double value, MatType& output)
+  {
+    output = arma::zeros<MatType>(input.n_rows, input.n_cols);
+    const double mean = arma::mean(arma::mean(input));
+
+    output.elem(arma::find(mean == input, 1)).fill(value);
+  }
+};
+
 /**
  * The glimpse layer returns a retina-like representation
  * (down-scaled cropped images) of increasing scale around a given location in a
@@ -44,10 +76,10 @@ namespace ann /** Artificial Neural Network. */ {
  *         arma::sp_mat or arma::cube).
  */
 template <
-    typename InputDataType = arma::cube,
-    typename OutputDataType = arma::cube
+    typename InputDataType = arma::mat,
+    typename OutputDataType = arma::mat
 >
-class GlimpseLayer
+class Glimpse
 {
  public:
 
@@ -60,18 +92,15 @@ class GlimpseLayer
    * @param depth The number of patches to crop per glimpse.
    * @param scale The scaling factor used to create the increasing retina-like
    *        representation.
+   * @param inputWidth The input width of the given input data.
+   * @param inputHeight The input height of the given input data.
    */
-  GlimpseLayer(const size_t inSize,
-               const size_t size,
-               const size_t depth = 3,
-               const size_t scale = 2) :
-      inSize(inSize),
-      size(size),
-      depth(depth),
-      scale(scale)
-  {
-    // Nothing to do here.
-  }
+  Glimpse(const size_t inSize,
+          const size_t size,
+          const size_t depth = 3,
+          const size_t scale = 2,
+          const size_t inputWidth = 0,
+          const size_t inputHeight = 0);
 
   /**
    * Ordinary feed forward pass of the glimpse layer.
@@ -80,68 +109,7 @@ class GlimpseLayer
    * @param output Resulting output activation.
    */
   template<typename eT>
-  void Forward(const arma::Cube<eT>& input, arma::Cube<eT>& output)
-  {
-    output = arma::Cube<eT>(size, size, depth * input.n_slices);
-
-    inputDepth = input.n_slices / inSize;
-
-    for (size_t inputIdx = 0; inputIdx < inSize; inputIdx++)
-    {
-      for (size_t depthIdx = 0, glimpseSize = size;
-          depthIdx < depth; depthIdx++, glimpseSize *= scale)
-      {
-        size_t padSize = std::floor((glimpseSize - 1) / 2);
-
-        arma::Cube<eT> inputPadded = arma::zeros<arma::Cube<eT> >(
-            input.n_rows + padSize * 2, input.n_cols + padSize * 2,
-            input.n_slices / inSize);
-
-        inputPadded.tube(padSize, padSize, padSize + input.n_rows - 1,
-            padSize + input.n_cols - 1) = input.subcube(0, 0,
-            inputIdx * inputDepth, input.n_rows - 1, input.n_cols - 1,
-            (inputIdx + 1) * inputDepth - 1);
-
-        size_t h = inputPadded.n_rows - glimpseSize;
-        size_t w = inputPadded.n_cols - glimpseSize;
-
-        size_t x = std::min(h, (size_t) std::max(0.0,
-            (location(0, inputIdx) + 1) / 2.0 * h));
-        size_t y = std::min(w, (size_t) std::max(0.0,
-            (location(1, inputIdx) + 1) / 2.0 * w));
-
-        if (depthIdx == 0)
-        {
-          for (size_t j = (inputIdx + depthIdx), paddedSlice = 0;
-              j < output.n_slices; j += (inSize * depth), paddedSlice++)
-          {
-            output.slice(j) = inputPadded.subcube(x, y,
-                paddedSlice, x + glimpseSize - 1, y + glimpseSize - 1,
-                paddedSlice);
-          }
-        }
-        else
-        {
-          for (size_t j = (inputIdx + depthIdx * (depth - 1)), paddedSlice = 0;
-              j < output.n_slices; j += (inSize * depth), paddedSlice++)
-          {
-            arma::Mat<eT> poolingInput = inputPadded.subcube(x, y,
-                paddedSlice, x + glimpseSize - 1, y + glimpseSize - 1,
-                paddedSlice);
-
-            if (scale == 2)
-            {
-              Pooling(glimpseSize / size, poolingInput, output.slice(j));
-            }
-            else
-            {
-              ReSampling(poolingInput, output.slice(j));
-            }
-          }
-        }
-      }
-    }
-  }
+  void Forward(const arma::Mat<eT>&& input, arma::Mat<eT>&& output);
 
   /**
    * Ordinary feed backward pass of the glimpse layer.
@@ -150,93 +118,10 @@ class GlimpseLayer
    * @param gy The backpropagated error.
    * @param g The calculated gradient.
    */
-  template<typename InputType, typename ErrorType, typename eT>
-  void Backward(const InputType& input,
-                const ErrorType& gy,
-                arma::Cube<eT>& g)
-  {
-    // Generate a cube using the backpropagated error matrix.
-    arma::Cube<eT> mappedError = arma::zeros<arma::cube>(input.n_rows,
-        input.n_cols, input.n_slices);
-
-    for (size_t s = 0, j = 0; s < mappedError.n_slices; s+= gy.n_cols, j++)
-    {
-      for (size_t i = 0; i < gy.n_cols; i++)
-      {
-        arma::Col<eT> temp = gy.col(i).subvec(
-            j * input.n_rows * input.n_cols,
-            (j + 1) * input.n_rows * input.n_cols - 1);
-
-        mappedError.slice(s + i) = arma::Mat<eT>(temp.memptr(),
-            input.n_rows, input.n_cols);
-      }
-    }
-
-    g = arma::zeros<arma::cube>(inputParameter.n_rows, inputParameter.n_cols,
-        inputParameter.n_slices);
-
-    for (size_t inputIdx = 0; inputIdx < inSize; inputIdx++)
-    {
-      for (size_t depthIdx = 0, glimpseSize = size;
-          depthIdx < depth; depthIdx++, glimpseSize *= scale)
-      {
-        size_t padSize = std::floor((glimpseSize - 1) / 2);
-
-        arma::Cube<eT> inputPadded = arma::zeros<arma::Cube<eT> >(
-            inputParameter.n_rows + padSize * 2, inputParameter.n_cols +
-            padSize * 2, inputParameter.n_slices / inSize);
-
-        size_t h = inputPadded.n_rows - glimpseSize;
-        size_t w = inputPadded.n_cols - glimpseSize;
-
-        size_t x = std::min(h, (size_t) std::max(0.0,
-            (location(0, inputIdx) + 1) / 2.0 * h));
-        size_t y = std::min(w, (size_t) std::max(0.0,
-            (location(1, inputIdx) + 1) / 2.0 * w));
-
-        if (depthIdx == 0)
-        {
-          for (size_t j = (inputIdx + depthIdx), paddedSlice = 0;
-              j < mappedError.n_slices; j += (inSize * depth), paddedSlice++)
-          {
-            inputPadded.subcube(x, y,
-            paddedSlice, x + glimpseSize - 1, y + glimpseSize - 1,
-            paddedSlice) = mappedError.slice(j);
-          }
-        }
-        else
-        {
-          for (size_t j = (inputIdx + depthIdx * (depth - 1)), paddedSlice = 0;
-              j < mappedError.n_slices; j += (inSize * depth), paddedSlice++)
-          {
-            arma::Mat<eT> poolingOutput = inputPadded.subcube(x, y,
-                 paddedSlice, x + glimpseSize - 1, y + glimpseSize - 1,
-                 paddedSlice);
-
-            if (scale == 2)
-            {
-              Unpooling(inputParameter.slice(paddedSlice), mappedError.slice(j),
-                  poolingOutput);
-            }
-            else
-            {
-              DownwardReSampling(inputParameter.slice(paddedSlice),
-                  mappedError.slice(j), poolingOutput);
-            }
-
-            inputPadded.subcube(x, y,
-                paddedSlice, x + glimpseSize - 1, y + glimpseSize - 1,
-                paddedSlice) = poolingOutput;
-          }
-        }
-
-        g += inputPadded.tube(padSize, padSize, padSize +
-            inputParameter.n_rows - 1, padSize + inputParameter.n_cols - 1);
-      }
-    }
-
-    Transform(g);
-  }
+  template<typename eT>
+  void Backward(const arma::Mat<eT>&& /* input */,
+                arma::Mat<eT>&& gy,
+                arma::Mat<eT>&& g);
 
   //! Get the input parameter.
   InputDataType& InputParameter() const {return inputParameter; }
@@ -260,6 +145,37 @@ class GlimpseLayer
     this->location = location;
   }
 
+   //! Get the input width.
+  size_t const& InputWidth() const { return inputWidth; }
+  //! Modify input the width.
+  size_t& InputWidth() { return inputWidth; }
+
+  //! Get the input height.
+  size_t const& InputHeight() const { return inputHeight; }
+  //! Modify the input height.
+  size_t& InputHeight() { return inputHeight; }
+
+  //! Get the output width.
+  size_t const& OutputWidth() const { return outputWidth; }
+  //! Modify the output width.
+  size_t& OutputWidth() { return outputWidth; }
+
+  //! Get the output height.
+  size_t const& OutputHeight() const { return outputHeight; }
+  //! Modify the output height.
+  size_t& OutputHeight() { return outputHeight; }
+
+  //! Get the value of the deterministic parameter.
+  bool Deterministic() const { return deterministic; }
+  //! Modify the value of the deterministic parameter.
+  bool& Deterministic() { return deterministic; }
+
+  /**
+   * Serialize the layer.
+   */
+  template<typename Archive>
+  void Serialize(Archive& ar, const unsigned int /* version */);
+
  private:
   /*
    * Transform the given input by changing rows to columns.
@@ -447,6 +363,30 @@ class GlimpseLayer
     }
   }
 
+  //! The size of the input units.
+  size_t inSize;
+
+  //! The used glimpse size (height = width).
+  size_t size;
+
+  //! The number of patches to crop per glimpse.
+  size_t depth;
+
+  //! The scale fraction.
+  size_t scale;
+
+  //! Locally-stored input width.
+  size_t inputWidth;
+
+  //! Locally-stored input height.
+  size_t inputHeight;
+
+  //! Locally-stored output width.
+  size_t outputWidth;
+
+  //! Locally-stored output height.
+  size_t outputHeight;
+
   //! Locally-stored delta object.
   OutputDataType delta;
 
@@ -459,26 +399,32 @@ class GlimpseLayer
   //! Locally-stored depth of the input.
   size_t inputDepth;
 
-  //! The size of the input units.
-  size_t inSize;
-
-  //! The used glimpse size (height = width).
-  size_t size;
-
-  //! The number of patches to crop per glimpse.
-  size_t depth;
+  //! Locally-stored transformed input parameter.
+  arma::cube inputTemp;
 
-  //! The scale fraction.
-  size_t scale;
+  //! Locally-stored transformed output parameter.
+  arma::cube outputTemp;
 
   //! The x and y coordinate of the center of the output glimpse.
   arma::mat location;
 
   //! Locally-stored object to perform the mean pooling operation.
-  MeanPooling pooling;
+  MeanPoolingRule pooling;
+
+  //! Location-stored module location parameter.
+  std::vector<arma::mat> locationParameter;
+
+  //! Location-stored transformed gradient paramter.
+  arma::cube gTemp;
+
+  //! If true use maximum a posteriori during the forward pass.
+  bool deterministic;
 }; // class GlimpseLayer
 
-}; // namespace ann
-}; // namespace mlpack
+} // namespace ann
+} // namespace mlpack
+
+// Include implementation.
+#include "glimpse_impl.hpp"
 
-#endif
+#endif
\ No newline at end of file
diff --git a/src/mlpack/methods/ann/layer/glimpse_impl.hpp b/src/mlpack/methods/ann/layer/glimpse_impl.hpp
new file mode 100644
index 00000000000..40ec37c932d
--- /dev/null
+++ b/src/mlpack/methods/ann/layer/glimpse_impl.hpp
@@ -0,0 +1,224 @@
+/**
+ * @file glimpse_impl.hpp
+ * @author Marcus Edel
+ *
+ * Implementation of the GlimpseLayer class, which takes an input image and a
+ * location to extract a retina-like representation of the input image at
+ * different increasing scales.
+ */
+#ifndef MLPACK_METHODS_ANN_LAYER_GLIMPSE_IMPL_HPP
+#define MLPACK_METHODS_ANN_LAYER_GLIMPSE_IMPL_HPP
+
+// In case it hasn't yet been included.
+#include "glimpse.hpp"
+
+namespace mlpack {
+namespace ann /** Artificial Neural Network. */ {
+
+template <typename InputDataType, typename OutputDataType>
+Glimpse<InputDataType, OutputDataType>::Glimpse(
+    const size_t inSize,
+    const size_t size,
+    const size_t depth,
+    const size_t scale,
+    const size_t inputWidth,
+    const size_t inputHeight) :
+    inSize(inSize),
+    size(size),
+    depth(depth),
+    scale(scale),
+    inputWidth(inputWidth),
+    inputHeight(inputHeight)
+{
+  // Nothing to do here.
+}
+
+template<typename InputDataType, typename OutputDataType>
+template<typename eT>
+void Glimpse<InputDataType, OutputDataType>::Forward(
+    const arma::Mat<eT>&& input, arma::Mat<eT>&& output)
+{
+  inputTemp = arma::cube(input.colptr(0), inputWidth, inputHeight, inSize);
+  outputTemp = arma::Cube<eT>(size, size, depth * inputTemp.n_slices);
+
+  location = input.submat(0, 1, 1, 1);
+
+  if (!deterministic)
+  {
+    locationParameter.push_back(location);
+  }
+
+  inputDepth = inputTemp.n_slices / inSize;
+
+  for (size_t inputIdx = 0; inputIdx < inSize; inputIdx++)
+  {
+    for (size_t depthIdx = 0, glimpseSize = size;
+        depthIdx < depth; depthIdx++, glimpseSize *= scale)
+    {
+      size_t padSize = std::floor((glimpseSize - 1) / 2);
+
+      arma::Cube<eT> inputPadded = arma::zeros<arma::Cube<eT> >(
+          inputTemp.n_rows + padSize * 2, inputTemp.n_cols + padSize * 2,
+          inputTemp.n_slices / inSize);
+
+      inputPadded.tube(padSize, padSize, padSize + inputTemp.n_rows - 1,
+          padSize + inputTemp.n_cols - 1) = inputTemp.subcube(0, 0,
+          inputIdx * inputDepth, inputTemp.n_rows - 1, inputTemp.n_cols - 1,
+          (inputIdx + 1) * inputDepth - 1);
+
+      size_t h = inputPadded.n_rows - glimpseSize;
+      size_t w = inputPadded.n_cols - glimpseSize;
+
+      size_t x = std::min(h, (size_t) std::max(0.0,
+          (location(0, inputIdx) + 1) / 2.0 * h));
+      size_t y = std::min(w, (size_t) std::max(0.0,
+          (location(1, inputIdx) + 1) / 2.0 * w));
+
+      if (depthIdx == 0)
+      {
+        for (size_t j = (inputIdx + depthIdx), paddedSlice = 0;
+            j < outputTemp.n_slices; j += (inSize * depth), paddedSlice++)
+        {
+          outputTemp.slice(j) = inputPadded.subcube(x, y,
+              paddedSlice, x + glimpseSize - 1, y + glimpseSize - 1,
+              paddedSlice);
+        }
+      }
+      else
+      {
+        for (size_t j = (inputIdx + depthIdx * (depth - 1)), paddedSlice = 0;
+            j < outputTemp.n_slices; j += (inSize * depth), paddedSlice++)
+        {
+          arma::Mat<eT> poolingInput = inputPadded.subcube(x, y,
+              paddedSlice, x + glimpseSize - 1, y + glimpseSize - 1,
+              paddedSlice);
+
+          if (scale == 2)
+          {
+            Pooling(glimpseSize / size, poolingInput, outputTemp.slice(j));
+          }
+          else
+          {
+            ReSampling(poolingInput, outputTemp.slice(j));
+          }
+        }
+      }
+    }
+  }
+
+  for (size_t i = 0; i < outputTemp.n_slices; ++i)
+  {
+    outputTemp.slice(i) = arma::trans(outputTemp.slice(i));
+  }
+
+  output = arma::Mat<eT>(outputTemp.memptr(), outputTemp.n_elem, 1);
+
+  outputWidth = outputTemp.n_rows;
+  outputHeight = outputTemp.n_cols;
+}
+
+template<typename InputDataType, typename OutputDataType>
+template<typename eT>
+void Glimpse<InputDataType, OutputDataType>::Backward(
+    const arma::Mat<eT>&& /* input */, arma::Mat<eT>&& gy, arma::Mat<eT>&& g)
+{
+  // Generate a cube using the backpropagated error matrix.
+  arma::Cube<eT> mappedError = arma::zeros<arma::cube>(outputWidth,
+      outputHeight, 1);
+
+  location = locationParameter.back();
+  locationParameter.pop_back();
+
+  for (size_t s = 0, j = 0; s < mappedError.n_slices; s+= gy.n_cols, j++)
+  {
+    for (size_t i = 0; i < gy.n_cols; i++)
+    {
+      mappedError.slice(s + i) = arma::Mat<eT>(gy.memptr(),
+          outputWidth, outputHeight);
+    }
+  }
+
+  gTemp = arma::zeros<arma::cube>(inputTemp.n_rows, inputTemp.n_cols,
+      inputTemp.n_slices);
+
+  for (size_t inputIdx = 0; inputIdx < inSize; inputIdx++)
+  {
+    for (size_t depthIdx = 0, glimpseSize = size;
+        depthIdx < depth; depthIdx++, glimpseSize *= scale)
+    {
+      size_t padSize = std::floor((glimpseSize - 1) / 2);
+
+      arma::Cube<eT> inputPadded = arma::zeros<arma::Cube<eT> >(
+          inputTemp.n_rows + padSize * 2, inputTemp.n_cols +
+          padSize * 2, inputTemp.n_slices / inSize);
+
+      size_t h = inputPadded.n_rows - glimpseSize;
+      size_t w = inputPadded.n_cols - glimpseSize;
+
+      size_t x = std::min(h, (size_t) std::max(0.0,
+          (location(0, inputIdx) + 1) / 2.0 * h));
+      size_t y = std::min(w, (size_t) std::max(0.0,
+          (location(1, inputIdx) + 1) / 2.0 * w));
+
+      if (depthIdx == 0)
+      {
+        for (size_t j = (inputIdx + depthIdx), paddedSlice = 0;
+            j < mappedError.n_slices; j += (inSize * depth), paddedSlice++)
+        {
+          inputPadded.subcube(x, y,
+              paddedSlice, x + glimpseSize - 1, y + glimpseSize - 1,
+              paddedSlice) = mappedError.slice(j);
+        }
+      }
+      else
+      {
+        for (size_t j = (inputIdx + depthIdx * (depth - 1)), paddedSlice = 0;
+            j < mappedError.n_slices; j += (inSize * depth), paddedSlice++)
+        {
+          arma::Mat<eT> poolingOutput = inputPadded.subcube(x, y,
+               paddedSlice, x + glimpseSize - 1, y + glimpseSize - 1,
+               paddedSlice);
+
+          if (scale == 2)
+          {
+            Unpooling(inputTemp.slice(paddedSlice), mappedError.slice(j),
+                poolingOutput);
+          }
+          else
+          {
+            DownwardReSampling(inputTemp.slice(paddedSlice),
+                mappedError.slice(j), poolingOutput);
+          }
+
+          inputPadded.subcube(x, y,
+              paddedSlice, x + glimpseSize - 1, y + glimpseSize - 1,
+              paddedSlice) = poolingOutput;
+        }
+      }
+
+      gTemp += inputPadded.tube(padSize, padSize, padSize +
+          inputTemp.n_rows - 1, padSize + inputTemp.n_cols - 1);
+    }
+  }
+
+  Transform(gTemp);
+  g = arma::mat(gTemp.memptr(), gTemp.n_elem, 1);
+}
+
+template<typename InputDataType, typename OutputDataType>
+template<typename Archive>
+void Glimpse<InputDataType, OutputDataType>::Serialize(
+    Archive& ar, const unsigned int /* version */)
+{
+  ar & data::CreateNVP(inSize, "inSize");
+  ar & data::CreateNVP(size, "size");
+  ar & data::CreateNVP(depth, "depth");
+  ar & data::CreateNVP(scale, "scale");
+  ar & data::CreateNVP(inputWidth, "inputWidth");
+  ar & data::CreateNVP(location, "location");
+}
+
+} // namespace ann
+} // namespace mlpack
+
+#endif
diff --git a/src/mlpack/methods/ann/layer/hard_tanh.hpp b/src/mlpack/methods/ann/layer/hard_tanh.hpp
new file mode 100644
index 00000000000..88c8ad2d853
--- /dev/null
+++ b/src/mlpack/methods/ann/layer/hard_tanh.hpp
@@ -0,0 +1,140 @@
+/**
+ * @file hard_tanh.hpp
+ * @author Dhawal Arora
+ *
+ * Definition and implementation of the HardTanH layer.
+ *
+ * mlpack is free software; you may redistribute it and/or modify it under the
+ * terms of the 3-clause BSD license.  You should have received a copy of the
+ * 3-clause BSD license along with mlpack.  If not, see
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
+ */
+#ifndef MLPACK_METHODS_ANN_LAYER_HARD_TANH_HPP
+#define MLPACK_METHODS_ANN_LAYER_HARD_TANH_HPP
+
+#include <mlpack/core.hpp>
+
+namespace mlpack {
+namespace ann /** Artificial Neural Network. */ {
+
+/**
+ * The Hard Tanh activation function, defined by
+ *
+ * @f{eqnarray*}{
+ * f(x) &=& \left\{
+ *   \begin{array}{lr}
+ *     max & : x > maxValue \\
+ *     min & : x \le minValue \\
+ *     x   & : otherwise
+ *   \end{array}
+ * \right. \\
+ * f'(x) &=& \left\{
+ *   \begin{array}{lr}
+ *     0 & : x > maxValue \\
+ *     0 & : x \le minValue \\
+ *     1 & : otherwise
+ *   \end{array}
+ * \right.
+ * @f}
+ *
+ * @tparam InputDataType Type of the input data (arma::colvec, arma::mat,
+ *         arma::sp_mat or arma::cube).
+ * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat,
+ *         arma::sp_mat or arma::cube).
+ */
+template <
+    typename InputDataType = arma::mat,
+    typename OutputDataType = arma::mat
+>
+class HardTanH
+{
+ public:
+  /**
+   * Create the HardTanH object using the specified parameters. The range
+   * of the linear region can be adjusted by specifying the maxValue and
+   * minValue. Default (maxValue = 1, minValue = -1).
+   *
+   * @param maxValue Range of the linear region maximum value.
+   * @param minValue Range of the linear region minimum value.
+   */
+  HardTanH(const double maxValue = 1, const double minValue = -1);
+
+  /**
+   * Ordinary feed forward pass of a neural network, evaluating the function
+   * f(x) by propagating the activity forward through f.
+   *
+   * @param input Input data used for evaluating the specified function.
+   * @param output Resulting output activation.
+   */
+  template<typename InputType, typename OutputType>
+  void Forward(const InputType&& input, OutputType&& output);
+
+  /**
+   * Ordinary feed backward pass of a neural network, calculating the function
+   * f(x) by propagating x backwards through f. Using the results from the feed
+   * forward pass.
+   *
+   * @param input The propagated input activation.
+   * @param gy The backpropagated error.
+   * @param g The calculated gradient.
+   */
+  template<typename DataType>
+  void Backward(const DataType&& input,
+                DataType&& gy,
+                DataType&& g);
+
+  //! Get the input parameter.
+  InputDataType const& InputParameter() const { return inputParameter; }
+  //! Modify the input parameter.
+  InputDataType& InputParameter() { return inputParameter; }
+
+  //! Get the output parameter.
+  OutputDataType const& OutputParameter() const { return outputParameter; }
+  //! Modify the output parameter.
+  OutputDataType& OutputParameter() { return outputParameter; }
+
+  //! Get the delta.
+  OutputDataType const& Delta() const { return delta; }
+  //! Modify the delta.
+  OutputDataType& Delta() { return delta; }
+
+  //! Get the maximum value.
+  double const& MaxValue() const { return maxValue; }
+  //! Modify the maximum value.
+  double& MaxValue() { return maxValue; }
+
+  //! Get the minimum value.
+  double const& MinValue() const { return minValue; }
+  //! Modify the minimum value.
+  double& MinValue() { return minValue; }
+
+  /**
+   * Serialize the layer.
+   */
+  template<typename Archive>
+  void Serialize(Archive& ar, const unsigned int /* version */);
+
+ private:
+  //! Locally-stored delta object.
+  OutputDataType delta;
+
+  //! Locally-stored input parameter object.
+  InputDataType inputParameter;
+
+  //! Locally-stored output parameter object.
+  OutputDataType outputParameter;
+
+  //! Maximum value for the HardTanH function.
+  double maxValue;
+
+  //! Minimum value for the HardTanH function.
+  double minValue;
+}; // class HardTanH
+
+} // namespace ann
+} // namespace mlpack
+
+// Include implementation.
+#include "hard_tanh_impl.hpp"
+
+#endif
diff --git a/src/mlpack/methods/ann/layer/hard_tanh_impl.hpp b/src/mlpack/methods/ann/layer/hard_tanh_impl.hpp
new file mode 100644
index 00000000000..55b92812072
--- /dev/null
+++ b/src/mlpack/methods/ann/layer/hard_tanh_impl.hpp
@@ -0,0 +1,72 @@
+/**
+ * @file hard_tanh_impl.hpp
+ * @author Dhawal Arora
+ *
+ * Implementation and implementation of the HardTanH layer.
+ *
+ * mlpack is free software; you may redistribute it and/or modify it under the
+ * terms of the 3-clause BSD license.  You should have received a copy of the
+ * 3-clause BSD license along with mlpack.  If not, see
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
+ */
+#ifndef MLPACK_METHODS_ANN_LAYER_HARD_TANH_IMPL_HPP
+#define MLPACK_METHODS_ANN_LAYER_HARD_TANH_IMPL_HPP
+
+// In case it hasn't yet been included.
+#include "hard_tanh.hpp"
+
+namespace mlpack {
+namespace ann /** Artificial Neural Network. */ {
+
+template<typename InputDataType, typename OutputDataType>
+HardTanH<InputDataType, OutputDataType>::HardTanH(
+    const double maxValue,
+    const double minValue) :
+    maxValue(maxValue),
+    minValue(minValue)
+{
+  // Nothing to do here.
+}
+
+template<typename InputDataType, typename OutputDataType>
+template<typename InputType, typename OutputType>
+void HardTanH<InputDataType, OutputDataType>::Forward(
+    const InputType&& input, OutputType&& output)
+{
+  output = input;
+  for (size_t i = 0; i < input.n_elem; i++)
+  {
+    output(i) = (output(i) > maxValue ? maxValue :
+        (output(i) < minValue ? minValue : output(i)));
+  }
+}
+
+template<typename InputDataType, typename OutputDataType>
+template<typename DataType>
+void HardTanH<InputDataType, OutputDataType>::Backward(
+    const DataType&& input, DataType&& gy, DataType&& g)
+{
+  g = gy;
+  for (size_t i = 0; i < input.n_elem; i++)
+  {
+    if (input(i) < minValue || input(i) > maxValue)
+    {
+      g(i) = 0;
+    }
+  }
+}
+
+template<typename InputDataType, typename OutputDataType>
+template<typename Archive>
+void HardTanH<InputDataType, OutputDataType>::Serialize(
+    Archive& ar,
+    const unsigned int /* version */)
+{
+  ar & data::CreateNVP(maxValue, "maxValue");
+  ar & data::CreateNVP(minValue, "minValue");
+}
+
+} // namespace ann
+} // namespace mlpack
+
+#endif
diff --git a/src/mlpack/methods/ann/layer/hard_tanh_layer.hpp b/src/mlpack/methods/ann/layer/hard_tanh_layer.hpp
deleted file mode 100644
index c707017fcbf..00000000000
--- a/src/mlpack/methods/ann/layer/hard_tanh_layer.hpp
+++ /dev/null
@@ -1,259 +0,0 @@
-/**
- * @file hard_tanh_layer.hpp
- * @author Dhawal Arora
- *
- * Definition and implementation of the HardTanHLayer layer.
- *
- * mlpack is free software; you may redistribute it and/or modify it under the
- * terms of the 3-clause BSD license.  You should have received a copy of the
- * 3-clause BSD license along with mlpack.  If not, see
- * http://www.opensource.org/licenses/BSD-3-Clause for more information.
- */
-#ifndef MLPACK_METHODS_ANN_LAYER_HARD_TANH_LAYER_HPP
-#define MLPACK_METHODS_ANN_LAYER_HARD_TANH_LAYER_HPP
-
-#include <mlpack/core.hpp>
-
-namespace mlpack {
-namespace ann /** Artificial Neural Network. */ {
-
-/**
- * The Hard Tanh activation function, defined by
- *
- * @f{eqnarray*}{
- * f(x) &=& \left\{
- *   \begin{array}{lr}
- *     max & : x > maxValue \\
- *     min & : x \le minValue \\
- *     x   & : otherwise
- *   \end{array}
- * \right. \\
- * f'(x) &=& \left\{
- *   \begin{array}{lr}
- *     0 & : x > maxValue \\
- *     0 & : x \le minValue \\
- *     1 & : otherwise
- *   \end{array}
- * \right.
- * @f}
- *
- * @tparam InputDataType Type of the input data (arma::colvec, arma::mat,
- *         arma::sp_mat or arma::cube).
- * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat,
- *         arma::sp_mat or arma::cube).
- */
-template <
-    typename InputDataType = arma::mat,
-    typename OutputDataType = arma::mat
->
-class HardTanHLayer
-{
- public:
-  /**
-   * Create the HardTanHLayer object using the specified parameters. The range
-   * of the linear region can be adjusted by specifying the maxValue and
-   * minValue. Default (maxValue = 1, minValue = -1).
-   *
-   * @param maxValue Range of the linear region maximum value.
-   * @param minValue Range of the linear region minimum value.
-   */
-  HardTanHLayer(const double maxValue = 1, const double minValue = -1) :
-      maxValue(maxValue), minValue(minValue)
-  {
-     // Nothing to do here.
-  }
-
-  /**
-   * Ordinary feed forward pass of a neural network, evaluating the function
-   * f(x) by propagating the activity forward through f.
-   *
-   * @param input Input data used for evaluating the specified function.
-   * @param output Resulting output activation.
-   */
-  template<typename InputType, typename OutputType>
-  void Forward(const InputType& input, OutputType& output)
-  {
-    Fn(input, output);
-  }
-
-  /**
-   * Ordinary feed backward pass of a neural network, calculating the function
-   * f(x) by propagating x backwards through f. Using the results from the feed
-   * forward pass.
-   *
-   * @param input The propagated input activation.
-   * @param gy The backpropagated error.
-   * @param g The calculated gradient.
-   */
-  template<typename DataType>
-  void Backward(const DataType& input,
-                const DataType& gy,
-                DataType& g)
-  {
-    DataType derivative;
-    Deriv(input, derivative);
-    g = gy % derivative;
-  }
-
-  /**
-   * Ordinary feed backward pass of a neural network, calculating the function
-   * f(x) by propagating x backwards through f. Using the results from the feed
-   * forward pass.
-   *
-   * @param input The propagated input activation.
-   * @param gy The backpropagated error.
-   * @param g The calculated gradient.
-   */
-  template<typename eT>
-  void Backward(const arma::Cube<eT>& input,
-                const arma::Mat<eT>& gy,
-                arma::Cube<eT>& g)
-  {
-    // Generate a cube using the backpropagated error matrix.
-    arma::Cube<eT> mappedError = arma::zeros<arma::cube>(input.n_rows,
-        input.n_cols, input.n_slices);
-
-    for (size_t s = 0, j = 0; s < mappedError.n_slices; s+= gy.n_cols, j++)
-    {
-      for (size_t i = 0; i < gy.n_cols; i++)
-      {
-        arma::Col<eT> temp = gy.col(i).subvec(
-            j * input.n_rows * input.n_cols,
-            (j + 1) * input.n_rows * input.n_cols - 1);
-
-        mappedError.slice(s + i) = arma::Mat<eT>(temp.memptr(),
-            input.n_rows, input.n_cols);
-      }
-    }
-
-    arma::Cube<eT> derivative;
-    Deriv(input, derivative);
-    g = mappedError % derivative;
-  }
-
-  //! Get the input parameter.
-  InputDataType const& InputParameter() const { return inputParameter; }
-  //! Modify the input parameter.
-  InputDataType& InputParameter() { return inputParameter; }
-
-  //! Get the output parameter.
-  OutputDataType const& OutputParameter() const { return outputParameter; }
-  //! Modify the output parameter.
-  OutputDataType& OutputParameter() { return outputParameter; }
-
-  //! Get the delta.
-  OutputDataType const& Delta() const { return delta; }
-  //! Modify the delta.
-  OutputDataType& Delta() { return delta; }
-
-  //! Get the maximum value.
-  double const& MaxValue() const { return maxValue; }
-  //! Modify the maximum value.
-  double& MaxValue() { return maxValue; }
-
-  //! Get the minimum value.
-  double const& MinValue() const { return minValue; }
-  //! Modify the minimum value.
-  double& MinValue() { return minValue; }
-
-  /**
-   * Serialize the layer.
-   */
-  template<typename Archive>
-  void Serialize(Archive& ar, const unsigned int /* version */)
-  {
-    ar & data::CreateNVP(maxValue, "maxValue");
-    ar & data::CreateNVP(minValue, "minValue");
-  }
-
- private:
-  /**
-   * Computes the HardTanH function.
-   *
-   * @param x Input data.
-   * @return f(x).
-   */
-  double Fn(const double x)
-  {
-    if (x > maxValue)
-      return maxValue;
-    else if (x < minValue)
-      return minValue;
-    return x;
-  }
-
-  /**
-   * Computes the HardTanH function using a dense matrix as input.
-   *
-   * @param x Input data.
-   * @param y The resulting output activation.
-   */
-
-  template<typename eT>
-  void Fn(const arma::Mat<eT>& x, arma::Mat<eT>& y)
-  {
-    y = x;
-    y.transform( [&](eT val) { return std::min(
-        std::max( val, minValue ), maxValue ); } );
-  }
-
-  /**
-   * Computes the HardTanH function using a 3rd-order tensor as input.
-   *
-   * @param x Input data.
-   * @param y The resulting output activation.
-   */
-  template<typename eT>
-  void Fn(const arma::Cube<eT>& x, arma::Cube<eT>& y)
-  {
-    y = x;
-    for (size_t s = 0; s < x.n_slices; s++)
-      Fn(x.slice(s), y.slice(s));
-  }
-
-  /**
-   * Computes the first derivative of the HardTanH function.
-   *
-   * @param x Input data.
-   * @return f'(x)
-   */
-  double Deriv(const double x)
-  {
-    return (x > maxValue || x < minValue) ? 0 : 1;
-  }
-
-  /**
-   * Computes the first derivative of the HardTanH function.
-   *
-   * @param y Input activations.
-   * @param x The resulting derivatives.
-   */
-  template<typename InputType, typename OutputType>
-  void Deriv(const InputType& x, OutputType& y)
-  {
-    y = x;
-
-    for (size_t i = 0; i < x.n_elem; i++)
-      y(i) = Deriv(x(i));
-  }
-
-  //! Locally-stored delta object.
-  OutputDataType delta;
-
-  //! Locally-stored input parameter object.
-  InputDataType inputParameter;
-
-  //! Locally-stored output parameter object.
-  OutputDataType outputParameter;
-
-  //! Maximum value for the HardTanH function.
-  double maxValue;
-
-  //! Minimum value for the HardTanH function.
-  double minValue;
-}; // class HardTanHLayer
-
-} // namespace ann
-} // namespace mlpack
-
-#endif
diff --git a/src/mlpack/methods/ann/layer/softmax_layer.hpp b/src/mlpack/methods/ann/layer/join.hpp
similarity index 67%
rename from src/mlpack/methods/ann/layer/softmax_layer.hpp
rename to src/mlpack/methods/ann/layer/join.hpp
index a2d3323eed8..bda31d787ab 100644
--- a/src/mlpack/methods/ann/layer/softmax_layer.hpp
+++ b/src/mlpack/methods/ann/layer/join.hpp
@@ -1,16 +1,16 @@
 /**
- * @file softmax_layer.hpp
+ * @file join.hpp
  * @author Marcus Edel
  *
- * Definition of the SoftmaxLayer class.
+ * Definition of the Join module.
  *
  * mlpack is free software; you may redistribute it and/or modify it under the
  * terms of the 3-clause BSD license.  You should have received a copy of the
  * 3-clause BSD license along with mlpack.  If not, see
  * http://www.opensource.org/licenses/BSD-3-Clause for more information.
  */
-#ifndef MLPACK_METHODS_ANN_LAYER_SOFTMAX_LAYER_HPP
-#define MLPACK_METHODS_ANN_LAYER_SOFTMAX_LAYER_HPP
+#ifndef MLPACK_METHODS_ANN_LAYER_JOIN_HPP
+#define MLPACK_METHODS_ANN_LAYER_JOIN_HPP
 
 #include <mlpack/core.hpp>
 
@@ -18,28 +18,23 @@ namespace mlpack {
 namespace ann /** Artificial Neural Network. */ {
 
 /**
- * Implementation of the softmax layer. The softmax loss layer computes the
- * multinomial logistic loss of the softmax of its inputs.
+ * Implementation of the Join module class. The Join class accumulates
+ * the output of various modules.
  *
  * @tparam InputDataType Type of the input data (arma::colvec, arma::mat,
  *         arma::sp_mat or arma::cube).
  * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat,
  *         arma::sp_mat or arma::cube).
  */
-template <
+template<
     typename InputDataType = arma::mat,
     typename OutputDataType = arma::mat
 >
-class SoftmaxLayer
+class Join
 {
  public:
-  /**
-   * Create the SoftmaxLayer object.
-   */
-  SoftmaxLayer()
-  {
-    // Nothing to do here.
-  }
+  //! Create the Join object.
+  Join();
 
   /**
    * Ordinary feed forward pass of a neural network, evaluating the function
@@ -48,13 +43,8 @@ class SoftmaxLayer
    * @param input Input data used for evaluating the specified function.
    * @param output Resulting output activation.
    */
-  template<typename eT>
-  void Forward(const arma::Mat<eT>& input, arma::Mat<eT>& output)
-  {
-    output = arma::trunc_exp(input -
-        arma::repmat(arma::max(input), input.n_rows, 1));
-    output /= arma::accu(output);
-  }
+  template<typename InputType, typename OutputType>
+  void Forward(const InputType&& input, OutputType&& output);
 
   /**
    * Ordinary feed backward pass of a neural network, calculating the function
@@ -66,12 +56,9 @@ class SoftmaxLayer
    * @param g The calculated gradient.
    */
   template<typename eT>
-  void Backward(const arma::Mat<eT>& /* unused */,
-                const arma::Mat<eT>& gy,
-                arma::Mat<eT>& g)
-  {
-    g = gy;
-  }
+  void Backward(const arma::Mat<eT>&& /* input */,
+                arma::Mat<eT>&& gy,
+                arma::Mat<eT>&& g);
 
   //! Get the input parameter.
   InputDataType const& InputParameter() const { return inputParameter; }
@@ -84,20 +71,23 @@ class SoftmaxLayer
   OutputDataType& OutputParameter() { return outputParameter; }
 
   //! Get the delta.
-  InputDataType const& Delta() const { return delta; }
+  OutputDataType const& Delta() const { return delta; }
   //! Modify the delta.
-  InputDataType& Delta() { return delta; }
+  OutputDataType& Delta() { return delta; }
 
   /**
    * Serialize the layer.
    */
   template<typename Archive>
-  void Serialize(Archive& /* ar */, const unsigned int /* version */)
-  {
-    /* Nothing to do here */
-  }
+  void Serialize(Archive& ar, const unsigned int /* version */);
 
  private:
+  //! Locally-stored number of input rows.
+  size_t inSizeRows;
+
+  //! Locally-stored number of input cols.
+  size_t inSizeCols;
+
   //! Locally-stored delta object.
   OutputDataType delta;
 
@@ -106,9 +96,12 @@ class SoftmaxLayer
 
   //! Locally-stored output parameter object.
   OutputDataType outputParameter;
-}; // class SoftmaxLayer
+}; // class Join
 
 } // namespace ann
 } // namespace mlpack
 
+// Include implementation.
+#include "join_impl.hpp"
+
 #endif
diff --git a/src/mlpack/methods/ann/layer/join_impl.hpp b/src/mlpack/methods/ann/layer/join_impl.hpp
new file mode 100644
index 00000000000..47584324871
--- /dev/null
+++ b/src/mlpack/methods/ann/layer/join_impl.hpp
@@ -0,0 +1,60 @@
+/**
+ * @file join_impl.hpp
+ * @author Marcus Edel
+ *
+ * Implementation of the Join module.
+ *
+ * mlpack is free software; you may redistribute it and/or modify it under the
+ * terms of the 3-clause BSD license.  You should have received a copy of the
+ * 3-clause BSD license along with mlpack.  If not, see
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
+ */
+#ifndef MLPACK_METHODS_ANN_LAYER_JOIN_IMPL_HPP
+#define MLPACK_METHODS_ANN_LAYER_JOIN_IMPL_HPP
+
+// In case it hasn't yet been included.
+#include "join.hpp"
+
+namespace mlpack {
+namespace ann /** Artificial Neural Network. */ {
+
+template<typename InputDataType, typename OutputDataType>
+Join<InputDataType, OutputDataType>::Join()
+{
+  // Nothing to do here.
+}
+
+template<typename InputDataType, typename OutputDataType>
+template<typename InputType, typename OutputType>
+void Join<InputDataType, OutputDataType>::Forward(
+    const InputType&& input, OutputType&& output)
+{
+  inSizeRows = input.n_rows;
+  inSizeCols = input.n_cols;
+  output = arma::vectorise(input);
+}
+
+template<typename InputDataType, typename OutputDataType>
+template<typename eT>
+void Join<InputDataType, OutputDataType>::Backward(
+    const arma::Mat<eT>&& /* input */,
+    arma::Mat<eT>&& gy,
+    arma::Mat<eT>&& g)
+{
+  g = arma::mat(gy.memptr(), inSizeRows, inSizeCols, false, false);
+}
+
+template<typename InputDataType, typename OutputDataType>
+template<typename Archive>
+void Join<InputDataType, OutputDataType>::Serialize(
+    Archive& ar,
+    const unsigned int /* version */)
+{
+  ar & data::CreateNVP(inSizeRows, "inSizeRows");
+  ar & data::CreateNVP(inSizeCols, "inSizeCols");
+}
+
+} // namespace ann
+} // namespace mlpack
+
+#endif
diff --git a/src/mlpack/methods/ann/layer/layer.hpp b/src/mlpack/methods/ann/layer/layer.hpp
new file mode 100644
index 00000000000..c1de77741e9
--- /dev/null
+++ b/src/mlpack/methods/ann/layer/layer.hpp
@@ -0,0 +1,30 @@
+/**
+ * @file layer.hpp
+ * @author Marcus Edel
+ *
+ * This includes various layers to construct a model.
+ *
+ * mlpack is free software; you may redistribute it and/or modify it under the
+ * terms of the 3-clause BSD license.  You should have received a copy of the
+ * 3-clause BSD license along with mlpack.  If not, see
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
+ */
+#ifndef MLPACK_METHODS_ANN_LAYER_LAYER_HPP
+#define MLPACK_METHODS_ANN_LAYER_LAYER_HPP
+
+#include "add_merge.hpp"
+#include "concat_performance.hpp"
+#include "convolution.hpp"
+#include "dropconnect.hpp"
+#include "glimpse.hpp"
+#include "layer_types.hpp"
+#include "linear.hpp"
+#include "linear_no_bias.hpp"
+#include "lstm.hpp"
+#include "recurrent.hpp"
+#include "recurrent_attention.hpp"
+#include "sequential.hpp"
+#include "concat.hpp"
+#include "vr_class_reward.hpp"
+
+#endif
diff --git a/src/mlpack/methods/ann/layer/layer_traits.hpp b/src/mlpack/methods/ann/layer/layer_traits.hpp
index a8671d62f5d..ff4fbf2d387 100644
--- a/src/mlpack/methods/ann/layer/layer_traits.hpp
+++ b/src/mlpack/methods/ann/layer/layer_traits.hpp
@@ -64,27 +64,42 @@ HAS_MEM_FUNC(Gradient, HasGradientCheck);
 // function.
 HAS_MEM_FUNC(Deterministic, HasDeterministicCheck);
 
-// This gives us a HasRecurrentParameterCheck<T, U> type (where U is a function
-// pointer) we can use with SFINAE to catch when a type has a
-// RecurrentParameter() function.
-HAS_MEM_FUNC(RecurrentParameter, HasRecurrentParameterCheck);
+// This gives us a HasParametersCheck<T, U> type (where U is a function pointer) we
+// can use with SFINAE to catch when a type has a Weights() function.
+HAS_MEM_FUNC(Parameters, HasParametersCheck);
 
-// This gives us a HasSeqLenCheck<T, U> type (where U is a function pointer) we
-// can use with SFINAE to catch when a type has a SeqLen() function.
-HAS_MEM_FUNC(SeqLen, HasSeqLenCheck);
+// This gives us a HasAddCheck<T, U> type (where U is a function pointer) we
+// can use with SFINAE to catch when a type has a Weights() function.
+HAS_MEM_FUNC(Add, HasAddCheck);
 
-// This gives us a HasWeightsCheck<T, U> type (where U is a function pointer) we
+// This gives us a HasModelCheck<T, U> type (where U is a function pointer) we
 // can use with SFINAE to catch when a type has a Weights() function.
-HAS_MEM_FUNC(Weights, HasWeightsCheck);
+HAS_MEM_FUNC(Model, HasModelCheck);
 
 // This gives us a HasLocationCheck<T, U> type (where U is a function pointer)
 // we can use with SFINAE to catch when a type has a Location() function.
 HAS_MEM_FUNC(Location, HasLocationCheck);
 
+// This gives us a HasResetCheck<T, U> type (where U is a function pointer)
+// we can use with SFINAE to catch when a type has a Location() function.
+HAS_MEM_FUNC(Reset, HasResetCheck);
+
 // This gives us a HasRewardCheck<T, U> type (where U is a function pointer) we
 // can use with SFINAE to catch when a type has a Reward() function.
 HAS_MEM_FUNC(Reward, HasRewardCheck);
 
+// This gives us a HasInputWidth<T, U> type (where U is a function pointer) we
+// can use with SFINAE to catch when a type has a InputWidth() function.
+HAS_MEM_FUNC(InputWidth, HasInputWidth);
+
+// This gives us a HasInputHeight<T, U> type (where U is a function pointer) we
+// can use with SFINAE to catch when a type has a InputHeight() function.
+HAS_MEM_FUNC(InputHeight, HasInputHeight);
+
+// This gives us a HasRho<T, U> type (where U is a function pointer) we
+// can use with SFINAE to catch when a type has a Rho() function.
+HAS_MEM_FUNC(InputHeight, HasRho);
+
 } // namespace ann
 } // namespace mlpack
 
diff --git a/src/mlpack/methods/ann/layer/layer_types.hpp b/src/mlpack/methods/ann/layer/layer_types.hpp
new file mode 100644
index 00000000000..156616864db
--- /dev/null
+++ b/src/mlpack/methods/ann/layer/layer_types.hpp
@@ -0,0 +1,117 @@
+/**
+ * @file layer_types.hpp
+ * @author Marcus Edel
+ *
+ * This provides a list of all modules that can be used to construct a model.
+ *
+ * mlpack is free software; you may redistribute it and/or modify it under the
+ * terms of the 3-clause BSD license.  You should have received a copy of the
+ * 3-clause BSD license along with mlpack.  If not, see
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
+ */
+#ifndef MLPACK_METHODS_ANN_LAYER_LAYER_TYPES_HPP
+#define MLPACK_METHODS_ANN_LAYER_LAYER_TYPES_HPP
+
+#include <boost/variant.hpp>
+
+// Layer modules.
+#include <mlpack/methods/ann/layer/add.hpp>
+#include <mlpack/methods/ann/layer/base_layer.hpp>
+#include <mlpack/methods/ann/layer/constant.hpp>
+#include <mlpack/methods/ann/layer/dropout.hpp>
+#include <mlpack/methods/ann/layer/hard_tanh.hpp>
+#include <mlpack/methods/ann/layer/join.hpp>
+#include <mlpack/methods/ann/layer/leaky_relu.hpp>
+#include <mlpack/methods/ann/layer/log_softmax.hpp>
+#include <mlpack/methods/ann/layer/lookup.hpp>
+#include <mlpack/methods/ann/layer/mean_squared_error.hpp>
+#include <mlpack/methods/ann/layer/multiply_constant.hpp>
+#include <mlpack/methods/ann/layer/negative_log_likelihood.hpp>
+#include <mlpack/methods/ann/layer/max_pooling.hpp>
+#include <mlpack/methods/ann/layer/mean_pooling.hpp>
+#include <mlpack/methods/ann/layer/reinforce_normal.hpp>
+#include <mlpack/methods/ann/layer/select.hpp>
+
+// Convolution modules.
+#include <mlpack/methods/ann/convolution_rules/border_modes.hpp>
+#include <mlpack/methods/ann/convolution_rules/naive_convolution.hpp>
+#include <mlpack/methods/ann/convolution_rules/fft_convolution.hpp>
+
+namespace mlpack {
+namespace ann {
+
+template<typename InputDataType, typename OutputDataType> class AddMerge;
+template<typename InputDataType, typename OutputDataType> class Concat;
+template<typename InputDataType, typename OutputDataType> class DropConnect;
+template<typename InputDataType, typename OutputDataType> class Glimpse;
+template<typename InputDataType, typename OutputDataType> class Linear;
+template<typename InputDataType, typename OutputDataType> class LinearNoBias;
+template<typename InputDataType, typename OutputDataType> class LSTM;
+template<typename InputDataType, typename OutputDataType> class Recurrent;
+template<typename InputDataType, typename OutputDataType> class Sequential;
+template<typename InputDataType, typename OutputDataType> class VRClassReward;
+
+template<
+    typename OutputLayerType,
+    typename InputDataType,
+    typename OutputDataType
+>
+class ConcatPerformance;
+
+template<
+    typename ForwardConvolutionRule,
+    typename BackwardConvolutionRule,
+    typename GradientConvolutionRule,
+    typename InputDataType,
+    typename OutputDataType
+>
+class Convolution;
+
+template<
+    typename InputDataType,
+    typename OutputDataType
+>
+class RecurrentAttention;
+
+using LayerTypes = boost::variant<
+    Add<arma::mat, arma::mat>*,
+    AddMerge<arma::mat, arma::mat>*,
+    BaseLayer<LogisticFunction, arma::mat, arma::mat>*,
+    BaseLayer<IdentityFunction, arma::mat, arma::mat>*,
+    BaseLayer<TanhFunction, arma::mat, arma::mat>*,
+    BaseLayer<RectifierFunction, arma::mat, arma::mat>*,
+    Concat<arma::mat, arma::mat>*,
+    ConcatPerformance<NegativeLogLikelihood<arma::mat, arma::mat>,
+                      arma::mat, arma::mat>*,
+    Constant<arma::mat, arma::mat>*,
+    Convolution<NaiveConvolution<ValidConvolution>,
+                NaiveConvolution<FullConvolution>,
+                NaiveConvolution<ValidConvolution>, arma::mat, arma::mat>*,
+    DropConnect<arma::mat, arma::mat>*,
+    Dropout<arma::mat, arma::mat>*,
+    Glimpse<arma::mat, arma::mat>*,
+    HardTanH<arma::mat, arma::mat>*,
+    Join<arma::mat, arma::mat>*,
+    LeakyReLU<arma::mat, arma::mat>*,
+    Linear<arma::mat, arma::mat>*,
+    LinearNoBias<arma::mat, arma::mat>*,
+    LogSoftMax<arma::mat, arma::mat>*,
+    Lookup<arma::mat, arma::mat>*,
+    LSTM<arma::mat, arma::mat>*,
+    MaxPooling<arma::mat, arma::mat>*,
+    MeanPooling<arma::mat, arma::mat>*,
+    MeanSquaredError<arma::mat, arma::mat>*,
+    MultiplyConstant<arma::mat, arma::mat>*,
+    NegativeLogLikelihood<arma::mat, arma::mat>*,
+    Recurrent<arma::mat, arma::mat>*,
+    RecurrentAttention<arma::mat, arma::mat>*,
+    ReinforceNormal<arma::mat, arma::mat>*,
+    Select<arma::mat, arma::mat>*,
+    Sequential<arma::mat, arma::mat>*,
+    VRClassReward<arma::mat, arma::mat>*
+>;
+
+} // namespace ann
+} // namespace mlpack
+
+#endif
diff --git a/src/mlpack/methods/ann/layer/layer_visitor.hpp b/src/mlpack/methods/ann/layer/layer_visitor.hpp
new file mode 100644
index 00000000000..a311b0addbc
--- /dev/null
+++ b/src/mlpack/methods/ann/layer/layer_visitor.hpp
@@ -0,0 +1,1034 @@
+/**
+ * @file layer_visitor.hpp
+ * @author Marcus Edel
+ *
+ * This file provides an easy way to serialize a layer, abstracts away the
+ * different types of layers, and also automatically directs any function to the
+ * right layer type.
+ *
+ * mlpack is free software; you may redistribute it and/or modify it under the
+ * terms of the 3-clause BSD license.  You should have received a copy of the
+ * 3-clause BSD license along with mlpack.  If not, see
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
+ */
+#ifndef MLPACK_METHODS_ANN_LAYER_LAYER_VISITOR_HPP
+#define MLPACK_METHODS_ANN_LAYER_LAYER_VISITOR_HPP
+
+#include <mlpack/methods/ann/layer/layer_traits.hpp>
+#include <mlpack/methods/ann/layer/layer_types.hpp>
+
+#include <boost/variant.hpp>
+
+namespace mlpack {
+namespace ann {
+
+/**
+ * LoadOutputParameterVisitor restores the output parameter using the given
+ * parameter set.
+ */
+class LoadOutputParameterVisitor : public boost::static_visitor<void>
+{
+ public:
+  //! Restore the output parameter given a parameter set.
+  LoadOutputParameterVisitor(std::vector<arma::mat>&& parameter);
+
+  //! Restore the output parameter.
+  template<typename LayerType>
+  void operator()(LayerType* layer) const;
+
+ private:
+  //! The parameter set.
+  std::vector<arma::mat>&& parameter;
+
+  //! Restore the output parameter for a module which doesn't implement the
+  //! Model() function.
+  template<typename T>
+  typename std::enable_if<
+      !HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, void>::type
+  OutputParameter(T* layer) const;
+
+  //! Restore the output parameter for a module which implements the Model()
+  //! function.
+  template<typename T>
+  typename std::enable_if<
+      HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, void>::type
+  OutputParameter(T* layer) const;
+};
+
+/**
+ * SaveOutputParameterVisitor saves the output parameter into the given
+ * parameter set.
+ */
+class SaveOutputParameterVisitor : public boost::static_visitor<void>
+{
+ public:
+  //! Save the output parameter into the given parameter set.
+  SaveOutputParameterVisitor(std::vector<arma::mat>&& parameter);
+
+  //! Save the output parameter.
+  template<typename LayerType>
+  void operator()(LayerType* layer) const;
+
+ private:
+  //! The parameter set.
+  std::vector<arma::mat>&& parameter;
+
+  //! Save the output parameter for a module which doesn't implement the
+  //! Model() function.
+  template<typename T>
+  typename std::enable_if<
+      !HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, void>::type
+  OutputParameter(T* layer) const;
+
+  //! Save the output parameter for a module which implements the Model()
+  //! function.
+  template<typename T>
+  typename std::enable_if<
+      HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, void>::type
+  OutputParameter(T* layer) const;
+};
+
+/**
+ * DeleteVisitor executes the destructor of the instantiated object.
+ */
+class DeleteVisitor : public boost::static_visitor<void>
+{
+ public:
+  //! Execute the destructor.
+  template<typename LayerType>
+  void operator()(LayerType* layer) const;
+};
+
+/**
+ * ForwardOutputVisitor executes the Forward() function given the input and
+ * output parameter.
+ */
+class ForwardOutputVisitor : public boost::static_visitor<double>
+{
+ public:
+  //! Execute the Foward() function given the input and output parameter.
+  ForwardOutputVisitor(arma::mat&& input, arma::mat&& output);
+
+  //! Execute the Foward() function.
+  template<typename LayerType>
+  double operator()(LayerType* layer) const;
+
+ private:
+  //! The input parameter set.
+  arma::mat&& input;
+
+  //! The output parameter set.
+  arma::mat&& output;
+};
+
+/**
+ * ForwardVisitor executes the Forward() function given the input and output
+ * parameter.
+ */
+class ForwardVisitor : public boost::static_visitor<void>
+{
+ public:
+  //! Execute the Foward() function given the input and output parameter.
+  ForwardVisitor(arma::mat&& input, arma::mat&& output);
+
+  //! Execute the Foward() function.
+  template<typename LayerType>
+  void operator()(LayerType* layer) const;
+
+ private:
+  //! The input parameter set.
+  arma::mat&& input;
+
+  //! The output parameter set.
+  arma::mat&& output;
+};
+
+/**
+ * BackwardVisitor executes the Backward() function given the input, error and
+ * delta parameter.
+ */
+class BackwardVisitor : public boost::static_visitor<void>
+{
+ public:
+  //! Execute the Backward() function given the input, error and delta
+  //! parameter.
+  BackwardVisitor(arma::mat&& input, arma::mat&& error, arma::mat&& delta);
+
+  //! Execute the Backward() function.
+  template<typename LayerType>
+  void operator()(LayerType* layer) const;
+
+ private:
+  //! The input parameter set.
+  arma::mat&& input;
+
+  //! The error parameter.
+  arma::mat&& error;
+
+  //! The delta parameter.
+  arma::mat&& delta;
+};
+
+/**
+ * ResetVisitor executes the Reset() function.
+ */
+class ResetVisitor : public boost::static_visitor<void>
+{
+ public:
+  //! Execute the Reset() function.
+  template<typename LayerType>
+  void operator()(LayerType* layer) const;
+
+ private:
+  //! Execute the Reset() function for a module which implements the Reset()
+  //! function.
+  template<typename T>
+  typename std::enable_if<
+      HasResetCheck<T, void(T::*)()>::value &&
+      !HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, void>::type
+  ResetParameter(T* layer) const;
+
+  //! Execute the Reset() function for a module which implements the Model()
+  //! function.
+  template<typename T>
+  typename std::enable_if<
+      !HasResetCheck<T, void(T::*)()>::value &&
+      HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, void>::type
+  ResetParameter(T* layer) const;
+
+  //! Execute the Reset() function for a module which implements the Reset()
+  //! and Model() function.
+  template<typename T>
+  typename std::enable_if<
+      HasResetCheck<T, void(T::*)()>::value &&
+      HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, void>::type
+  ResetParameter(T* layer) const;
+
+  //! Do not execute the Reset() function for a module which doesn't implement
+  // the Reset() or Model() function.
+  template<typename T>
+  typename std::enable_if<
+      !HasResetCheck<T, void(T::*)()>::value &&
+      !HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, void>::type
+  ResetParameter(T* layer) const;
+};
+
+/**
+ * InputParameterVisitor exposes the input parameter of the given module.
+ */
+class InputParameterVisitor : public boost::static_visitor<arma::mat&>
+{
+ public:
+  //! Return the input parameter set.
+  template<typename LayerType>
+  arma::mat& operator()(LayerType* layer) const;
+};
+
+/**
+ * OutputParameterVisitor exposes the output parameter of the given module.
+ */
+class OutputParameterVisitor : public boost::static_visitor<arma::mat&>
+{
+ public:
+  //! Return the output parameter set.
+  template<typename LayerType>
+  arma::mat& operator()(LayerType* layer) const;
+};
+
+/**
+ * DeltaVisitor exposes the delta parameter of the given module.
+ */
+class DeltaVisitor : public boost::static_visitor<arma::mat&>
+{
+ public:
+  //! Return the delta parameter.
+  template<typename LayerType>
+  arma::mat& operator()(LayerType* layer) const;
+};
+
+/**
+ * ParametersVisitor exposes the parameters set of the given module and stores
+ * the parameters set into the given matrix.
+ */
+class ParametersVisitor : public boost::static_visitor<void>
+{
+ public:
+  //! Store the parameters set into the given parameters matrix.
+  ParametersVisitor(arma::mat&& parameters);
+
+  //! Set the parameters set.
+  template<typename LayerType>
+  void operator()(LayerType* layer) const;
+
+ private:
+  //! The parameters set.
+  arma::mat&& parameters;
+
+  //! Do not set the parameters set if the module doesn't implement the
+  //! Parameters() function.
+  template<typename T, typename P>
+  typename std::enable_if<
+      !HasParametersCheck<T, P&(T::*)()>::value, void>::type
+  LayerParameters(T* layer, P& output) const;
+
+  //! Set the parameters set if the module implements the Parameters() function.
+  template<typename T, typename P>
+  typename std::enable_if<
+      HasParametersCheck<T, P&(T::*)()>::value, void>::type
+  LayerParameters(T* layer, P& output) const;
+};
+
+/**
+ * ParametersSetVisitor update the parameters set using the given matrix.
+ */
+class ParametersSetVisitor : public boost::static_visitor<void>
+{
+ public:
+  //! Update the parameters set given the parameters matrix.
+  ParametersSetVisitor(arma::mat&& parameters);
+
+  //! Update the parameters set.
+  template<typename LayerType>
+  void operator()(LayerType *layer) const;
+
+ private:
+  //! The parameters set.
+  arma::mat&& parameters;
+
+  //! Do not update the parameters set if the module doesn't implement the
+  //! Parameters() function.
+  template<typename T, typename P>
+  typename std::enable_if<
+      !HasParametersCheck<T, P&(T::*)()>::value, void>::type
+  LayerParameters(T* layer, P& output) const;
+
+  //! Update the parameters set if the module implements the Parameters()
+  //! function.
+  template<typename T, typename P>
+  typename std::enable_if<
+      HasParametersCheck<T, P&(T::*)()>::value, void>::type
+  LayerParameters(T* layer, P& output) const;
+};
+
+/**
+ * WeightSizeVisitor returns the number of weights of the given module.
+ */
+class WeightSizeVisitor : public boost::static_visitor<size_t>
+{
+ public:
+  //! Return the number of weights.
+  template<typename LayerType>
+  size_t operator()(LayerType* layer) const;
+
+ private:
+  //! If the module doesn't implement the Parameters() or Model() function
+  //! return 0.
+  template<typename T, typename P>
+  typename std::enable_if<
+      !HasParametersCheck<T, P&(T::*)()>::value &&
+      !HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, size_t>::type
+  LayerSize(T* layer, P& output) const;
+
+  //! Return the number of parameters if the module implements the Model()
+  //! function.
+  template<typename T, typename P>
+  typename std::enable_if<
+      !HasParametersCheck<T, P&(T::*)()>::value &&
+      HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, size_t>::type
+  LayerSize(T* layer, P& output) const;
+
+  //! Return the number of parameters if the module implements the Parameters()
+  //! function.
+  template<typename T, typename P>
+  typename std::enable_if<
+      HasParametersCheck<T, P&(T::*)()>::value &&
+      !HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, size_t>::type
+  LayerSize(T* layer, P& output) const;
+
+  //! Return the accumulated number of parameters if the module implements the
+  //! Parameters() and Model() function.
+  template<typename T, typename P>
+  typename std::enable_if<
+      HasParametersCheck<T, P&(T::*)()>::value &&
+      HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, size_t>::type
+  LayerSize(T* layer, P& output) const;
+};
+
+/**
+ * SetInputWidthVisitor updates the input width parameter with the given input
+ * width.
+ */
+class SetInputWidthVisitor : public boost::static_visitor<bool>
+{
+ public:
+  //! Update the input width parameter with the given input width.
+  SetInputWidthVisitor(const size_t inputWidth = 0, const bool reset = false);
+
+  //! Update the input width parameter.
+  template<typename LayerType>
+  bool operator()(LayerType* layer) const;
+
+ private:
+  //! The input width parameter.
+  size_t inputWidth;
+
+  //! If set reset the height parameter if already set.
+  bool reset;
+
+  //! Do nothing if the module doesn't implement the InputWidth() or Model()
+  //! function.
+  template<typename T>
+  typename std::enable_if<
+      !HasInputWidth<T, size_t&(T::*)()>::value &&
+      !HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, bool>::type
+  LayerInputWidth(T* layer) const;
+
+  //! Update the input width if the module implements the InputWidth() function.
+  template<typename T>
+  typename std::enable_if<
+      HasInputWidth<T, size_t&(T::*)()>::value &&
+      !HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, bool>::type
+  LayerInputWidth(T* layer) const;
+
+  //! Update the input width if the module implements the Model() function.
+  template<typename T>
+  typename std::enable_if<
+      !HasInputWidth<T, size_t&(T::*)()>::value &&
+      HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, bool>::type
+  LayerInputWidth(T* layer) const;
+
+  //! Update the input width if the module implements the InputWidth() or
+  //! Model() function.
+  template<typename T>
+  typename std::enable_if<
+      HasInputWidth<T, size_t&(T::*)()>::value &&
+      HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, bool>::type
+  LayerInputWidth(T* layer) const;
+};
+
+/**
+ * SetInputHeightVisitor updates the input height parameter with the given input
+ * height.
+ */
+class SetInputHeightVisitor : public boost::static_visitor<bool>
+{
+ public:
+  //! Update the input height parameter with the given input height.
+  SetInputHeightVisitor(const size_t inputHeight = 0, const bool reset = false);
+
+  //! Update the input height parameter.
+  template<typename LayerType>
+  bool operator()(LayerType* layer) const;
+
+ private:
+  //! The input height parameter.
+  size_t inputHeight;
+
+  //! If set reset the height parameter if already set.
+  bool reset;
+
+  //! Do nothing if the module doesn't implement the InputHeight() or Model()
+  //! function.
+  template<typename T>
+  typename std::enable_if<
+      !HasInputHeight<T, size_t&(T::*)()>::value &&
+      !HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, bool>::type
+  LayerInputHeight(T* layer) const;
+
+  //! Update the input height if the module implements the InputHeight()
+  //! function.
+  template<typename T>
+  typename std::enable_if<
+      HasInputHeight<T, size_t&(T::*)()>::value &&
+      !HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, bool>::type
+  LayerInputHeight(T* layer) const;
+
+  //! Update the input height if the module implements the Model() function.
+  template<typename T>
+  typename std::enable_if<
+      !HasInputHeight<T, size_t&(T::*)()>::value &&
+      HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, bool>::type
+  LayerInputHeight(T* layer) const;
+
+  //! Update the input height if the module implements the InputHeight() or
+  //! Model() function.
+  template<typename T>
+  typename std::enable_if<
+      HasInputHeight<T, size_t&(T::*)()>::value &&
+      HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, bool>::type
+  LayerInputHeight(T* layer) const;
+};
+
+/**
+ * OutputWidthVisitor exposes the OutputWidth() method of the given module.
+ */
+class OutputWidthVisitor : public boost::static_visitor<size_t>
+{
+ public:
+  //! Return the output width.
+  template<typename LayerType>
+  size_t operator()(LayerType* layer) const;
+
+ private:
+  //! Return 0 if the module doesn't implement the InputWidth() or Model()
+  //! function.
+  template<typename T>
+  typename std::enable_if<
+      !HasInputWidth<T, size_t&(T::*)()>::value &&
+      !HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, size_t>::type
+  LayerOutputWidth(T* layer) const;
+
+  //! Return the output width if the module implements the InputWidth()
+  //! function.
+  template<typename T>
+  typename std::enable_if<
+      HasInputWidth<T, size_t&(T::*)()>::value &&
+      !HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, size_t>::type
+  LayerOutputWidth(T* layer) const;
+
+  //! Return the output width if the module implements the Model() function.
+  template<typename T>
+  typename std::enable_if<
+      !HasInputWidth<T, size_t&(T::*)()>::value &&
+      HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, size_t>::type
+  LayerOutputWidth(T* layer) const;
+
+  //! Return the output width if the module implements the Model() or
+  //! InputWidth() function.
+  template<typename T>
+  typename std::enable_if<
+      HasInputWidth<T, size_t&(T::*)()>::value &&
+      HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, size_t>::type
+  LayerOutputWidth(T* layer) const;
+};
+
+/**
+ * OutputWidthVisitor exposes the OutputHeight() method of the given module.
+ */
+class OutputHeightVisitor : public boost::static_visitor<size_t>
+{
+ public:
+  //! Return the output height.
+  template<typename LayerType>
+  size_t operator()(LayerType* layer) const;
+
+ private:
+  //! Return 0 if the module doesn't implement the InputHeight() or Model()
+  //! function.
+  template<typename T>
+  typename std::enable_if<
+      !HasInputHeight<T, size_t&(T::*)()>::value &&
+      !HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, size_t>::type
+  LayerOutputHeight(T* layer) const;
+
+  //! Return the output height if the module implements the InputHeight()
+  //! function.
+  template<typename T>
+  typename std::enable_if<
+      HasInputHeight<T, size_t&(T::*)()>::value &&
+      !HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, size_t>::type
+  LayerOutputHeight(T* layer) const;
+
+  //! Return the output height if the module implements the Model() function.
+  template<typename T>
+  typename std::enable_if<
+      !HasInputHeight<T, size_t&(T::*)()>::value &&
+      HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, size_t>::type
+  LayerOutputHeight(T* layer) const;
+
+  //! Return the output height if the module implement the Model() or
+  //! InputHeight() function.
+  template<typename T>
+  typename std::enable_if<
+      HasInputHeight<T, size_t&(T::*)()>::value &&
+      HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, size_t>::type
+  LayerOutputHeight(T* layer) const;
+};
+
+/**
+ * LastOutputWidthVisitor exposes the OutputWidth() method of the given module.
+ */
+class LastOutputWidthVisitor : public boost::static_visitor<size_t>
+{
+ public:
+  //! Return the output width.
+  template<typename LayerType>
+  size_t operator()(LayerType* layer) const;
+
+ private:
+  //! Return 0 if the module doesn't implement the InputWidth() or Model()
+  //! function.
+  template<typename T>
+  typename std::enable_if<
+      !HasInputWidth<T, size_t&(T::*)()>::value &&
+      !HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, size_t>::type
+  LayerOutputWidth(T* layer) const;
+
+  //! Return the output width if the module implements the InputWidth()
+  //! function.
+  template<typename T>
+  typename std::enable_if<
+      HasInputWidth<T, size_t&(T::*)()>::value &&
+      !HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, size_t>::type
+  LayerOutputWidth(T* layer) const;
+
+  //! Return the output width if the module implements the Model() function.
+  template<typename T>
+  typename std::enable_if<
+      !HasInputWidth<T, size_t&(T::*)()>::value &&
+      HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, size_t>::type
+  LayerOutputWidth(T* layer) const;
+
+  //! Return the output width if the module implements the Model() or
+  //! InputWidth() function.
+  template<typename T>
+  typename std::enable_if<
+      HasInputWidth<T, size_t&(T::*)()>::value &&
+      HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, size_t>::type
+  LayerOutputWidth(T* layer) const;
+};
+
+/**
+ * LastOutputHeightVisitor exposes the OutputHeight() method of the given module.
+ */
+class LastOutputHeightVisitor : public boost::static_visitor<size_t>
+{
+ public:
+  //! Return the output height.
+  template<typename LayerType>
+  size_t operator()(LayerType* layer) const;
+
+ private:
+  //! Return 0 if the module doesn't implement the InputHeight() or Model()
+  //! function.
+  template<typename T>
+  typename std::enable_if<
+      !HasInputHeight<T, size_t&(T::*)()>::value &&
+      !HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, size_t>::type
+  LayerOutputHeight(T* layer) const;
+
+  //! Return the output height if the module implements the InputHeight()
+  //! function.
+  template<typename T>
+  typename std::enable_if<
+      HasInputHeight<T, size_t&(T::*)()>::value &&
+      !HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, size_t>::type
+  LayerOutputHeight(T* layer) const;
+
+  //! Return the output height if the module implements the Model() function.
+  template<typename T>
+  typename std::enable_if<
+      !HasInputHeight<T, size_t&(T::*)()>::value &&
+      HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, size_t>::type
+  LayerOutputHeight(T* layer) const;
+
+  //! Return the output height if the module implement the Model() or
+  //! InputHeight() function.
+  template<typename T>
+  typename std::enable_if<
+      HasInputHeight<T, size_t&(T::*)()>::value &&
+      HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, size_t>::type
+  LayerOutputHeight(T* layer) const;
+};
+
+/**
+ * WeightSetVisitor update the module parameters given the parameters set.
+ */
+class WeightSetVisitor : public boost::static_visitor<size_t>
+{
+ public:
+  //! Update the parameters given the parameters set and offset.
+  WeightSetVisitor(arma::mat&& weight, const size_t offset = 0);
+
+  //! Update the parameters set.
+  template<typename LayerType>
+  size_t operator()(LayerType* layer) const;
+
+ private:
+  //! The parameters set.
+  arma::mat&& weight;
+
+  //! The parameters offset.
+  const size_t offset;
+
+  //! Do not update the parameters if the module doesn't implement the
+  //! Parameters() or Model() function.
+  template<typename T, typename P>
+  typename std::enable_if<
+      !HasParametersCheck<T, P&(T::*)()>::value &&
+      !HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, size_t>::type
+  LayerSize(T* layer, P&& input) const;
+
+  //! Update the parameters if the module implements the Model() function.
+  template<typename T, typename P>
+  typename std::enable_if<
+      !HasParametersCheck<T, P&(T::*)()>::value &&
+      HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, size_t>::type
+  LayerSize(T* layer, P&& input) const;
+
+  //! Update the parameters if the module implements the Parameters() function.
+  template<typename T, typename P>
+  typename std::enable_if<
+      HasParametersCheck<T, P&(T::*)()>::value &&
+      !HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, size_t>::type
+  LayerSize(T* layer, P&& input) const;
+
+  //! Update the parameters if the module implements the Model() and
+  //! Parameters() function.
+  template<typename T, typename P>
+  typename std::enable_if<
+      HasParametersCheck<T, P&(T::*)()>::value &&
+      HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, size_t>::type
+  LayerSize(T* layer, P&& input) const;
+};
+
+/**
+ * RhoVisitor exposes the Rho() method of the given module.
+ */
+class RhoVisitor : public boost::static_visitor<size_t>
+{
+ public:
+  //! Return the output height.
+  template<typename LayerType>
+  size_t operator()(LayerType* layer) const;
+
+ private:
+  //! Return 0 if the module doesn't implement the InputHeight() or Model()
+  //! function.
+  template<typename T>
+  typename std::enable_if<
+      !HasRho<T, size_t&(T::*)()>::value &&
+      !HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, size_t>::type
+  LayerRho(T* layer) const;
+
+  //! Return the output height if the module implements the InputHeight()
+  //! function.
+  template<typename T>
+  typename std::enable_if<
+      HasRho<T, size_t&(T::*)()>::value &&
+      !HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, size_t>::type
+  LayerRho(T* layer) const;
+
+  //! Return the output height if the module implements the Model() function.
+  template<typename T>
+  typename std::enable_if<
+      !HasRho<T, size_t&(T::*)()>::value &&
+      HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, size_t>::type
+  LayerRho(T* layer) const;
+
+  //! Return the output height if the module implement the Model() or
+  //! InputHeight() function.
+  template<typename T>
+  typename std::enable_if<
+      HasRho<T, size_t&(T::*)()>::value &&
+      HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, size_t>::type
+  LayerRho(T* layer) const;
+};
+
+/**
+ * DeterministicSetVisitor set the deterministic parameter given the
+ * deterministic value.
+ */
+class DeterministicSetVisitor : public boost::static_visitor<void>
+{
+ public:
+  //! Set the deterministic parameter given the current deterministic value.
+  DeterministicSetVisitor(const bool deterministic = true);
+
+  //! Set the deterministic parameter.
+  template<typename LayerType>
+  void operator()(LayerType* layer) const;
+
+ private:
+  //! The deterministic parameter.
+  const bool deterministic;
+
+  //! Set the deterministic parameter if the module implements the
+  //! Deterministic() and Model() function.
+  template<typename T>
+  typename std::enable_if<
+      HasDeterministicCheck<T, bool&(T::*)(void)>::value &&
+      HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, void>::type
+  LayerDeterministic(T* layer) const;
+
+  //! Set the deterministic parameter if the module implements the
+  //! Model() function.
+  template<typename T>
+  typename std::enable_if<
+      !HasDeterministicCheck<T, bool&(T::*)(void)>::value &&
+      HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, void>::type
+  LayerDeterministic(T* layer) const;
+
+  //! Set the deterministic parameter if the module implements the
+  //! Deterministic() function.
+  template<typename T>
+  typename std::enable_if<
+      HasDeterministicCheck<T, bool&(T::*)(void)>::value &&
+      !HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, void>::type
+  LayerDeterministic(T* layer) const;
+
+  //! Do not set the deterministic parameter if the module doesn't implement the
+  //! Deterministic() or Model() function.
+  template<typename T>
+  typename std::enable_if<
+      !HasDeterministicCheck<T, bool&(T::*)(void)>::value &&
+      !HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, void>::type
+  LayerDeterministic(T* layer) const;
+};
+
+/**
+ * AddVisitor exposes the Add() method of the given module.
+ */
+class AddVisitor : public boost::static_visitor<void>
+{
+ public:
+  //! Exposes the Add() method of the given module.
+  template<typename T>
+  AddVisitor(T newLayer);
+
+  //! Exposes the Add() method.
+  template<typename LayerType>
+  void operator()(LayerType* layer) const;
+
+ private:
+  //! The layer that should be added.
+  LayerTypes newLayer;
+
+  //! Only add the layer if the module implements the Add() function.
+  template<typename T>
+  typename std::enable_if<
+      HasAddCheck<T, void(T::*)(LayerTypes)>::value, void>::type
+  LayerAdd(T* layer) const;
+
+  //! Do not add the layer if the module doesn't implement the Add() function.
+  template<typename T>
+  typename std::enable_if<
+      !HasAddCheck<T, void(T::*)(LayerTypes)>::value, void>::type
+  LayerAdd(T* layer) const;
+};
+
+/**
+ * GradientSetVisitor update the gradient parameter given the gradient set.
+ */
+class GradientSetVisitor : public boost::static_visitor<size_t>
+{
+ public:
+  //! Update the gradient parameter given the gradient set.
+  GradientSetVisitor(arma::mat&& gradient, size_t offset = 0);
+
+  //! Update the gradient parameter.
+  template<typename LayerType>
+  size_t operator()(LayerType* layer) const;
+
+ private:
+  //! The gradient set.
+  arma::mat&& gradient;
+
+  //! The gradient offset.
+  size_t offset;
+
+  //! Update the gradient if the module implements the Gradient() function.
+  template<typename T>
+  typename std::enable_if<
+      HasGradientCheck<T, arma::mat&(T::*)()>::value &&
+      !HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, size_t>::type
+  LayerGradients(T* layer, arma::mat& input) const;
+
+  //! Update the gradient if the module implements the Model() function.
+  template<typename T>
+  typename std::enable_if<
+      !HasGradientCheck<T, arma::mat&(T::*)()>::value &&
+      HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, size_t>::type
+  LayerGradients(T* layer, arma::mat& input) const;
+
+  //! Update the gradient if the module implements the Gradient() and Model()
+  //! function.
+  template<typename T>
+  typename std::enable_if<
+      HasGradientCheck<T, arma::mat&(T::*)()>::value &&
+      HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, size_t>::type
+  LayerGradients(T* layer, arma::mat& input) const;
+
+  //! Do not update the gradient parameter if the module doesn't implement the
+  //! Gradient() or Model() function.
+  template<typename T, typename P>
+  typename std::enable_if<
+      !HasGradientCheck<T, P&(T::*)()>::value &&
+      !HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, size_t>::type
+  LayerGradients(T* layer, P& input) const;
+};
+
+
+/**
+ * GradientUpdateVisitor update the gradient parameter given the gradient set.
+ */
+class GradientUpdateVisitor : public boost::static_visitor<size_t>
+{
+ public:
+  //! Update the gradient parameter given the gradient set.
+  GradientUpdateVisitor(arma::mat&& gradient, size_t offset = 0);
+
+  //! Update the gradient parameter.
+  template<typename LayerType>
+  size_t operator()(LayerType* layer) const;
+
+ private:
+  //! The gradient set.
+  arma::mat&& gradient;
+
+  //! The gradient offset.
+  size_t offset;
+
+  //! Update the gradient if the module implements the Gradient() function.
+  template<typename T>
+  typename std::enable_if<
+      HasGradientCheck<T, arma::mat&(T::*)()>::value &&
+      !HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, size_t>::type
+  LayerGradients(T* layer, arma::mat& input) const;
+
+  //! Update the gradient if the module implements the Model() function.
+  template<typename T>
+  typename std::enable_if<
+      !HasGradientCheck<T, arma::mat&(T::*)()>::value &&
+      HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, size_t>::type
+  LayerGradients(T* layer, arma::mat& input) const;
+
+  //! Update the gradient if the module implements the Gradient() and Model()
+  //! function.
+  template<typename T>
+  typename std::enable_if<
+      HasGradientCheck<T, arma::mat&(T::*)()>::value &&
+      HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, size_t>::type
+  LayerGradients(T* layer, arma::mat& input) const;
+
+  //! Do not update the gradient parameter if the module doesn't implement the
+  //! Gradient() or Model() function.
+  template<typename T, typename P>
+  typename std::enable_if<
+      !HasGradientCheck<T, P&(T::*)()>::value &&
+      !HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, size_t>::type
+  LayerGradients(T* layer, P& input) const;
+};
+
+/*
+ * GradientZeroVisitor set the gradient to zero for the given module.
+ */
+class GradientZeroVisitor : public boost::static_visitor<void>
+{
+ public:
+  //! Set the gradient to zero for the given module.
+  GradientZeroVisitor();
+
+  //! Set the gradient to zero.
+  template<typename LayerType>
+  void operator()(LayerType* layer) const;
+
+ private:
+  //! Set the gradient to zero if the module implements the Gradient() function.
+  template<typename T>
+  typename std::enable_if<
+      HasGradientCheck<T, arma::mat&(T::*)()>::value, void>::type
+  LayerGradients(T* layer, arma::mat& input) const;
+
+  //! Do not set the gradient to zero if the module doesn't implement the
+  //! Gradient() function.
+  template<typename T, typename P>
+  typename std::enable_if<
+      !HasGradientCheck<T, P&(T::*)()>::value, void>::type
+  LayerGradients(T* layer, P& input) const;
+};
+
+/**
+ * SearchModeVisitor executes the Gradient() method of the given module using
+ * the input and delta parameter.
+ */
+class GradientVisitor : public boost::static_visitor<void>
+{
+ public:
+  //! Executes the Gradient() method of the given module using the input and
+  //! delta parameter.
+  GradientVisitor(arma::mat&& input, arma::mat&& delta);
+
+  //! Executes the Gradient() method.
+  template<typename LayerType>
+  void operator()(LayerType* layer) const;
+
+ private:
+  //! The input set.
+  arma::mat&& input;
+
+  //! The delta parameter.
+  arma::mat&& delta;
+
+  //! Execute the Gradient() function if the module implements the Gradient()
+  //! function.
+  template<typename T>
+  typename std::enable_if<
+      HasGradientCheck<T, arma::mat&(T::*)()>::value, void>::type
+  LayerGradients(T* layer, arma::mat& input) const;
+
+  //! Do not execute the Gradient() function if the module doesn't implement
+  //! the Gradient() function.
+  template<typename T, typename P>
+  typename std::enable_if<
+      !HasGradientCheck<T, P&(T::*)()>::value, void>::type
+  LayerGradients(T* layer, P& input) const;
+};
+
+/**
+ * RewardSetVisitor set the reward parameter given the reward value.
+ */
+class RewardSetVisitor : public boost::static_visitor<void>
+{
+ public:
+  //! Set the reward parameter given the reward value.
+  RewardSetVisitor(const double reward);
+
+  //! Set the reward parameter.
+  template<typename LayerType>
+  void operator()(LayerType* layer) const;
+
+ private:
+  //! The reward value.
+  const double reward;
+
+  //! Set the deterministic parameter if the module implements the
+  //! Deterministic() and Model() function.
+  template<typename T>
+  typename std::enable_if<
+      HasRewardCheck<T, double&(T::*)()>::value &&
+      HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, void>::type
+  LayerReward(T* layer) const;
+
+  //! Set the deterministic parameter if the module implements the
+  //! Model() function.
+  template<typename T>
+  typename std::enable_if<
+      !HasRewardCheck<T, double&(T::*)()>::value &&
+      HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, void>::type
+  LayerReward(T* layer) const;
+
+  //! Set the deterministic parameter if the module implements the
+  //! Deterministic() function.
+  template<typename T>
+  typename std::enable_if<
+      HasRewardCheck<T, double&(T::*)()>::value &&
+      !HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, void>::type
+  LayerReward(T* layer) const;
+
+  //! Do not set the deterministic parameter if the module doesn't implement the
+  //! Deterministic() or Model() function.
+  template<typename T>
+  typename std::enable_if<
+      !HasRewardCheck<T, double&(T::*)()>::value &&
+      !HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, void>::type
+  LayerReward(T* layer) const;
+};
+
+} // namespace ann
+} // namespace mlpack
+
+// Include implementation.
+#include "layer_visitor_impl.hpp"
+
+#endif
diff --git a/src/mlpack/methods/ann/layer/layer_visitor_impl.hpp b/src/mlpack/methods/ann/layer/layer_visitor_impl.hpp
new file mode 100644
index 00000000000..482b8e10ad3
--- /dev/null
+++ b/src/mlpack/methods/ann/layer/layer_visitor_impl.hpp
@@ -0,0 +1,1241 @@
+/**
+ * @file layer_visitor_impl.hpp
+ * @author Marcus Edel
+ *
+ * This file provides an easy way to serialize a layer, abstracts away the
+ * different types of layers, and also automatically directs any function to the
+ * right layer type.
+ *
+ * mlpack is free software; you may redistribute it and/or modify it under the
+ * terms of the 3-clause BSD license.  You should have received a copy of the
+ * 3-clause BSD license along with mlpack.  If not, see
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
+ */
+#ifndef MLPACK_METHODS_ANN_LAYER_LAYER_VISITOR_IMPL_HPP
+#define MLPACK_METHODS_ANN_LAYER_LAYER_VISITOR_IMPL_HPP
+
+// In case it hasn't been included yet.
+#include "layer_visitor.hpp"
+
+namespace mlpack {
+namespace ann {
+
+//! LoadOutputParameterVisitor visitor class.
+inline LoadOutputParameterVisitor::LoadOutputParameterVisitor(
+    std::vector<arma::mat>&& parameter) : parameter(std::move(parameter))
+{
+  /* Nothing to do here. */
+}
+
+template<typename LayerType>
+inline void LoadOutputParameterVisitor::operator()(LayerType* layer) const
+{
+  OutputParameter(layer);
+}
+
+template<typename T>
+inline typename std::enable_if<
+    !HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, void>::type
+LoadOutputParameterVisitor::OutputParameter(T* layer) const
+{
+  layer->OutputParameter() = parameter.back();
+  parameter.pop_back();
+}
+
+template<typename T>
+inline typename std::enable_if<
+    HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, void>::type
+LoadOutputParameterVisitor::OutputParameter(T* layer) const
+{
+  for (size_t i = 0; i < layer->Model().size(); ++i)
+  {
+    boost::apply_visitor(LoadOutputParameterVisitor(std::move(parameter)),
+        layer->Model()[layer->Model().size() - i - 1]);
+  }
+
+  layer->OutputParameter() = parameter.back();
+  parameter.pop_back();
+}
+
+//! SaveOutputParameterVisitor visitor class.
+inline SaveOutputParameterVisitor::SaveOutputParameterVisitor(
+    std::vector<arma::mat>&& parameter) : parameter(std::move(parameter))
+{
+  /* Nothing to do here. */
+}
+
+template<typename LayerType>
+inline void SaveOutputParameterVisitor::operator()(LayerType* layer) const
+{
+  OutputParameter(layer);
+}
+
+template<typename T>
+inline typename std::enable_if<
+    !HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, void>::type
+SaveOutputParameterVisitor::OutputParameter(T* layer) const
+{
+  parameter.push_back(layer->OutputParameter());
+}
+
+template<typename T>
+inline typename std::enable_if<
+    HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, void>::type
+SaveOutputParameterVisitor::OutputParameter(T* layer) const
+{
+  parameter.push_back(layer->OutputParameter());
+
+  for (size_t i = 0; i < layer->Model().size(); ++i)
+  {
+    boost::apply_visitor(SaveOutputParameterVisitor(std::move(parameter)),
+        layer->Model()[i]);
+  }
+}
+
+//! DeleteVisitor visitor class.
+template<typename LayerType>
+inline void DeleteVisitor::operator()(LayerType* layer) const
+{
+  if (layer)
+    delete layer;
+}
+
+//! ForwardOutputVisitor visitor class.
+inline ForwardOutputVisitor::ForwardOutputVisitor(arma::mat&& input,
+                                                  arma::mat&& output) :
+  input(std::move(input)),
+  output(std::move(output))
+{
+  /* Nothing to do here. */
+}
+
+template<typename LayerType>
+inline double ForwardOutputVisitor::operator()(LayerType* layer) const
+{
+  return layer->Forward(std::move(input), std::move(output));
+}
+
+//! ForwardVisitor visitor class.
+inline ForwardVisitor::ForwardVisitor(arma::mat&& input, arma::mat&& output) :
+  input(std::move(input)),
+  output(std::move(output))
+{
+  /* Nothing to do here. */
+}
+
+template<typename LayerType>
+inline void ForwardVisitor::operator()(LayerType* layer) const
+{
+  layer->Forward(std::move(input), std::move(output));
+}
+
+//! BackwardVisitor visitor class.
+inline BackwardVisitor::BackwardVisitor(arma::mat&& input,
+                                 arma::mat&& error,
+                                 arma::mat&& delta) :
+  input(std::move(input)),
+  error(std::move(error)),
+  delta(std::move(delta))
+{
+  /* Nothing to do here. */
+}
+
+template<typename LayerType>
+inline void BackwardVisitor::operator()(LayerType* layer) const
+{
+  layer->Backward(std::move(input), std::move(error), std::move(delta));
+}
+
+//! ResetVisitor visitor class.
+template<typename LayerType>
+inline void ResetVisitor::operator()(LayerType* layer) const
+{
+  ResetParameter(layer);
+}
+
+template<typename T>
+inline typename std::enable_if<
+    HasResetCheck<T, void(T::*)()>::value &&
+    !HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, void>::type
+ResetVisitor::ResetParameter(T* layer) const
+{
+  layer->Reset();
+}
+
+template<typename T>
+inline typename std::enable_if<
+    !HasResetCheck<T, void(T::*)()>::value &&
+    HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, void>::type
+ResetVisitor::ResetParameter(T* layer) const
+{
+  for (size_t i = 0; i < layer->Model().size(); ++i)
+  {
+    boost::apply_visitor(ResetVisitor(), layer->Model()[i]);
+  }
+}
+
+template<typename T>
+inline typename std::enable_if<
+    HasResetCheck<T, void(T::*)()>::value &&
+    HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, void>::type
+ResetVisitor::ResetParameter(T* layer) const
+{
+  for (size_t i = 0; i < layer->Model().size(); ++i)
+  {
+    boost::apply_visitor(ResetVisitor(), layer->Model()[i]);
+  }
+
+  layer->Reset();
+}
+
+template<typename T>
+inline typename std::enable_if<
+    !HasResetCheck<T, void(T::*)()>::value &&
+    !HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, void>::type
+ResetVisitor::ResetParameter(T* /* layer */) const
+{
+  /* Nothing to do here. */
+}
+
+//! InputParameterVisitor visitor class.
+template<typename LayerType>
+inline arma::mat& InputParameterVisitor::operator()(LayerType *layer) const
+{
+  return layer->InputParameter();
+}
+
+//! OutputParameterVisitor visitor class.
+template<typename LayerType>
+inline arma::mat& OutputParameterVisitor::operator()(LayerType *layer) const
+{
+  return layer->OutputParameter();
+}
+
+//! DeltaVisitor visitor class.
+template<typename LayerType>
+inline arma::mat& DeltaVisitor::operator()(LayerType *layer) const
+{
+  return layer->Delta();
+}
+
+//! ParametersVisitor visitor class.
+inline ParametersVisitor::ParametersVisitor(arma::mat&& parameters) :
+    parameters(std::move(parameters))
+{
+  /* Nothing to do here. */
+}
+
+template<typename LayerType>
+inline void ParametersVisitor::operator()(LayerType *layer) const
+{
+  LayerParameters(layer, layer->OutputParameter());
+}
+
+template<typename T, typename P>
+inline typename std::enable_if<
+    !HasParametersCheck<T, P&(T::*)()>::value, void>::type
+ParametersVisitor::LayerParameters(T* /* layer */, P& /* output */) const
+{
+  /* Nothing to do here. */
+}
+
+template<typename T, typename P>
+inline typename std::enable_if<
+    HasParametersCheck<T, P&(T::*)()>::value, void>::type
+ParametersVisitor::LayerParameters(T* layer, P& /* output */) const
+{
+  parameters = layer->Parameters();
+}
+
+//! ParametersSetVisitor visitor class.
+inline ParametersSetVisitor::ParametersSetVisitor(arma::mat&& parameters) :
+    parameters(std::move(parameters))
+{
+  /* Nothing to do here. */
+}
+
+template<typename LayerType>
+inline void ParametersSetVisitor::operator()(LayerType *layer) const
+{
+  LayerParameters(layer, layer->OutputParameter());
+}
+
+template<typename T, typename P>
+inline typename std::enable_if<
+    !HasParametersCheck<T, P&(T::*)()>::value, void>::type
+ParametersSetVisitor::LayerParameters(T* /* layer */, P& /* output */) const
+{
+  /* Nothing to do here. */
+}
+
+template<typename T, typename P>
+inline typename std::enable_if<
+    HasParametersCheck<T, P&(T::*)()>::value, void>::type
+ParametersSetVisitor::LayerParameters(T* layer, P& /* output */) const
+{
+  layer->Parameters() = parameters;
+}
+
+//! WeightSizeVisitor visitor class.
+template<typename LayerType>
+inline size_t WeightSizeVisitor::operator()(LayerType* layer) const
+{
+  return LayerSize(layer, layer->OutputParameter());
+}
+
+template<typename T, typename P>
+inline typename std::enable_if<
+    !HasParametersCheck<T, P&(T::*)()>::value &&
+    !HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, size_t>::type
+WeightSizeVisitor::LayerSize(T* /* layer */, P& /* output */) const
+{
+  return 0;
+}
+
+template<typename T, typename P>
+inline typename std::enable_if<
+    !HasParametersCheck<T, P&(T::*)()>::value &&
+    HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, size_t>::type
+WeightSizeVisitor::LayerSize(T* layer, P& /* output */) const
+{
+  size_t weights = 0;
+  for (size_t i = 0; i < layer->Model().size(); ++i)
+  {
+    weights += boost::apply_visitor(WeightSizeVisitor(), layer->Model()[i]);
+  }
+
+  return weights;
+}
+
+template<typename T, typename P>
+inline typename std::enable_if<
+    HasParametersCheck<T, P&(T::*)()>::value &&
+    !HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, size_t>::type
+WeightSizeVisitor::LayerSize(T* layer, P& /* output */) const
+{
+  return layer->Parameters().n_elem;
+}
+
+template<typename T, typename P>
+inline typename std::enable_if<
+    HasParametersCheck<T, P&(T::*)()>::value &&
+    HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, size_t>::type
+WeightSizeVisitor::LayerSize(T* layer, P& /* output */) const
+{
+  size_t weights = layer->Parameters().n_elem;
+  for (size_t i = 0; i < layer->Model().size(); ++i)
+  {
+    weights += boost::apply_visitor(WeightSizeVisitor(), layer->Model()[i]);
+  }
+
+  return weights;
+}
+
+//! SetInputWidthVisitor visitor class.
+inline SetInputWidthVisitor::SetInputWidthVisitor(const size_t inputWidth,
+                                                  const bool reset) :
+    inputWidth(inputWidth),
+    reset(reset)
+{
+  /* Nothing to do here. */
+}
+
+template<typename LayerType>
+inline bool SetInputWidthVisitor::operator()(LayerType* layer) const
+{
+  return LayerInputWidth(layer);
+}
+
+template<typename T>
+inline typename std::enable_if<
+    !HasInputWidth<T, size_t&(T::*)()>::value &&
+    !HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, bool>::type
+SetInputWidthVisitor::LayerInputWidth(T* /* layer */) const
+{
+  return false;
+}
+
+template<typename T>
+inline typename std::enable_if<
+    HasInputWidth<T, size_t&(T::*)()>::value &&
+    !HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, bool>::type
+SetInputWidthVisitor::LayerInputWidth(T* layer) const
+{
+  if (layer->InputWidth() == 0 || reset)
+  {
+    layer->InputWidth() = inputWidth;
+  }
+
+  return true;
+}
+
+template<typename T>
+inline typename std::enable_if<
+    !HasInputWidth<T, size_t&(T::*)()>::value &&
+    HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, bool>::type
+SetInputWidthVisitor::LayerInputWidth(T* layer) const
+{
+  for (size_t i = 0; i < layer->Model().size(); ++i)
+  {
+    boost::apply_visitor(SetInputWidthVisitor(inputWidth, reset),
+        layer->Model()[i]);
+  }
+
+  return true;
+}
+
+template<typename T>
+inline typename std::enable_if<
+    HasInputWidth<T, size_t&(T::*)()>::value &&
+    HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, bool>::type
+SetInputWidthVisitor::LayerInputWidth(T* layer) const
+{
+  if (layer->InputWidth() == 0 || reset)
+  {
+    layer->InputWidth() = inputWidth;
+  }
+
+  for (size_t i = 0; i < layer->Model().size(); ++i)
+  {
+    boost::apply_visitor(SetInputWidthVisitor(inputWidth, reset),
+        layer->Model()[i]);
+  }
+
+  return true;
+}
+
+//! SetInputHeightVisitor visitor class.
+inline SetInputHeightVisitor::SetInputHeightVisitor(const size_t inputHeight,
+                                                    const bool reset) :
+    inputHeight(inputHeight),
+    reset(reset)
+{
+  /* Nothing to do here. */
+}
+
+template<typename LayerType>
+inline bool SetInputHeightVisitor::operator()(LayerType* layer) const
+{
+  return LayerInputHeight(layer);
+}
+
+template<typename T>
+inline typename std::enable_if<
+    !HasInputHeight<T, size_t&(T::*)()>::value &&
+    !HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, bool>::type
+SetInputHeightVisitor::LayerInputHeight(T* /* layer */) const
+{
+  return false;
+}
+
+template<typename T>
+inline typename std::enable_if<
+    HasInputHeight<T, size_t&(T::*)()>::value &&
+    !HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, bool>::type
+SetInputHeightVisitor::LayerInputHeight(T* layer) const
+{
+  if (layer->InputHeight() == 0 || reset)
+  {
+    layer->InputHeight() = inputHeight;
+  }
+
+  return true;
+}
+
+template<typename T>
+inline typename std::enable_if<
+    !HasInputHeight<T, size_t&(T::*)()>::value &&
+    HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, bool>::type
+SetInputHeightVisitor::LayerInputHeight(T* layer) const
+{
+  for (size_t i = 0; i < layer->Model().size(); ++i)
+  {
+    boost::apply_visitor(SetInputHeightVisitor(inputHeight, reset),
+        layer->Model()[i]);
+  }
+
+  return true;
+}
+
+template<typename T>
+inline typename std::enable_if<
+    HasInputHeight<T, size_t&(T::*)()>::value &&
+    HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, bool>::type
+SetInputHeightVisitor::LayerInputHeight(T* layer) const
+{
+  if (layer->InputHeight() == 0  || reset)
+  {
+    layer->InputHeight() = inputHeight;
+  }
+
+  for (size_t i = 0; i < layer->Model().size(); ++i)
+  {
+    boost::apply_visitor(SetInputHeightVisitor(inputHeight, reset),
+        layer->Model()[i]);
+  }
+
+  return true;
+}
+
+//! OutputWidthVisitor visitor class.
+template<typename LayerType>
+inline size_t OutputWidthVisitor::operator()(LayerType* layer) const
+{
+  return LayerOutputWidth(layer);
+}
+
+template<typename T>
+inline typename std::enable_if<
+    !HasInputWidth<T, size_t&(T::*)()>::value &&
+    !HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, size_t>::type
+OutputWidthVisitor::LayerOutputWidth(T* /* layer */) const
+{
+  return 0;
+}
+
+template<typename T>
+inline typename std::enable_if<
+    HasInputWidth<T, size_t&(T::*)()>::value &&
+    !HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, size_t>::type
+OutputWidthVisitor::LayerOutputWidth(T* layer) const
+{
+  return layer->OutputWidth();
+}
+
+template<typename T>
+inline typename std::enable_if<
+    !HasInputWidth<T, size_t&(T::*)()>::value &&
+    HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, size_t>::type
+OutputWidthVisitor::LayerOutputWidth(T* layer) const
+{
+  for (size_t i = 0; i < layer->Model().size(); ++i)
+  {
+    size_t outputWidth = boost::apply_visitor(OutputWidthVisitor(),
+        layer->Model()[layer->Model().size() - 1 - i]);
+
+    if (outputWidth != 0)
+    {
+      return outputWidth;
+    }
+  }
+
+  return 0;
+}
+
+template<typename T>
+inline typename std::enable_if<
+    HasInputWidth<T, size_t&(T::*)()>::value &&
+    HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, size_t>::type
+OutputWidthVisitor::LayerOutputWidth(T* layer) const
+{
+  size_t outputWidth = layer->OutputWidth();
+
+  if (outputWidth == 0)
+  {
+    for (size_t i = 0; i < layer->Model().size(); ++i)
+    {
+      outputWidth = boost::apply_visitor(OutputWidthVisitor(),
+          layer->Model()[layer->Model().size() - 1 - i]);
+
+      if (outputWidth != 0)
+      {
+        return outputWidth;
+      }
+    }
+  }
+
+  return outputWidth;
+}
+
+//! OutputHeightVisitor visitor class.
+template<typename LayerType>
+inline size_t OutputHeightVisitor::operator()(LayerType* layer) const
+{
+  return LayerOutputHeight(layer);
+}
+
+template<typename T>
+inline typename std::enable_if<
+    !HasInputHeight<T, size_t&(T::*)()>::value &&
+    !HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, size_t>::type
+OutputHeightVisitor::LayerOutputHeight(T* /* layer */) const
+{
+  return 0;
+}
+
+template<typename T>
+inline typename std::enable_if<
+    HasInputHeight<T, size_t&(T::*)()>::value &&
+    !HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, size_t>::type
+OutputHeightVisitor::LayerOutputHeight(T* layer) const
+{
+  return layer->OutputHeight();
+}
+
+template<typename T>
+inline typename std::enable_if<
+    !HasInputHeight<T, size_t&(T::*)()>::value &&
+    HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, size_t>::type
+OutputHeightVisitor::LayerOutputHeight(T* layer) const
+{
+  for (size_t i = 0; i < layer->Model().size(); ++i)
+  {
+    size_t outputHeight = boost::apply_visitor(OutputHeightVisitor(),
+        layer->Model()[layer->Model().size() - 1 - i]);
+
+    if (outputHeight != 0)
+    {
+      return outputHeight;
+    }
+  }
+
+  return 0;
+}
+
+template<typename T>
+inline typename std::enable_if<
+    HasInputHeight<T, size_t&(T::*)()>::value &&
+    HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, size_t>::type
+OutputHeightVisitor::LayerOutputHeight(T* layer) const
+{
+  size_t outputHeight = layer->OutputHeight();
+
+  if (outputHeight == 0)
+  {
+    for (size_t i = 0; i < layer->Model().size(); ++i)
+    {
+      outputHeight = boost::apply_visitor(OutputHeightVisitor(),
+          layer->Model()[layer->Model().size() - 1 - i]);
+
+      if (outputHeight != 0)
+      {
+        return outputHeight;
+      }
+    }
+  }
+
+  return outputHeight;
+}
+
+//! LastOutputWidthVisitor visitor class.
+template<typename LayerType>
+inline size_t LastOutputWidthVisitor::operator()(LayerType* layer) const
+{
+  return LayerOutputWidth(layer);
+}
+
+template<typename T>
+inline typename std::enable_if<
+    !HasInputWidth<T, size_t&(T::*)()>::value &&
+    !HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, size_t>::type
+LastOutputWidthVisitor::LayerOutputWidth(T* /* layer */) const
+{
+  return 0;
+}
+
+template<typename T>
+inline typename std::enable_if<
+    HasInputWidth<T, size_t&(T::*)()>::value &&
+    !HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, size_t>::type
+LastOutputWidthVisitor::LayerOutputWidth(T* layer) const
+{
+  return layer->OutputWidth();
+}
+
+template<typename T>
+inline typename std::enable_if<
+    !HasInputWidth<T, size_t&(T::*)()>::value &&
+    HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, size_t>::type
+LastOutputWidthVisitor::LayerOutputWidth(T* layer) const
+{
+  for (size_t i = 0; i < layer->Model().size(); ++i)
+  {
+    size_t outputWidth = boost::apply_visitor(LastOutputWidthVisitor(),
+        layer->Model()[layer->Model().size() - 1 - i]);
+
+    if (outputWidth != 0)
+    {
+      return outputWidth;
+    }
+  }
+
+  return 0;
+}
+
+template<typename T>
+inline typename std::enable_if<
+    HasInputWidth<T, size_t&(T::*)()>::value &&
+    HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, size_t>::type
+LastOutputWidthVisitor::LayerOutputWidth(T* layer) const
+{
+  size_t outputWidth = layer->OutputWidth();
+
+  if (outputWidth == 0)
+  {
+    for (size_t i = 0; i < layer->Model().size(); ++i)
+    {
+      outputWidth = boost::apply_visitor(OutputWidthVisitor(),
+          layer->Model()[layer->Model().size() - 1 - i]);
+
+      if (outputWidth != 0)
+      {
+        return outputWidth;
+      }
+    }
+  }
+
+  return outputWidth;
+}
+
+//! LastOutputHeightVisitor visitor class.
+template<typename LayerType>
+inline size_t LastOutputHeightVisitor::operator()(LayerType* layer) const
+{
+  return LayerOutputHeight(layer);
+}
+
+template<typename T>
+inline typename std::enable_if<
+    !HasInputHeight<T, size_t&(T::*)()>::value &&
+    !HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, size_t>::type
+LastOutputHeightVisitor::LayerOutputHeight(T* /* layer */) const
+{
+  return 0;
+}
+
+template<typename T>
+inline typename std::enable_if<
+    HasInputHeight<T, size_t&(T::*)()>::value &&
+    !HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, size_t>::type
+LastOutputHeightVisitor::LayerOutputHeight(T* layer) const
+{
+  return layer->OutputHeight();
+}
+
+template<typename T>
+inline typename std::enable_if<
+    !HasInputHeight<T, size_t&(T::*)()>::value &&
+    HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, size_t>::type
+LastOutputHeightVisitor::LayerOutputHeight(T* layer) const
+{
+  for (size_t i = 0; i < layer->Model().size(); ++i)
+  {
+    size_t outputHeight = boost::apply_visitor(LastOutputHeightVisitor(),
+        layer->Model()[layer->Model().size() - 1 - i]);
+
+    if (outputHeight != 0)
+    {
+      return outputHeight;
+    }
+  }
+
+  return 0;
+}
+
+template<typename T>
+inline typename std::enable_if<
+    HasInputHeight<T, size_t&(T::*)()>::value &&
+    HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, size_t>::type
+LastOutputHeightVisitor::LayerOutputHeight(T* layer) const
+{
+  size_t outputHeight = layer->OutputHeight();
+
+  if (outputHeight == 0)
+  {
+    for (size_t i = 0; i < layer->Model().size(); ++i)
+    {
+      outputHeight = boost::apply_visitor(OutputHeightVisitor(),
+          layer->Model()[layer->Model().size() - 1 - i]);
+
+      if (outputHeight != 0)
+      {
+        return outputHeight;
+      }
+    }
+  }
+
+  return outputHeight;
+}
+
+//! WeightSetVisitor visitor class.
+inline WeightSetVisitor::WeightSetVisitor(arma::mat&& weight,
+                                          const size_t offset) :
+    weight(std::move(weight)),
+    offset(offset)
+{
+  /* Nothing to do here. */
+}
+
+template<typename LayerType>
+inline size_t WeightSetVisitor::operator()(LayerType* layer) const
+{
+  return LayerSize(layer, std::move(layer->OutputParameter()));
+}
+
+template<typename T, typename P>
+inline typename std::enable_if<
+    !HasParametersCheck<T, P&(T::*)()>::value &&
+    !HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, size_t>::type
+WeightSetVisitor::LayerSize(T* /* layer */, P&& /*output */) const
+{
+  return 0;
+}
+
+template<typename T, typename P>
+inline typename std::enable_if<
+    !HasParametersCheck<T, P&(T::*)()>::value &&
+    HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, size_t>::type
+WeightSetVisitor::LayerSize(T* layer, P&& /*output */) const
+{
+  size_t modelOffset = 0;
+  for (size_t i = 0; i < layer->Model().size(); ++i)
+  {
+    modelOffset += boost::apply_visitor(WeightSetVisitor(
+        std::move(weight), modelOffset + offset), layer->Model()[i]);
+  }
+
+  return modelOffset;
+}
+
+template<typename T, typename P>
+inline typename std::enable_if<
+    HasParametersCheck<T, P&(T::*)()>::value &&
+    !HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, size_t>::type
+WeightSetVisitor::LayerSize(T* layer, P&& /* output */) const
+{
+  layer->Parameters() = arma::mat(weight.memptr() + offset,
+      layer->Parameters().n_rows, layer->Parameters().n_cols, false, false);
+
+  return layer->Parameters().n_elem;
+}
+
+template<typename T, typename P>
+inline typename std::enable_if<
+    HasParametersCheck<T, P&(T::*)()>::value &&
+    HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, size_t>::type
+WeightSetVisitor::LayerSize(T* layer, P&& /* output */) const
+{
+  layer->Parameters() = arma::mat(weight.memptr() + offset,
+      layer->Parameters().n_rows, layer->Parameters().n_cols, false, false);
+
+  size_t modelOffset = layer->Parameters().n_elem;
+  for (size_t i = 0; i < layer->Model().size(); ++i)
+  {
+    modelOffset += boost::apply_visitor(WeightSetVisitor(
+        std::move(weight), modelOffset + offset), layer->Model()[i]);
+  }
+
+  return modelOffset;
+}
+
+//! RhoVisitor visitor class.
+template<typename LayerType>
+inline size_t RhoVisitor::operator()(LayerType* layer) const
+{
+  return LayerRho(layer);
+}
+
+template<typename T>
+inline typename std::enable_if<
+    !HasRho<T, size_t&(T::*)()>::value &&
+    !HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, size_t>::type
+RhoVisitor::LayerRho(T* /* layer */) const
+{
+  return 0;
+}
+
+template<typename T>
+inline typename std::enable_if<
+    !HasRho<T, size_t&(T::*)()>::value &&
+    HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, size_t>::type
+RhoVisitor::LayerRho(T* layer) const
+{
+  size_t moduleRho = 0;
+  for (size_t i = 0; i < layer->Model().size(); ++i)
+  {
+    moduleRho = boost::apply_visitor(RhoVisitor(), layer->Model()[i]);
+    if (moduleRho != 0)
+    {
+      return moduleRho;
+    }
+  }
+
+  return moduleRho;
+}
+
+template<typename T>
+inline typename std::enable_if<
+    HasRho<T, size_t&(T::*)()>::value &&
+    !HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, size_t>::type
+RhoVisitor::LayerRho(T* layer) const
+{
+  return layer->Rho();
+}
+
+template<typename T>
+inline typename std::enable_if<
+    HasRho<T, size_t&(T::*)()>::value &&
+    HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, size_t>::type
+RhoVisitor::LayerRho(T* layer) const
+{
+  return layer->Rho();
+}
+
+//! DeterministicSetVisitor visitor class.
+inline DeterministicSetVisitor::DeterministicSetVisitor(
+    const bool deterministic) : deterministic(deterministic)
+{
+  /* Nothing to do here. */
+}
+
+template<typename LayerType>
+inline void DeterministicSetVisitor::operator()(LayerType* layer) const
+{
+  LayerDeterministic(layer);
+}
+
+template<typename T>
+inline typename std::enable_if<
+    HasDeterministicCheck<T, bool&(T::*)(void)>::value &&
+    HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, void>::type
+DeterministicSetVisitor::LayerDeterministic(T* layer) const
+{
+  layer->Deterministic() = deterministic;
+
+  for (size_t i = 0; i < layer->Model().size(); ++i)
+  {
+    boost::apply_visitor(DeterministicSetVisitor(deterministic),
+        layer->Model()[i]);
+  }
+}
+
+template<typename T>
+inline typename std::enable_if<
+    !HasDeterministicCheck<T, bool&(T::*)(void)>::value &&
+    HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, void>::type
+DeterministicSetVisitor::LayerDeterministic(T* layer) const
+{
+  for (size_t i = 0; i < layer->Model().size(); ++i)
+  {
+    boost::apply_visitor(DeterministicSetVisitor(deterministic),
+        layer->Model()[i]);
+  }
+}
+
+template<typename T>
+inline typename std::enable_if<
+    HasDeterministicCheck<T, bool&(T::*)(void)>::value &&
+    !HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, void>::type
+DeterministicSetVisitor::LayerDeterministic(T* layer) const
+{
+  layer->Deterministic() = deterministic;
+}
+
+template<typename T>
+inline typename std::enable_if<
+    !HasDeterministicCheck<T, bool&(T::*)(void)>::value &&
+    !HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, void>::type
+DeterministicSetVisitor::LayerDeterministic(T* /* input */) const
+{
+  /* Nothing to do here. */
+}
+
+//! AddVisitor visitor class.
+template<typename T>
+inline AddVisitor::AddVisitor(T newLayer) :
+    newLayer(std::move(newLayer))
+{
+  /* Nothing to do here. */
+}
+
+template<typename LayerType>
+inline void AddVisitor::operator()(LayerType* layer) const
+{
+  LayerAdd<LayerType>(layer);
+}
+
+template<typename T>
+inline typename std::enable_if<
+    HasAddCheck<T, void(T::*)(LayerTypes)>::value, void>::type
+AddVisitor::LayerAdd(T* layer) const
+{
+  layer->Add(newLayer);
+}
+
+template<typename T>
+inline typename std::enable_if<
+    !HasAddCheck<T, void(T::*)(LayerTypes)>::value, void>::type
+AddVisitor::LayerAdd(T* /* layer */) const
+{
+  /* Nothing to do here. */
+}
+
+//! GradientSetVisitor visitor class.
+inline GradientSetVisitor::GradientSetVisitor(arma::mat&& gradient,
+                                              size_t offset) :
+    gradient(std::move(gradient)),
+    offset(offset)
+{
+  /* Nothing to do here. */
+}
+
+template<typename LayerType>
+inline size_t GradientSetVisitor::operator()(LayerType* layer) const
+{
+  return LayerGradients(layer, layer->OutputParameter());
+}
+
+template<typename T>
+inline typename std::enable_if<
+    HasGradientCheck<T, arma::mat&(T::*)()>::value &&
+    !HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, size_t>::type
+GradientSetVisitor::LayerGradients(T* layer, arma::mat& /* input */) const
+{
+  layer->Gradient() = arma::mat(gradient.memptr() + offset,
+      layer->Parameters().n_rows, layer->Parameters().n_cols, false, false);
+
+  return layer->Parameters().n_elem;
+}
+
+template<typename T>
+inline typename std::enable_if<
+    !HasGradientCheck<T, arma::mat&(T::*)()>::value &&
+    HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, size_t>::type
+GradientSetVisitor::LayerGradients(T* layer, arma::mat& /* input */) const
+{
+  size_t modelOffset = 0;
+  for (size_t i = 0; i < layer->Model().size(); ++i)
+  {
+    modelOffset += boost::apply_visitor(GradientSetVisitor(
+        std::move(gradient), modelOffset + offset), layer->Model()[i]);
+  }
+
+  return modelOffset;
+}
+
+template<typename T>
+inline typename std::enable_if<
+    HasGradientCheck<T, arma::mat&(T::*)()>::value &&
+    HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, size_t>::type
+GradientSetVisitor::LayerGradients(T* layer, arma::mat& /* input */) const
+{
+  layer->Gradient() = arma::mat(gradient.memptr() + offset,
+      layer->Parameters().n_rows, layer->Parameters().n_cols, false, false);
+
+  size_t modelOffset = layer->Parameters().n_elem;
+  for (size_t i = 0; i < layer->Model().size(); ++i)
+  {
+    modelOffset += boost::apply_visitor(GradientSetVisitor(
+        std::move(gradient), modelOffset + offset), layer->Model()[i]);
+  }
+
+  return modelOffset;
+}
+
+template<typename T, typename P>
+inline typename std::enable_if<
+    !HasGradientCheck<T, P&(T::*)()>::value &&
+    !HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, size_t>::type
+GradientSetVisitor::LayerGradients(T* /* layer */, P& /* input */) const
+{
+  return 0;
+}
+
+//! GradientUpdateVisitor visitor class.
+inline GradientUpdateVisitor::GradientUpdateVisitor(arma::mat&& gradient,
+                                                    size_t offset) :
+    gradient(std::move(gradient)),
+    offset(offset)
+{
+  /* Nothing to do here. */
+}
+
+template<typename LayerType>
+inline size_t GradientUpdateVisitor::operator()(LayerType* layer) const
+{
+  return LayerGradients(layer, layer->OutputParameter());
+}
+
+template<typename T>
+inline typename std::enable_if<
+    HasGradientCheck<T, arma::mat&(T::*)()>::value &&
+    !HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, size_t>::type
+GradientUpdateVisitor::LayerGradients(T* layer, arma::mat& /* input */) const
+{
+  if (layer->Parameters().n_elem != 0)
+  {
+    layer->Gradient() = gradient.submat(offset, 0,
+        offset + layer->Parameters().n_elem - 1, 0);;
+  }
+
+  return layer->Parameters().n_elem;
+}
+
+template<typename T>
+inline typename std::enable_if<
+    !HasGradientCheck<T, arma::mat&(T::*)()>::value &&
+    HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, size_t>::type
+GradientUpdateVisitor::LayerGradients(T* layer, arma::mat& /* input */) const
+{
+  size_t modelOffset = 0;
+  for (size_t i = 0; i < layer->Model().size(); ++i)
+  {
+    modelOffset += boost::apply_visitor(GradientUpdateVisitor(
+        std::move(gradient), modelOffset + offset), layer->Model()[i]);
+  }
+
+  return modelOffset;
+}
+
+template<typename T>
+inline typename std::enable_if<
+    HasGradientCheck<T, arma::mat&(T::*)()>::value &&
+    HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, size_t>::type
+GradientUpdateVisitor::LayerGradients(T* layer, arma::mat& /* input */) const
+{
+  if (layer->Parameters().n_elem != 0)
+  {
+    layer->Gradient() = gradient.submat(offset, 0,
+        offset + layer->Parameters().n_elem - 1, 0);;
+  }
+
+  size_t modelOffset = layer->Parameters().n_elem;
+  for (size_t i = 0; i < layer->Model().size(); ++i)
+  {
+    modelOffset += boost::apply_visitor(GradientUpdateVisitor(
+        std::move(gradient), modelOffset + offset), layer->Model()[i]);
+  }
+
+  return modelOffset;
+}
+
+template<typename T, typename P>
+inline typename std::enable_if<
+    !HasGradientCheck<T, P&(T::*)()>::value &&
+    !HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, size_t>::type
+GradientUpdateVisitor::LayerGradients(T* /* layer */, P& /* input */) const
+{
+  return 0;
+}
+
+//! GradientZeroVisitor visitor class.
+inline GradientZeroVisitor::GradientZeroVisitor()
+{
+  /* Nothing to do here. */
+}
+
+template<typename LayerType>
+inline void GradientZeroVisitor::operator()(LayerType* layer) const
+{
+  LayerGradients(layer, layer->OutputParameter());
+}
+
+template<typename T>
+inline typename std::enable_if<
+    HasGradientCheck<T, arma::mat&(T::*)()>::value, void>::type
+GradientZeroVisitor::LayerGradients(T* layer, arma::mat& /* input */) const
+{
+  layer->Gradient().zeros();
+}
+
+template<typename T, typename P>
+inline typename std::enable_if<
+    !HasGradientCheck<T, P&(T::*)()>::value, void>::type
+GradientZeroVisitor::LayerGradients(T* /* layer */, P& /* input */) const
+{
+  /* Nothing to do here. */
+}
+
+//! GradientVisitor visitor class.
+inline GradientVisitor::GradientVisitor(arma::mat&& input, arma::mat&& delta) :
+    input(std::move(input)),
+    delta(std::move(delta))
+{
+  /* Nothing to do here. */
+}
+
+template<typename LayerType>
+inline void GradientVisitor::operator()(LayerType* layer) const
+{
+  LayerGradients(layer, layer->OutputParameter());
+}
+
+template<typename T>
+inline typename std::enable_if<
+    HasGradientCheck<T, arma::mat&(T::*)()>::value, void>::type
+GradientVisitor::LayerGradients(T* layer, arma::mat& /* input */) const
+{
+  layer->Gradient(std::move(input), std::move(delta),
+      std::move(layer->Gradient()));
+}
+
+template<typename T, typename P>
+inline typename std::enable_if<
+    !HasGradientCheck<T, P&(T::*)()>::value, void>::type
+GradientVisitor::LayerGradients(T* /* layer */, P& /* input */) const
+{
+  /* Nothing to do here. */
+}
+
+//! RewardSetVisitor visitor class.
+inline RewardSetVisitor::RewardSetVisitor(const double reward) : reward(reward)
+{
+  /* Nothing to do here. */
+}
+
+template<typename LayerType>
+inline void RewardSetVisitor::operator()(LayerType* layer) const
+{
+  LayerReward(layer);
+}
+
+template<typename T>
+inline typename std::enable_if<
+    HasRewardCheck<T, double&(T::*)()>::value &&
+    HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, void>::type
+RewardSetVisitor::LayerReward(T* layer) const
+{
+  layer->Reward() = reward;
+
+  for (size_t i = 0; i < layer->Model().size(); ++i)
+  {
+    boost::apply_visitor(RewardSetVisitor(reward),
+        layer->Model()[i]);
+  }
+}
+
+template<typename T>
+inline typename std::enable_if<
+    !HasRewardCheck<T, double&(T::*)()>::value &&
+    HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, void>::type
+RewardSetVisitor::LayerReward(T* layer) const
+{
+  for (size_t i = 0; i < layer->Model().size(); ++i)
+  {
+    boost::apply_visitor(RewardSetVisitor(reward),
+        layer->Model()[i]);
+  }
+}
+
+template<typename T>
+inline typename std::enable_if<
+    HasRewardCheck<T, double&(T::*)()>::value &&
+    !HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, void>::type
+RewardSetVisitor::LayerReward(T* layer) const
+{
+  layer->Reward() = reward;
+}
+
+template<typename T>
+inline typename std::enable_if<
+    !HasRewardCheck<T, double&(T::*)()>::value &&
+    !HasModelCheck<T, std::vector<LayerTypes>&(T::*)()>::value, void>::type
+RewardSetVisitor::LayerReward(T* /* input */) const
+{
+  /* Nothing to do here. */
+}
+
+} // namespace ann
+} // namespace mlpack
+
+#endif
diff --git a/src/mlpack/methods/ann/layer/leaky_relu_layer.hpp b/src/mlpack/methods/ann/layer/leaky_relu.hpp
similarity index 63%
rename from src/mlpack/methods/ann/layer/leaky_relu_layer.hpp
rename to src/mlpack/methods/ann/layer/leaky_relu.hpp
index a87792e4295..d8160f1a50b 100644
--- a/src/mlpack/methods/ann/layer/leaky_relu_layer.hpp
+++ b/src/mlpack/methods/ann/layer/leaky_relu.hpp
@@ -1,9 +1,9 @@
 /**
- * @file leaky_relu_layer.hpp
+ * @file leaky_relu.hpp
  * @author Dhawal Arora
  *
- * Definition and implementation of LeakyReLULayer layer first introduced
- * in the acoustic model, Andrew L. Maas, Awni Y. Hannun, Andrew Y. Ng,
+ * Definition of LeakyReLU layer first introduced in the acoustic model,
+ * Andrew L. Maas, Awni Y. Hannun, Andrew Y. Ng,
  * "Rectifier Nonlinearities Improve Neural Network Acoustic Models", 2014
  *
  * mlpack is free software; you may redistribute it and/or modify it under the
@@ -11,8 +11,8 @@
  * 3-clause BSD license along with mlpack.  If not, see
  * http://www.opensource.org/licenses/BSD-3-Clause for more information.
  */
-#ifndef MLPACK_METHODS_ANN_LAYER_LEAKYRELU_LAYER_HPP
-#define MLPACK_METHODS_ANN_LAYER_LEAKYRELU_LAYER_HPP
+#ifndef MLPACK_METHODS_ANN_LAYER_LEAKYRELU_HPP
+#define MLPACK_METHODS_ANN_LAYER_LEAKYRELU_HPP
 
 #include <mlpack/core.hpp>
 
@@ -41,20 +41,17 @@ template <
     typename InputDataType = arma::mat,
     typename OutputDataType = arma::mat
 >
-class LeakyReLULayer
+class LeakyReLU
 {
  public:
   /**
-   * Create the LeakyReLULayer object using the specified parameters.
+   * Create the LeakyReLU object using the specified parameters.
    * The non zero gradient can be adjusted by specifying tha parameter
    * alpha in the range 0 to 1. Default (alpha = 0.03)
    *
    * @param alpha Non zero gradient
    */
-  LeakyReLULayer(const double alpha = 0.03) : alpha(alpha)
-  {
-     // Nothing to do here.
-  }
+  LeakyReLU(const double alpha = 0.03);
 
   /**
    * Ordinary feed forward pass of a neural network, evaluating the function
@@ -64,10 +61,7 @@ class LeakyReLULayer
    * @param output Resulting output activation.
    */
   template<typename InputType, typename OutputType>
-  void Forward(const InputType& input, OutputType& output)
-  {
-    Fn(input, output);
-  }
+  void Forward(const InputType&& input, OutputType&& output);
 
   /**
    * Ordinary feed backward pass of a neural network, calculating the function
@@ -79,50 +73,7 @@ class LeakyReLULayer
    * @param g The calculated gradient.
    */
   template<typename DataType>
-  void Backward(const DataType& input,
-                const DataType& gy,
-                DataType& g)
-  {
-    DataType derivative;
-    Deriv(input, derivative);
-    g = gy % derivative;
-  }
-
-  /**
-   * Ordinary feed backward pass of a neural network, calculating the function
-   * f(x) by propagating x backwards through f. Using the results from the feed
-   * forward pass.
-   *
-   * @param input The propagated input activation.
-   * @param gy The backpropagated error.
-   * @param g The calculated gradient.
-   */
-  template<typename eT>
-  void Backward(const arma::Cube<eT>& input,
-                const arma::Mat<eT>& gy,
-                arma::Cube<eT>& g)
-  {
-    // Generate a cube using the backpropagated error matrix.
-    arma::Cube<eT> mappedError = arma::zeros<arma::cube>(input.n_rows,
-        input.n_cols, input.n_slices);
-
-    for (size_t s = 0, j = 0; s < mappedError.n_slices; s+= gy.n_cols, j++)
-    {
-      for (size_t i = 0; i < gy.n_cols; i++)
-      {
-        arma::Col<eT> temp = gy.col(i).subvec(
-            j * input.n_rows * input.n_cols,
-            (j + 1) * input.n_rows * input.n_cols - 1);
-
-        mappedError.slice(s + i) = arma::Mat<eT>(temp.memptr(),
-            input.n_rows, input.n_cols);
-      }
-    }
-
-    arma::Cube<eT> derivative;
-    Deriv(input, derivative);
-    g = mappedError % derivative;
-  }
+  void Backward(const DataType&& input, DataType&& gy, DataType&& g);
 
   //! Get the input parameter.
   InputDataType const& InputParameter() const { return inputParameter; }
@@ -148,10 +99,7 @@ class LeakyReLULayer
    * Serialize the layer.
    */
   template<typename Archive>
-  void Serialize(Archive& ar, const unsigned int /* version */)
-  {
-    ar & data::CreateNVP(alpha, "alpha");
-  }
+  void Serialize(Archive& ar, const unsigned int /* version */);
 
  private:
   /**
@@ -177,20 +125,6 @@ class LeakyReLULayer
     y = arma::max(x, alpha * x);
   }
 
-  /**
-   * Computes the LeakyReLU function using a 3rd-order tensor as input.
-   *
-   * @param x Input data.
-   * @param y The resulting output activation.
-   */
-  template<typename eT>
-  void Fn(const arma::Cube<eT>& x, arma::Cube<eT>& y)
-  {
-    y = x;
-    for (size_t s = 0; s < x.n_slices; s++)
-      fn(x.slice(s), y.slice(s));
-  }
-
   /**
    * Computes the first derivative of the LeakyReLU function.
    *
@@ -215,11 +149,11 @@ class LeakyReLULayer
     y = x;
 
     for (size_t i = 0; i < x.n_elem; i++)
+    {
       y(i) = Deriv(x(i));
+    }
   }
 
-
-
   //! Locally-stored delta object.
   OutputDataType delta;
 
@@ -232,9 +166,12 @@ class LeakyReLULayer
   //! Leakyness Parameter in the range 0 <alpha< 1
   double alpha;
 
-}; // class LeakyReLULayer
+}; // class LeakyReLU
 
 } // namespace ann
 } // namespace mlpack
 
-#endif
+// Include implementation.
+#include "leaky_relu_impl.hpp"
+
+#endif
\ No newline at end of file
diff --git a/src/mlpack/methods/ann/layer/leaky_relu_impl.hpp b/src/mlpack/methods/ann/layer/leaky_relu_impl.hpp
new file mode 100644
index 00000000000..24bc13d9769
--- /dev/null
+++ b/src/mlpack/methods/ann/layer/leaky_relu_impl.hpp
@@ -0,0 +1,60 @@
+/**
+ * @file leaky_relu_impl.hpp
+ * @author Dhawal Arora
+ *
+ * Implementation of LeakyReLU layer first introduced in the acoustic model,
+ * Andrew L. Maas, Awni Y. Hannun, Andrew Y. Ng,
+ * "Rectifier Nonlinearities Improve Neural Network Acoustic Models", 2014
+ *
+ * mlpack is free software; you may redistribute it and/or modify it under the
+ * terms of the 3-clause BSD license.  You should have received a copy of the
+ * 3-clause BSD license along with mlpack.  If not, see
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
+ */
+#ifndef MLPACK_METHODS_ANN_LAYER_LEAKYRELU_IMPL_HPP
+#define MLPACK_METHODS_ANN_LAYER_LEAKYRELU_IMPL_HPP
+
+// In case it hasn't yet been included.
+#include "leaky_relu.hpp"
+
+namespace mlpack {
+namespace ann /** Artificial Neural Network. */ {
+
+template<typename InputDataType, typename OutputDataType>
+LeakyReLU<InputDataType, OutputDataType>::LeakyReLU(
+    const double alpha) : alpha(alpha)
+{
+  // Nothing to do here.
+}
+
+template<typename InputDataType, typename OutputDataType>
+template<typename InputType, typename OutputType>
+void LeakyReLU<InputDataType, OutputDataType>::Forward(
+    const InputType&& input, OutputType&& output)
+{
+  Fn(input, output);
+}
+
+template<typename InputDataType, typename OutputDataType>
+template<typename DataType>
+void LeakyReLU<InputDataType, OutputDataType>::Backward(
+    const DataType&& input, DataType&& gy, DataType&& g)
+{
+  DataType derivative;
+  Deriv(input, derivative);
+  g = gy % derivative;
+}
+
+template<typename InputDataType, typename OutputDataType>
+template<typename Archive>
+void LeakyReLU<InputDataType, OutputDataType>::Serialize(
+    Archive& ar,
+    const unsigned int /* version */)
+{
+  ar & data::CreateNVP(alpha, "alpha");
+}
+
+} // namespace ann
+} // namespace mlpack
+
+#endif
diff --git a/src/mlpack/methods/ann/layer/recurrent_layer.hpp b/src/mlpack/methods/ann/layer/linear.hpp
similarity index 50%
rename from src/mlpack/methods/ann/layer/recurrent_layer.hpp
rename to src/mlpack/methods/ann/layer/linear.hpp
index 5e231a7480b..0f6be78b647 100644
--- a/src/mlpack/methods/ann/layer/recurrent_layer.hpp
+++ b/src/mlpack/methods/ann/layer/linear.hpp
@@ -1,27 +1,28 @@
 /**
- * @file recurrent_layer.hpp
+ * @file linear.hpp
  * @author Marcus Edel
  *
- * Definition of the RecurrentLayer class.
+ * Definition of the Linear layer class also known as fully-connected layer or
+ * affine transformation.
  *
  * mlpack is free software; you may redistribute it and/or modify it under the
  * terms of the 3-clause BSD license.  You should have received a copy of the
  * 3-clause BSD license along with mlpack.  If not, see
  * http://www.opensource.org/licenses/BSD-3-Clause for more information.
  */
-#ifndef MLPACK_METHODS_ANN_LAYER_RECURRENT_LAYER_HPP
-#define MLPACK_METHODS_ANN_LAYER_RECURRENT_LAYER_HPP
+#ifndef MLPACK_METHODS_ANN_LAYER_LINEAR_HPP
+#define MLPACK_METHODS_ANN_LAYER_LINEAR_HPP
 
 #include <mlpack/core.hpp>
-#include <mlpack/methods/ann/layer/layer_traits.hpp>
+
+#include "layer_types.hpp"
 
 namespace mlpack {
 namespace ann /** Artificial Neural Network. */ {
 
 /**
- * Implementation of the RecurrentLayer class. Recurrent layers can be used
- * similarly to feed-forward layers except that the input isn't stored in the
- * inputParameter, instead it's in stored in the recurrentParameter.
+ * Implementation of the Linear layer class. The Linear class represents a
+ * single layer of a neural network.
  *
  * @tparam InputDataType Type of the input data (arma::colvec, arma::mat,
  *         arma::sp_mat or arma::cube).
@@ -32,35 +33,24 @@ template <
     typename InputDataType = arma::mat,
     typename OutputDataType = arma::mat
 >
-class RecurrentLayer
+class Linear
 {
  public:
+  //! Create the Linear object.
+  Linear();
+
   /**
-   * Create the RecurrentLayer object using the specified number of units.
+   * Create the Linear layer object using the specified number of units.
    *
    * @param inSize The number of input units.
    * @param outSize The number of output units.
    */
-  RecurrentLayer(const size_t inSize, const size_t outSize) :
-      inSize(outSize),
-      outSize(outSize),
-      recurrentParameter(arma::zeros<InputDataType>(inSize, 1))
-  {
-    weights.set_size(outSize, inSize);
-  }
+  Linear(const size_t inSize, const size_t outSize);;
 
-  /**
-   * Create the RecurrentLayer object using the specified number of units.
-   *
-   * @param outSize The number of output units.
+  /*
+   * Reset the layer parameter.
    */
-  RecurrentLayer(const size_t outSize) :
-      inSize(outSize),
-      outSize(outSize),
-      recurrentParameter(arma::zeros<InputDataType>(outSize, 1))
-  {
-    weights.set_size(outSize, inSize);
-  }
+  void Reset();
 
   /**
    * Ordinary feed forward pass of a neural network, evaluating the function
@@ -70,10 +60,7 @@ class RecurrentLayer
    * @param output Resulting output activation.
    */
   template<typename eT>
-  void Forward(const arma::Mat<eT>& input, arma::Mat<eT>& output)
-  {
-    output = input + weights * recurrentParameter;
-  }
+  void Forward(const arma::Mat<eT>&& input, arma::Mat<eT>&& output);
 
   /**
    * Ordinary feed backward pass of a neural network, calculating the function
@@ -84,44 +71,33 @@ class RecurrentLayer
    * @param gy The backpropagated error.
    * @param g The calculated gradient.
    */
-  template<typename InputType, typename eT>
-  void Backward(const InputType& /* unused */,
-                const arma::Mat<eT>& gy,
-                arma::mat& g)
-  {
-    g = (weights).t() * gy;
-  }
+  template<typename eT>
+  void Backward(const arma::Mat<eT>&& /* input */,
+                arma::Mat<eT>&& gy,
+                arma::Mat<eT>&& g);
 
   /*
    * Calculate the gradient using the output delta and the input activation.
    *
-   * @param input The propagated input activation.
-   * @param d The calculated error.
-   * @param g The calculated gradient.
+   * @param input The input parameter used for calculating the gradient.
+   * @param error The calculated error.
+   * @param gradient The calculated gradient.
    */
-  template<typename InputType, typename eT, typename GradientDataType>
-  void Gradient(const InputType& /* input */,
-                const arma::Mat<eT>& d,
-                GradientDataType& g)
-  {
-    g = d * recurrentParameter.t();
-  }
-
-  //! Get the weights.
-  OutputDataType const& Weights() const { return weights; }
-  //! Modify the weights.
-  OutputDataType& Weights() { return weights; }
+  template<typename eT>
+  void Gradient(const arma::Mat<eT>&& input,
+                arma::Mat<eT>&& error,
+                arma::Mat<eT>&& gradient);
+
+  //! Get the parameters.
+  OutputDataType const& Parameters() const { return weights; }
+  //! Modify the parameters.
+  OutputDataType& Parameters() { return weights; }
 
   //! Get the input parameter.
   InputDataType const& InputParameter() const { return inputParameter; }
   //! Modify the input parameter.
   InputDataType& InputParameter() { return inputParameter; }
 
-  //! Get the input parameter.
-  InputDataType const& RecurrentParameter() const { return recurrentParameter; }
-  //! Modify the input parameter.
-  InputDataType& RecurrentParameter() { return recurrentParameter; }
-
   //! Get the output parameter.
   OutputDataType const& OutputParameter() const { return outputParameter; }
   //! Modify the output parameter.
@@ -138,14 +114,10 @@ class RecurrentLayer
   OutputDataType& Gradient() { return gradient; }
 
   /**
-   * Serialize the layer.
+   * Serialize the layer
    */
   template<typename Archive>
-  void Serialize(Archive& ar, const unsigned int /* version */)
-  {
-    ar & data::CreateNVP(recurrentParameter, "recurrentParameter");
-    ar & data::CreateNVP(weights, "weights");
-  }
+  void Serialize(Archive& ar, const unsigned int /* version */);
 
  private:
   //! Locally-stored number of input units.
@@ -157,6 +129,12 @@ class RecurrentLayer
   //! Locally-stored weight object.
   OutputDataType weights;
 
+  //! Locally-stored weight paramters.
+  OutputDataType weight;
+
+  //! Locally-stored bias term parameters.
+  OutputDataType bias;
+
   //! Locally-stored delta object.
   OutputDataType delta;
 
@@ -168,25 +146,12 @@ class RecurrentLayer
 
   //! Locally-stored output parameter object.
   OutputDataType outputParameter;
-
-  //! Locally-stored recurrent parameter object.
-  InputDataType recurrentParameter;
-}; // class RecurrentLayer
-
-//! Layer traits for the recurrent layer.
-template<typename InputDataType, typename OutputDataType
->
-class LayerTraits<RecurrentLayer<InputDataType, OutputDataType> >
-{
- public:
-  static const bool IsBinary = false;
-  static const bool IsOutputLayer = false;
-  static const bool IsBiasLayer = false;
-  static const bool IsLSTMLayer = false;
-  static const bool IsConnection = true;
-};
+}; // class Linear
 
 } // namespace ann
 } // namespace mlpack
 
+// Include implementation.
+#include "linear_impl.hpp"
+
 #endif
diff --git a/src/mlpack/methods/ann/layer/linear_impl.hpp b/src/mlpack/methods/ann/layer/linear_impl.hpp
new file mode 100644
index 00000000000..8b726e5d255
--- /dev/null
+++ b/src/mlpack/methods/ann/layer/linear_impl.hpp
@@ -0,0 +1,87 @@
+/**
+ * @file linear_impl.hpp
+ * @author Marcus Edel
+ *
+ * Implementation of the Linear layer class also known as fully-connected layer
+ * or affine transformation.
+ *
+ * mlpack is free software; you may redistribute it and/or modify it under the
+ * terms of the 3-clause BSD license.  You should have received a copy of the
+ * 3-clause BSD license along with mlpack.  If not, see
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
+ */
+#ifndef MLPACK_METHODS_ANN_LAYER_LINEAR_IMPL_HPP
+#define MLPACK_METHODS_ANN_LAYER_LINEAR_IMPL_HPP
+
+// In case it hasn't yet been included.
+#include "linear.hpp"
+
+namespace mlpack {
+namespace ann /** Artificial Neural Network. */ {
+
+template<typename InputDataType, typename OutputDataType>
+Linear<InputDataType, OutputDataType>::Linear()
+{
+  // Nothing to do here.
+}
+
+template <typename InputDataType, typename OutputDataType>
+Linear<InputDataType, OutputDataType>::Linear(
+    const size_t inSize,
+    const size_t outSize) :
+    inSize(inSize),
+    outSize(outSize)
+{
+  weights.set_size(outSize * inSize + outSize, 1);
+}
+
+template<typename InputDataType, typename OutputDataType>
+void Linear<InputDataType, OutputDataType>::Reset()
+{
+  weight = arma::mat(weights.memptr(), outSize, inSize, false, false);
+  bias = arma::mat(weights.memptr() + weight.n_elem,
+      outSize, 1, false, false);
+}
+
+template<typename InputDataType, typename OutputDataType>
+template<typename eT>
+void Linear<InputDataType, OutputDataType>::Forward(
+    const arma::Mat<eT>&& input, arma::Mat<eT>&& output)
+{
+  output = (weight * input) + bias;
+}
+
+template<typename InputDataType, typename OutputDataType>
+template<typename eT>
+void Linear<InputDataType, OutputDataType>::Backward(
+    const arma::Mat<eT>&& /* input */, arma::Mat<eT>&& gy, arma::Mat<eT>&& g)
+{
+  g = weight.t() * gy;
+}
+
+template<typename InputDataType, typename OutputDataType>
+template<typename eT>
+void Linear<InputDataType, OutputDataType>::Gradient(
+    const arma::Mat<eT>&& input,
+    arma::Mat<eT>&& error,
+    arma::Mat<eT>&& gradient)
+{
+  gradient.submat(0, 0, weight.n_elem - 1, 0) = arma::vectorise(
+      error * input.t());
+  gradient.submat(weight.n_elem, 0, gradient.n_elem - 1, 0) = error;
+}
+
+template<typename InputDataType, typename OutputDataType>
+template<typename Archive>
+void Linear<InputDataType, OutputDataType>::Serialize(
+    Archive& ar, const unsigned int /* version */)
+{
+  ar & data::CreateNVP(weights, "weights");
+  ar & data::CreateNVP(inSize, "inSize");
+  ar & data::CreateNVP(outSize, "outSize");
+}
+
+} // namespace ann
+} // namespace mlpack
+
+#endif
diff --git a/src/mlpack/methods/ann/layer/linear_layer.hpp b/src/mlpack/methods/ann/layer/linear_layer.hpp
deleted file mode 100644
index b3b3dbf0266..00000000000
--- a/src/mlpack/methods/ann/layer/linear_layer.hpp
+++ /dev/null
@@ -1,289 +0,0 @@
-/**
- * @file linear_layer.hpp
- * @author Marcus Edel
- *
- * Definition of the LinearLayer class also known as fully-connected layer or
- * affine transformation.
- *
- * mlpack is free software; you may redistribute it and/or modify it under the
- * terms of the 3-clause BSD license.  You should have received a copy of the
- * 3-clause BSD license along with mlpack.  If not, see
- * http://www.opensource.org/licenses/BSD-3-Clause for more information.
- */
-#ifndef MLPACK_METHODS_ANN_LAYER_LINEAR_LAYER_HPP
-#define MLPACK_METHODS_ANN_LAYER_LINEAR_LAYER_HPP
-
-#include <mlpack/core.hpp>
-#include <mlpack/methods/ann/layer/layer_traits.hpp>
-
-namespace mlpack {
-namespace ann /** Artificial Neural Network. */ {
-
-/**
- * Implementation of the LinearLayer class. The LinearLayer class represents a
- * single layer of a neural network.
- *
- * @tparam InputDataType Type of the input data (arma::colvec, arma::mat,
- *         arma::sp_mat or arma::cube).
- * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat,
- *         arma::sp_mat or arma::cube).
- */
-template <
-    typename InputDataType = arma::mat,
-    typename OutputDataType = arma::mat
->
-class LinearLayer
-{
- public:
-  /**
-   * Create the LinearLayer object using the specified number of units.
-   *
-   * @param inSize The number of input units.
-   * @param outSize The number of output units.
-   */
-  LinearLayer(const size_t inSize, const size_t outSize) :
-      inSize(inSize),
-      outSize(outSize)
-  {
-    weights.set_size(outSize, inSize);
-  }
-
-  /**
-   * Ordinary feed forward pass of a neural network, evaluating the function
-   * f(x) by propagating the activity forward through f.
-   *
-   * @param input Input data used for evaluating the specified function.
-   * @param output Resulting output activation.
-   */
-  template<typename eT>
-  void Forward(const arma::Mat<eT>& input, arma::Mat<eT>& output)
-  {
-    output = weights * input;
-  }
-
-  /**
-   * Ordinary feed forward pass of a neural network, evaluating the function
-   * f(x) by propagating the activity forward through f.
-   *
-   * @param input Input data used for evaluating the specified function.
-   * @param output Resulting output activation.
-   */
-  template<typename eT>
-  void Forward(const arma::Cube<eT>& input, arma::Mat<eT>& output)
-  {
-    arma::Mat<eT> data(input.n_elem, 1);
-
-    for (size_t s = 0, c = 0; s < input.n_slices / data.n_cols; s++)
-    {
-      for (size_t i = 0; i < data.n_cols; i++, c++)
-      {
-        data.col(i).subvec(s * input.n_rows * input.n_cols, (s + 1) *
-            input.n_rows * input.n_cols - 1) = arma::trans(arma::vectorise(
-            input.slice(c), 1));
-      }
-    }
-
-    output = weights * data;
-  }
-
-  /**
-   * Ordinary feed backward pass of a neural network, calculating the function
-   * f(x) by propagating x backwards trough f. Using the results from the feed
-   * forward pass.
-   *
-   * @param input The propagated input activation.
-   * @param gy The backpropagated error.
-   * @param g The calculated gradient.
-   */
-  template<typename InputType, typename eT>
-  void Backward(const InputType& /* unused */,
-                const arma::Mat<eT>& gy,
-                arma::Mat<eT>& g)
-  {
-    g = weights.t() * gy;
-  }
-
-  /*
-   * Calculate the gradient using the output delta and the input activation.
-   *
-   * @param input The propagated input.
-   * @param error The calculated error.
-   * @param gradient The calculated gradient.
-   */
-  template<typename InputType, typename ErrorType, typename GradientType>
-  void Gradient(const InputType& input,
-                const ErrorType& error,
-                GradientType& gradient)
-  {
-    GradientDelta(input, error, gradient);
-  }
-
-  //! Get the weights.
-  OutputDataType const& Weights() const { return weights; }
-  //! Modify the weights.
-  OutputDataType& Weights() { return weights; }
-
-  //! Get the input parameter.
-  InputDataType const& InputParameter() const { return inputParameter; }
-  //! Modify the input parameter.
-  InputDataType& InputParameter() { return inputParameter; }
-
-  //! Get the output parameter.
-  OutputDataType const& OutputParameter() const { return outputParameter; }
-  //! Modify the output parameter.
-  OutputDataType& OutputParameter() { return outputParameter; }
-
-  //! Get the delta.
-  OutputDataType const& Delta() const { return delta; }
-  //! Modify the delta.
-  OutputDataType& Delta() { return delta; }
-
-  //! Get the gradient.
-  OutputDataType const& Gradient() const { return gradient; }
-  //! Modify the gradient.
-  OutputDataType& Gradient() { return gradient; }
-
-  /**
-   * Serialize the layer
-   */
-  template<typename Archive>
-  void Serialize(Archive& ar, const unsigned int /* version */)
-  {
-    ar & data::CreateNVP(weights, "weights");
-  }
-
- private:
-  /*
-   * Calculate the gradient using the output delta (3rd order tensor) and the
-   * input activation (3rd order tensor).
-   *
-   * @param input The input parameter used for calculating the gradient.
-   * @param d The output delta.
-   * @param g The calculated gradient.
-   */
-  template<typename eT>
-  void GradientDelta(const arma::Cube<eT>& input,
-                     const arma::Mat<eT>& d,
-                     arma::Cube<eT>& g)
-  {
-    g = arma::Cube<eT>(weights.n_rows, weights.n_cols, 1);
-    arma::Mat<eT> data = arma::Mat<eT>(d.n_cols,
-        input.n_elem / d.n_cols);
-
-    for (size_t s = 0, c = 0; s < input.n_slices /
-        data.n_rows; s++)
-    {
-      for (size_t i = 0; i < data.n_rows; i++, c++)
-      {
-        data.row(i).subvec(s * input.n_rows *
-            input.n_cols, (s + 1) *
-            input.n_rows *
-        input.n_cols - 1) = arma::vectorise(
-                input.slice(c), 1);
-      }
-    }
-
-    g.slice(0) = d * data / d.n_cols;
-  }
-
-  /*
-   * Calculate the gradient (3rd order tensor) using the output delta
-   * (dense matrix) and the input activation (dense matrix).
-   *
-   * @param input The input parameter used for calculating the gradient.
-   * @param d The output delta.
-   * @param g The calculated gradient.
-   */
-  template<typename eT>
-  void GradientDelta(const arma::Mat<eT>& input,
-                     const arma::Mat<eT>& d,
-                     arma::Cube<eT>& g)
-  {
-    g = arma::Cube<eT>(weights.n_rows, weights.n_cols, 1);
-    Gradient(input, d, g.slice(0));
-  }
-
-  /*
-   * Calculate the gradient (dense matrix) using the output delta
-   * (dense matrix) and the input activation (3rd order tensor).
-   *
-   * @param input The input parameter used for calculating the gradient.
-   * @param d The output delta.
-   * @param g The calculated gradient.
-   */
-  template<typename eT>
-  void GradientDelta(const arma::Cube<eT>& input,
-                     const arma::Mat<eT>& d,
-                     arma::Mat<eT>& g)
-  {
-    arma::Cube<eT> grad = arma::Cube<eT>(weights.n_rows, weights.n_cols, 1);
-    Gradient(input, d, grad);
-    g = grad.slice(0);
-  }
-
-  /*
-   * Calculate the gradient (dense matrix) using the output delta
-   * (dense matrix) and the input activation (dense matrix).
-   *
-   * @param input The input parameter used for calculating the gradient.
-   * @param d The output delta.
-   * @param g The calculated gradient.
-   */
-  template<typename eT>
-  void GradientDelta(const arma::Mat<eT>& input,
-                     const arma::Mat<eT>& d,
-                     arma::Mat<eT>& g)
-  {
-    g = d * input.t();
-  }
-
-  //! Locally-stored number of input units.
-  size_t inSize;
-
-  //! Locally-stored number of output units.
-  size_t outSize;
-
-  //! Locally-stored weight object.
-  OutputDataType weights;
-
-  //! Locally-stored delta object.
-  OutputDataType delta;
-
-  //! Locally-stored gradient object.
-  OutputDataType gradient;
-
-  //! Locally-stored input parameter object.
-  InputDataType inputParameter;
-
-  //! Locally-stored output parameter object.
-  OutputDataType outputParameter;
-}; // class LinearLayer
-
-/**
- * Linear Mapping layer to map between 3rd order tensors and dense matrices.
- */
-template <
-    typename InputDataType = arma::cube,
-    typename OutputDataType = arma::mat
->
-using LinearMappingLayer = LinearLayer<InputDataType, OutputDataType>;
-
-//! Layer traits for the linear layer.
-template<
-    typename InputDataType,
-    typename OutputDataType
->
-class LayerTraits<LinearLayer<InputDataType, OutputDataType> >
-{
- public:
-  static const bool IsBinary = false;
-  static const bool IsOutputLayer = false;
-  static const bool IsBiasLayer = false;
-  static const bool IsLSTMLayer = false;
-  static const bool IsConnection = true;
-};
-
-} // namespace ann
-} // namespace mlpack
-
-#endif
diff --git a/src/mlpack/methods/ann/layer/linear_no_bias.hpp b/src/mlpack/methods/ann/layer/linear_no_bias.hpp
new file mode 100644
index 00000000000..972f72db8ec
--- /dev/null
+++ b/src/mlpack/methods/ann/layer/linear_no_bias.hpp
@@ -0,0 +1,154 @@
+/**
+ * @file linear.hpp
+ * @author Marcus Edel
+ *
+ * Definition of the LinearNoBias class also known as fully-connected layer or
+ * affine transformation without the bias term.
+ *
+ * mlpack is free software; you may redistribute it and/or modify it under the
+ * terms of the 3-clause BSD license.  You should have received a copy of the
+ * 3-clause BSD license along with mlpack.  If not, see
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
+ */
+#ifndef MLPACK_METHODS_ANN_LAYER_LINEAR_NO_BIAS_HPP
+#define MLPACK_METHODS_ANN_LAYER_LINEAR_NO_BIAS_HPP
+
+#include <mlpack/core.hpp>
+
+#include "layer_types.hpp"
+
+namespace mlpack {
+namespace ann /** Artificial Neural Network. */ {
+
+/**
+ * Implementation of the LinearNoBias class. The LinearNoBias class represents a
+ * single layer of a neural network.
+ *
+ * @tparam InputDataType Type of the input data (arma::colvec, arma::mat,
+ *         arma::sp_mat or arma::cube).
+ * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat,
+ *         arma::sp_mat or arma::cube).
+ */
+template <
+    typename InputDataType = arma::mat,
+    typename OutputDataType = arma::mat
+>
+class LinearNoBias
+{
+ public:
+  //! Create the LinearNoBias object.
+  LinearNoBias();
+  /**
+   * Create the LinearNoBias object using the specified number of units.
+   *
+   * @param inSize The number of input units.
+   * @param outSize The number of output units.
+   */
+  LinearNoBias(const size_t inSize, const size_t outSize);
+
+  /*
+   * Reset the layer parameter.
+   */
+  void Reset();
+
+  /**
+   * Ordinary feed forward pass of a neural network, evaluating the function
+   * f(x) by propagating the activity forward through f.
+   *
+   * @param input Input data used for evaluating the specified function.
+   * @param output Resulting output activation.
+   */
+  template<typename eT>
+  void Forward(const arma::Mat<eT>&& input, arma::Mat<eT>&& output);
+
+  /**
+   * Ordinary feed backward pass of a neural network, calculating the function
+   * f(x) by propagating x backwards trough f. Using the results from the feed
+   * forward pass.
+   *
+   * @param input The propagated input activation.
+   * @param gy The backpropagated error.
+   * @param g The calculated gradient.
+   */
+  template<typename eT>
+  void Backward(const arma::Mat<eT>&& /* input */,
+                arma::Mat<eT>&& gy,
+                arma::Mat<eT>&& g);
+
+  /*
+   * Calculate the gradient using the output delta and the input activation.
+   *
+   * @param input The input parameter used for calculating the gradient.
+   * @param error The calculated error.
+   * @param gradient The calculated gradient.
+   */
+  template<typename eT>
+  void Gradient(const arma::Mat<eT>&& input,
+                arma::Mat<eT>&& error,
+                arma::Mat<eT>&& gradient);
+
+  //! Get the parameters.
+  OutputDataType const& Parameters() const { return weights; }
+  //! Modify the parameters.
+  OutputDataType& Parameters() { return weights; }
+
+  //! Get the input parameter.
+  InputDataType const& InputParameter() const { return inputParameter; }
+  //! Modify the input parameter.
+  InputDataType& InputParameter() { return inputParameter; }
+
+  //! Get the output parameter.
+  OutputDataType const& OutputParameter() const { return outputParameter; }
+  //! Modify the output parameter.
+  OutputDataType& OutputParameter() { return outputParameter; }
+
+  //! Get the delta.
+  OutputDataType const& Delta() const { return delta; }
+  //! Modify the delta.
+  OutputDataType& Delta() { return delta; }
+
+  //! Get the gradient.
+  OutputDataType const& Gradient() const { return gradient; }
+  //! Modify the gradient.
+  OutputDataType& Gradient() { return gradient; }
+
+  /**
+   * Serialize the layer
+   */
+  template<typename Archive>
+  void Serialize(Archive& ar, const unsigned int /* version */);
+
+ private:
+
+  //! Locally-stored number of input units.
+  size_t inSize;
+
+  //! Locally-stored number of output units.
+  size_t outSize;
+
+  //! Locally-stored weight object.
+  OutputDataType weights;
+
+  //! Locally-stored weight parameter.
+  OutputDataType weight;
+
+  //! Locally-stored delta object.
+  OutputDataType delta;
+
+  //! Locally-stored gradient object.
+  OutputDataType gradient;
+
+  //! Locally-stored input parameter object.
+  InputDataType inputParameter;
+
+  //! Locally-stored output parameter object.
+  OutputDataType outputParameter;
+}; // class LinearNoBias
+
+} // namespace ann
+} // namespace mlpack
+
+// Include implementation.
+#include "linear_no_bias_impl.hpp"
+
+#endif
diff --git a/src/mlpack/methods/ann/layer/linear_no_bias_impl.hpp b/src/mlpack/methods/ann/layer/linear_no_bias_impl.hpp
new file mode 100644
index 00000000000..15f96cdb537
--- /dev/null
+++ b/src/mlpack/methods/ann/layer/linear_no_bias_impl.hpp
@@ -0,0 +1,83 @@
+/**
+ * @file linear_no_bias_impl.hpp
+ * @author Marcus Edel
+ *
+ * Implementation of the LinearNoBias class also known as fully-connected layer
+ * or affine transformation without the bias term.
+ *
+ * mlpack is free software; you may redistribute it and/or modify it under the
+ * terms of the 3-clause BSD license.  You should have received a copy of the
+ * 3-clause BSD license along with mlpack.  If not, see
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
+ */
+#ifndef MLPACK_METHODS_ANN_LAYER_LINEAR_NO_BIAS_IMPL_HPP
+#define MLPACK_METHODS_ANN_LAYER_LINEAR_NO_BIAS_IMPL_HPP
+
+// In case it hasn't yet been included.
+#include "linear_no_bias.hpp"
+
+namespace mlpack {
+namespace ann /** Artificial Neural Network. */ {
+
+template<typename InputDataType, typename OutputDataType>
+LinearNoBias<InputDataType, OutputDataType>::LinearNoBias()
+{
+  // Nothing to do here.
+}
+
+template<typename InputDataType, typename OutputDataType>
+LinearNoBias<InputDataType, OutputDataType>::LinearNoBias(
+    const size_t inSize, const size_t outSize) :
+    inSize(inSize),
+    outSize(outSize)
+{
+  weights.set_size(outSize * inSize, 1);
+}
+
+template <typename InputDataType, typename OutputDataType>
+void LinearNoBias<InputDataType, OutputDataType>::Reset()
+{
+  weight = arma::mat(weights.memptr(), outSize, inSize, false, false);
+}
+
+template<typename InputDataType, typename OutputDataType>
+template<typename eT>
+void LinearNoBias<InputDataType, OutputDataType>::Forward(
+    const arma::Mat<eT>&& input, arma::Mat<eT>&& output)
+{
+  output = weight * input;
+}
+
+template<typename InputDataType, typename OutputDataType>
+template<typename eT>
+void LinearNoBias<InputDataType, OutputDataType>::Backward(
+    const arma::Mat<eT>&& /* input */, arma::Mat<eT>&& gy, arma::Mat<eT>&& g)
+{
+  g = weight.t() * gy;
+}
+
+template<typename InputDataType, typename OutputDataType>
+template<typename eT>
+void LinearNoBias<InputDataType, OutputDataType>::Gradient(
+    const arma::Mat<eT>&& input,
+    arma::Mat<eT>&& error,
+    arma::Mat<eT>&& gradient)
+{
+  gradient.submat(0, 0, weight.n_elem - 1, 0) = arma::vectorise(
+      error * input.t());
+}
+
+template<typename InputDataType, typename OutputDataType>
+template<typename Archive>
+void LinearNoBias<InputDataType, OutputDataType>::Serialize(
+    Archive& ar, const unsigned int /* version */)
+{
+  ar & data::CreateNVP(weights, "weights");
+  ar & data::CreateNVP(inSize, "inSize");
+  ar & data::CreateNVP(outSize, "outSize");
+}
+
+} // namespace ann
+} // namespace mlpack
+
+#endif
diff --git a/src/mlpack/methods/ann/layer/log_softmax_layer.hpp b/src/mlpack/methods/ann/layer/log_softmax.hpp
similarity index 62%
rename from src/mlpack/methods/ann/layer/log_softmax_layer.hpp
rename to src/mlpack/methods/ann/layer/log_softmax.hpp
index 2b417e32b61..df9872d6c8a 100644
--- a/src/mlpack/methods/ann/layer/log_softmax_layer.hpp
+++ b/src/mlpack/methods/ann/layer/log_softmax.hpp
@@ -1,16 +1,16 @@
 /**
- * @file log_softmax_layer.hpp
+ * @file log_softmax.hpp
  * @author Marcus Edel
  *
- * Definition of the LogSoftmaxLayer class.
+ * Definition of the LogSoftmax class.
  *
  * mlpack is free software; you may redistribute it and/or modify it under the
  * terms of the 3-clause BSD license.  You should have received a copy of the
  * 3-clause BSD license along with mlpack.  If not, see
  * http://www.opensource.org/licenses/BSD-3-Clause for more information.
  */
-#ifndef MLPACK_METHODS_ANN_LAYER_LOG_SOFTMAX_LAYER_HPP
-#define MLPACK_METHODS_ANN_LAYER_LOG_SOFTMAX_LAYER_HPP
+#ifndef MLPACK_METHODS_ANN_LAYER_LOG_SOFTMAX_HPP
+#define MLPACK_METHODS_ANN_LAYER_LOG_SOFTMAX_HPP
 
 #include <mlpack/core.hpp>
 
@@ -33,13 +33,13 @@ template <
     typename InputDataType = arma::mat,
     typename OutputDataType = arma::mat
 >
-class LogSoftmaxLayer
+class LogSoftMax
 {
  public:
   /**
-   * Create the LogSoftmaxLayer object.
+   * Create the LogSoftmax object.
    */
-  LogSoftmaxLayer() { /* Nothing to do here. */ }
+  LogSoftMax();
 
   /**
    * Ordinary feed forward pass of a neural network, evaluating the function
@@ -48,39 +48,8 @@ class LogSoftmaxLayer
    * @param input Input data used for evaluating the specified function.
    * @param output Resulting output activation.
    */
-  template<typename eT>
-  void Forward(const arma::Mat<eT>& input, arma::Mat<eT>& output)
-  {
-    arma::mat maxInput = arma::repmat(arma::max(input), input.n_rows, 1);
-    output = (maxInput - input);
-
-    // Approximation of the hyperbolic tangent. The acuracy however is
-    // about 0.00001 lower as using tanh. Credits go to Leon Bottou.
-    output.transform( [](double x)
-    {
-      //! Fast approximation of exp(-x) for x positive.
-      static constexpr double A0 = 1.0;
-      static constexpr double A1 = 0.125;
-      static constexpr double A2 = 0.0078125;
-      static constexpr double A3 = 0.00032552083;
-      static constexpr double A4 = 1.0172526e-5;
-
-      if (x < 13.0)
-      {
-        double y = A0 + x * (A1 + x * (A2 + x * (A3 + x * A4)));
-        y *= y;
-        y *= y;
-        y *= y;
-        y = 1 / y;
-
-        return y;
-      }
-
-      return 0.0;
-    } );
-
-    output = input - (maxInput + std::log(arma::accu(output)));
-  }
+  template<typename InputType, typename OutputType>
+  void Forward(const InputType&& input, OutputType&& output);
 
   /**
    * Ordinary feed backward pass of a neural network, calculating the function
@@ -92,12 +61,9 @@ class LogSoftmaxLayer
    * @param g The calculated gradient.
    */
   template<typename eT>
-  void Backward(const arma::Mat<eT>& input,
-                const arma::Mat<eT>& gy,
-                arma::Mat<eT>& g)
-  {
-    g = gy - arma::exp(input) * arma::accu(gy);
-  }
+  void Backward(const arma::Mat<eT>&& input,
+                arma::Mat<eT>&& gy,
+                arma::Mat<eT>&& g);
 
   //! Get the input parameter.
   InputDataType& InputParameter() const { return inputParameter; }
@@ -114,6 +80,12 @@ class LogSoftmaxLayer
   //! Modify the delta.
   InputDataType& Delta() { return delta; }
 
+  /**
+   * Serialize the layer.
+   */
+  template<typename Archive>
+  void Serialize(Archive& /* ar */, const unsigned int /* version */);
+
  private:
   //! Locally-stored delta object.
   OutputDataType delta;
@@ -123,9 +95,12 @@ class LogSoftmaxLayer
 
   //! Locally-stored output parameter object.
   OutputDataType outputParameter;
-}; // class LogSoftmaxLayer
+}; // class LogSoftmax
+
+} // namespace ann
+} // namespace mlpack
 
-}; // namespace ann
-}; // namespace mlpack
+// Include implementation.
+#include "log_softmax_impl.hpp"
 
 #endif
diff --git a/src/mlpack/methods/ann/layer/log_softmax_impl.hpp b/src/mlpack/methods/ann/layer/log_softmax_impl.hpp
new file mode 100644
index 00000000000..68fba8d8459
--- /dev/null
+++ b/src/mlpack/methods/ann/layer/log_softmax_impl.hpp
@@ -0,0 +1,85 @@
+/**
+ * @file log_softmax_impl.hpp
+ * @author Marcus Edel
+ *
+ * Implementation of the LogSoftmax class.
+ *
+ * mlpack is free software; you may redistribute it and/or modify it under the
+ * terms of the 3-clause BSD license.  You should have received a copy of the
+ * 3-clause BSD license along with mlpack.  If not, see
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
+ */
+#ifndef MLPACK_METHODS_ANN_LAYER_LOG_SOFTMAX_IMPL_HPP
+#define MLPACK_METHODS_ANN_LAYER_LOG_SOFTMAX_IMPL_HPP
+
+// In case it hasn't yet been included.
+#include "log_softmax.hpp"
+
+namespace mlpack {
+namespace ann /** Artificial Neural Network. */ {
+
+template<typename InputDataType, typename OutputDataType>
+LogSoftMax<InputDataType, OutputDataType>::LogSoftMax()
+{
+  // Nothing to do here.
+}
+
+template<typename InputDataType, typename OutputDataType>
+template<typename InputType, typename OutputType>
+void LogSoftMax<InputDataType, OutputDataType>::Forward(
+    const InputType&& input, OutputType&& output)
+{
+  arma::mat maxInput = arma::repmat(arma::max(input), input.n_rows, 1);
+  output = (maxInput - input);
+
+  // Approximation of the hyperbolic tangent. The acuracy however is
+  // about 0.00001 lower as using tanh. Credits go to Leon Bottou.
+  output.transform( [](double x)
+  {
+    //! Fast approximation of exp(-x) for x positive.
+    static constexpr double A0 = 1.0;
+    static constexpr double A1 = 0.125;
+    static constexpr double A2 = 0.0078125;
+    static constexpr double A3 = 0.00032552083;
+    static constexpr double A4 = 1.0172526e-5;
+
+    if (x < 13.0)
+    {
+      double y = A0 + x * (A1 + x * (A2 + x * (A3 + x * A4)));
+      y *= y;
+      y *= y;
+      y *= y;
+      y = 1 / y;
+
+      return y;
+    }
+
+    return 0.0;
+  } );
+
+  output = input - (maxInput + std::log(arma::accu(output)));
+}
+
+template<typename InputDataType, typename OutputDataType>
+template<typename eT>
+void LogSoftMax<InputDataType, OutputDataType>::Backward(
+    const arma::Mat<eT>&& input,
+    arma::Mat<eT>&& gy,
+    arma::Mat<eT>&& g)
+{
+  g = gy - arma::exp(input) * arma::accu(gy);
+}
+
+template<typename InputDataType, typename OutputDataType>
+template<typename Archive>
+void LogSoftMax<InputDataType, OutputDataType>::Serialize(
+    Archive& /* ar */,
+    const unsigned int /* version */)
+{
+  // Nothing to do here.
+}
+
+} // namespace ann
+} // namespace mlpack
+
+#endif
diff --git a/src/mlpack/methods/ann/layer/sparse_input_layer.hpp b/src/mlpack/methods/ann/layer/lookup.hpp
similarity index 55%
rename from src/mlpack/methods/ann/layer/sparse_input_layer.hpp
rename to src/mlpack/methods/ann/layer/lookup.hpp
index 6b1d9d118f9..080d4b5746e 100644
--- a/src/mlpack/methods/ann/layer/sparse_input_layer.hpp
+++ b/src/mlpack/methods/ann/layer/lookup.hpp
@@ -1,29 +1,27 @@
 /**
- * @file sparse_input_layer.hpp
- * @author Tham Ngap Wei
+ * @file lookup.hpp
+ * @author Marcus Edel
  *
- * Definition of the sparse input class which serve as the first layer
- * of the sparse autoencoder
+ * Definition of the Lookup class a particular convolution, where the width of
+ * the convolution is 1.
  *
  * mlpack is free software; you may redistribute it and/or modify it under the
  * terms of the 3-clause BSD license.  You should have received a copy of the
  * 3-clause BSD license along with mlpack.  If not, see
  * http://www.opensource.org/licenses/BSD-3-Clause for more information.
  */
-#ifndef MLPACK_METHODS_ANN_LAYER_SPARSE_INPUT_LAYER_HPP
-#define MLPACK_METHODS_ANN_LAYER_SPARSE_INPUT_LAYER_HPP
+#ifndef MLPACK_METHODS_ANN_LAYER_LOOKUP_HPP
+#define MLPACK_METHODS_ANN_LAYER_LOOKUP_HPP
 
 #include <mlpack/core.hpp>
 #include <mlpack/methods/ann/layer/layer_traits.hpp>
 
-#include <type_traits>
-
 namespace mlpack {
 namespace ann /** Artificial Neural Network. */ {
 
 /**
- * Implementation of the SparseInputLayer. The SparseInputLayer class represents
- * the first layer of sparse autoencoder
+ * Implementation of the Lookup class. The Lookup class is a particular
+ * convolution, where the width of the convolution is 1.
  *
  * @tparam InputDataType Type of the input data (arma::colvec, arma::mat,
  *         arma::sp_mat or arma::cube).
@@ -33,26 +31,18 @@ namespace ann /** Artificial Neural Network. */ {
 template <
     typename InputDataType = arma::mat,
     typename OutputDataType = arma::mat
-    >
-class SparseInputLayer
+>
+class Lookup
 {
  public:
   /**
-   * Create the SparseInputLayer object using the specified number of units.
+   * Create the Lookup object using the specified number of input and output
+   * units.
    *
    * @param inSize The number of input units.
    * @param outSize The number of output units.
-   * @param lambda L2-regularization parameter.
    */
-  SparseInputLayer(const size_t inSize,
-                   const size_t outSize,
-                   const double lambda = 0.0001) :
-    inSize(inSize),
-    outSize(outSize),
-    lambda(lambda)
-  {
-    weights.set_size(outSize, inSize);
-  }
+  Lookup(const size_t inSize, const size_t outSize);
 
   /**
    * Ordinary feed forward pass of a neural network, evaluating the function
@@ -62,10 +52,7 @@ class SparseInputLayer
    * @param output Resulting output activation.
    */
   template<typename eT>
-  void Forward(const arma::Mat<eT>& input, arma::Mat<eT>& output)
-  {
-    output = weights * input;
-  }
+  void Forward(const arma::Mat<eT>&& input, arma::Mat<eT>&& output);
 
   /**
    * Ordinary feed backward pass of a neural network, calculating the function
@@ -76,34 +63,27 @@ class SparseInputLayer
    * @param gy The backpropagated error.
    * @param g The calculated gradient.
    */
-  template<typename InputType, typename eT>
-  void Backward(const InputType& /* unused */,
-                const arma::Mat<eT>& gy,
-                arma::Mat<eT>& g)
-  {
-    g = gy;
-  }
+  template<typename eT>
+  void Backward(const arma::Mat<eT>&& /* input */,
+                const arma::Mat<eT>&& gy,
+                arma::Mat<eT>&& g);
 
   /*
    * Calculate the gradient using the output delta and the input activation.
    *
-   * @param input The propagated input.
-   * @param d The calculated error.
-   * @param g The calculated gradient.
+   * @param input The input parameter used for calculating the gradient.
+   * @param error The calculated error.
+   * @param gradient The calculated gradient.
    */
-  template<typename InputType, typename eT, typename GradientDataType>
-  void Gradient(const InputType& input,
-                const arma::Mat<eT>& d,
-                GradientDataType& g)
-  {
-    g = d * input.t() / static_cast<typename InputType::value_type>(
-        input.n_cols) + lambda * weights;
-  }
-
-  //! Get the weights.
-  OutputDataType const& Weights() const { return weights; }
-  //! Modify the weights.
-  OutputDataType& Weights() { return weights; }
+  template<typename eT>
+  void Gradient(const arma::Mat<eT>&& input,
+                arma::Mat<eT>&& error,
+                arma::Mat<eT>&& gradient);
+
+  //! Get the parameters.
+  OutputDataType const& Parameters() const { return weights; }
+  //! Modify the parameters.
+  OutputDataType& Parameters() { return weights; }
 
   //! Get the input parameter.
   InputDataType const& InputParameter() const { return inputParameter; }
@@ -126,25 +106,19 @@ class SparseInputLayer
   OutputDataType& Gradient() { return gradient; }
 
   /**
-   * Serialize the layer.
+   * Serialize the layer
    */
   template<typename Archive>
-  void Serialize(Archive& ar, const unsigned int /* version */)
-  {
-    ar & data::CreateNVP(weights, "weights");
-    ar & data::CreateNVP(lambda, "lambda");
-  }
+  void Serialize(Archive& ar, const unsigned int /* version */);
 
  private:
+
   //! Locally-stored number of input units.
   size_t inSize;
 
   //! Locally-stored number of output units.
   size_t outSize;
 
-  //! L2-regularization parameter.
-  double lambda;
-
   //! Locally-stored weight object.
   OutputDataType weights;
 
@@ -159,22 +133,12 @@ class SparseInputLayer
 
   //! Locally-stored output parameter object.
   OutputDataType outputParameter;
-}; // class SparseInputLayer
-
-//! Layer traits for the SparseInputLayer.
-template<typename InputDataType, typename OutputDataType
->
-class LayerTraits<SparseInputLayer<InputDataType, OutputDataType> >
-{
-public:
-  static const bool IsBinary = false;
-  static const bool IsOutputLayer = false;
-  static const bool IsBiasLayer = false;
-  static const bool IsLSTMLayer = false;
-  static const bool IsConnection = true;
-};
+}; // class Lookup
 
 } // namespace ann
 } // namespace mlpack
 
+// Include implementation.
+#include "lookup_impl.hpp"
+
 #endif
diff --git a/src/mlpack/methods/ann/layer/lookup_impl.hpp b/src/mlpack/methods/ann/layer/lookup_impl.hpp
new file mode 100644
index 00000000000..cbe24468ed3
--- /dev/null
+++ b/src/mlpack/methods/ann/layer/lookup_impl.hpp
@@ -0,0 +1,74 @@
+/**
+ * @file lookup_impl.hpp
+ * @author Marcus Edel
+ *
+ * Implementation of the Lookup class a particular convolution, where the width
+ * of the convolution is 1.
+ *
+ * mlpack is free software; you may redistribute it and/or modify it under the
+ * terms of the 3-clause BSD license.  You should have received a copy of the
+ * 3-clause BSD license along with mlpack.  If not, see
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
+ */
+#ifndef MLPACK_METHODS_ANN_LAYER_LOOKUP_IMPL_HPP
+#define MLPACK_METHODS_ANN_LAYER_LOOKUP_IMPL_HPP
+
+// In case it hasn't yet been included.
+#include "lookup.hpp"
+
+namespace mlpack {
+namespace ann /** Artificial Neural Network. */ {
+
+template <typename InputDataType, typename OutputDataType>
+Lookup<InputDataType, OutputDataType>::Lookup(
+    const size_t inSize,
+    const size_t outSize) :
+    inSize(inSize),
+    outSize(outSize)
+{
+  weights.set_size(outSize, inSize);
+}
+
+template<typename InputDataType, typename OutputDataType>
+template<typename eT>
+void Lookup<InputDataType, OutputDataType>::Forward(
+    const arma::Mat<eT>&& input, arma::Mat<eT>&& output)
+{
+  output = weights.cols(arma::conv_to<arma::uvec>::from(input) - 1);
+}
+
+template<typename InputDataType, typename OutputDataType>
+template<typename eT>
+void Lookup<InputDataType, OutputDataType>::Backward(
+    const arma::Mat<eT>&& /* input */,
+    const arma::Mat<eT>&& gy,
+    arma::Mat<eT>&& g)
+{
+  g = gy;
+}
+
+template<typename InputDataType, typename OutputDataType>
+template<typename eT>
+void Lookup<InputDataType, OutputDataType>::Gradient(
+    const arma::Mat<eT>&& input,
+    arma::Mat<eT>&& error,
+    arma::Mat<eT>&& gradient)
+{
+  gradient = arma::zeros<arma::Mat<eT> >(weights.n_rows, weights.n_cols);
+  gradient.cols(arma::conv_to<arma::uvec>::from(input) - 1) = error;
+}
+
+template<typename InputDataType, typename OutputDataType>
+template<typename Archive>
+void Lookup<InputDataType, OutputDataType>::Serialize(
+    Archive& ar, const unsigned int /* version */)
+{
+  ar & data::CreateNVP(weights, "weights");
+  ar & data::CreateNVP(inSize, "inSize");
+  ar & data::CreateNVP(outSize, "outSize");
+}
+
+} // namespace ann
+} // namespace mlpack
+
+#endif
diff --git a/src/mlpack/methods/ann/layer/lstm.hpp b/src/mlpack/methods/ann/layer/lstm.hpp
new file mode 100644
index 00000000000..cf70e344d94
--- /dev/null
+++ b/src/mlpack/methods/ann/layer/lstm.hpp
@@ -0,0 +1,237 @@
+/**
+ * @file lstm.hpp
+ * @author Marcus Edel
+ *
+ * Definition of the LSTM class, which implements a lstm network
+ * layer.
+ *
+ * mlpack is free software; you may redistribute it and/or modify it under the
+ * terms of the 3-clause BSD license.  You should have received a copy of the
+ * 3-clause BSD license along with mlpack.  If not, see
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
+ */
+#ifndef MLPACK_METHODS_ANN_LAYER_LSTM_HPP
+#define MLPACK_METHODS_ANN_LAYER_LSTM_HPP
+
+#include <mlpack/core.hpp>
+
+#include <boost/ptr_container/ptr_vector.hpp>
+
+#include "layer_types.hpp"
+#include "add_merge.hpp"
+#include "sequential.hpp"
+
+namespace mlpack {
+namespace ann /** Artificial Neural Network. */ {
+
+/**
+ * An implementation of a lstm network layer.
+ *
+ * This class allows specification of the type of the activation functions used
+ * for the gates and cells and also of the type of the function used to
+ * initialize and update the peephole weights.
+ *
+ * @tparam InputDataType Type of the input data (arma::colvec, arma::mat,
+ *         arma::sp_mat or arma::cube).
+ * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat,
+ *         arma::sp_mat or arma::cube).
+ */
+template <
+    typename InputDataType = arma::mat,
+    typename OutputDataType = arma::mat
+>
+class LSTM
+{
+ public:
+  //! Create the LSTM object.
+  LSTM();
+
+  /**
+   * Create the LSTM layer object using the specified parameters.
+   *
+   * @param inSize The number of input units.
+   * @param outSize The number of output units.
+   * @param rho Maximum number of steps to backpropagate through time (BPTT).
+   */
+  LSTM(const size_t inSize, const size_t outSize, const size_t rho);
+
+  /**
+   * Ordinary feed forward pass of a neural network, evaluating the function
+   * f(x) by propagating the activity forward through f.
+   *
+   * @param input Input data used for evaluating the specified function.
+   * @param output Resulting output activation.
+   */
+  template<typename eT>
+  void Forward(arma::Mat<eT>&& input, arma::Mat<eT>&& output);
+
+  /**
+   * Ordinary feed backward pass of a neural network, calculating the function
+   * f(x) by propagating x backwards trough f. Using the results from the feed
+   * forward pass.
+   *
+   * @param input The propagated input activation.
+   * @param gy The backpropagated error.
+   * @param g The calculated gradient.
+   */
+  template<typename eT>
+  void Backward(const arma::Mat<eT>&& /* input */,
+                arma::Mat<eT>&& gy,
+                arma::Mat<eT>&& g);
+
+  /*
+   * Calculate the gradient using the output delta and the input activation.
+   *
+   * @param input The input parameter used for calculating the gradient.
+   * @param error The calculated error.
+   * @param gradient The calculated gradient.
+   */
+  template<typename eT>
+  void Gradient(arma::Mat<eT>&& input,
+                arma::Mat<eT>&& /* error */,
+                arma::Mat<eT>&& /* gradient */);
+
+  //! The value of the deterministic parameter.
+  bool Deterministic() const { return deterministic; }
+  //! Modify the value of the deterministic parameter.
+  bool& Deterministic() { return deterministic; }
+
+  //! Get the maximum number of steps to backpropagate through time (BPTT).
+  size_t Rho() const { return rho; }
+  //! Modify the maximum number of steps to backpropagate through time (BPTT).
+  size_t& Rho() { return rho; }
+
+  //! Get the parameters.
+  OutputDataType const& Parameters() const { return weights; }
+  //! Modify the parameters.
+  OutputDataType& Parameters() { return weights; }
+
+  //! Get the input parameter.
+  InputDataType const& InputParameter() const { return inputParameter; }
+  //! Modify the input parameter.
+  InputDataType& InputParameter() { return inputParameter; }
+
+  //! Get the output parameter.
+  OutputDataType const& OutputParameter() const { return outputParameter; }
+  //! Modify the output parameter.
+  OutputDataType& OutputParameter() { return outputParameter; }
+
+  //! Get the delta.
+  OutputDataType const& Delta() const { return delta; }
+  //! Modify the delta.
+  OutputDataType& Delta() { return delta; }
+
+  //! Get the gradient.
+  OutputDataType const& Gradient() const { return gradient; }
+  //! Modify the gradient.
+  OutputDataType& Gradient() { return gradient; }
+
+  //! Get the model modules.
+  std::vector<LayerTypes>& Model() { return network; }
+
+  /**
+   * Serialize the layer
+   */
+  template<typename Archive>
+  void Serialize(Archive& ar, const unsigned int /* version */);
+
+ private:
+
+  //! Locally-stored number of input units.
+  size_t inSize;
+
+  //! Locally-stored number of output units.
+  size_t outSize;
+
+  //! Number of steps to backpropagate through time (BPTT).
+  size_t rho;
+
+  //! Locally-stored weight object.
+  OutputDataType weights;
+
+  //! Locally-stored previous output.
+  arma::mat prevOutput;
+
+  //! Locally-stored previous cell state.
+  arma::mat prevCell;
+
+  //! Locally-stored input 2 gate module.
+  LayerTypes input2GateModule;
+
+  //! Locally-stored output 2 gate module.
+  LayerTypes output2GateModule;
+
+  //! Locally-stored input gate module.
+  LayerTypes inputGateModule;
+
+  //! Locally-stored hidden state module.
+  LayerTypes hiddenStateModule;
+
+  //! Locally-stored forget gate module.
+  LayerTypes forgetGateModule;
+
+  //! Locally-stored output gate module.
+  LayerTypes outputGateModule;
+
+  //! Locally-stored cell module.
+  LayerTypes cellModule;
+
+  //! Locally-stored cell activation module.
+  LayerTypes cellActivationModule;
+
+  //! Locally-stored output parameter visitor.
+  OutputParameterVisitor outputParameterVisitor;
+
+  //! Locally-stored delta visitor.
+  DeltaVisitor deltaVisitor;
+
+  //! Locally-stored list of network modules.
+  std::vector<LayerTypes> network;
+
+  //! Locally-stored number of forward steps.
+  size_t forwardStep;
+
+  //! Locally-stored number of backward steps.
+  size_t backwardStep;
+
+  //! Locally-stored number of gradient steps.
+  size_t gradientStep;
+
+  //! Locally-stored cell parameters.
+  std::vector<arma::mat> cellParameter;
+
+  //! Locally-stored output parameters.
+  std::vector<arma::mat> outParameter;
+
+  //! Locally-stored previous error.
+  arma::mat prevError;
+
+  //! Locally-stored cell activation error.
+  arma::mat cellActivationError;
+
+  //! Locally-stored foget gate error.
+  arma::mat forgetGateError;
+
+  //! If true dropout and scaling is disabled, see notes above.
+  bool deterministic;
+
+  //! Locally-stored delta object.
+  OutputDataType delta;
+
+  //! Locally-stored gradient object.
+  OutputDataType gradient;
+
+  //! Locally-stored input parameter object.
+  InputDataType inputParameter;
+
+  //! Locally-stored output parameter object.
+  OutputDataType outputParameter;
+}; // class LSTM
+
+} // namespace ann
+} // namespace mlpack
+
+// Include implementation.
+#include "lstm_impl.hpp"
+
+#endif
diff --git a/src/mlpack/methods/ann/layer/lstm_impl.hpp b/src/mlpack/methods/ann/layer/lstm_impl.hpp
new file mode 100644
index 00000000000..6ebe89b2907
--- /dev/null
+++ b/src/mlpack/methods/ann/layer/lstm_impl.hpp
@@ -0,0 +1,273 @@
+/**
+ * @file lstm_impl.hpp
+ * @author Marcus Edel
+ *
+ * Implementation of the LSTM class, which implements a lstm network
+ * layer.
+ *
+ * mlpack is free software; you may redistribute it and/or modify it under the
+ * terms of the 3-clause BSD license.  You should have received a copy of the
+ * 3-clause BSD license along with mlpack.  If not, see
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
+ */
+#ifndef MLPACK_METHODS_ANN_LAYER_LSTM_IMPL_HPP
+#define MLPACK_METHODS_ANN_LAYER_LSTM_IMPL_HPP
+
+// In case it hasn't yet been included.
+#include "linear.hpp"
+
+namespace mlpack {
+namespace ann /** Artificial Neural Network. */ {
+
+template<typename InputDataType, typename OutputDataType>
+LSTM<InputDataType, OutputDataType>::LSTM()
+{
+  // Nothing to do here.
+}
+
+template <typename InputDataType, typename OutputDataType>
+LSTM<InputDataType, OutputDataType>::LSTM(
+    const size_t inSize,
+    const size_t outSize,
+    const size_t rho) :
+    inSize(inSize),
+    outSize(outSize),
+    rho(rho),
+    forwardStep(0),
+    backwardStep(0),
+    gradientStep(0),
+    deterministic(false)
+{
+  input2GateModule = new Linear<>(inSize, 4 * outSize);
+  output2GateModule = new LinearNoBias<>(outSize, 4 * outSize);
+
+  network.push_back(input2GateModule);
+  network.push_back(output2GateModule);
+
+  inputGateModule = new SigmoidLayer<>();
+  hiddenStateModule = new TanHLayer<>();
+  forgetGateModule = new SigmoidLayer<>();
+  outputGateModule = new SigmoidLayer<>();
+
+  network.push_back(inputGateModule);
+  network.push_back(hiddenStateModule);
+  network.push_back(forgetGateModule);
+  network.push_back(outputGateModule);
+
+  cellModule = new IdentityLayer<>();
+  cellActivationModule = new TanHLayer<>();
+
+  network.push_back(cellModule);
+  network.push_back(cellActivationModule);
+
+  prevOutput = arma::zeros<arma::mat>(outSize, 1);
+  prevCell = arma::zeros<arma::mat>(outSize, 1);
+  prevError = arma::zeros<arma::mat>(4 * outSize, 1);
+  cellActivationError = arma::zeros<arma::mat>(outSize, 1);
+}
+
+template<typename InputDataType, typename OutputDataType>
+template<typename eT>
+void LSTM<InputDataType, OutputDataType>::Forward(
+    arma::Mat<eT>&& input, arma::Mat<eT>&& output)
+{
+  if (!deterministic)
+  {
+    cellParameter.push_back(prevCell);
+    outParameter.push_back(prevOutput);
+  }
+
+  arma::mat output1;
+  arma::mat output2;
+  arma::mat output3;
+
+  boost::apply_visitor(ForwardVisitor(std::move(input), std::move(
+      boost::apply_visitor(outputParameterVisitor, input2GateModule))),
+      input2GateModule);
+
+  boost::apply_visitor(ForwardVisitor(std::move(prevOutput), std::move(
+      boost::apply_visitor(outputParameterVisitor, output2GateModule))),
+      output2GateModule);
+
+  output = boost::apply_visitor(outputParameterVisitor, input2GateModule) +
+      boost::apply_visitor(outputParameterVisitor, output2GateModule);
+
+  boost::apply_visitor(ForwardVisitor(std::move(output.submat(
+      0, 0, 1 * outSize - 1, 0)), std::move(boost::apply_visitor(
+      outputParameterVisitor, inputGateModule))), inputGateModule);
+
+  boost::apply_visitor(ForwardVisitor(std::move(output.submat(
+      1 * outSize, 0, 2 * outSize - 1, 0)), std::move(boost::apply_visitor(
+      outputParameterVisitor, hiddenStateModule))), hiddenStateModule);
+
+  boost::apply_visitor(ForwardVisitor(std::move(output.submat(
+      2 * outSize, 0, 3 * outSize - 1, 0)), std::move(boost::apply_visitor(
+      outputParameterVisitor, forgetGateModule))), forgetGateModule);
+
+  boost::apply_visitor(ForwardVisitor(std::move(output.submat(
+      3 * outSize, 0, 4 * outSize - 1, 0)), std::move(boost::apply_visitor(
+      outputParameterVisitor, outputGateModule))), outputGateModule);
+
+  arma::mat cell = prevCell;
+
+  // Input gate * hidden state.
+  arma::mat cmul1 = boost::apply_visitor(outputParameterVisitor,
+      inputGateModule) % boost::apply_visitor(outputParameterVisitor,
+      hiddenStateModule);
+
+  // Forget gate * cell.
+  arma::mat cmul2 = boost::apply_visitor(outputParameterVisitor,
+      forgetGateModule) % cell;
+
+  arma::mat nextCell = cmul1 + cmul2;
+
+  boost::apply_visitor(ForwardVisitor(std::move(nextCell), std::move(
+    boost::apply_visitor(outputParameterVisitor, cellModule))), cellModule);
+
+  boost::apply_visitor(ForwardVisitor(std::move(boost::apply_visitor(
+      outputParameterVisitor, cellModule)), std::move(boost::apply_visitor(
+      outputParameterVisitor, cellActivationModule))), cellActivationModule);
+
+  output = boost::apply_visitor(outputParameterVisitor,
+      cellActivationModule) % boost::apply_visitor(outputParameterVisitor,
+      outputGateModule);
+
+  prevCell = nextCell;
+  prevOutput = output;
+
+  forwardStep++;
+  if (forwardStep == rho)
+  {
+    forwardStep = 0;
+    prevOutput.zeros();
+    prevCell.zeros();
+  }
+}
+
+template<typename InputDataType, typename OutputDataType>
+template<typename eT>
+void LSTM<InputDataType, OutputDataType>::Backward(
+  const arma::Mat<eT>&& /* input */, arma::Mat<eT>&& gy, arma::Mat<eT>&& g)
+{
+  if (backwardStep > 0)
+  {
+    gy += boost::apply_visitor(deltaVisitor, output2GateModule);
+  }
+
+  arma::mat g1 = boost::apply_visitor(outputParameterVisitor,
+      cellActivationModule) % gy;
+
+  arma::mat g2 = boost::apply_visitor(outputParameterVisitor,
+      outputGateModule) % gy;
+
+  boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor(
+      outputParameterVisitor, cellActivationModule)), std::move(g2),
+      std::move(boost::apply_visitor(deltaVisitor, cellActivationModule))),
+      cellActivationModule);
+
+  cellActivationError = boost::apply_visitor(deltaVisitor,
+      cellActivationModule);
+
+  if (backwardStep > 0)
+  {
+    cellActivationError += forgetGateError;
+  }
+
+  arma::mat g4 = boost::apply_visitor(outputParameterVisitor,
+      inputGateModule) % cellActivationError;
+
+  arma::mat g5 = boost::apply_visitor(outputParameterVisitor,
+      hiddenStateModule) % cellActivationError;
+
+  forgetGateError = boost::apply_visitor(outputParameterVisitor,
+      forgetGateModule) % cellActivationError;
+
+  arma::mat g7 = cellParameter[cellParameter.size() -
+      backwardStep - 1] % cellActivationError;
+
+  boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor(
+      outputParameterVisitor, inputGateModule)), std::move(g5),
+      std::move(boost::apply_visitor(deltaVisitor, inputGateModule))),
+      inputGateModule);
+
+  boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor(
+      outputParameterVisitor, hiddenStateModule)), std::move(g4),
+      std::move(boost::apply_visitor(deltaVisitor, hiddenStateModule))),
+      hiddenStateModule);
+
+  boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor(
+      outputParameterVisitor, forgetGateModule)), std::move(g7),
+      std::move(boost::apply_visitor(deltaVisitor, forgetGateModule))),
+      forgetGateModule);
+
+  boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor(
+      outputParameterVisitor, outputGateModule)), std::move(g1),
+      std::move(boost::apply_visitor(deltaVisitor, outputGateModule))),
+      outputGateModule);
+
+  prevError.submat(0, 0, 1 * outSize - 1, 0) = boost::apply_visitor(
+      deltaVisitor, inputGateModule);
+  prevError.submat(1 * outSize, 0, 2 * outSize - 1, 0) = boost::apply_visitor(
+      deltaVisitor, hiddenStateModule);
+  prevError.submat(2 * outSize, 0, 3 * outSize - 1, 0) = boost::apply_visitor(
+      deltaVisitor, forgetGateModule);
+  prevError.submat(3 * outSize, 0, 4 * outSize - 1, 0) = boost::apply_visitor(
+      deltaVisitor, outputGateModule);
+
+  boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor(
+      outputParameterVisitor, input2GateModule)), std::move(prevError),
+      std::move(boost::apply_visitor(deltaVisitor, input2GateModule))),
+      input2GateModule);
+
+  boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor(
+      outputParameterVisitor, output2GateModule)), std::move(prevError),
+      std::move(boost::apply_visitor(deltaVisitor, output2GateModule))),
+      output2GateModule);
+
+  backwardStep++;
+  if (backwardStep == rho)
+  {
+    backwardStep = 0;
+    cellParameter.clear();
+  }
+
+  g = boost::apply_visitor(deltaVisitor, input2GateModule);
+}
+
+template<typename InputDataType, typename OutputDataType>
+template<typename eT>
+void LSTM<InputDataType, OutputDataType>::Gradient(
+    arma::Mat<eT>&& input,
+    arma::Mat<eT>&& /* error */,
+    arma::Mat<eT>&& /* gradient */)
+{
+  boost::apply_visitor(GradientVisitor(std::move(input), std::move(prevError)),
+      input2GateModule);
+
+  boost::apply_visitor(GradientVisitor(
+      std::move(outParameter[outParameter.size() - gradientStep - 1]),
+      std::move(prevError)), output2GateModule);
+
+  gradientStep++;
+  if (gradientStep == rho)
+  {
+    gradientStep = 0;
+    outParameter.clear();
+  }
+}
+
+template<typename InputDataType, typename OutputDataType>
+template<typename Archive>
+void LSTM<InputDataType, OutputDataType>::Serialize(
+    Archive& ar, const unsigned int /* version */)
+{
+  ar & data::CreateNVP(weights, "weights");
+  ar & data::CreateNVP(inSize, "inSize");
+  ar & data::CreateNVP(outSize, "outSize");
+  ar & data::CreateNVP(rho, "rho");
+}
+
+} // namespace ann
+} // namespace mlpack
+
+#endif
diff --git a/src/mlpack/methods/ann/layer/lstm_layer.hpp b/src/mlpack/methods/ann/layer/lstm_layer.hpp
deleted file mode 100644
index 6ccd2fc65fd..00000000000
--- a/src/mlpack/methods/ann/layer/lstm_layer.hpp
+++ /dev/null
@@ -1,418 +0,0 @@
-/**
- * @file lstm_layer.hpp
- * @author Marcus Edel
- *
- * Definition of the LSTMLayer class, which implements a lstm network
- * layer.
- *
- * mlpack is free software; you may redistribute it and/or modify it under the
- * terms of the 3-clause BSD license.  You should have received a copy of the
- * 3-clause BSD license along with mlpack.  If not, see
- * http://www.opensource.org/licenses/BSD-3-Clause for more information.
- */
-#ifndef MLPACK_METHODS_ANN_LAYER_LSTM_LAYER_HPP
-#define MLPACK_METHODS_ANN_LAYER_LSTM_LAYER_HPP
-
-#include <mlpack/core.hpp>
-#include <mlpack/methods/ann/layer/layer_traits.hpp>
-
-namespace mlpack {
-namespace ann /** Artificial Neural Network. */ {
-
-/**
- * An implementation of a lstm network layer.
- *
- * This class allows specification of the type of the activation functions used
- * for the gates and cells and also of the type of the function used to
- * initialize and update the peephole weights.
- *
- * @tparam GateActivationFunction Activation function used for the gates.
- * @tparam StateActivationFunction Activation function used for the state.
- * @tparam OutputActivationFunction Activation function used for the output.
- * @tparam InputDataType Type of the input data (arma::colvec, arma::mat,
- *         arma::sp_mat or arma::cube).
- * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat,
- *         arma::sp_mat or arma::cube).
- */
-template <
-    class GateActivationFunction = LogisticFunction,
-    class StateActivationFunction = TanhFunction,
-    class OutputActivationFunction = TanhFunction,
-    typename InputDataType = arma::mat,
-    typename OutputDataType = arma::mat
->
-class LSTMLayer
-{
- public:
-  /**
-   * Create the LSTMLayer object using the specified parameters.
-   *
-   * @param outSize The number of output units.
-   * @param peepholes The flag used to indicate if peephole connections should
-   *        be used (Default: false).
-   * @param WeightInitRule The weight initialization rule used to initialize the
-   *        weight matrix.
-   */
-  LSTMLayer(const size_t outSize, const bool peepholes = false) :
-      outSize(outSize),
-      peepholes(peepholes),
-      seqLen(1),
-      offset(0)
-  {
-    if (peepholes)
-    {
-      peepholeWeights.set_size(outSize, 3);
-      peepholeDerivatives = arma::zeros<OutputDataType>(outSize, 3);
-    }
-    else
-    {
-      peepholeWeights.set_size(0, 0);
-    }
-  }
-
-  /**
-   * Ordinary feed forward pass of a neural network, evaluating the function
-   * f(x) by propagating the activity forward through f.
-   *
-   * @param input Input data used for evaluating the specified function.
-   * @param output Resulting output activation.
-   */
-  template<typename eT>
-  void Forward(const arma::Mat<eT>& input, arma::Mat<eT>& output)
-  {
-    if (inGate.n_cols < seqLen)
-    {
-      inGate = arma::zeros<InputDataType>(outSize, seqLen);
-      inGateAct = arma::zeros<InputDataType>(outSize, seqLen);
-      inGateError = arma::zeros<InputDataType>(outSize, seqLen);
-      outGate = arma::zeros<InputDataType>(outSize, seqLen);
-      outGateAct = arma::zeros<InputDataType>(outSize, seqLen);
-      outGateError = arma::zeros<InputDataType>(outSize, seqLen);
-      forgetGate = arma::zeros<InputDataType>(outSize, seqLen);
-      forgetGateAct = arma::zeros<InputDataType>(outSize, seqLen);
-      forgetGateError = arma::zeros<InputDataType>(outSize, seqLen);
-      state = arma::zeros<InputDataType>(outSize, seqLen);
-      stateError = arma::zeros<InputDataType>(outSize, seqLen);
-      cellAct = arma::zeros<InputDataType>(outSize, seqLen);
-    }
-
-    // Split up the inputactivation into the 3 parts (inGate, forgetGate,
-    // outGate).
-    inGate.col(offset) = input.submat(0, 0, outSize - 1, 0);
-
-    forgetGate.col(offset) = input.submat(outSize, 0, (outSize * 2) - 1, 0);
-    outGate.col(offset) = input.submat(outSize * 3, 0, (outSize * 4) - 1, 0);
-
-    if (peepholes && offset > 0)
-    {
-      inGate.col(offset) += peepholeWeights.col(0) % state.col(offset - 1);
-      forgetGate.col(offset) += peepholeWeights.col(1) %
-          state.col(offset - 1);
-    }
-
-    arma::Col<eT> inGateActivation = inGateAct.unsafe_col(offset);
-    GateActivationFunction::fn(inGate.unsafe_col(offset), inGateActivation);
-
-    arma::Col<eT> forgetGateActivation = forgetGateAct.unsafe_col(offset);
-    GateActivationFunction::fn(forgetGate.unsafe_col(offset),
-        forgetGateActivation);
-
-    arma::Col<eT> cellActivation = cellAct.unsafe_col(offset);
-    StateActivationFunction::fn(input.submat(outSize * 2, 0,
-        (outSize * 3) - 1, 0), cellActivation);
-
-    state.col(offset) = inGateAct.col(offset) % cellActivation;
-
-    if (offset > 0)
-      state.col(offset) += forgetGateAct.col(offset) % state.col(offset - 1);
-
-    if (peepholes)
-      outGate.col(offset) += peepholeWeights.col(2) % state.col(offset);
-
-    arma::Col<eT> outGateActivation = outGateAct.unsafe_col(offset);
-    GateActivationFunction::fn(outGate.unsafe_col(offset), outGateActivation);
-
-    OutputActivationFunction::fn(state.unsafe_col(offset), output);
-    output = outGateAct.col(offset) % output;
-
-    offset = (offset + 1) % seqLen;
-  }
-
-  /**
-   * Ordinary feed backward pass of a neural network, calculating the function
-   * f(x) by propagating x backwards trough f. Using the results from the feed
-   * forward pass.
-   *
-   * @param input The propagated input activation.
-   * @param gy The backpropagated error.
-   * @param g The calculated gradient.
-   */
-  template<typename InputType, typename eT>
-  void Backward(const InputType& /* unused */,
-                const arma::Mat<eT>& gy,
-                arma::Mat<eT>& g)
-  {
-    queryOffset = seqLen - offset - 1;
-
-    arma::Col<eT> outGateDerivative;
-    GateActivationFunction::deriv(outGateAct.unsafe_col(queryOffset),
-        outGateDerivative);
-
-    arma::Col<eT> stateActivation;
-    StateActivationFunction::fn(state.unsafe_col(queryOffset), stateActivation);
-
-    outGateError.col(queryOffset) = outGateDerivative % gy % stateActivation;
-
-    arma::Col<eT> stateDerivative;
-    StateActivationFunction::deriv(stateActivation, stateDerivative);
-
-    stateError.col(queryOffset) = gy % outGateAct.col(queryOffset) %
-        stateDerivative;
-
-    if (queryOffset < (seqLen - 1))
-    {
-      stateError.col(queryOffset) += stateError.col(queryOffset + 1) %
-          forgetGateAct.col(queryOffset + 1);
-
-      if (peepholes)
-      {
-        stateError.col(queryOffset) += inGateError.col(queryOffset + 1) %
-            peepholeWeights.col(0);
-        stateError.col(queryOffset) += forgetGateError.col(queryOffset + 1) %
-            peepholeWeights.col(1);
-      }
-    }
-
-    if (peepholes)
-    {
-      stateError.col(queryOffset) += outGateError.col(queryOffset) %
-          peepholeWeights.col(2);
-    }
-
-    arma::Col<eT> cellDerivative;
-    StateActivationFunction::deriv(cellAct.col(queryOffset), cellDerivative);
-
-    arma::Col<eT> cellError = inGateAct.col(queryOffset) % cellDerivative %
-        stateError.col(queryOffset);
-
-    if (queryOffset > 0)
-    {
-      arma::Col<eT> forgetGateDerivative;
-      GateActivationFunction::deriv(forgetGateAct.col(queryOffset),
-          forgetGateDerivative);
-
-      forgetGateError.col(queryOffset) = forgetGateDerivative %
-          stateError.col(queryOffset) % state.col(queryOffset - 1);
-    }
-
-    arma::Col<eT> inGateDerivative;
-    GateActivationFunction::deriv(inGateAct.col(queryOffset), inGateDerivative);
-
-    inGateError.col(queryOffset) = inGateDerivative %
-        stateError.col(queryOffset) % cellAct.col(queryOffset);
-
-    if (peepholes)
-    {
-      peepholeDerivatives.col(2) += outGateError.col(queryOffset) %
-          state.col(queryOffset);
-
-      if (queryOffset > 0)
-      {
-        peepholeDerivatives.col(0) += inGateError.col(queryOffset) %
-            state.col(queryOffset - 1);
-        peepholeDerivatives.col(1) += forgetGateError.col(queryOffset) %
-            state.col(queryOffset - 1);
-      }
-    }
-
-    g = arma::zeros<arma::Mat<eT> >(outSize * 4, 1);
-    g.submat(0, 0, outSize - 1, 0) = inGateError.col(queryOffset);
-    g.submat(outSize, 0, (outSize * 2) - 1, 0) =
-        forgetGateError.col(queryOffset);
-    g.submat(outSize * 2, 0, (outSize * 3) - 1, 0) = cellError;
-    g.submat(outSize * 3, 0, (outSize * 4) - 1, 0) =
-        outGateError.col(queryOffset);
-
-    offset = (offset + 1) % seqLen;
-  }
-
-  /**
-   * Ordinary feed backward pass of the lstm layer.
-   *
-   * @param input The propagated input activation.
-   * @param gy The backpropagated error.
-   * @param g The calculated gradient.
-   */
-  template<typename InputType, typename eT, typename GradientDataType>
-  void Gradient(const InputType& /* input */,
-                const arma::Mat<eT>& /* gy */,
-                GradientDataType& /* g */)
-  {
-    if (peepholes && offset == 0)
-    {
-      peepholeGradient.col(0) = arma::trans((peepholeWeights.col(0).t() *
-          (inGateError.col(queryOffset) % peepholeDerivatives.col(0))) *
-          inGate.col(queryOffset).t());
-
-      peepholeGradient.col(1) = arma::trans((peepholeWeights.col(1).t() *
-          (forgetGateError.col(queryOffset) % peepholeDerivatives.col(1))) *
-          forgetGate.col(queryOffset).t());
-
-      peepholeGradient.col(2) = arma::trans((peepholeWeights.col(2).t() *
-          (outGateError.col(queryOffset) % peepholeDerivatives.col(2))) *
-          outGate.col(queryOffset).t());
-
-      peepholeDerivatives.zeros();
-    }
-  }
-
-  //! Get the peephole weights.
-  OutputDataType const& Weights() const { return peepholeWeights; }
-  //! Modify the peephole weights.
-  OutputDataType& Weights() { return peepholeWeights; }
-
-  //! Get the input parameter.
-  InputDataType const& InputParameter() const { return inputParameter; }
-  //! Modify the input parameter.
-  InputDataType& InputParameter() { return inputParameter; }
-
-  //! Get the output parameter.
-  OutputDataType const& OutputParameter() const { return outputParameter; }
-  //! Modify the output parameter.
-  OutputDataType& OutputParameter() { return outputParameter; }
-
-  //! Get the delta.
-  OutputDataType const& Delta() const { return delta; }
-  //! Modify the delta.
-  OutputDataType& Delta() { return delta; }
-
-  //! Get the peephole gradient.
-  OutputDataType const& Gradient() const { return peepholeGradient; }
-  //! Modify the peephole gradient.
-  OutputDataType& Gradient() { return peepholeGradient; }
-
-  //! Get the sequence length.
-  size_t SeqLen() const { return seqLen; }
-  //! Modify the sequence length.
-  size_t& SeqLen() { return seqLen; }
-
-  /**
-   * Serialize the layer.
-   */
-  template<typename Archive>
-  void Serialize(Archive& ar, const unsigned int /* version */)
-  {
-    ar & data::CreateNVP(peepholes, "peepholes");
-
-    if (peepholes)
-    {
-      ar & data::CreateNVP(peepholeWeights, "peepholeWeights");
-
-      if (Archive::is_loading::value)
-      {
-        peepholeDerivatives = arma::zeros<OutputDataType>(
-            peepholeWeights.n_rows, 3);
-      }
-    }
-  }
-
- private:
-  //! Locally-stored number of output units.
-  size_t outSize;
-
-  //! Locally-stored peephole indication flag.
-  bool peepholes;
-
-  //! Locally-stored length of the the input sequence.
-  size_t seqLen;
-
-  //! Locally-stored sequence offset.
-  size_t offset;
-
-  //! Locally-stored query offset.
-  size_t queryOffset;
-
-  //! Locally-stored delta object.
-  OutputDataType delta;
-
-  //! Locally-stored gradient object.
-  OutputDataType gradient;
-
-  //! Locally-stored input parameter object.
-  InputDataType inputParameter;
-
-  //! Locally-stored output parameter object.
-  OutputDataType outputParameter;
-
-  //! Locally-stored ingate object.
-  InputDataType inGate;
-
-  //! Locally-stored ingate activation object.
-  InputDataType inGateAct;
-
-  //! Locally-stored ingate error object.
-  InputDataType inGateError;
-
-  //! Locally-stored outgate object.
-  InputDataType outGate;
-
-  //! Locally-stored outgate activation object.
-  InputDataType outGateAct;
-
-  //! Locally-stored outgate error object.
-  InputDataType outGateError;
-
-  //! Locally-stored forget object.
-  InputDataType forgetGate;
-
-  //! Locally-stored forget activation object.
-  InputDataType forgetGateAct;
-
-  //! Locally-stored forget error object.
-  InputDataType forgetGateError;
-
-  //! Locally-stored state object.
-  InputDataType state;
-
-  //! Locally-stored state erro object.
-  InputDataType stateError;
-
-  //! Locally-stored cell activation object.
-  InputDataType cellAct;
-
-  //! Locally-stored peephole weight object.
-  OutputDataType peepholeWeights;
-
-  //! Locally-stored derivatives object.
-  OutputDataType peepholeDerivatives;
-
-  //! Locally-stored peephole gradient object.
-  OutputDataType peepholeGradient;
-}; // class LSTMLayer
-
-//! Layer traits for the lstm layer.
-template<
-    class GateActivationFunction,
-    class StateActivationFunction,
-    class OutputActivationFunction,
-    typename InputDataType,
-    typename OutputDataType
->
-class LayerTraits<LSTMLayer<GateActivationFunction,
-                            StateActivationFunction,
-                            OutputActivationFunction,
-                            InputDataType,
-                            OutputDataType> >
-{
- public:
-  static const bool IsBinary = false;
-  static const bool IsOutputLayer = false;
-  static const bool IsBiasLayer = false;
-  static const bool IsLSTMLayer = true;
-  static const bool IsConnection = false;
-};
-
-} // namespace ann
-} // namespace mlpack
-
-#endif
diff --git a/src/mlpack/methods/ann/layer/max_pooling.hpp b/src/mlpack/methods/ann/layer/max_pooling.hpp
new file mode 100644
index 00000000000..6bf179b7a5a
--- /dev/null
+++ b/src/mlpack/methods/ann/layer/max_pooling.hpp
@@ -0,0 +1,280 @@
+/**
+ * @file max_pooling.hpp
+ * @author Marcus Edel
+ * @author Nilay Jain
+ *
+ * Definition of the MaxPooling class.
+ *
+ * mlpack is free software; you may redistribute it and/or modify it under the
+ * terms of the 3-clause BSD license.  You should have received a copy of the
+ * 3-clause BSD license along with mlpack.  If not, see
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
+ */
+#ifndef MLPACK_METHODS_ANN_LAYER_MAX_POOLING_HPP
+#define MLPACK_METHODS_ANN_LAYER_MAX_POOLING_HPP
+
+#include <mlpack/core.hpp>
+// #include "layer_types.hpp"
+
+namespace mlpack {
+namespace ann /** Artificial Neural Network. */ {
+
+/*
+ * The max pooling rule for convolution neural networks. Take the maximum value
+ * within the receptive block.
+ */
+class MaxPoolingRule
+{
+ public:
+  /*
+   * Return the maximum value within the receptive block.
+   *
+   * @param input Input used to perform the pooling operation.
+   */
+  template<typename MatType>
+  size_t Pooling(const MatType& input)
+  {
+    return arma::as_scalar(arma::find(input.max() == input, 1));
+  }
+};
+
+/**
+ * Implementation of the MaxPooling layer.
+ *
+ * @tparam InputDataType Type of the input data (arma::colvec, arma::mat,
+ *         arma::sp_mat or arma::cube).
+ * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat,
+ *         arma::sp_mat or arma::cube).
+ */
+template <
+    typename InputDataType = arma::mat,
+    typename OutputDataType = arma::mat
+>
+class MaxPooling
+{
+public:
+  //! Create the MaxPooling object.
+  MaxPooling();
+
+  /**
+   * Create the MaxPooling object using the specified number of units.
+   *
+   * @param kW Width of the pooling window.
+   * @param kH Height of the pooling window.
+   * @param dW Width of the stride operation.
+   * @param dH Width of the stride operation.
+   * @param floor Rounding operator (floor or ceil).
+   */
+  MaxPooling(const size_t kW,
+             const size_t kH,
+             const size_t dW = 1,
+             const size_t dH = 1,
+             const bool floor = true);
+
+  /**
+   * Ordinary feed forward pass of a neural network, evaluating the function
+   * f(x) by propagating the activity forward through f.
+   *
+   * @param input Input data used for evaluating the specified function.
+   * @param output Resulting output activation.
+   */
+  template<typename eT>
+  void Forward(const arma::Mat<eT>&& input, arma::Mat<eT>&& output);
+
+  /**
+   * Ordinary feed backward pass of a neural network, using 3rd-order tensors as
+   * input, calculating the function f(x) by propagating x backwards through f.
+   * Using the results from the feed forward pass.
+   *
+   * @param input The propagated input activation.
+   * @param gy The backpropagated error.
+   * @param g The calculated gradient.
+   */
+  template<typename eT>
+  void Backward(const arma::Mat<eT>&& /* input */,
+                arma::Mat<eT>&& gy,
+                arma::Mat<eT>&& g);
+
+  //! Get the input parameter.
+  InputDataType const& InputParameter() const { return inputParameter; }
+  //! Modify the input parameter.
+  InputDataType& InputParameter() { return inputParameter; }
+
+  //! Get the output parameter.
+  OutputDataType const& OutputParameter() const { return outputParameter; }
+  //! Modify the output parameter.
+  OutputDataType& OutputParameter() { return outputParameter; }
+
+  //! Get the delta.
+  OutputDataType const& Delta() const { return delta; }
+  //! Modify the delta.
+  OutputDataType& Delta() { return delta; }
+
+  //! Get the width.
+  size_t const& InputWidth() const { return inputWidth; }
+  //! Modify the width.
+  size_t& InputWidth() { return inputWidth; }
+
+  //! Get the height.
+  size_t const& InputHeight() const { return inputHeight; }
+  //! Modify the height.
+  size_t& InputHeight() { return inputHeight; }
+
+  //! Get the width.
+  size_t const& OutputWidth() const { return outputWidth; }
+  //! Modify the width.
+  size_t& OutputWidth() { return outputWidth; }
+
+  //! Get the height.
+  size_t const& OutputHeight() const { return outputHeight; }
+  //! Modify the height.
+  size_t& OutputHeight() { return outputHeight; }
+
+  //! Get the value of the deterministic parameter.
+  bool Deterministic() const { return deterministic; }
+  //! Modify the value of the deterministic parameter.
+  bool& Deterministic() { return deterministic; }
+
+  /**
+   * Serialize the layer
+   */
+  template<typename Archive>
+  void Serialize(Archive& ar, const unsigned int /* version */);
+
+ private:
+
+ /**
+   * Apply pooling to the input and store the results.
+   *
+   * @param input The input to be apply the pooling rule.
+   * @param output The pooled result.
+   * @param poolingIndices The pooled indices.
+   */
+  template<typename eT>
+  void PoolingOperation(const arma::Mat<eT>& input,
+                        arma::Mat<eT>& output,
+                        arma::Mat<eT>& poolingIndices)
+  {
+    for (size_t j = 0, colidx = 0; j < output.n_cols; ++j, colidx += dW)
+    {
+      for (size_t i = 0, rowidx = 0; i < output.n_rows; ++i, rowidx += dH)
+      {
+        arma::mat subInput = input(arma::span(rowidx, rowidx + kW - 1 - offset),
+            arma::span(colidx, colidx + kH - 1 - offset));
+
+        const size_t idx = pooling.Pooling(subInput);
+        output(i, j) = subInput(idx);
+
+        if (!deterministic)
+        {
+          arma::Mat<size_t> subIndices = indices(arma::span(rowidx,
+              rowidx + kW - 1 - offset),
+              arma::span(colidx, colidx + kH - 1 - offset));
+
+          poolingIndices(i, j) = subIndices(idx);
+        }
+      }
+    }
+  }
+
+  /**
+   * Apply unpooling to the input and store the results.
+   *
+   * @param error The backward error.
+   * @param output The pooled result.
+   * @param poolingIndices The pooled indices.
+   */
+  template<typename eT>
+  void Unpooling(const arma::Mat<eT>& error,
+                 arma::Mat<eT>& output,
+                 arma::Mat<eT>& poolingIndices)
+  {
+    for (size_t i = 0; i < poolingIndices.n_elem; ++i)
+    {
+      output(poolingIndices(i)) += error(i);
+    }
+  }
+
+  //! Locally-stored number of input units.
+  size_t inSize;
+
+  //! Locally-stored number of output units.
+  size_t outSize;
+
+  //! Locally-stored width of the pooling window.
+  size_t kW;
+
+  //! Locally-stored height of the pooling window.
+  size_t kH;
+
+  //! Locally-stored width of the stride operation.
+  size_t dW;
+
+  //! Locally-stored height of the stride operation.
+  size_t dH;
+
+  //! Locally-stored reset parameter used to initialize the module once.
+  bool reset;
+
+  //! Rounding operation used.
+  bool floor;
+
+  //! Locally-stored stored rounding offset.
+  size_t offset;
+
+  //! Locally-stored input width.
+  size_t inputWidth;
+
+  //! Locally-stored input height.
+  size_t inputHeight;
+
+  //! Locally-stored output width.
+  size_t outputWidth;
+
+  //! Locally-stored output height.
+  size_t outputHeight;
+
+  //! If true use maximum a posteriori during the forward pass.
+  bool deterministic;
+
+  //! Locally-stored output parameter.
+  arma::cube outputTemp;
+
+  //! Locally-stored transformed input parameter.
+  arma::cube inputTemp;
+
+  //! Locally-stored transformed output parameter.
+  arma::cube gTemp;
+
+  //! Locally-stored pooling strategy.
+  MaxPoolingRule pooling;
+
+  //! Locally-stored delta object.
+  OutputDataType delta;
+
+  //! Locally-stored gradient object.
+  OutputDataType gradient;
+
+  //! Locally-stored input parameter object.
+  InputDataType inputParameter;
+
+  //! Locally-stored output parameter object.
+  OutputDataType outputParameter;
+
+  //! Locally-stored indices matrix parameter.
+  arma::Mat<size_t> indices;
+
+  //! Locally-stored indices column parameter.
+  arma::Col<size_t> indicesCol;
+
+  //! Locally-stored pooling indicies.
+  std::vector<arma::cube> poolingIndices;
+}; // class MaxPooling
+
+} // namespace ann
+} // namespace mlpack
+
+// Include implementation.
+#include "max_pooling_impl.hpp"
+
+#endif
diff --git a/src/mlpack/methods/ann/layer/max_pooling_impl.hpp b/src/mlpack/methods/ann/layer/max_pooling_impl.hpp
new file mode 100644
index 00000000000..95aeea86049
--- /dev/null
+++ b/src/mlpack/methods/ann/layer/max_pooling_impl.hpp
@@ -0,0 +1,149 @@
+/**
+ * @file max_pooling_impl.hpp
+ * @author Marcus Edel
+ * @author Nilay Jain
+ *
+ * Implementation of the MaxPooling class.
+ *
+ * mlpack is free software; you may redistribute it and/or modify it under the
+ * terms of the 3-clause BSD license.  You should have received a copy of the
+ * 3-clause BSD license along with mlpack.  If not, see
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
+ */
+#ifndef MLPACK_METHODS_ANN_LAYER_MAX_POOLING_IMPL_HPP
+#define MLPACK_METHODS_ANN_LAYER_MAX_POOLING_IMPL_HPP
+
+// In case it hasn't yet been included.
+#include "max_pooling.hpp"
+
+namespace mlpack {
+namespace ann /** Artificial Neural Network. */ {
+
+template<typename InputDataType, typename OutputDataType>
+MaxPooling<InputDataType, OutputDataType>::MaxPooling()
+{
+  // Nothing to do here.
+}
+
+template<typename InputDataType, typename OutputDataType>
+MaxPooling<InputDataType, OutputDataType>::MaxPooling(
+    const size_t kW,
+    const size_t kH,
+    const size_t dW,
+    const size_t dH,
+    const bool floor) :
+    kW(kW),
+    kH(kH),
+    dW(dW),
+    dH(dH),
+    reset(false),
+    floor(floor),
+    offset(0),
+    inputWidth(0),
+    inputHeight(0),
+    outputWidth(0),
+    outputHeight(0),
+    deterministic(false)
+{
+  // Nothing to do here.
+}
+
+template<typename InputDataType, typename OutputDataType>
+template<typename eT>
+void MaxPooling<InputDataType, OutputDataType>::Forward(
+  const arma::Mat<eT>&& input, arma::Mat<eT>&& output)
+{
+  const size_t slices = input.n_elem / (inputWidth * inputHeight);
+  inputTemp = arma::cube(input.memptr(), inputWidth, inputHeight, slices);
+
+  if (floor)
+  {
+    outputWidth = std::floor((inputWidth - (double) kW) / (double) dW + 1);
+    outputHeight = std::floor((inputHeight - (double) kH) / (double) dH + 1);
+    offset = 0;
+  }
+  else
+  {
+    outputWidth = std::ceil((inputWidth - (double) kW) / (double) dW + 1);
+    outputHeight = std::ceil((inputHeight - (double) kH) / (double) dH + 1);
+    offset = 1;
+  }
+
+  outputTemp = arma::zeros<arma::Cube<eT> >(outputWidth, outputHeight,
+      slices);
+
+  if (!deterministic)
+  {
+    poolingIndices.push_back(outputTemp);
+  }
+
+  if (!reset)
+  {
+    size_t elements = inputWidth * inputHeight;
+    indicesCol = arma::linspace<arma::Col<size_t> >(0, (elements - 1),
+        elements);
+
+    indices = arma::Mat<size_t>(indicesCol.memptr(), inputWidth, inputHeight);
+
+    reset = true;
+  }
+
+  for (size_t s = 0; s < inputTemp.n_slices; s++)
+  {
+    if (!deterministic)
+    {
+      PoolingOperation(inputTemp.slice(s), outputTemp.slice(s),
+        poolingIndices.back().slice(s));
+    }
+    else
+    {
+      PoolingOperation(inputTemp.slice(s), outputTemp.slice(s),
+          inputTemp.slice(s));
+    }
+  }
+
+  output = arma::Mat<eT>(outputTemp.memptr(), outputTemp.n_elem, 1);
+
+  outputWidth = outputTemp.n_rows;
+  outputHeight = outputTemp.n_cols;
+  outSize = slices;
+}
+
+template<typename InputDataType, typename OutputDataType>
+template<typename eT>
+void MaxPooling<InputDataType, OutputDataType>::Backward(
+    const arma::Mat<eT>&& /* input */, arma::Mat<eT>&& gy, arma::Mat<eT>&& g)
+{
+  arma::cube mappedError = arma::cube(gy.memptr(), outputWidth,
+      outputHeight, outSize);
+
+  gTemp = arma::zeros<arma::cube>(inputTemp.n_rows,
+      inputTemp.n_cols, inputTemp.n_slices);
+
+  for (size_t s = 0; s < mappedError.n_slices; s++)
+  {
+    Unpooling(mappedError.slice(s), gTemp.slice(s),
+        poolingIndices.back().slice(s));
+  }
+
+  poolingIndices.pop_back();
+
+  g = arma::mat(gTemp.memptr(), gTemp.n_elem, 1);
+}
+
+template<typename InputDataType, typename OutputDataType>
+template<typename Archive>
+void MaxPooling<InputDataType, OutputDataType>::Serialize(
+    Archive& ar,
+    const unsigned int /* version */)
+{
+  ar & data::CreateNVP(kW, "kW");
+  ar & data::CreateNVP(kH, "kH");
+  ar & data::CreateNVP(dW, "dW");
+  ar & data::CreateNVP(dH, "dH");
+}
+
+} // namespace ann
+} // namespace mlpack
+
+#endif
diff --git a/src/mlpack/methods/ann/layer/mean_pooling.hpp b/src/mlpack/methods/ann/layer/mean_pooling.hpp
new file mode 100644
index 00000000000..70e061c7e2b
--- /dev/null
+++ b/src/mlpack/methods/ann/layer/mean_pooling.hpp
@@ -0,0 +1,252 @@
+/**
+ * @file mean_pooling.hpp
+ * @author Marcus Edel
+ * @author Nilay Jain
+ *
+ * Definition of the MeanPooling layer class.
+ *
+ * mlpack is free software; you may redistribute it and/or modify it under the
+ * terms of the 3-clause BSD license.  You should have received a copy of the
+ * 3-clause BSD license along with mlpack.  If not, see
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
+ */
+#ifndef MLPACK_METHODS_ANN_LAYER_MEAN_POOLING_HPP
+#define MLPACK_METHODS_ANN_LAYER_MEAN_POOLING_HPP
+
+#include <mlpack/core.hpp>
+
+namespace mlpack {
+namespace ann /** Artificial Neural Network. */ {
+
+/**
+ * Implementation of the MeanPooling.
+ *
+ * @tparam InputDataType Type of the input data (arma::colvec, arma::mat,
+ *         arma::sp_mat or arma::cube).
+ * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat,
+ *         arma::sp_mat or arma::cube).
+ */
+template <
+    typename InputDataType = arma::mat,
+    typename OutputDataType = arma::mat
+>
+class MeanPooling
+{
+public:
+  //! Create the MeanPooling object.
+  MeanPooling();
+
+  /**
+   * Create the MeanPooling object using the specified number of units.
+   *
+   * @param kW Width of the pooling window.
+   * @param kH Height of the pooling window.
+   * @param dW Width of the stride operation.
+   * @param dH Width of the stride operation.
+   */
+  MeanPooling(const size_t kW,
+              const size_t kH,
+              const size_t dW = 1,
+              const size_t dH = 1,
+              const bool floor = true);
+
+  /**
+   * Ordinary feed forward pass of a neural network, evaluating the function
+   * f(x) by propagating the activity forward through f.
+   *
+   * @param input Input data used for evaluating the specified function.
+   * @param output Resulting output activation.
+   */
+  template<typename eT>
+  void Forward(const arma::Mat<eT>&& input, arma::Mat<eT>&& output);
+
+  /**
+   * Ordinary feed backward pass of a neural network, using 3rd-order tensors as
+   * input, calculating the function f(x) by propagating x backwards through f.
+   * Using the results from the feed forward pass.
+   *
+   * @param input The propagated input activation.
+   * @param gy The backpropagated error.
+   * @param g The calculated gradient.
+   */
+  template<typename eT>
+  void Backward(const arma::Mat<eT>&& /* input */,
+                arma::Mat<eT>&& gy,
+                arma::Mat<eT>&& g);
+
+  //! Get the input parameter.
+  InputDataType const& InputParameter() const { return inputParameter; }
+  //! Modify the input parameter.
+  InputDataType& InputParameter() { return inputParameter; }
+
+  //! Get the output parameter.
+  OutputDataType const& OutputParameter() const { return outputParameter; }
+  //! Modify the output parameter.
+  OutputDataType& OutputParameter() { return outputParameter; }
+
+  //! Get the delta.
+  OutputDataType const& Delta() const { return delta; }
+  //! Modify the delta.
+  OutputDataType& Delta() { return delta; }
+
+  //! Get the width.
+  size_t const& InputWidth() const { return inputWidth; }
+  //! Modify the width.
+  size_t& InputWidth() { return inputWidth; }
+
+  //! Get the height.
+  size_t const& InputHeight() const { return inputHeight; }
+  //! Modify the height.
+  size_t& InputHeight() { return inputHeight; }
+
+  //! Get the width.
+  size_t const& OutputWidth() const { return outputWidth; }
+  //! Modify the width.
+  size_t& OutputWidth() { return outputWidth; }
+
+  //! Get the height.
+  size_t const& OutputHeight() const { return outputHeight; }
+  //! Modify the height.
+  size_t& OutputHeight() { return outputHeight; }
+
+  //! Get the value of the deterministic parameter.
+  bool Deterministic() const { return deterministic; }
+  //! Modify the value of the deterministic parameter.
+  bool& Deterministic() { return deterministic; }
+
+  /**
+   * Serialize the layer
+   */
+  template<typename Archive>
+  void Serialize(Archive& ar, const unsigned int /* version */);
+
+ private:
+
+  /**
+   * Apply pooling to the input and store the results.
+   *
+   * @param input The input to be apply the pooling rule.
+   * @param output The pooled result.
+   */
+  template<typename eT>
+  void Pooling(const arma::Mat<eT>& input, arma::Mat<eT>& output)
+  {
+    const size_t rStep = kW;
+    const size_t cStep = kH;
+
+    for (size_t j = 0, colidx = 0; j < output.n_cols; ++j, colidx += dH)
+    {
+      for (size_t i = 0, rowidx = 0; i < output.n_rows; ++i, rowidx += dW)
+      {
+        arma::mat subInput = input(
+            arma::span(rowidx, rowidx + rStep - 1 - offset),
+            arma::span(colidx, colidx + cStep - 1 - offset));
+
+        output(i, j) = arma::mean(arma::mean(subInput));
+      }
+    }
+  }
+
+  /**
+   * Apply unpooling to the input and store the results.
+   *
+   * @param input The input to be apply the unpooling rule.
+   * @param output The pooled result.
+   */
+  template<typename eT>
+  void Unpooling(const arma::Mat<eT>& input,
+                 const arma::Mat<eT>& error,
+                 arma::Mat<eT>& output)
+  {
+    const size_t rStep = input.n_rows / error.n_rows - offset;
+    const size_t cStep = input.n_cols / error.n_cols - offset;
+
+    arma::Mat<eT> unpooledError;
+    for (size_t j = 0; j < input.n_cols - cStep; j += cStep)
+    {
+      for (size_t i = 0; i < input.n_rows - rStep; i += rStep)
+      {
+        const arma::Mat<eT>& inputArea = input(arma::span(i, i + rStep - 1),
+            arma::span(j, j + cStep - 1));
+
+        unpooledError = arma::Mat<eT>(inputArea.n_rows, inputArea.n_cols);
+        unpooledError.fill(error(i / rStep, j / cStep) / inputArea.n_elem);
+
+        output(arma::span(i, i + rStep - 1 - offset),
+            arma::span(j, j + cStep - 1 - offset)) += unpooledError;
+      }
+    }
+  }
+
+  //! Locally-stored number of input units.
+  size_t inSize;
+
+  //! Locally-stored number of output units.
+  size_t outSize;
+
+  //! Locally-stored width of the pooling window.
+  size_t kW;
+
+  //! Locally-stored height of the pooling window.
+  size_t kH;
+
+  //! Locally-stored width of the stride operation.
+  size_t dW;
+
+  //! Locally-stored height of the stride operation.
+  size_t dH;
+
+  //! Locally-stored input width.
+  size_t inputWidth;
+
+  //! Locally-stored input height.
+  size_t inputHeight;
+
+  //! Locally-stored output width.
+  size_t outputWidth;
+
+  //! Locally-stored output height.
+  size_t outputHeight;
+
+   //! Locally-stored reset parameter used to initialize the module once.
+  bool reset;
+
+  //! Rounding operation used.
+  bool floor;
+
+   //! If true use maximum a posteriori during the forward pass.
+  bool deterministic;
+
+  //! Locally-stored stored rounding offset.
+  size_t offset;
+
+  //! Locally-stored output parameter.
+  arma::cube outputTemp;
+
+  //! Locally-stored transformed input parameter.
+  arma::cube inputTemp;
+
+  //! Locally-stored transformed output parameter.
+  arma::cube gTemp;
+
+  //! Locally-stored delta object.
+  OutputDataType delta;
+
+  //! Locally-stored gradient object.
+  OutputDataType gradient;
+
+  //! Locally-stored input parameter object.
+  InputDataType inputParameter;
+
+  //! Locally-stored output parameter object.
+  OutputDataType outputParameter;
+}; // class MeanPooling
+
+
+} // namespace ann
+} // namespace mlpack
+
+// Include implementation.
+#include "mean_pooling_impl.hpp"
+
+#endif
diff --git a/src/mlpack/methods/ann/layer/mean_pooling_impl.hpp b/src/mlpack/methods/ann/layer/mean_pooling_impl.hpp
new file mode 100644
index 00000000000..5008763752b
--- /dev/null
+++ b/src/mlpack/methods/ann/layer/mean_pooling_impl.hpp
@@ -0,0 +1,126 @@
+/**
+ * @file mean_pooling_impl.hpp
+ * @author Marcus Edel
+ * @author Nilay Jain
+ *
+ * Implementation of the MeanPooling layer class.
+ *
+ * mlpack is free software; you may redistribute it and/or modify it under the
+ * terms of the 3-clause BSD license.  You should have received a copy of the
+ * 3-clause BSD license along with mlpack.  If not, see
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
+ */
+#ifndef MLPACK_METHODS_ANN_LAYER_MEAN_POOLING_IMPL_HPP
+#define MLPACK_METHODS_ANN_LAYER_MEAN_POOLING_IMPL_HPP
+
+// In case it hasn't yet been included.
+#include "mean_pooling.hpp"
+
+namespace mlpack {
+namespace ann /** Artificial Neural Network. */ {
+
+template<typename InputDataType, typename OutputDataType>
+MeanPooling<InputDataType, OutputDataType>::MeanPooling()
+{
+  // Nothing to do here.
+}
+
+template<typename InputDataType, typename OutputDataType>
+MeanPooling<InputDataType, OutputDataType>::MeanPooling(
+    const size_t kW,
+    const size_t kH,
+    const size_t dW,
+    const size_t dH,
+    const bool floor) :
+    kW(kW),
+    kH(kH),
+    dW(dW),
+    dH(dH),
+    inputWidth(0),
+    inputHeight(0),
+    outputWidth(0),
+    outputHeight(0),
+    reset(false),
+    floor(floor),
+    deterministic(false),
+    offset(0)
+{
+  // Nothing to do here.
+}
+
+template<typename InputDataType, typename OutputDataType>
+template<typename eT>
+void MeanPooling<InputDataType, OutputDataType>::Forward(
+    const arma::Mat<eT>&& input, arma::Mat<eT>&& output)
+{
+  size_t slices = input.n_elem / (inputWidth * inputHeight);
+  inputTemp = arma::cube(input.memptr(), inputWidth, inputHeight, slices);
+
+  if (floor)
+  {
+    outputWidth = std::floor((inputWidth - (double) kW) / (double) dW + 1);
+    outputHeight = std::floor((inputHeight - (double) kH) / (double) dH + 1);
+
+    offset = 0;
+  }
+  else
+  {
+    outputWidth = std::ceil((inputWidth - (double) kW) / (double) dW + 1);
+    outputHeight = std::ceil((inputHeight - (double) kH) / (double) dH + 1);
+
+    offset = 1;
+  }
+
+  outputTemp = arma::zeros<arma::Cube<eT> >(outputWidth, outputHeight,
+      slices);
+
+  for (size_t s = 0; s < inputTemp.n_slices; s++)
+  {
+
+    Pooling(inputTemp.slice(s), outputTemp.slice(s));
+  }
+
+  output = arma::Mat<eT>(outputTemp.memptr(), outputTemp.n_elem, 1);
+
+  outputWidth = outputTemp.n_rows;
+  outputHeight = outputTemp.n_cols;
+  outSize = slices;
+}
+
+template<typename InputDataType, typename OutputDataType>
+template<typename eT>
+void MeanPooling<InputDataType, OutputDataType>::Backward(
+  const arma::Mat<eT>&& /* input */,
+  arma::Mat<eT>&& gy,
+  arma::Mat<eT>&& g)
+{
+  arma::cube mappedError = arma::cube(gy.memptr(), outputWidth,
+      outputHeight, outSize);
+
+  gTemp = arma::zeros<arma::cube>(inputTemp.n_rows,
+      inputTemp.n_cols, inputTemp.n_slices);
+
+  for (size_t s = 0; s < mappedError.n_slices; s++)
+  {
+    Unpooling(inputTemp.slice(s), mappedError.slice(s), gTemp.slice(s));
+  }
+
+  g = arma::mat(gTemp.memptr(), gTemp.n_elem, 1);
+}
+
+template<typename InputDataType, typename OutputDataType>
+template<typename Archive>
+void MeanPooling<InputDataType, OutputDataType>::Serialize(
+    Archive& ar,
+    const unsigned int /* version */)
+{
+  ar & data::CreateNVP(kW, "kW");
+  ar & data::CreateNVP(kH, "kH");
+  ar & data::CreateNVP(dW, "dW");
+  ar & data::CreateNVP(dH, "dH");
+}
+
+} // namespace ann
+} // namespace mlpack
+
+#endif
diff --git a/src/mlpack/methods/ann/layer/mean_squared_error.hpp b/src/mlpack/methods/ann/layer/mean_squared_error.hpp
new file mode 100644
index 00000000000..280b6e79b11
--- /dev/null
+++ b/src/mlpack/methods/ann/layer/mean_squared_error.hpp
@@ -0,0 +1,100 @@
+/**
+ * @file mean_squared_error.hpp
+ * @author Marcus Edel
+ *
+ * Definition of the mean squared error performance function.
+ *
+ * mlpack is free software; you may redistribute it and/or modify it under the
+ * terms of the 3-clause BSD license.  You should have received a copy of the
+ * 3-clause BSD license along with mlpack.  If not, see
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
+ */
+#ifndef MLPACK_METHODS_ANN_LAYER_MEAN_SQUARED_ERROR_HPP
+#define MLPACK_METHODS_ANN_LAYER_MEAN_SQUARED_ERROR_HPP
+
+#include <mlpack/core.hpp>
+
+namespace mlpack {
+namespace ann /** Artificial Neural Network. */ {
+
+/**
+ * The mean squared error performance function measures the network's
+ * performance according to the mean of squared errors.
+ *
+ * @tparam ActivationFunction Activation function used for the embedding layer.
+ * @tparam InputDataType Type of the input data (arma::colvec, arma::mat,
+ *         arma::sp_mat or arma::cube).
+ * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat,
+ *         arma::sp_mat or arma::cube).
+ */
+template <
+    typename InputDataType = arma::mat,
+    typename OutputDataType = arma::mat
+>
+class MeanSquaredError
+{
+ public:
+  /**
+   * Create the MeanSquaredError object.
+   */
+  MeanSquaredError();
+
+  /*
+   * Computes the mean squared error function.
+   *
+   * @param input Input data used for evaluating the specified function.
+   * @param output Resulting output activation.
+   */
+  template<typename eT>
+  double Forward(const arma::Mat<eT>&& input, const arma::Mat<eT>&& target);
+  /**
+   * Ordinary feed backward pass of a neural network.
+   *
+   * @param input The propagated input activation.
+   * @param target The target vector.
+   * @param output The calculated error.
+   */
+  template<typename eT>
+  void Backward(const arma::Mat<eT>&& input,
+                const arma::Mat<eT>&& target,
+                arma::Mat<eT>&& output);
+
+  //! Get the input parameter.
+  InputDataType& InputParameter() const { return inputParameter; }
+  //! Modify the input parameter.
+  InputDataType& InputParameter() { return inputParameter; }
+
+  //! Get the output parameter.
+  OutputDataType& OutputParameter() const { return outputParameter; }
+  //! Modify the output parameter.
+  OutputDataType& OutputParameter() { return outputParameter; }
+
+  //! Get the delta.
+  OutputDataType& Delta() const { return delta; }
+  //! Modify the delta.
+  OutputDataType& Delta() { return delta; }
+
+  /**
+   * Serialize the layer
+   */
+  template<typename Archive>
+  void Serialize(Archive& ar, const unsigned int /* version */);
+
+ private:
+  //! Locally-stored delta object.
+  OutputDataType delta;
+
+  //! Locally-stored input parameter object.
+  InputDataType inputParameter;
+
+  //! Locally-stored output parameter object.
+  OutputDataType outputParameter;
+}; // class MeanSquaredError
+
+} // namespace ann
+} // namespace mlpack
+
+// Include implementation.
+#include "mean_squared_error_impl.hpp"
+
+#endif
diff --git a/src/mlpack/methods/ann/layer/mean_squared_error_impl.hpp b/src/mlpack/methods/ann/layer/mean_squared_error_impl.hpp
new file mode 100644
index 00000000000..037a9445295
--- /dev/null
+++ b/src/mlpack/methods/ann/layer/mean_squared_error_impl.hpp
@@ -0,0 +1,57 @@
+/**
+ * @file mean_squared_error_impl.hpp
+ * @author Marcus Edel
+ *
+ * Implementation of the mean squared error performance function.
+ *
+ * mlpack is free software; you may redistribute it and/or modify it under the
+ * terms of the 3-clause BSD license.  You should have received a copy of the
+ * 3-clause BSD license along with mlpack.  If not, see
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
+ */
+#ifndef MLPACK_METHODS_ANN_LAYER_MEAN_SQUARED_ERROR_IMPL_HPP
+#define MLPACK_METHODS_ANN_LAYER_MEAN_SQUARED_ERROR_IMPL_HPP
+
+// In case it hasn't yet been included.
+#include "mean_squared_error.hpp"
+
+namespace mlpack {
+namespace ann /** Artificial Neural Network. */ {
+
+template<typename InputDataType, typename OutputDataType>
+MeanSquaredError<InputDataType, OutputDataType>::MeanSquaredError()
+{
+  // Nothing to do here.
+}
+
+template<typename InputDataType, typename OutputDataType>
+template<typename eT>
+double MeanSquaredError<InputDataType, OutputDataType>::Forward(
+    const arma::Mat<eT>&& input, const arma::Mat<eT>&& target)
+{
+  return arma::mean(arma::mean(arma::square(input - target)));
+}
+
+template<typename InputDataType, typename OutputDataType>
+template<typename eT>
+void MeanSquaredError<InputDataType, OutputDataType>::Backward(
+    const arma::Mat<eT>&& input,
+    const arma::Mat<eT>&& target,
+    arma::Mat<eT>&& output)
+{
+  output = (input - target);
+}
+
+template<typename InputDataType, typename OutputDataType>
+template<typename Archive>
+void MeanSquaredError<InputDataType, OutputDataType>::Serialize(
+    Archive& /* ar */,
+    const unsigned int /* version */)
+{
+  // Nothing to do here.
+}
+
+} // namespace ann
+} // namespace mlpack
+
+#endif
diff --git a/src/mlpack/methods/ann/layer/multiclass_classification_layer.hpp b/src/mlpack/methods/ann/layer/multiclass_classification_layer.hpp
deleted file mode 100644
index 7705b52205a..00000000000
--- a/src/mlpack/methods/ann/layer/multiclass_classification_layer.hpp
+++ /dev/null
@@ -1,98 +0,0 @@
-/**
- * @file multiclass_classification_layer.hpp
- * @author Marcus Edel
- *
- * Definition of the MulticlassClassificationLayer class, which implements a
- * multiclass classification layer that can be used as output layer.
- *
- * mlpack is free software; you may redistribute it and/or modify it under the
- * terms of the 3-clause BSD license.  You should have received a copy of the
- * 3-clause BSD license along with mlpack.  If not, see
- * http://www.opensource.org/licenses/BSD-3-Clause for more information.
- */
-#ifndef MLPACK_METHODS_ANN_LAYER_MULTICLASS_CLASSIFICATION_LAYER_HPP
-#define MLPACK_METHODS_ANN_LAYER_MULTICLASS_CLASSIFICATION_LAYER_HPP
-
-#include <mlpack/core.hpp>
-#include <mlpack/methods/ann/layer/layer_traits.hpp>
-
-namespace mlpack {
-namespace ann /** Artificial Neural Network. */ {
-
-/**
- * An implementation of a multiclass classification layer that can be used as
- * output layer.
- *
- * A convenience typedef is given:
- *
- *  - ClassificationLayer
- */
-class MulticlassClassificationLayer
-{
- public:
-  /**
-   * Create the MulticlassClassificationLayer object.
-   */
-  MulticlassClassificationLayer()
-  {
-    // Nothing to do here.
-  }
-
-  /*
-   * Calculate the error using the specified input activation and the target.
-   * The error is stored into the given error parameter.
-   *
-   * @param inputActivations Input data used for evaluating the network.
-   * @param target Target data used for evaluating the network.
-   * @param error The calculated error with respect to the input activation and
-   * the given target.
-   */
-  template<typename DataType>
-  void CalculateError(const DataType& inputActivations,
-                      const DataType& target,
-                      DataType& error)
-  {
-    error = inputActivations - target;
-  }
-
-  /*
-   * Calculate the output class using the specified input activation.
-   *
-   * @param inputActivations Input data used to calculate the output class.
-   * @param output Output class of the input activation.
-   */
-  template<typename DataType>
-  void OutputClass(const DataType& inputActivations, DataType& output)
-  {
-    output = inputActivations;
-  }
-
-  /**
-   * Serialize the layer
-   */
-  template<typename Archive>
-  void Serialize(Archive& ar, const unsigned int /* version */)
-  {
-  }
-}; // class MulticlassClassificationLayer
-
-//! Layer traits for the multiclass classification layer.
-template <>
-class LayerTraits<MulticlassClassificationLayer>
-{
- public:
-  static const bool IsBinary = false;
-  static const bool IsOutputLayer = true;
-  static const bool IsBiasLayer = false;
-  static const bool IsConnection = false;
-};
-
-/***
- * Alias ClassificationLayer.
- */
-using ClassificationLayer = MulticlassClassificationLayer;
-
-} // namespace ann
-} // namespace mlpack
-
-#endif
diff --git a/src/mlpack/methods/ann/layer/multiply_constant_layer.hpp b/src/mlpack/methods/ann/layer/multiply_constant.hpp
similarity index 69%
rename from src/mlpack/methods/ann/layer/multiply_constant_layer.hpp
rename to src/mlpack/methods/ann/layer/multiply_constant.hpp
index afa0f42e63c..b2985b27b83 100644
--- a/src/mlpack/methods/ann/layer/multiply_constant_layer.hpp
+++ b/src/mlpack/methods/ann/layer/multiply_constant.hpp
@@ -1,17 +1,12 @@
 /**
- * @file multiply_constant_layer.hpp
+ * @file multiply_constant.hpp
  * @author Marcus Edel
  *
  * Definition of the MultiplyConstantLayer class, which multiplies the input by
  * a (non-learnable) constant.
- *
- * mlpack is free software; you may redistribute it and/or modify it under the
- * terms of the 3-clause BSD license.  You should have received a copy of the
- * 3-clause BSD license along with mlpack.  If not, see
- * http://www.opensource.org/licenses/BSD-3-Clause for more information.
  */
-#ifndef MLPACK_METHODS_ANN_LAYER_MULTIPLY_CONSTANT_LAYER_HPP
-#define MLPACK_METHODS_ANN_LAYER_MULTIPLY_CONSTANT_LAYER_HPP
+#ifndef MLPACK_METHODS_ANN_LAYER_MULTIPLY_CONSTANT_HPP
+#define MLPACK_METHODS_ANN_LAYER_MULTIPLY_CONSTANT_HPP
 
 #include <mlpack/core.hpp>
 
@@ -31,16 +26,13 @@ template <
     typename InputDataType = arma::mat,
     typename OutputDataType = arma::mat
 >
-class MultiplyConstantLayer
+class MultiplyConstant
 {
  public:
   /**
-   * Create the BaseLayer object.
+   * Create the MultiplyConstant object.
    */
-  MultiplyConstantLayer(const double scalar) : scalar(scalar)
-  {
-    // Nothing to do here.
-  }
+  MultiplyConstant(const double scalar);
 
   /**
    * Ordinary feed forward pass of a neural network. Multiply the input with the
@@ -50,10 +42,7 @@ class MultiplyConstantLayer
    * @param output Resulting output activation.
    */
   template<typename InputType, typename OutputType>
-  void Forward(const InputType& input, OutputType& output)
-  {
-    output = input * scalar;
-  }
+  void Forward(const InputType&& input, OutputType&& output);
 
   /**
    * Ordinary feed backward pass of a neural network. The backward pass
@@ -64,10 +53,7 @@ class MultiplyConstantLayer
    * @param g The calculated gradient.
    */
   template<typename DataType>
-  void Backward(const DataType& /* input */, const DataType& gy, DataType& g)
-  {
-    g = gy * scalar;
-  }
+  void Backward(const DataType&& /* input */, DataType&& gy, DataType&& g);
 
   //! Get the input parameter.
   InputDataType& InputParameter() const { return inputParameter; }
@@ -88,10 +74,7 @@ class MultiplyConstantLayer
    * Serialize the layer.
    */
   template<typename Archive>
-  void Serialize(Archive& ar, const unsigned int /* version */)
-  {
-    ar & data::CreateNVP(scalar, "scalar");
-  }
+  void Serialize(Archive& ar, const unsigned int /* version */);
 
  private:
   //! Locally-stored constant scalar value.
@@ -105,9 +88,12 @@ class MultiplyConstantLayer
 
   //! Locally-stored output parameter object.
   OutputDataType outputParameter;
-}; // class MultiplyConstantLayer
+}; // class MultiplyConstant
+
+} // namespace ann
+} // namespace mlpack
 
-}; // namespace ann
-}; // namespace mlpack
+// Include implementation.
+#include "multiply_constant_impl.hpp"
 
 #endif
diff --git a/src/mlpack/methods/ann/layer/multiply_constant_impl.hpp b/src/mlpack/methods/ann/layer/multiply_constant_impl.hpp
new file mode 100644
index 00000000000..07dc4799779
--- /dev/null
+++ b/src/mlpack/methods/ann/layer/multiply_constant_impl.hpp
@@ -0,0 +1,51 @@
+/**
+ * @file multiply_constant_impl.hpp
+ * @author Marcus Edel
+ *
+ * Implementation of the MultiplyConstantLayer class, which multiplies the
+ * input by a (non-learnable) constant.
+ */
+#ifndef MLPACK_METHODS_ANN_LAYER_MULTIPLY_CONSTANT_IMPL_HPP
+#define MLPACK_METHODS_ANN_LAYER_MULTIPLY_CONSTANT_IMPL_HPP
+
+// In case it hasn't yet been included.
+#include "multiply_constant.hpp"
+
+namespace mlpack {
+namespace ann /** Artificial Neural Network. */ {
+
+template<typename InputDataType, typename OutputDataType>
+MultiplyConstant<InputDataType, OutputDataType>::MultiplyConstant(
+    const double scalar) : scalar(scalar)
+{
+  // Nothing to do here.
+}
+
+template<typename InputDataType, typename OutputDataType>
+template<typename InputType, typename OutputType>
+void MultiplyConstant<InputDataType, OutputDataType>::Forward(
+    const InputType&& input, OutputType&& output)
+{
+  output = input * scalar;
+}
+
+template<typename InputDataType, typename OutputDataType>
+template<typename DataType>
+void MultiplyConstant<InputDataType, OutputDataType>::Backward(
+    const DataType&& /* input */, DataType&& gy, DataType&& g)
+{
+  g = gy * scalar;
+}
+
+template<typename InputDataType, typename OutputDataType>
+template<typename Archive>
+void MultiplyConstant<InputDataType, OutputDataType>::Serialize(
+    Archive& ar, const unsigned int /* version */)
+{
+  ar & data::CreateNVP(scalar, "scalar");
+}
+
+} // namespace ann
+} // namespace mlpack
+
+#endif
diff --git a/src/mlpack/methods/ann/layer/negative_log_likelihood_layer.hpp b/src/mlpack/methods/ann/layer/negative_log_likelihood.hpp
similarity index 54%
rename from src/mlpack/methods/ann/layer/negative_log_likelihood_layer.hpp
rename to src/mlpack/methods/ann/layer/negative_log_likelihood.hpp
index 6c08698f790..84c6a2d7904 100644
--- a/src/mlpack/methods/ann/layer/negative_log_likelihood_layer.hpp
+++ b/src/mlpack/methods/ann/layer/negative_log_likelihood.hpp
@@ -1,16 +1,16 @@
 /**
- * @file negative_log_likelihood_layer.hpp
+ * @file negative_log_likelihood.hpp
  * @author Marcus Edel
  *
- * Definition of the NegativeLogLikelihoodLayer class.
+ * Definition of the NegativeLogLikelihood class.
  *
  * mlpack is free software; you may redistribute it and/or modify it under the
  * terms of the 3-clause BSD license.  You should have received a copy of the
  * 3-clause BSD license along with mlpack.  If not, see
  * http://www.opensource.org/licenses/BSD-3-Clause for more information.
  */
-#ifndef MLPACK_METHODS_ANN_LAYER_NEGATIVE_LOG_LIKELIHOOD_Layer_HPP
-#define MLPACK_METHODS_ANN_LAYER_NEGATIVE_LOG_LIKELIHOOD_Layer_HPP
+#ifndef MLPACK_METHODS_ANN_LAYER_NEGATIVE_LOG_LIKELIHOOD_HPP
+#define MLPACK_METHODS_ANN_LAYER_NEGATIVE_LOG_LIKELIHOOD_HPP
 
 #include <mlpack/core.hpp>
 
@@ -19,11 +19,10 @@ namespace ann /** Artificial Neural Network. */ {
 
 /**
  * Implementation of the negative log likelihood layer. The negative log
- * likelihood layer expects that the input contains log-probabilities for each
+ * likelihood layer expectes that the input contains log-probabilities for each
  * class. The layer also expects a class index, in the range between 1 and the
  * number of classes, as target when calling the Forward function.
  *
- * @tparam ActivationFunction Activation function used for the embedding layer.
  * @tparam InputDataType Type of the input data (arma::colvec, arma::mat,
  *         arma::sp_mat or arma::cube).
  * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat,
@@ -33,44 +32,26 @@ template <
     typename InputDataType = arma::mat,
     typename OutputDataType = arma::mat
 >
-class NegativeLogLikelihoodLayer
+class NegativeLogLikelihood
 {
  public:
   /**
    * Create the NegativeLogLikelihoodLayer object.
    */
-  NegativeLogLikelihoodLayer() { /* Nothing to do here. */ }
+  NegativeLogLikelihood();
 
-  /**
-   * Ordinary feed forward pass of a neural network. The negative log
-   * likelihood layer expects that the input contains log-probabilities for
-   * each class. The layer also expects a class index, in the range between 1
-   * and the number of classes, as target when calling the Forward function.
+  /*
+   * Computes the Negative log likelihood.
    *
-   * @param input Input data that contains the log-probabilities for each class.
-   * @param target The target vector, that contains the class index in the range
-   *        between 1 and the number of classes.
+   * @param input Input data used for evaluating the specified function.
+   * @param output Resulting output activation.
    */
   template<typename eT>
-  double Forward(const arma::Mat<eT>& input, const arma::Mat<eT>& target)
-  {
-    double output = 0;
-
-    for (size_t i = 0; i < input.n_cols; ++i)
-    {
-      size_t currentTarget = target(i) - 1;
-      Log::Assert(currentTarget >= 0 && currentTarget < input.n_rows,
-          "Target class out of range.");
-
-      output -= input(currentTarget, i);
-    }
-
-    return output;
-  }
+  double Forward(const arma::Mat<eT>&& input, arma::Mat<eT>&& target);
 
   /**
    * Ordinary feed backward pass of a neural network. The negative log
-   * likelihood layer expects that the input contains log-probabilities for
+   * likelihood layer expectes that the input contains log-probabilities for
    * each class. The layer also expects a class index, in the range between 1
    * and the number of classes, as target when calling the Forward function.
    *
@@ -80,20 +61,9 @@ class NegativeLogLikelihoodLayer
    * @param output The calculated error.
    */
   template<typename eT>
-  void Backward(const arma::Mat<eT>& input,
-                const arma::Mat<eT>& target,
-                arma::Mat<eT>& output)
-  {
-    output = arma::zeros<arma::Mat<eT> >(input.n_rows, input.n_cols);
-    for (size_t i = 0; i < input.n_cols; ++i)
-    {
-      size_t currentTarget = target(i) - 1;
-      Log::Assert(currentTarget >= 0 && currentTarget < input.n_rows,
-          "Target class out of range.");
-
-      output(currentTarget, i) = -1;
-    }
-  }
+  void Backward(const arma::Mat<eT>&& input,
+                const arma::Mat<eT>&& target,
+                arma::Mat<eT>&& output);
 
   //! Get the input parameter.
   InputDataType& InputParameter() const { return inputParameter; }
@@ -110,6 +80,12 @@ class NegativeLogLikelihoodLayer
   //! Modify the delta.
   OutputDataType& Delta() { return delta; }
 
+  /**
+   * Serialize the layer
+   */
+  template<typename Archive>
+  void Serialize(Archive& /* ar */, const unsigned int /* version */);
+
  private:
   //! Locally-stored delta object.
   OutputDataType delta;
@@ -119,9 +95,12 @@ class NegativeLogLikelihoodLayer
 
   //! Locally-stored output parameter object.
   OutputDataType outputParameter;
-}; // class NegativeLogLikelihoodLayer
+}; // class NegativeLogLikelihood
+
+} // namespace ann
+} // namespace mlpack
 
-}; // namespace ann
-}; // namespace mlpack
+// Include implementation.
+#include "negative_log_likelihood_impl.hpp"
 
 #endif
diff --git a/src/mlpack/methods/ann/layer/negative_log_likelihood_impl.hpp b/src/mlpack/methods/ann/layer/negative_log_likelihood_impl.hpp
new file mode 100644
index 00000000000..2ec7799efe0
--- /dev/null
+++ b/src/mlpack/methods/ann/layer/negative_log_likelihood_impl.hpp
@@ -0,0 +1,76 @@
+/**
+ * @file negative_log_likelihood_impl.hpp
+ * @author Marcus Edel
+ *
+ * Implementation of the NegativeLogLikelihood class.
+ *
+ * mlpack is free software; you may redistribute it and/or modify it under the
+ * terms of the 3-clause BSD license.  You should have received a copy of the
+ * 3-clause BSD license along with mlpack.  If not, see
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
+ */
+#ifndef MLPACK_METHODS_ANN_LAYER_NEGATIVE_LOG_LIKELIHOOD_IMPL_HPP
+#define MLPACK_METHODS_ANN_LAYER_NEGATIVE_LOG_LIKELIHOOD_IMPL_HPP
+
+// In case it hasn't yet been included.
+#include "negative_log_likelihood.hpp"
+
+namespace mlpack {
+namespace ann /** Artificial Neural Network. */ {
+
+template<typename InputDataType, typename OutputDataType>
+NegativeLogLikelihood<InputDataType, OutputDataType>::NegativeLogLikelihood()
+{
+  // Nothing to do here.
+}
+
+template<typename InputDataType, typename OutputDataType>
+template<typename eT>
+double NegativeLogLikelihood<InputDataType, OutputDataType>::Forward(
+    const arma::Mat<eT>&& input, arma::Mat<eT>&& target)
+{
+  double output = 0;
+
+  for (size_t i = 0; i < input.n_cols; ++i)
+  {
+    size_t currentTarget = target(i) - 1;
+    Log::Assert(currentTarget >= 0 && currentTarget < input.n_rows,
+        "Target class out of range.");
+
+    output -= input(currentTarget, i);
+  }
+
+  return output;
+}
+
+template<typename InputDataType, typename OutputDataType>
+template<typename eT>
+void NegativeLogLikelihood<InputDataType, OutputDataType>::Backward(
+      const arma::Mat<eT>&& input,
+      const arma::Mat<eT>&& target,
+      arma::Mat<eT>&& output)
+{
+  output = arma::zeros<arma::Mat<eT> >(input.n_rows, input.n_cols);
+  for (size_t i = 0; i < input.n_cols; ++i)
+  {
+    size_t currentTarget = target(i) - 1;
+    Log::Assert(currentTarget >= 0 && currentTarget < input.n_rows,
+        "Target class out of range.");
+
+    output(currentTarget, i) = -1;
+  }
+}
+
+template<typename InputDataType, typename OutputDataType>
+template<typename Archive>
+void NegativeLogLikelihood<InputDataType, OutputDataType>::Serialize(
+    Archive& /* ar */,
+    const unsigned int /* version */)
+{
+  // Nothing to do here.
+}
+
+} // namespace ann
+} // namespace mlpack
+
+#endif
diff --git a/src/mlpack/methods/ann/layer/one_hot_layer.hpp b/src/mlpack/methods/ann/layer/one_hot_layer.hpp
deleted file mode 100644
index 63200b2c335..00000000000
--- a/src/mlpack/methods/ann/layer/one_hot_layer.hpp
+++ /dev/null
@@ -1,96 +0,0 @@
-/**
- * @file one_hot_layer.hpp
- * @author Shangtong Zhang
- *
- * Definition of the OneHotLayer class, which implements a standard network
- * layer.
- *
- * mlpack is free software; you may redistribute it and/or modify it under the
- * terms of the 3-clause BSD license.  You should have received a copy of the
- * 3-clause BSD license along with mlpack.  If not, see
- * http://www.opensource.org/licenses/BSD-3-Clause for more information.
- */
-#ifndef MLPACK_METHODS_ANN_LAYER_ONE_HOT_LAYER_HPP
-#define MLPACK_METHODS_ANN_LAYER_ONE_HOT_LAYER_HPP
-
-#include <mlpack/core.hpp>
-#include <mlpack/methods/ann/layer/layer_traits.hpp>
-
-namespace mlpack {
-namespace ann /** Artificial Neural Network. */ {
-
-/**
- * An implementation of a one hot classification layer that can be used as
- * output layer.
- */
-class OneHotLayer
-{
- public:
-  /**
-   * Create the OneHotLayer object.
-   */
-  OneHotLayer()
-  {
-    // Nothing to do here.
-  }
-
-  /*
-   * Calculate the error using the specified input activation and the target.
-   * The error is stored into the given error parameter.
-   *
-   * @param inputActivations Input data used for evaluating the network.
-   * @param target Target data used for evaluating the network.
-   * @param error The calculated error with respect to the input activation and
-   * the given target.
-   */
-  template<typename DataType>
-  void CalculateError(const DataType& inputActivations,
-                      const DataType& target,
-                      DataType& error)
-  {
-    error = inputActivations - target;
-  }
-
-  /*
-   * Calculate the output class using the specified input activation.
-   *
-   * @param inputActivations Input data used to calculate the output class.
-   * @param output Output class of the input activation.
-   */
-  template<typename DataType>
-  void OutputClass(const DataType& inputActivations, DataType& output)
-  {
-    output = inputActivations;
-    output.zeros();
-
-    arma::uword maxIndex = 0;
-    inputActivations.max(maxIndex);
-    output(maxIndex) = 1;
-  }
-
-  /**
-   * Serialize the layer.
-   */
-  template<typename Archive>
-  void Serialize(Archive& /* ar */, const unsigned int /* version */)
-  {
-    /* Nothing to do here */
-  }
-}; // class OneHotLayer
-
-//! Layer traits for the one-hot class classification layer.
-template <>
-class LayerTraits<OneHotLayer>
-{
- public:
-  static const bool IsBinary = true;
-  static const bool IsOutputLayer = true;
-  static const bool IsBiasLayer = false;
-  static const bool IsConnection = false;
-};
-
-} // namespace ann
-} // namespace mlpack
-
-
-#endif
diff --git a/src/mlpack/methods/ann/layer/pooling_layer.hpp b/src/mlpack/methods/ann/layer/pooling_layer.hpp
deleted file mode 100644
index e8a205f44f8..00000000000
--- a/src/mlpack/methods/ann/layer/pooling_layer.hpp
+++ /dev/null
@@ -1,267 +0,0 @@
-/**
- * @file pooling_layer.hpp
- * @author Marcus Edel
- * @author Nilay Jain
- *
- * Definition of the PoolingLayer class, which attaches various pooling
- * functions to the embedding layer.
- *
- * mlpack is free software; you may redistribute it and/or modify it under the
- * terms of the 3-clause BSD license.  You should have received a copy of the
- * 3-clause BSD license along with mlpack.  If not, see
- * http://www.opensource.org/licenses/BSD-3-Clause for more information.
- */
-#ifndef MLPACK_METHODS_ANN_LAYER_POOLING_LAYER_HPP
-#define MLPACK_METHODS_ANN_LAYER_POOLING_LAYER_HPP
-
-#include <mlpack/core.hpp>
-#include <mlpack/methods/ann/pooling_rules/mean_pooling.hpp>
-#include <mlpack/methods/ann/layer/layer_traits.hpp>
-
-namespace mlpack {
-namespace ann /** Artificial Neural Network. */ {
-
-/**
- * Implementation of the pooling layer. The pooling layer works as a metaclass
- * which attaches various functions to the embedding layer.
- *
- * @tparam PoolingRule Pooling function used for the embedding layer.
- * @tparam InputDataType Type of the input data (arma::colvec, arma::mat,
- *         arma::sp_mat or arma::cube).
- * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat,
- *         arma::sp_mat or arma::cube).
- */
-template <
-    typename PoolingRule = MeanPooling,
-    typename InputDataType = arma::cube,
-    typename OutputDataType = arma::cube
->
-class PoolingLayer
-{
- public:
-  /**
-   * Create the PoolingLayer object using the specified number of units.
-   *
-   * @param kSize Size of the pooling window.
-   * @param stride The stride of the convolution operation.
-   * @param pooling The pooling strategy.
-   */
-  PoolingLayer(const size_t kSize,
-               const size_t stride = 1,
-               PoolingRule pooling = PoolingRule()) :
-      kSize(kSize),
-      stride(stride),
-      pooling(pooling)
-  {
-    // Nothing to do here.
-  }
-
-  /**
-   * Ordinary feed forward pass of a neural network, evaluating the function
-   * f(x) by propagating the activity forward through f.
-   *
-   * @param input Input data used for evaluating the specified function.
-   * @param output Resulting output activation.
-   */
-  template<typename eT>
-  void Forward(const arma::Mat<eT>& input, arma::Mat<eT>& output)
-  {
-    Pooling(input, output);
-  }
-
-  /**
-   * Ordinary feed forward pass of a neural network, evaluating the function
-   * f(x) by propagating the activity forward through f.
-   *
-   * @param input Input data used for evaluating the specified function.
-   * @param output Resulting output activation.
-   */
-  template<typename eT>
-  void Forward(const arma::Cube<eT>& input, arma::Cube<eT>& output)
-  {
-    output = arma::zeros<arma::Cube<eT> >((input.n_rows - kSize) / stride + 1,
-        (input.n_cols - kSize) / stride + 1, input.n_slices);
-
-    for (size_t s = 0; s < input.n_slices; s++)
-      Pooling(input.slice(s), output.slice(s));
-  }
-
-  /**
-   * Ordinary feed backward pass of a neural network, using 3rd-order tensors as
-   * input, calculating the function f(x) by propagating x backwards through f.
-   * Using the results from the feed forward pass.
-   *
-   * @param input The propagated input activation.
-   * @param gy The backpropagated error.
-   * @param g The calculated gradient.
-   */
-  template<typename eT>
-  void Backward(const arma::Cube<eT>& /* unused */,
-                const arma::Cube<eT>& gy,
-                arma::Cube<eT>& g)
-  {
-    g = arma::zeros<arma::Cube<eT> >(inputParameter.n_rows,
-        inputParameter.n_cols, inputParameter.n_slices);
-
-    for (size_t s = 0; s < gy.n_slices; s++)
-    {
-      Unpooling(inputParameter.slice(s), gy.slice(s), g.slice(s));
-    }
-  }
-
-  /**
-   * Ordinary feed backward pass of a neural network, using 3rd-order tensors as
-   * input, calculating the function f(x) by propagating x backwards through f.
-   * Using the results from the feed forward pass.
-   *
-   * @param input The propagated input activation.
-   * @param gy The backpropagated error.
-   * @param g The calculated gradient.
-   */
-  template<typename eT>
-  void Backward(const arma::Cube<eT>& /* unused */,
-                const arma::Mat<eT>& gy,
-                arma::Cube<eT>& g)
-  {
-    // Generate a cube from the error matrix.
-    arma::Cube<eT> mappedError = arma::zeros<arma::cube>(outputParameter.n_rows,
-        outputParameter.n_cols, outputParameter.n_slices);
-
-    for (size_t s = 0, j = 0; s < mappedError.n_slices; s+= gy.n_cols, j++)
-    {
-      for (size_t i = 0; i < gy.n_cols; i++)
-      {
-        arma::Col<eT> temp = gy.col(i).subvec(
-            j * outputParameter.n_rows * outputParameter.n_cols,
-            (j + 1) * outputParameter.n_rows * outputParameter.n_cols - 1);
-
-        mappedError.slice(s + i) = arma::Mat<eT>(temp.memptr(),
-            outputParameter.n_rows, outputParameter.n_cols);
-      }
-    }
-
-    Backward(inputParameter, mappedError, g);
-  }
-
-  //! Get the input parameter.
-  InputDataType const& InputParameter() const { return inputParameter; }
-  //! Modify the input parameter.
-  InputDataType& InputParameter() { return inputParameter; }
-
-  //! Get the output parameter.
-  InputDataType const& OutputParameter() const { return outputParameter; }
-  //! Modify the output parameter.
-  InputDataType& OutputParameter() { return outputParameter; }
-
-  //! Get the delta.
-  OutputDataType const& Delta() const { return delta; }
-  //! Modify the delta.
-  OutputDataType& Delta() { return delta; }
-
-  /**
-   * Serialize the layer.
-   */
-  template<typename Archive>
-  void Serialize(Archive& ar, const unsigned int /* version */)
-  {
-    ar & data::CreateNVP(kSize, "kSize");
-    ar & data::CreateNVP(pooling, "pooling");
-    ar & data::CreateNVP(stride, "stride");
-  }
-
- private:
-  /**
-   * Apply pooling to the input and store the results.
-   *
-   * @param input The input to be apply the pooling rule.
-   * @param output The pooled result.
-   */
-  template<typename eT>
-  void Pooling(const arma::Mat<eT>& input, arma::Mat<eT>& output)
-  {
-    const size_t rStep = kSize;
-    const size_t cStep = kSize;
-
-    for (size_t j = 0, colidx = 0; j < output.n_cols; ++j, colidx += stride)
-    {
-      for (size_t i = 0, rowidx = 0; i < output.n_rows; ++i, rowidx += stride)
-      {
-        output(i, j) += pooling.Pooling(input(
-            arma::span(rowidx, rowidx + rStep - 1),
-            arma::span(colidx, colidx + cStep - 1)));
-      }
-    }
-  }
-
-  /**
-   * Apply unpooling to the input and store the results.
-   *
-   * @param input The input to be apply the unpooling rule.
-   * @param output The pooled result.
-   */
-  template<typename eT>
-  void Unpooling(const arma::Mat<eT>& input,
-                 const arma::Mat<eT>& error,
-                 arma::Mat<eT>& output)
-  {
-    const size_t rStep = input.n_rows / error.n_rows;
-    const size_t cStep = input.n_cols / error.n_cols;
-
-    arma::Mat<eT> unpooledError;
-    for (size_t j = 0; j < input.n_cols; j += cStep)
-    {
-      for (size_t i = 0; i < input.n_rows; i += rStep)
-      {
-        const arma::Mat<eT>& inputArea = input(arma::span(i, i + rStep - 1),
-            arma::span(j, j + cStep - 1));
-
-        pooling.Unpooling(inputArea, error(i / rStep, j / cStep),
-            unpooledError);
-
-        output(arma::span(i, i + rStep - 1),
-            arma::span(j, j + cStep - 1)) += unpooledError;
-      }
-    }
-  }
-
-  //! Locally-stored size of the pooling window.
-  size_t kSize;
-
-  //! Locally-stored stride value by which we move filter.
-  size_t stride;
-
-  //! Locally-stored pooling strategy.
-  PoolingRule pooling;
-
-  //! Locally-stored delta object.
-  OutputDataType delta;
-
-  //! Locally-stored input parameter object.
-  InputDataType inputParameter;
-
-  //! Locally-stored output parameter object.
-  OutputDataType outputParameter;
-}; // class PoolingLayer
-
-//! Layer traits for the pooling layer.
-template<
-    typename PoolingRule,
-    typename InputDataType,
-    typename OutputDataType
->
-class LayerTraits<PoolingLayer<PoolingRule, InputDataType, OutputDataType> >
-{
- public:
-  static const bool IsBinary = false;
-  static const bool IsOutputLayer = false;
-  static const bool IsBiasLayer = false;
-  static const bool IsLSTMLayer = false;
-  static const bool IsConnection = true;
-};
-
-
-} // namespace ann
-} // namespace mlpack
-
-#endif
-
diff --git a/src/mlpack/methods/ann/layer/recurrent.hpp b/src/mlpack/methods/ann/layer/recurrent.hpp
new file mode 100644
index 00000000000..297127a6ae8
--- /dev/null
+++ b/src/mlpack/methods/ann/layer/recurrent.hpp
@@ -0,0 +1,213 @@
+/**
+ * @file recurrent.hpp
+ * @author Marcus Edel
+ *
+ * Definition of the LinearLayer class also known as fully-connected layer or
+ * affine transformation.
+ *
+ * mlpack is free software; you may redistribute it and/or modify it under the
+ * terms of the 3-clause BSD license.  You should have received a copy of the
+ * 3-clause BSD license along with mlpack.  If not, see
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
+ */
+#ifndef MLPACK_METHODS_ANN_LAYER_RECURRENT_HPP
+#define MLPACK_METHODS_ANN_LAYER_RECURRENT_HPP
+
+#include <mlpack/core.hpp>
+#include <boost/ptr_container/ptr_vector.hpp>
+
+#include "layer_types.hpp"
+#include "add_merge.hpp"
+#include "sequential.hpp"
+
+namespace mlpack {
+namespace ann /** Artificial Neural Network. */ {
+
+/**
+ * Implementation of the RecurrentLayer class. Recurrent layers can be used
+ * similarly to feed-forward layers.
+ *
+ * @tparam InputDataType Type of the input data (arma::colvec, arma::mat,
+ *         arma::sp_mat or arma::cube).
+ * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat,
+ *         arma::sp_mat or arma::cube).
+ */
+template <
+    typename InputDataType = arma::mat,
+    typename OutputDataType = arma::mat
+>
+class Recurrent
+{
+ public:
+  /**
+   * Create the Recurrent object using the specified modules.
+   *
+   * @param start The start module.
+   * @param start The input module.
+   * @param start The feedback module.
+   * @param start The transfer module.
+   * @param rho Maximum number of steps to backpropagate through time (BPTT).
+   */
+  template<typename StartModuleType,
+           typename InputModuleType,
+           typename FeedbackModuleType,
+           typename TransferModuleType>
+  Recurrent(const StartModuleType& start,
+            const InputModuleType& input,
+            const FeedbackModuleType& feedback,
+            const TransferModuleType& transfer,
+            const size_t rho);
+
+  /**
+   * Ordinary feed forward pass of a neural network, evaluating the function
+   * f(x) by propagating the activity forward through f.
+   *
+   * @param input Input data used for evaluating the specified function.
+   * @param output Resulting output activation.
+   */
+  template<typename eT>
+  void Forward(arma::Mat<eT>&& input, arma::Mat<eT>&& output);
+
+  /**
+   * Ordinary feed backward pass of a neural network, calculating the function
+   * f(x) by propagating x backwards trough f. Using the results from the feed
+   * forward pass.
+   *
+   * @param input The propagated input activation.
+   * @param gy The backpropagated error.
+   * @param g The calculated gradient.
+   */
+  template<typename eT>
+  void Backward(const arma::Mat<eT>&& /* input */,
+                arma::Mat<eT>&& gy,
+                arma::Mat<eT>&& g);
+
+  /*
+   * Calculate the gradient using the output delta and the input activation.
+   *
+   * @param input The input parameter used for calculating the gradient.
+   * @param error The calculated error.
+   * @param gradient The calculated gradient.
+   */
+  template<typename eT>
+  void Gradient(arma::Mat<eT>&& input,
+                arma::Mat<eT>&& error,
+                arma::Mat<eT>&& /* gradient */);
+
+  //! Get the model modules.
+  std::vector<LayerTypes>& Model() { return network; }
+
+    //! The value of the deterministic parameter.
+  bool Deterministic() const { return deterministic; }
+  //! Modify the value of the deterministic parameter.
+  bool& Deterministic() { return deterministic; }
+
+  //! Get the parameters.
+  OutputDataType const& Parameters() const { return parameters; }
+  //! Modify the parameters.
+  OutputDataType& Parameters() { return parameters; }
+
+  //! Get the input parameter.
+  InputDataType const& InputParameter() const { return inputParameter; }
+  //! Modify the input parameter.
+  InputDataType& InputParameter() { return inputParameter; }
+
+  //! Get the output parameter.
+  OutputDataType const& OutputParameter() const { return outputParameter; }
+  //! Modify the output parameter.
+  OutputDataType& OutputParameter() { return outputParameter; }
+
+  //! Get the delta.
+  OutputDataType const& Delta() const { return delta; }
+  //! Modify the delta.
+  OutputDataType& Delta() { return delta; }
+
+  //! Get the gradient.
+  OutputDataType const& Gradient() const { return gradient; }
+  //! Modify the gradient.
+  OutputDataType& Gradient() { return gradient; }
+
+  /**
+   * Serialize the layer
+   */
+  template<typename Archive>
+  void Serialize(Archive& ar, const unsigned int /* version */);
+
+ private:
+  //! Locally-stored start module.
+  LayerTypes startModule;
+
+  //! Locally-stored input module.
+  LayerTypes inputModule;
+
+  //! Locally-stored feedback module.
+  LayerTypes feedbackModule;
+
+  //! Locally-stored transfer module.
+  LayerTypes transferModule;
+
+  //! Number of steps to backpropagate through time (BPTT).
+  size_t rho;
+
+  //! Locally-stored number of forward steps.
+  size_t forwardStep;
+
+  //! Locally-stored number of backward steps.
+  size_t backwardStep;
+
+  //! Locally-stored number of gradient steps.
+  size_t gradientStep;
+
+  //! If true dropout and scaling is disabled, see notes above.
+  bool deterministic;
+
+  //! Locally-stored weight object.
+  OutputDataType parameters;
+
+  //! Locally-stored initial module.
+  LayerTypes initialModule;
+
+  //! Locally-stored recurrent module.
+  LayerTypes recurrentModule;
+
+  //! Locally-stored model modules.
+  std::vector<LayerTypes> network;
+
+  //! Locally-stored merge module.
+  LayerTypes mergeModule;
+
+  //! Locally-stored weight size visitor.
+  WeightSizeVisitor weightSizeVisitor;
+
+  //! Locally-stored delta visitor.
+  DeltaVisitor deltaVisitor;
+
+  //! Locally-stored output parameter visitor.
+  OutputParameterVisitor outputParameterVisitor;
+
+  //! Locally-stored feedback output parameters.
+  std::vector<arma::mat> feedbackOutputParameter;
+
+  //! Locally-stored delta object.
+  OutputDataType delta;
+
+  //! Locally-stored gradient object.
+  OutputDataType gradient;
+
+  //! Locally-stored input parameter object.
+  InputDataType inputParameter;
+
+  //! Locally-stored output parameter object.
+  OutputDataType outputParameter;
+
+  //! Locally-stored recurrent error parameter.
+  arma::mat recurrentError;
+}; // class Recurrent
+
+} // namespace ann
+} // namespace mlpack
+
+// Include implementation.
+#include "recurrent_impl.hpp"
+
+#endif
diff --git a/src/mlpack/methods/ann/layer/recurrent_attention.hpp b/src/mlpack/methods/ann/layer/recurrent_attention.hpp
new file mode 100644
index 00000000000..ffb7320b232
--- /dev/null
+++ b/src/mlpack/methods/ann/layer/recurrent_attention.hpp
@@ -0,0 +1,263 @@
+/**
+ * @file recurrent_attention.hpp
+ * @author Marcus Edel
+ *
+ * Definition of the RecurrentAttention class.
+ *
+ * mlpack is free software; you may redistribute it and/or modify it under the
+ * terms of the 3-clause BSD license.  You should have received a copy of the
+ * 3-clause BSD license along with mlpack.  If not, see
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
+ */
+#ifndef MLPACK_METHODS_ANN_LAYER_RECURRENT_ATTENTION_HPP
+#define MLPACK_METHODS_ANN_LAYER_RECURRENT_ATTENTION_HPP
+
+#include <mlpack/core.hpp>
+#include <boost/ptr_container/ptr_vector.hpp>
+
+#include "layer_types.hpp"
+#include "add_merge.hpp"
+#include "sequential.hpp"
+
+namespace mlpack {
+namespace ann /** Artificial Neural Network. */ {
+
+/**
+ * This class implements the Recurrent Model for Visual Attention, using a
+ * variety of possible layer implementations.
+ *
+ * For more information, see the following paper.
+ *
+ * @code
+ * @article{MnihHGK14,
+ *   title={Recurrent Models of Visual Attention},
+ *   author={Volodymyr Mnih, Nicolas Heess, Alex Graves, Koray Kavukcuoglu},
+ *   journal={CoRR},
+ *   volume={abs/1406.6247},
+ *   year={2014}
+ * }
+ * @endcode
+ *
+ * @tparam InputDataType Type of the input data (arma::colvec, arma::mat,
+ *         arma::sp_mat or arma::cube).
+ * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat,
+ *         arma::sp_mat or arma::cube).
+ */
+template <
+    typename InputDataType = arma::mat,
+    typename OutputDataType = arma::mat
+>
+class RecurrentAttention
+{
+ public:
+  /**
+   * Create the RecurrentAttention object using the specified modules.
+   *
+   * @param start The module output size.
+   * @param start The recurrent neural network module.
+   * @param start The action module.
+   * @param rho Maximum number of steps to backpropagate through time (BPTT).
+   */
+  template<typename RNNModuleType, typename ActionModuleType>
+  RecurrentAttention(const size_t outSize,
+                     const RNNModuleType& rnn,
+                     const ActionModuleType& action,
+                     const size_t rho);
+
+  /**
+   * Ordinary feed forward pass of a neural network, evaluating the function
+   * f(x) by propagating the activity forward through f.
+   *
+   * @param input Input data used for evaluating the specified function.
+   * @param output Resulting output activation.
+   */
+  template<typename eT>
+  void Forward(arma::Mat<eT>&& input, arma::Mat<eT>&& output);
+
+  /**
+   * Ordinary feed backward pass of a neural network, calculating the function
+   * f(x) by propagating x backwards trough f. Using the results from the feed
+   * forward pass.
+   *
+   * @param input The propagated input activation.
+   * @param gy The backpropagated error.
+   * @param g The calculated gradient.
+   */
+  template<typename eT>
+  void Backward(const arma::Mat<eT>&& /* input */,
+                arma::Mat<eT>&& gy,
+                arma::Mat<eT>&& g);
+
+  /*
+   * Calculate the gradient using the output delta and the input activation.
+   *
+   * @param input The input parameter used for calculating the gradient.
+   * @param error The calculated error.
+   * @param gradient The calculated gradient.
+   */
+  template<typename eT>
+  void Gradient(arma::Mat<eT>&& /* input */,
+                arma::Mat<eT>&& /* error */,
+                arma::Mat<eT>&& /* gradient */);
+
+  //! Get the model modules.
+  std::vector<LayerTypes>& Model() { return network; }
+
+    //! The value of the deterministic parameter.
+  bool Deterministic() const { return deterministic; }
+  //! Modify the value of the deterministic parameter.
+  bool& Deterministic() { return deterministic; }
+
+  //! Get the parameters.
+  OutputDataType const& Parameters() const { return parameters; }
+  //! Modify the parameters.
+  OutputDataType& Parameters() { return parameters; }
+
+  //! Get the input parameter.
+  InputDataType const& InputParameter() const { return inputParameter; }
+  //! Modify the input parameter.
+  InputDataType& InputParameter() { return inputParameter; }
+
+  //! Get the output parameter.
+  OutputDataType const& OutputParameter() const { return outputParameter; }
+  //! Modify the output parameter.
+  OutputDataType& OutputParameter() { return outputParameter; }
+
+  //! Get the delta.
+  OutputDataType const& Delta() const { return delta; }
+  //! Modify the delta.
+  OutputDataType& Delta() { return delta; }
+
+  //! Get the gradient.
+  OutputDataType const& Gradient() const { return gradient; }
+  //! Modify the gradient.
+  OutputDataType& Gradient() { return gradient; }
+
+  /**
+   * Serialize the layer
+   */
+  template<typename Archive>
+  void Serialize(Archive& ar, const unsigned int /* version */);
+
+ private:
+  //! Calculate the gradient of the attention module.
+  void IntermediateGradient()
+  {
+    intermediateGradient.zeros();
+
+    // Gradient of the action module.
+    if (backwardStep == (rho - 1))
+    {
+      boost::apply_visitor(GradientVisitor(std::move(initialInput),
+          std::move(actionError)), actionModule);
+    }
+    else
+    {
+      boost::apply_visitor(GradientVisitor(std::move(boost::apply_visitor(
+          outputParameterVisitor, actionModule)), std::move(actionError)),
+          actionModule);
+    }
+
+    // Gradient of the recurrent module.
+    boost::apply_visitor(GradientVisitor(std::move(boost::apply_visitor(
+        outputParameterVisitor, rnnModule)), std::move(recurrentError)),
+        rnnModule);
+
+    attentionGradient += intermediateGradient;
+  }
+
+  //! Locally-stored module output size.
+  size_t outSize;
+
+  //! Locally-stored start module.
+  LayerTypes rnnModule;
+
+  //! Locally-stored input module.
+  LayerTypes actionModule;
+
+  //! Number of steps to backpropagate through time (BPTT).
+  size_t rho;
+
+  //! Locally-stored number of forward steps.
+  size_t forwardStep;
+
+  //! Locally-stored number of backward steps.
+  size_t backwardStep;
+
+  //! If true dropout and scaling is disabled, see notes above.
+  bool deterministic;
+
+  //! Locally-stored weight object.
+  OutputDataType parameters;
+
+  //! Locally-stored initial module.
+  LayerTypes initialModule;
+
+  //! Locally-stored recurrent module.
+  LayerTypes recurrentModule;
+
+  //! Locally-stored model modules.
+  std::vector<LayerTypes> network;
+
+  //! Locally-stored merge module.
+  LayerTypes mergeModule;
+
+  //! Locally-stored weight size visitor.
+  WeightSizeVisitor weightSizeVisitor;
+
+  //! Locally-stored delta visitor.
+  DeltaVisitor deltaVisitor;
+
+  //! Locally-stored output parameter visitor.
+  OutputParameterVisitor outputParameterVisitor;
+
+  //! Locally-stored feedback output parameters.
+  std::vector<arma::mat> feedbackOutputParameter;
+
+  //! List of all module parameters for the backward pass (BBTT).
+  std::vector<arma::mat> moduleOutputParameter;
+
+  //! Locally-stored delta object.
+  OutputDataType delta;
+
+  //! Locally-stored gradient object.
+  OutputDataType gradient;
+
+  //! Locally-stored input parameter object.
+  InputDataType inputParameter;
+
+  //! Locally-stored output parameter object.
+  OutputDataType outputParameter;
+
+  //! Locally-stored recurrent error parameter.
+  arma::mat recurrentError;
+
+  //! Locally-stored action error parameter.
+  arma::mat actionError;
+
+  //! Locally-stored action delta.
+  arma::mat actionDelta;
+
+  //! Locally-stored recurrent delta.
+  arma::mat rnnDelta;
+
+  //! Locally-stored initial action input.
+  arma::mat initialInput;
+
+  //! Locally-stored reset visitor.
+  ResetVisitor resetVisitor;
+
+  //! Locally-stored attention gradient.
+  arma::mat attentionGradient;
+
+  //! Locally-stored intermediate gradient for the attention module.
+  arma::mat intermediateGradient;
+}; // class RecurrentAttention
+
+} // namespace ann
+} // namespace mlpack
+
+// Include implementation.
+#include "recurrent_attention_impl.hpp"
+
+#endif
diff --git a/src/mlpack/methods/ann/layer/recurrent_attention_impl.hpp b/src/mlpack/methods/ann/layer/recurrent_attention_impl.hpp
new file mode 100644
index 00000000000..6642894fa88
--- /dev/null
+++ b/src/mlpack/methods/ann/layer/recurrent_attention_impl.hpp
@@ -0,0 +1,204 @@
+/**
+ * @file recurrent_attention_impl.hpp
+ * @author Marcus Edel
+ *
+ * Implementation of the RecurrentAttention class.
+ *
+ * mlpack is free software; you may redistribute it and/or modify it under the
+ * terms of the 3-clause BSD license.  You should have received a copy of the
+ * 3-clause BSD license along with mlpack.  If not, see
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
+ */
+#ifndef MLPACK_METHODS_ANN_LAYER_RECURRENT_ATTENTION_IMPL_HPP
+#define MLPACK_METHODS_ANN_LAYER_RECURRENT_ATTENTION_IMPL_HPP
+
+// In case it hasn't yet been included.
+#include "recurrent_attention.hpp"
+
+namespace mlpack {
+namespace ann /** Artificial Neural Network. */ {
+
+template <typename InputDataType, typename OutputDataType>
+template<typename RNNModuleType, typename ActionModuleType>
+RecurrentAttention<InputDataType, OutputDataType>::RecurrentAttention(
+    const size_t outSize,
+    const RNNModuleType& rnn,
+    const ActionModuleType& action,
+    const size_t rho) :
+    outSize(outSize),
+    rnnModule(new RNNModuleType(rnn)),
+    actionModule(new ActionModuleType(action)),
+    rho(rho),
+    forwardStep(0),
+    backwardStep(0),
+    deterministic(false)
+{
+  network.push_back(rnnModule);
+  network.push_back(actionModule);
+}
+
+template<typename InputDataType, typename OutputDataType>
+template<typename eT>
+void RecurrentAttention<InputDataType, OutputDataType>::Forward(
+    arma::Mat<eT>&& input, arma::Mat<eT>&& output)
+{
+  // Initialize the action input.
+  if (initialInput.is_empty())
+  {
+    initialInput = arma::zeros(outSize, input.n_cols);
+  }
+
+  // Propagate through the action and recurrent module.
+  for (forwardStep = 0; forwardStep < rho; ++forwardStep)
+  {
+    if (forwardStep == 0)
+    {
+      boost::apply_visitor(ForwardVisitor(std::move(initialInput), std::move(
+          boost::apply_visitor(outputParameterVisitor, actionModule))),
+          actionModule);
+    }
+    else
+    {
+      boost::apply_visitor(ForwardVisitor(std::move(boost::apply_visitor(
+          outputParameterVisitor, rnnModule)), std::move(boost::apply_visitor(
+          outputParameterVisitor, actionModule))), actionModule);
+    }
+
+    // Initialize the glimpse input.
+    arma::mat glimpseInput = arma::zeros(input.n_elem, 2);
+    glimpseInput.col(0) = input;
+    glimpseInput.submat(0, 1, boost::apply_visitor(outputParameterVisitor,
+        actionModule).n_elem - 1, 1) = boost::apply_visitor(
+        outputParameterVisitor, actionModule);
+
+    boost::apply_visitor(ForwardVisitor(std::move(glimpseInput),
+        std::move(boost::apply_visitor(outputParameterVisitor, rnnModule))),
+        rnnModule);
+
+    // Save the output parameter when training the module.
+    if (!deterministic)
+    {
+      for (size_t l = 0; l < network.size(); ++l)
+      {
+        boost::apply_visitor(SaveOutputParameterVisitor(
+            std::move(moduleOutputParameter)), network[l]);
+      }
+    }
+  }
+
+  output = boost::apply_visitor(outputParameterVisitor, rnnModule);
+
+  forwardStep = 0;
+  backwardStep = 0;
+}
+
+template<typename InputDataType, typename OutputDataType>
+template<typename eT>
+void RecurrentAttention<InputDataType, OutputDataType>::Backward(
+    const arma::Mat<eT>&& /* input */,
+    arma::Mat<eT>&& gy,
+    arma::Mat<eT>&& g)
+{
+  if (intermediateGradient.is_empty() && backwardStep == 0)
+  {
+    // Initialize the attention gradients.
+    size_t weights = boost::apply_visitor(weightSizeVisitor, rnnModule) +
+        boost::apply_visitor(weightSizeVisitor, actionModule);
+
+    intermediateGradient = arma::zeros(weights, 1);
+    attentionGradient = arma::zeros(weights, 1);
+
+    // Initialize the action error.
+    actionError = arma::zeros(
+      boost::apply_visitor(outputParameterVisitor, actionModule).n_rows,
+      boost::apply_visitor(outputParameterVisitor, actionModule).n_cols);
+  }
+
+  // Propagate the attention gradients.
+  if (backwardStep == 0)
+  {
+    size_t offset = 0;
+    offset += boost::apply_visitor(GradientSetVisitor(
+        std::move(intermediateGradient), offset), rnnModule);
+    boost::apply_visitor(GradientSetVisitor(
+        std::move(intermediateGradient), offset), actionModule);
+
+    attentionGradient.zeros();
+  }
+
+  // Back-propagate through time.
+  for (; backwardStep < rho; backwardStep++)
+  {
+    if (backwardStep == 0)
+    {
+      recurrentError = gy;
+    }
+    else
+    {
+      recurrentError = actionDelta;
+    }
+
+    for (size_t l = 0; l < network.size(); ++l)
+    {
+      boost::apply_visitor(LoadOutputParameterVisitor(
+         std::move(moduleOutputParameter)), network[network.size() - 1 - l]);
+    }
+
+    if (backwardStep == (rho - 1))
+    {
+      boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor(
+          outputParameterVisitor, actionModule)), std::move(actionError),
+          std::move(actionDelta)), actionModule);
+    }
+    else
+    {
+      boost::apply_visitor(BackwardVisitor(std::move(initialInput),
+          std::move(actionError), std::move(actionDelta)), actionModule);
+    }
+
+    boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor(
+        outputParameterVisitor, rnnModule)), std::move(recurrentError),
+        std::move(rnnDelta)), rnnModule);
+
+    if (backwardStep == 0)
+    {
+      g = rnnDelta.col(1);
+    }
+    else
+    {
+      g += rnnDelta.col(1);
+    }
+
+    IntermediateGradient();
+  }
+}
+
+template<typename InputDataType, typename OutputDataType>
+template<typename eT>
+void RecurrentAttention<InputDataType, OutputDataType>::Gradient(
+    arma::Mat<eT>&& /* input */,
+    arma::Mat<eT>&& /* error */,
+    arma::Mat<eT>&& /* gradient */)
+{
+  size_t offset = 0;
+  offset += boost::apply_visitor(GradientUpdateVisitor(
+      std::move(attentionGradient), offset), rnnModule);
+  boost::apply_visitor(GradientUpdateVisitor(
+      std::move(attentionGradient), offset), actionModule);
+}
+
+template<typename InputDataType, typename OutputDataType>
+template<typename Archive>
+void RecurrentAttention<InputDataType, OutputDataType>::Serialize(
+    Archive& ar, const unsigned int /* version */)
+{
+  ar & data::CreateNVP(rho, "rho");
+  ar & data::CreateNVP(outSize, "outSize");
+  ar & data::CreateNVP(forwardStep, "forwardStep");
+  ar & data::CreateNVP(backwardStep, "backwardStep");
+}
+
+} // namespace ann
+} // namespace mlpack
+
+#endif
diff --git a/src/mlpack/methods/ann/layer/recurrent_impl.hpp b/src/mlpack/methods/ann/layer/recurrent_impl.hpp
new file mode 100644
index 00000000000..8f1525c4551
--- /dev/null
+++ b/src/mlpack/methods/ann/layer/recurrent_impl.hpp
@@ -0,0 +1,206 @@
+/**
+ * @file recurrent_impl.hpp
+ * @author Marcus Edel
+ *
+ * Implementation of the LinearLayer class also known as fully-connected layer
+ * or affine transformation.
+ *
+ * mlpack is free software; you may redistribute it and/or modify it under the
+ * terms of the 3-clause BSD license.  You should have received a copy of the
+ * 3-clause BSD license along with mlpack.  If not, see
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
+ */
+#ifndef MLPACK_METHODS_ANN_LAYER_RECURRENT_IMPL_HPP
+#define MLPACK_METHODS_ANN_LAYER_RECURRENT_IMPL_HPP
+
+// In case it hasn't yet been included.
+#include "recurrent.hpp"
+
+namespace mlpack {
+namespace ann /** Artificial Neural Network. */ {
+
+template <typename InputDataType, typename OutputDataType>
+template<
+    typename StartModuleType,
+    typename InputModuleType,
+    typename FeedbackModuleType,
+    typename TransferModuleType
+>
+Recurrent<InputDataType, OutputDataType>::Recurrent(
+    const StartModuleType& start,
+    const InputModuleType& input,
+    const FeedbackModuleType& feedback,
+    const TransferModuleType& transfer,
+    const size_t rho) :
+    startModule(new StartModuleType(start)),
+    inputModule(new InputModuleType(input)),
+    feedbackModule(new FeedbackModuleType(feedback)),
+    transferModule(new TransferModuleType(transfer)),
+    rho(rho),
+    forwardStep(0),
+    backwardStep(0),
+    gradientStep(0),
+    deterministic(false)
+{
+  initialModule = new Sequential<>();
+  mergeModule = new AddMerge<>();
+  recurrentModule = new Sequential<>(false);
+
+  boost::apply_visitor(AddVisitor(inputModule), initialModule);
+  boost::apply_visitor(AddVisitor(startModule), initialModule);
+  boost::apply_visitor(AddVisitor(transferModule), initialModule);
+
+  boost::apply_visitor(weightSizeVisitor, startModule);
+  boost::apply_visitor(weightSizeVisitor, inputModule);
+  boost::apply_visitor(weightSizeVisitor, feedbackModule);
+  boost::apply_visitor(weightSizeVisitor, transferModule);
+
+  boost::apply_visitor(AddVisitor(inputModule), mergeModule);
+  boost::apply_visitor(AddVisitor(feedbackModule), mergeModule);
+  boost::apply_visitor(AddVisitor(mergeModule), recurrentModule);
+  boost::apply_visitor(AddVisitor(transferModule), recurrentModule);
+
+  network.push_back(initialModule);
+  network.push_back(mergeModule);
+  network.push_back(feedbackModule);
+  network.push_back(recurrentModule);
+}
+
+template<typename InputDataType, typename OutputDataType>
+template<typename eT>
+void Recurrent<InputDataType, OutputDataType>::Forward(
+    arma::Mat<eT>&& input, arma::Mat<eT>&& output)
+{
+  if (forwardStep == 0)
+  {
+    boost::apply_visitor(ForwardVisitor(std::move(input), std::move(output)),
+        initialModule);
+  }
+  else
+  {
+    boost::apply_visitor(ForwardVisitor(std::move(input), std::move(
+        boost::apply_visitor(outputParameterVisitor, inputModule))),
+        inputModule);
+
+    boost::apply_visitor(ForwardVisitor(std::move(boost::apply_visitor(
+        outputParameterVisitor, transferModule)), std::move(
+        boost::apply_visitor(outputParameterVisitor, feedbackModule))),
+        feedbackModule);
+
+    boost::apply_visitor(ForwardVisitor(std::move(input), std::move(output)),
+        recurrentModule);
+  }
+
+  output = boost::apply_visitor(outputParameterVisitor, transferModule);
+
+  // Save the feedback output parameter when training the module.
+  if (!deterministic)
+  {
+    feedbackOutputParameter.push_back(output);
+  }
+
+  forwardStep++;
+  if (forwardStep == rho)
+  {
+    forwardStep = 0;
+    backwardStep = 0;
+
+    if (!recurrentError.is_empty())
+    {
+      recurrentError.zeros();
+    }
+  }
+}
+
+template<typename InputDataType, typename OutputDataType>
+template<typename eT>
+void Recurrent<InputDataType, OutputDataType>::Backward(
+    const arma::Mat<eT>&& /* input */, arma::Mat<eT>&& gy, arma::Mat<eT>&& g)
+{
+  if (!recurrentError.is_empty())
+  {
+    recurrentError += gy;
+  }
+  else
+  {
+    recurrentError = gy;
+  }
+
+  if (backwardStep < (rho - 1))
+  {
+    boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor(
+        outputParameterVisitor, recurrentModule)), std::move(recurrentError),
+        std::move(boost::apply_visitor(deltaVisitor, recurrentModule))),
+        recurrentModule);
+
+    boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor(
+        outputParameterVisitor, inputModule)), std::move(
+        boost::apply_visitor(deltaVisitor, recurrentModule)), std::move(g)),
+        inputModule);
+
+    boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor(
+        outputParameterVisitor, feedbackModule)), std::move(
+        boost::apply_visitor(deltaVisitor, recurrentModule)), std::move(
+        boost::apply_visitor(deltaVisitor, feedbackModule))),feedbackModule);
+  }
+  else
+  {
+    boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor(
+        outputParameterVisitor, initialModule)), std::move(recurrentError),
+        std::move(g)), initialModule);
+  }
+
+  recurrentError = boost::apply_visitor(deltaVisitor, feedbackModule);
+  backwardStep++;
+}
+
+template<typename InputDataType, typename OutputDataType>
+template<typename eT>
+void Recurrent<InputDataType, OutputDataType>::Gradient(
+    arma::Mat<eT>&& input,
+    arma::Mat<eT>&& error,
+    arma::Mat<eT>&& /* gradient */)
+{
+  if (gradientStep < (rho - 1))
+  {
+    boost::apply_visitor(GradientVisitor(std::move(input), std::move(error)),
+        recurrentModule);
+
+    boost::apply_visitor(GradientVisitor(std::move(input), std::move(
+        boost::apply_visitor(deltaVisitor, mergeModule))), inputModule);
+
+    boost::apply_visitor(GradientVisitor(std::move(
+        feedbackOutputParameter[feedbackOutputParameter.size() - 2 -
+        gradientStep]), std::move(boost::apply_visitor(deltaVisitor,
+        mergeModule))), feedbackModule);
+  }
+  else
+  {
+    boost::apply_visitor(GradientZeroVisitor(), recurrentModule);
+    boost::apply_visitor(GradientZeroVisitor(), inputModule);
+    boost::apply_visitor(GradientZeroVisitor(), feedbackModule);
+
+    boost::apply_visitor(GradientVisitor(std::move(input), std::move(
+        boost::apply_visitor(deltaVisitor, startModule))), initialModule);
+  }
+
+  gradientStep++;
+  if (gradientStep == rho)
+  {
+    gradientStep = 0;
+    feedbackOutputParameter.clear();
+  }
+}
+
+template<typename InputDataType, typename OutputDataType>
+template<typename Archive>
+void Recurrent<InputDataType, OutputDataType>::Serialize(
+    Archive& ar, const unsigned int /* version */)
+{
+  ar & data::CreateNVP(rho, "rho");
+}
+
+} // namespace ann
+} // namespace mlpack
+
+#endif
diff --git a/src/mlpack/methods/ann/layer/reinforce_normal_layer.hpp b/src/mlpack/methods/ann/layer/reinforce_normal.hpp
similarity index 69%
rename from src/mlpack/methods/ann/layer/reinforce_normal_layer.hpp
rename to src/mlpack/methods/ann/layer/reinforce_normal.hpp
index 655e443b1e5..fd192f0a1f2 100644
--- a/src/mlpack/methods/ann/layer/reinforce_normal_layer.hpp
+++ b/src/mlpack/methods/ann/layer/reinforce_normal.hpp
@@ -1,17 +1,12 @@
 /**
- * @file reinforce_normal_layer.hpp
+ * @file reinforce_normal.hpp
  * @author Marcus Edel
  *
  * Definition of the ReinforceNormalLayer class, which implements the REINFORCE
  * algorithm for the normal distribution.
- *
- * mlpack is free software; you may redistribute it and/or modify it under the
- * terms of the 3-clause BSD license.  You should have received a copy of the
- * 3-clause BSD license along with mlpack.  If not, see
- * http://www.opensource.org/licenses/BSD-3-Clause for more information.
  */
-#ifndef MLPACK_METHODS_ANN_LAYER_REINFORCE_NORMAL_LAYER_HPP
-#define MLPACK_METHODS_ANN_LAYER_REINFORCE_NORMAL_LAYER_HPP
+#ifndef MLPACK_METHODS_ANN_LAYER_REINFORCE_NORMAL_HPP
+#define MLPACK_METHODS_ANN_LAYER_REINFORCE_NORMAL_HPP
 
 #include <mlpack/core.hpp>
 
@@ -31,18 +26,15 @@ template <
     typename InputDataType = arma::mat,
     typename OutputDataType = arma::mat
 >
-class ReinforceNormalLayer
+class ReinforceNormal
 {
  public:
   /**
-   * Create the ReinforceNormalLayer object.
+   * Create the ReinforceNormal object.
    *
    * @param stdev Standard deviation used during the forward and backward pass.
    */
-  ReinforceNormalLayer(const double stdev) : stdev(stdev)
-  {
-    // Nothing to do here.
-  }
+  ReinforceNormal(const double stdev);
 
   /**
    * Ordinary feed forward pass of a neural network, evaluating the function
@@ -52,20 +44,7 @@ class ReinforceNormalLayer
    * @param output Resulting output activation.
    */
   template<typename eT>
-  void Forward(const arma::Mat<eT>& input, arma::Mat<eT>& output)
-  {
-    if (!deterministic)
-    {
-      // Multiply by standard deviations and re-center the means to the mean.
-      output = arma::randn<arma::Mat<eT> >(input.n_rows, input.n_cols) *
-          stdev + input;
-    }
-    else
-    {
-      // Use maximum a posteriori.
-      output = input;
-    }
-  }
+  void Forward(const arma::Mat<eT>&& input, arma::Mat<eT>&& output);
 
   /**
    * Ordinary feed backward pass of a neural network, calculating the function
@@ -77,16 +56,7 @@ class ReinforceNormalLayer
    * @param g The calculated gradient.
    */
   template<typename DataType>
-  void Backward(const DataType& input,
-                const DataType& /* gy */,
-                DataType& g)
-  {
-    g = (input - inputParameter) / std::pow(stdev, 2.0);
-
-    // Multiply by reward and multiply by -1.
-    g *= -reward;
-  }
-
+  void Backward(const DataType&& input, DataType&& /* gy */, DataType&& g);
 
   //! Get the input parameter.
   InputDataType& InputParameter() const { return inputParameter; }
@@ -113,6 +83,12 @@ class ReinforceNormalLayer
   //! Modify the value of the deterministic parameter.
   double& Reward() { return reward; }
 
+  /**
+   * Serialize the layer
+   */
+  template<typename Archive>
+  void Serialize(Archive& /* ar */, const unsigned int /* version */);
+
  private:
   //! Standard deviation used during the forward and backward pass.
   const double stdev;
@@ -129,11 +105,17 @@ class ReinforceNormalLayer
   //! Locally-stored output parameter object.
   OutputDataType outputParameter;
 
+  //!  Locally-stored output module parameter parameters.
+  std::vector<arma::mat> moduleInputParameter;
+
   //! If true use maximum a posteriori during the forward pass.
   bool deterministic;
-}; // class ReinforceNormalLayer
+}; // class ReinforceNormal
+
+} // namespace ann
+} // namespace mlpack
 
-}; // namespace ann
-}; // namespace mlpack
+// Include implementation.
+#include "reinforce_normal_impl.hpp"
 
 #endif
diff --git a/src/mlpack/methods/ann/layer/reinforce_normal_impl.hpp b/src/mlpack/methods/ann/layer/reinforce_normal_impl.hpp
new file mode 100644
index 00000000000..1eaa25d8fae
--- /dev/null
+++ b/src/mlpack/methods/ann/layer/reinforce_normal_impl.hpp
@@ -0,0 +1,69 @@
+/**
+ * @file reinforce_normal_impl.hpp
+ * @author Marcus Edel
+ *
+ * Implementation of the ReinforceNormalLayer class, which implements the
+ * REINFORCE algorithm for the normal distribution.
+ */
+#ifndef MLPACK_METHODS_ANN_LAYER_REINFORCE_NORMAL_IMPL_HPP
+#define MLPACK_METHODS_ANN_LAYER_REINFORCE_NORMAL_IMPL_HPP
+
+// In case it hasn't yet been included.
+#include "reinforce_normal.hpp"
+
+namespace mlpack {
+namespace ann /** Artificial Neural Network. */ {
+
+template<typename InputDataType, typename OutputDataType>
+ReinforceNormal<InputDataType, OutputDataType>::ReinforceNormal(
+    const double stdev) : stdev(stdev)
+{
+  // Nothing to do here.
+}
+
+template<typename InputDataType, typename OutputDataType>
+template<typename eT>
+void ReinforceNormal<InputDataType, OutputDataType>::Forward(
+    const arma::Mat<eT>&& input, arma::Mat<eT>&& output)
+{
+  if (!deterministic)
+  {
+    // Multiply by standard deviations and re-center the means to the mean.
+    output = arma::randn<arma::Mat<eT> >(input.n_rows, input.n_cols) *
+        stdev + input;
+
+    moduleInputParameter.push_back(input);
+  }
+  else
+  {
+    // Use maximum a posteriori.
+    output = input;
+  }
+}
+
+template<typename InputDataType, typename OutputDataType>
+template<typename DataType>
+void ReinforceNormal<InputDataType, OutputDataType>::Backward(
+    const DataType&& input, DataType&& /* gy */, DataType&& g)
+{
+  g = (input - moduleInputParameter.back()) / std::pow(stdev, 2.0);
+
+  // Multiply by reward and multiply by -1.
+  g *= reward;
+  g *= -1;
+
+  moduleInputParameter.pop_back();
+}
+
+template<typename InputDataType, typename OutputDataType>
+template<typename Archive>
+void ReinforceNormal<InputDataType, OutputDataType>::Serialize(
+    Archive& /* ar */, const unsigned int /* version */)
+{
+  // Nothing to do here.
+}
+
+} // namespace ann
+} // namespace mlpack
+
+#endif
diff --git a/src/mlpack/methods/ann/layer/select.hpp b/src/mlpack/methods/ann/layer/select.hpp
new file mode 100644
index 00000000000..d3c42a008c9
--- /dev/null
+++ b/src/mlpack/methods/ann/layer/select.hpp
@@ -0,0 +1,111 @@
+/**
+ * @file select.hpp
+ * @author Marcus Edel
+ *
+ * Definition of the Select module.
+ *
+ * mlpack is free software; you may redistribute it and/or modify it under the
+ * terms of the 3-clause BSD license.  You should have received a copy of the
+ * 3-clause BSD license along with mlpack.  If not, see
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
+ */
+#ifndef MLPACK_METHODS_ANN_LAYER_SELECT_HPP
+#define MLPACK_METHODS_ANN_LAYER_SELECT_HPP
+
+#include <mlpack/core.hpp>
+
+namespace mlpack {
+namespace ann /** Artificial Neural Network. */ {
+
+/**
+ * The select module selects the specified column from a given input matrix.
+ *
+ * @tparam InputDataType Type of the input data (arma::colvec, arma::mat,
+ *         arma::sp_mat or arma::cube).
+ * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat,
+ *         arma::sp_mat or arma::cube).
+ */
+template <
+    typename InputDataType = arma::mat,
+    typename OutputDataType = arma::mat
+>
+class Select
+{
+ public:
+  /**
+   * Create the Select object.
+   *
+   * @param index The column which should be extracted from the given input.
+   * @param elements The number of elements that should be used.
+   */
+  Select(const size_t index, const size_t elements = 0);
+
+  /**
+   * Ordinary feed forward pass of a neural network, evaluating the function
+   * f(x) by propagating the activity forward through f.
+   *
+   * @param input Input data used for evaluating the specified function.
+   * @param output Resulting output activation.
+   */
+  template<typename eT>
+  void Forward(const arma::Mat<eT>&& input, arma::Mat<eT>&& output);
+
+  /**
+   * Ordinary feed backward pass of a neural network, calculating the function
+   * f(x) by propagating x backwards trough f. Using the results from the feed
+   * forward pass.
+   *
+   * @param input The propagated input activation.
+   * @param gy The backpropagated error.
+   * @param g The calculated gradient.
+   */
+  template<typename eT>
+  void Backward(const arma::Mat<eT>&& /* input */,
+                arma::Mat<eT>&& gy,
+                arma::Mat<eT>&& g);
+
+  //! Get the input parameter.
+  InputDataType& InputParameter() const { return inputParameter; }
+  //! Modify the input parameter.
+  InputDataType& InputParameter() { return inputParameter; }
+
+  //! Get the output parameter.
+  OutputDataType& OutputParameter() const { return outputParameter; }
+  //! Modify the output parameter.
+  OutputDataType& OutputParameter() { return outputParameter; }
+
+  //! Get the delta.
+  OutputDataType& Delta() const { return delta; }
+  //! Modify the delta.
+  OutputDataType& Delta() { return delta; }
+
+  /**
+   * Serialize the layer
+   */
+  template<typename Archive>
+  void Serialize(Archive& ar, const unsigned int /* version */);
+
+ private:
+  //! Locally-stored column index.
+  size_t index;
+
+  //! Locally-stored number of elements selected.
+  size_t elements;
+
+  //! Locally-stored delta object.
+  OutputDataType delta;
+
+  //! Locally-stored input parameter object.
+  InputDataType inputParameter;
+
+  //! Locally-stored output parameter object.
+  OutputDataType outputParameter;
+}; // class Select
+
+} // namespace ann
+} // namespace mlpack
+
+// Include implementation.
+#include "select_impl.hpp"
+
+#endif
diff --git a/src/mlpack/methods/ann/layer/select_impl.hpp b/src/mlpack/methods/ann/layer/select_impl.hpp
new file mode 100644
index 00000000000..a40cb968461
--- /dev/null
+++ b/src/mlpack/methods/ann/layer/select_impl.hpp
@@ -0,0 +1,75 @@
+/**
+ * @file select_impl.hpp
+ * @author Marcus Edel
+ *
+ * Implementation of the Select module.
+ *
+ * mlpack is free software; you may redistribute it and/or modify it under the
+ * terms of the 3-clause BSD license.  You should have received a copy of the
+ * 3-clause BSD license along with mlpack.  If not, see
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
+ */
+#ifndef MLPACK_METHODS_ANN_LAYER_SELECT_IMPL_HPP
+#define MLPACK_METHODS_ANN_LAYER_SELECT_IMPL_HPP
+
+// In case it hasn't yet been included.
+#include "constant.hpp"
+
+namespace mlpack {
+namespace ann /** Artificial Neural Network. */ {
+
+template<typename InputDataType, typename OutputDataType>
+Select<InputDataType, OutputDataType>::Select(
+    const size_t index,
+    const size_t elements) :
+    index(index),
+    elements(elements)
+  {
+    // Nothing to do here.
+  }
+
+template<typename InputDataType, typename OutputDataType>
+template<typename eT>
+void Select<InputDataType, OutputDataType>::Forward(
+    const arma::Mat<eT>&& input, arma::Mat<eT>&& output)
+{
+  if (elements == 0)
+  {
+    output = input.col(index);
+  }
+  else
+  {
+    output = input.submat(0, index, elements - 1, index);
+  }
+}
+
+template<typename InputDataType, typename OutputDataType>
+template<typename eT>
+void Select<InputDataType, OutputDataType>::Backward(
+    const arma::Mat<eT>&& /* input */,
+    arma::Mat<eT>&& gy,
+    arma::Mat<eT>&& g)
+{
+  if (elements == 0)
+  {
+    g = gy;
+  }
+  else
+  {
+    g = gy.submat(0, 0, elements - 1, 0);
+  }
+}
+
+template<typename InputDataType, typename OutputDataType>
+template<typename Archive>
+void Select<InputDataType, OutputDataType>::Serialize(
+    Archive& ar, const unsigned int /* version */)
+{
+  ar & data::CreateNVP(index, "index");
+  ar & data::CreateNVP(elements, "elements");
+}
+
+} // namespace ann
+} // namespace mlpack
+
+#endif
diff --git a/src/mlpack/methods/ann/layer/sequential.hpp b/src/mlpack/methods/ann/layer/sequential.hpp
new file mode 100644
index 00000000000..ca729c9da13
--- /dev/null
+++ b/src/mlpack/methods/ann/layer/sequential.hpp
@@ -0,0 +1,203 @@
+/**
+ * @file sequential.hpp
+ * @author Marcus Edel
+ *
+ * Definition of the Sequential class, which acts as a feed-forward fully
+ * connected network container.
+ *
+ * mlpack is free software; you may redistribute it and/or modify it under the
+ * terms of the 3-clause BSD license.  You should have received a copy of the
+ * 3-clause BSD license along with mlpack.  If not, see
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
+ */
+#ifndef MLPACK_METHODS_ANN_LAYER_SEQUENTIAL_HPP
+#define MLPACK_METHODS_ANN_LAYER_SEQUENTIAL_HPP
+
+#include <mlpack/core.hpp>
+
+#include <boost/ptr_container/ptr_vector.hpp>
+
+#include "layer_types.hpp"
+#include "add_merge.hpp"
+#include "layer_visitor.hpp"
+
+namespace mlpack {
+namespace ann /** Artificial Neural Network. */ {
+
+/**
+ * Implementation of the Sequential class. The sequential class works as a
+ * feed-forward fully connected network container which plugs various layers
+ * together.
+ *
+ * @tparam InputDataType Type of the input data (arma::colvec, arma::mat,
+ *         arma::sp_mat or arma::cube).
+ * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat,
+ *         arma::sp_mat or arma::cube).
+ */
+template <
+    typename InputDataType = arma::mat,
+    typename OutputDataType = arma::mat
+>
+class Sequential
+{
+ public:
+
+  /**
+   * Create the Sequential object using the specified parameters.
+   *
+   * @param model Expose the all network modules.
+   */
+  Sequential(const bool model = true);
+
+  //! Destroy the Sequential object.
+  ~Sequential();
+
+  /**
+   * Ordinary feed forward pass of a neural network, evaluating the function
+   * f(x) by propagating the activity forward through f.
+   *
+   * @param input Input data used for evaluating the specified function.
+   * @param output Resulting output activation.
+   */
+  template<typename eT>
+  void Forward(arma::Mat<eT>&& input, arma::Mat<eT>&& output);
+
+  /**
+   * Ordinary feed backward pass of a neural network, using 3rd-order tensors as
+   * input, calculating the function f(x) by propagating x backwards through f.
+   * Using the results from the feed forward pass.
+   *
+   * @param input The propagated input activation.
+   * @param gy The backpropagated error.
+   * @param g The calculated gradient.
+   */
+  template<typename eT>
+  void Backward(const arma::Mat<eT>&& /* input */,
+                arma::Mat<eT>&& gy,
+                arma::Mat<eT>&& g);
+
+  /*
+   * Calculate the gradient using the output delta and the input activation.
+   *
+   * @param input The input parameter used for calculating the gradient.
+   * @param error The calculated error.
+   * @param gradient The calculated gradient.
+   */
+  template<typename eT>
+  void Gradient(arma::Mat<eT>&& input,
+                arma::Mat<eT>&& error,
+                arma::Mat<eT>&& /* gradient */);
+
+  /*
+   * Add a new module to the model.
+   *
+   * @param args The layer parameter.
+   */
+  template <class LayerType, class... Args>
+  void Add(Args... args) { network.push_back(new LayerType(args...)); }
+
+  /*
+   * Add a new module to the model.
+   *
+   * @param layer The Layer to be added to the model.
+   */
+  void Add(LayerTypes layer) { network.push_back(layer); }
+
+  //! Return the model modules.
+  std::vector<LayerTypes>& Model()
+  {
+    if (model)
+    {
+      return network;
+    }
+
+    return empty;
+  }
+
+  //! Return the initial point for the optimization.
+  const arma::mat& Parameters() const { return parameters; }
+  //! Modify the initial point for the optimization.
+  arma::mat& Parameters() { return parameters; }
+
+  arma::mat const& InputParameter() const { return inputParameter; }
+  //! Modify the input parameter.
+  arma::mat& InputParameter() { return inputParameter; }
+
+  //! Get the output parameter.
+  arma::mat const& OutputParameter() const { return outputParameter; }
+  //! Modify the output parameter.
+  arma::mat& OutputParameter() { return outputParameter; }
+
+  //! Get the delta.e
+  arma::mat const& Delta() const { return delta; }
+  //! Modify the delta.
+  arma::mat& Delta() { return delta; }
+
+  //! Get the gradient.
+  arma::mat const& Gradient() const { return gradient; }
+  //! Modify the gradient.
+  arma::mat& Gradient() { return gradient; }
+
+  /**
+   * Serialize the layer
+   */
+  template<typename Archive>
+  void Serialize(Archive& /* ar */, const unsigned int /* version */);
+
+ private:
+  //! Parameter which indicates if the modules should be exposed.
+  bool model;
+
+  //! Indicator if we already initialized the model.
+  bool reset;
+
+  //! Locally-stored network modules.
+  std::vector<LayerTypes> network;
+
+  //! Locally-stored model parameters.
+  arma::mat parameters;
+
+  //! Locally-stored delta visitor.
+  DeltaVisitor deltaVisitor;
+
+  //! Locally-stored output parameter visitor.
+  OutputParameterVisitor outputParameterVisitor;
+
+  //! Locally-stored delete visitor.
+  DeleteVisitor deleteVisitor;
+
+  //! Locally-stored empty list of modules.
+  std::vector<LayerTypes> empty;
+
+  //! Locally-stored delta object.
+  arma::mat delta;
+
+  //! Locally-stored input parameter object.
+  arma::mat inputParameter;
+
+  //! Locally-stored output parameter object.
+  arma::mat outputParameter;
+
+  //! Locally-stored gradient object.
+  arma::mat gradient;
+
+  //! Locally-stored output width visitor.
+  OutputWidthVisitor outputWidthVisitor;
+
+  //! Locally-stored output height visitor.
+  OutputHeightVisitor outputHeightVisitor;
+
+  //! The input width.
+  size_t width;
+
+  //! The input height.
+  size_t height;
+}; // class Sequential
+
+} // namespace ann
+} // namespace mlpack
+
+// Include implementation.
+#include "sequential_impl.hpp"
+
+#endif
diff --git a/src/mlpack/methods/ann/layer/sequential_impl.hpp b/src/mlpack/methods/ann/layer/sequential_impl.hpp
new file mode 100644
index 00000000000..df20a7a7668
--- /dev/null
+++ b/src/mlpack/methods/ann/layer/sequential_impl.hpp
@@ -0,0 +1,154 @@
+/**
+ * @file sequential_impl.hpp
+ * @author Marcus Edel
+ *
+ * Implementation of the Sequential class, which acts as a feed-forward fully
+ * connected network container.
+ *
+ * mlpack is free software; you may redistribute it and/or modify it under the
+ * terms of the 3-clause BSD license.  You should have received a copy of the
+ * 3-clause BSD license along with mlpack.  If not, see
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
+ */
+#ifndef MLPACK_METHODS_ANN_LAYER_SEQUENTIAL_IMPL_HPP
+#define MLPACK_METHODS_ANN_LAYER_SEQUENTIAL_IMPL_HPP
+
+// In case it hasn't yet been included.
+#include "sequential.hpp"
+
+namespace mlpack {
+namespace ann /** Artificial Neural Network. */ {
+
+template <typename InputDataType, typename OutputDataType>
+Sequential<InputDataType, OutputDataType>::Sequential(
+    const bool model) : model(model), reset(false)
+{
+  // Nothing to do here.
+}
+
+template <typename InputDataType, typename OutputDataType>
+Sequential<InputDataType, OutputDataType>::~Sequential()
+{
+  if (!model)
+  {
+    for (LayerTypes& layer : network)
+    {
+      boost::apply_visitor(deleteVisitor, layer);
+    }
+  }
+}
+
+template<typename InputDataType, typename OutputDataType>
+template<typename eT>
+void Sequential<InputDataType, OutputDataType>::Forward(
+    arma::Mat<eT>&& input, arma::Mat<eT>&& output)
+{
+  boost::apply_visitor(ForwardVisitor(std::move(input), std::move(
+      boost::apply_visitor(outputParameterVisitor, network.front()))),
+      network.front());
+
+  if (!reset)
+  {
+    if (boost::apply_visitor(outputWidthVisitor, network.front()) != 0)
+    {
+      width = boost::apply_visitor(outputWidthVisitor, network.front());
+    }
+
+    if (boost::apply_visitor(outputHeightVisitor, network.front()) != 0)
+    {
+      height = boost::apply_visitor(outputHeightVisitor, network.front());
+    }
+  }
+
+  for (size_t i = 1; i < network.size(); ++i)
+  {
+    if (!reset)
+    {
+      // Set the input width.
+      boost::apply_visitor(SetInputWidthVisitor(width, true), network[i]);
+
+      // Set the input height.
+      boost::apply_visitor(SetInputHeightVisitor(height, true), network[i]);
+    }
+
+    boost::apply_visitor(ForwardVisitor(std::move(boost::apply_visitor(
+        outputParameterVisitor, network[i - 1])), std::move(
+        boost::apply_visitor(outputParameterVisitor, network[i]))),
+        network[i]);
+
+    if (!reset)
+    {
+      // Get the output width.
+      if (boost::apply_visitor(outputWidthVisitor, network[i]) != 0)
+      {
+        width = boost::apply_visitor(outputWidthVisitor, network[i]);
+      }
+
+      // Get the output height.
+      if (boost::apply_visitor(outputHeightVisitor, network[i]) != 0)
+      {
+        height = boost::apply_visitor(outputHeightVisitor, network[i]);
+      }
+    }
+  }
+
+if (!reset)
+{
+  reset = true;
+}
+
+  output = boost::apply_visitor(outputParameterVisitor, network.back());
+}
+
+template<typename InputDataType, typename OutputDataType>
+template<typename eT>
+void Sequential<InputDataType, OutputDataType>::Backward(
+    const arma::Mat<eT>&& /* input */, arma::Mat<eT>&& gy, arma::Mat<eT>&& g)
+{
+  boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor(
+      outputParameterVisitor, network.back())), std::move(gy),
+      std::move(boost::apply_visitor(deltaVisitor, network.back()))),
+      network.back());
+
+  for (size_t i = 2; i < network.size() + 1; ++i)
+  {
+    boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor(
+        outputParameterVisitor, network[network.size() - i])), std::move(
+        boost::apply_visitor(deltaVisitor, network[network.size() - i + 1])),
+        std::move(boost::apply_visitor(deltaVisitor,
+        network[network.size() - i]))), network[network.size() - i]);
+  }
+
+  g = boost::apply_visitor(deltaVisitor, network.front());
+}
+
+template<typename InputDataType, typename OutputDataType>
+template<typename eT>
+void Sequential<InputDataType, OutputDataType>::Gradient(
+    arma::Mat<eT>&& input,
+    arma::Mat<eT>&& error,
+    arma::Mat<eT>&& /* gradient */)
+{
+  boost::apply_visitor(GradientVisitor(std::move(input), std::move(error)),
+      network.front());
+
+  for (size_t i = 1; i < network.size() - 1; ++i)
+  {
+    boost::apply_visitor(GradientVisitor(std::move(boost::apply_visitor(
+        outputParameterVisitor, network[i - 1])), std::move(
+        boost::apply_visitor(deltaVisitor, network[i + 1]))), network[i]);
+  }
+}
+
+template<typename InputDataType, typename OutputDataType>
+template<typename Archive>
+void Sequential<InputDataType, OutputDataType>::Serialize(
+    Archive& /* ar */, const unsigned int /* version */)
+{
+  // Nothing to do here.
+}
+
+} // namespace ann
+} // namespace mlpack
+
+#endif
diff --git a/src/mlpack/methods/ann/layer/sparse_bias_layer.hpp b/src/mlpack/methods/ann/layer/sparse_bias_layer.hpp
deleted file mode 100644
index c3b723f17d9..00000000000
--- a/src/mlpack/methods/ann/layer/sparse_bias_layer.hpp
+++ /dev/null
@@ -1,177 +0,0 @@
-/**
- * @file sparse_bias_layer.hpp
- * @author Tham Ngap Wei
- *
- * Definition of the SparseBiasLayer class.
- *
- * mlpack is free software; you may redistribute it and/or modify it under the
- * terms of the 3-clause BSD license.  You should have received a copy of the
- * 3-clause BSD license along with mlpack.  If not, see
- * http://www.opensource.org/licenses/BSD-3-Clause for more information.
- */
-#ifndef MLPACK_METHODS_ANN_LAYER_SPARSE_BIAS_LAYER_HPP
-#define MLPACK_METHODS_ANN_LAYER_SPARSE_BIAS_LAYER_HPP
-
-#include <mlpack/core.hpp>
-#include <mlpack/methods/ann/layer/layer_traits.hpp>
-
-namespace mlpack {
-namespace ann /** Artificial Neural Network. */ {
-
-/**
- * An implementation of a bias layer design for sparse autoencoder.
- * The BiasLayer class represents a single layer of a neural network.
- *
- * @tparam InputDataType Type of the input data (arma::colvec, arma::mat,
- *         arma::sp_mat or arma::cube).
- * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat,
- *         arma::sp_mat or arma::cube).
- */
-template <
-    typename InputDataType = arma::mat,
-    typename OutputDataType = arma::mat
->
-class SparseBiasLayer
-{
- public:
-  /**
-   * Create the SparseBiasLayer object using the specified number of units and
-   * bias parameter.
-   *
-   * @param outSize The number of output units.
-   * @param batchSize The batch size used to train the network.
-   * @param bias The bias value.
-   */
-  SparseBiasLayer(const size_t outSize, const size_t batchSize) :
-      outSize(outSize),
-      batchSize(batchSize)
-  {
-    weights.set_size(outSize, 1);
-  }
-
-  /**
-   * Ordinary feed forward pass of a neural network, evaluating the function
-   * f(x) by propagating the activity forward through f.
-   *
-   * @param input Input data used for evaluating the specified function.
-   * @param output Resulting output activation.
-   */
-  template<typename eT>
-  void Forward(const arma::Mat<eT>& input, arma::Mat<eT>& output)
-  {
-    output = input + arma::repmat(weights, 1, input.n_cols);
-  }
-
-  /**
-   * Ordinary feed backward pass of a neural network, calculating the function
-   * f(x) by propagating x backwards trough f. Using the results from the feed
-   * forward pass.
-   *
-   * @param input The propagated input activation.
-   * @param gy The backpropagated error.
-   * @param g The calculated gradient.
-   */
-  template<typename DataType, typename ErrorType>
-  void Backward(const DataType& /* unused */,
-                const ErrorType& gy,
-                ErrorType& g)
-  {
-    g = gy;
-  }
-
-  /*
-   * Calculate the gradient using the output delta and the bias.
-   *
-   * @param input The propagated input.
-   * @param d The calculated error.
-   * @param g The calculated gradient.
-   */
-  template<typename InputType, typename eT>
-  void Gradient(const InputType& /* input */,
-                const arma::Mat<eT>& d,
-                InputDataType& g)
-  {
-    g = arma::sum(d, 1) / static_cast<typename InputDataType::value_type>(
-        batchSize);
-  }
-
-  //! Get the batch size
-  size_t BatchSize() const { return batchSize; }
-  //! Modify the batch size
-  size_t& BatchSize() { return batchSize; }
-
-  //! Get the weights.
-  InputDataType const& Weights() const { return weights; }
-  //! Modify the weights.
-  InputDataType& Weights() { return weights; }
-
-  //! Get the input parameter.
-  InputDataType const& InputParameter() const { return inputParameter; }
-  //! Modify the input parameter.
-  InputDataType& InputParameter() { return inputParameter; }
-
-  //! Get the output parameter.
-  OutputDataType const& OutputParameter() const { return outputParameter; }
-  //! Modify the output parameter.
-  OutputDataType& OutputParameter() { return outputParameter; }
-
-  //! Get the delta.
-  OutputDataType const& Delta() const { return delta; }
-  //! Modify the delta.
-  OutputDataType& Delta() { return delta; }
-
-  //! Get the gradient.
-  InputDataType const& Gradient() const { return gradient; }
-  //! Modify the gradient.
-  InputDataType& Gradient() { return gradient; }
-
-  /**
-   * Serialize the layer.
-   */
-  template<typename Archive>
-  void Serialize(Archive& ar, const unsigned int /* version */)
-  {
-    ar & data::CreateNVP(weights, "weights");
-    ar & data::CreateNVP(batchSize, "batchSize");
-  }
-
- private:
-  //! Locally-stored number of output units.
-  size_t outSize;
-
-  //! The batch size used to train the network.
-  size_t batchSize;
-
-  //! Locally-stored weight object.
-  InputDataType weights;
-
-  //! Locally-stored delta object.
-  OutputDataType delta;
-
-  //! Locally-stored gradient object.
-  InputDataType gradient;
-
-  //! Locally-stored input parameter object.
-  InputDataType inputParameter;
-
-  //! Locally-stored output parameter object.
-  OutputDataType outputParameter;
-}; // class SparseBiasLayer
-
-//! Layer traits for the bias layer.
-template<typename InputDataType, typename OutputDataType
->
-class LayerTraits<SparseBiasLayer<InputDataType, OutputDataType> >
-{
- public:
-  static const bool IsBinary = false;
-  static const bool IsOutputLayer = false;
-  static const bool IsBiasLayer = true;
-  static const bool IsLSTMLayer = false;
-  static const bool IsConnection = true;
-};
-
-} // namespace ann
-} // namespace mlpack
-
-#endif
diff --git a/src/mlpack/methods/ann/layer/sparse_output_layer.hpp b/src/mlpack/methods/ann/layer/sparse_output_layer.hpp
deleted file mode 100644
index 33a2a72f7f3..00000000000
--- a/src/mlpack/methods/ann/layer/sparse_output_layer.hpp
+++ /dev/null
@@ -1,227 +0,0 @@
-/**
- * @file sparse_output_layer.hpp
- * @author Tham Ngap Wei
- *
- * This is the fourth layer of sparse autoencoder.
- *
- * mlpack is free software; you may redistribute it and/or modify it under the
- * terms of the 3-clause BSD license.  You should have received a copy of the
- * 3-clause BSD license along with mlpack.  If not, see
- * http://www.opensource.org/licenses/BSD-3-Clause for more information.
- */
-#ifndef MLPACK_METHODS_ANN_LAYER_SPARSE_OUTPUT_LAYER_HPP
-#define MLPACK_METHODS_ANN_LAYER_SPARSE_OUTPUT_LAYER_HPP
-
-#include <mlpack/core.hpp>
-#include <mlpack/methods/ann/layer/layer_traits.hpp>
-
-namespace mlpack {
-namespace ann /** Artificial Neural Network. */ {
-
-/**
- * Implementation of the SparseOutputLayer class. The SparseOutputLayer class
- * represents  the fourth layer of the sparse autoencoder.
- *
- * @tparam InputDataType Type of the input data (arma::colvec, arma::mat,
- *         arma::sp_mat or arma::cube).
- * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat,
- *         arma::sp_mat or arma::cube).
- */
-template <
-    typename InputDataType = arma::mat,
-    typename OutputDataType = arma::mat
->
-class SparseOutputLayer
-{
- public:
-  /**
-   * Create the SparseLayer object using the specified number of units.
-   *
-   * @param inSize The number of input units.
-   * @param outSize The number of output units.
-   */
-  SparseOutputLayer(const size_t inSize,
-                    const size_t outSize,
-                    const double lambda = 0.0001,
-                    const double beta = 3,
-                    const double rho = 0.01) :
-    inSize(inSize),
-    outSize(outSize),
-    lambda(lambda),
-    beta(beta),
-    rho(rho)
-  {
-    weights.set_size(outSize, inSize);
-  }
-
-  /**
-   * Ordinary feed forward pass of a neural network, evaluating the function
-   * f(x) by propagating the activity forward through f.
-   *
-   * @param input Input data used for evaluating the specified function.
-   * @param output Resulting output activation.
-   */
-  template<typename eT>
-  void Forward(const arma::Mat<eT>& input, arma::Mat<eT>& output)
-  {
-    output = weights * input;
-    // Average activations of the hidden layer.
-    rhoCap = arma::sum(input, 1) / static_cast<double>(input.n_cols);
-  }
-
-  /**
-   * Ordinary feed backward pass of a neural network, calculating the function
-   * f(x) by propagating x backwards trough f. Using the results from the feed
-   * forward pass.
-   *
-   * @param input The propagated input activation.
-   * @param gy The backpropagated error.
-   * @param g The calculated gradient.
-   */
-  template<typename InputType, typename eT>
-  void Backward(const InputType& input,
-                const arma::Mat<eT>& gy,
-                arma::Mat<eT>& g)
-  {
-    const arma::mat klDivGrad = beta * (-(rho / rhoCap) + (1 - rho) /
-          (1 - rhoCap));
-
-    // NOTE: if the armadillo version high enough, find_nonfinite can prevents
-    // overflow value:
-    // klDivGrad.elem(arma::find_nonfinite(klDivGrad)).zeros();
-    g = weights.t() * gy +
-        arma::repmat(klDivGrad, 1, input.n_cols);
-  }
-
-  /*
-   * Calculate the gradient using the output delta and the input activation.
-   *
-   * @param input The propagated input.
-   * @param d The calculated error.
-   * @param g The calculated gradient.
-   */
-  template<typename InputType, typename eT>
-  void Gradient(const InputType input, const arma::Mat<eT>& d, arma::Mat<eT>& g)
-  {
-    g = d * input.t() / static_cast<typename InputType::value_type>(
-        input.n_cols) + lambda * weights;
-  }
-
-  //! Sets the KL divergence parameter.
-  void Beta(const double b)
-  {
-    beta = b;
-  }
-
-  //! Gets the KL divergence parameter.
-  double Beta() const
-  {
-    return beta;
-  }
-
-  //! Sets the sparsity parameter.
-  void Rho(const double r)
-  {
-    rho = r;
-  }
-
-  //! Gets the sparsity parameter.
-  double Rho() const
-  {
-    return rho;
-  }
-
-  //! Get the weights.
-  OutputDataType const& Weights() const { return weights; }
-  //! Modify the weights.
-  OutputDataType& Weights() { return weights; }
-
-  //! Get the RhoCap.
-  OutputDataType const& RhoCap() const { return rhoCap; }
-  //! Modify the RhoCap.
-  OutputDataType& RhoCap() { return rhoCap; }
-
-  //! Get the input parameter.
-  InputDataType const& InputParameter() const { return inputParameter; }
-  //! Modify the input parameter.
-  InputDataType& InputParameter() { return inputParameter; }
-
-  //! Get the output parameter.
-  OutputDataType const& OutputParameter() const { return outputParameter; }
-  //! Modify the output parameter.
-  OutputDataType& OutputParameter() { return outputParameter; }
-
-  //! Get the delta.
-  OutputDataType const& Delta() const { return delta; }
-  //! Modify the delta.
-  OutputDataType& Delta() { return delta; }
-
-  //! Get the gradient.
-  OutputDataType const& Gradient() const { return gradient; }
-  //! Modify the gradient.
-  OutputDataType& Gradient() { return gradient; }
-
-  /**
-   * Serialize the layer.
-   */
-  template<typename Archive>
-  void Serialize(Archive& ar, const unsigned int /* version */)
-  {
-    ar & data::CreateNVP(weights, "weights");
-    ar & data::CreateNVP(lambda, "lambda");
-    ar & data::CreateNVP(beta, "beta");
-    ar & data::CreateNVP(rho, "rho");
-  }
-
- private:
-  //! Locally-stored number of input units.
-  size_t inSize;
-
-  //! Locally-stored number of output units.
-  size_t outSize;
-
-  //! L2-regularization parameter.
-  double lambda;
-
-  //! KL divergence parameter.
-  double beta;
-
-  //! Sparsity parameter.
-  double rho;
-
-  //! Locally-stored weight object.
-  OutputDataType weights;
-
-  //! Locally-stored delta object.
-  OutputDataType delta;
-
-  //! Locally-stored gradient object.
-  OutputDataType gradient;
-
-  //! Average activations of the hidden layer.
-  OutputDataType rhoCap;
-
-  //! Locally-stored input parameter object.
-  InputDataType inputParameter;
-
-  //! Locally-stored output parameter object.
-  OutputDataType outputParameter;
-}; // class SparseOutputLayer
-
-//! Layer traits for the SparseOutputLayer.
-template<typename InputDataType, typename OutputDataType
-    >
-class LayerTraits<SparseOutputLayer<InputDataType, OutputDataType> >
-{
-public:
-  static const bool IsBinary = false;
-  static const bool IsOutputLayer = false;
-  static const bool IsBiasLayer = false;
-  static const bool IsLSTMLayer = false;
-  static const bool IsConnection = true;
-};
-
-} // namespace ann
-} // namespace mlpack
-
-#endif
diff --git a/src/mlpack/methods/ann/layer/vr_class_reward_layer.hpp b/src/mlpack/methods/ann/layer/vr_class_reward.hpp
similarity index 52%
rename from src/mlpack/methods/ann/layer/vr_class_reward_layer.hpp
rename to src/mlpack/methods/ann/layer/vr_class_reward.hpp
index 5b4da8ed0b7..f820e351aa8 100644
--- a/src/mlpack/methods/ann/layer/vr_class_reward_layer.hpp
+++ b/src/mlpack/methods/ann/layer/vr_class_reward.hpp
@@ -1,20 +1,18 @@
 /**
- * @file vr_class_reward_layer.hpp
+ * @file vr_class_reward.hpp
  * @author Marcus Edel
  *
- * Definition of the VRClassRewardLayer class, which implements the variance
+ * Definition of the VRClassReward class, which implements the variance
  * reduced classification reinforcement layer.
- *
- * mlpack is free software; you may redistribute it and/or modify it under the
- * terms of the 3-clause BSD license.  You should have received a copy of the
- * 3-clause BSD license along with mlpack.  If not, see
- * http://www.opensource.org/licenses/BSD-3-Clause for more information.
  */
-#ifndef MLPACK_METHODS_ANN_LAYER_VR_CLASS_REWARD_LAYER_HPP
-#define MLPACK_METHODS_ANN_LAYER_VR_CLASS_REWARD_LAYER_HPP
+#ifndef MLPACK_METHODS_ANN_LAYER_VR_CLASS_REWARD_HPP
+#define MLPACK_METHODS_ANN_LAYER_VR_CLASS_REWARD_HPP
 
 #include <mlpack/core.hpp>
 
+#include "layer_types.hpp"
+#include "layer_visitor.hpp"
+
 namespace mlpack {
 namespace ann /** Artificial Neural Network. */ {
 
@@ -30,24 +28,19 @@ namespace ann /** Artificial Neural Network. */ {
  *         arma::sp_mat or arma::cube).
  */
 template <
-    typename InputDataType = arma::field<arma::mat>,
-    typename OutputDataType = arma::field<arma::mat>
+    typename InputDataType = arma::mat,
+    typename OutputDataType = arma::mat
 >
-class VRClassRewardLayer
+class VRClassReward
 {
  public:
   /**
-   * Create the VRClassRewardLayer object.
+   * Create the VRClassReward object.
    *
    * @param scale Parameter used to scale the reward.
    * @param sizeAverage Take the average over all batches.
    */
-  VRClassRewardLayer(const double scale = 1, const bool sizeAverage = true) :
-      scale(scale),
-      sizeAverage(sizeAverage)
-  {
-    // Nothing to do here.
-  }
+  VRClassReward(const double scale = 1, const bool sizeAverage = true);
 
   /**
    * Ordinary feed forward pass of a neural network, evaluating the function
@@ -58,69 +51,23 @@ class VRClassRewardLayer
    *        between 1 and the number of classes.
    */
   template<typename eT>
-  double Forward(const arma::field<arma::Mat<eT> >& input,
-                 const arma::Mat<eT>& target)
-  {
-    return Forward(input(0, 0), target);
-  }
+  double Forward(const arma::Mat<eT>&& input, const arma::Mat<eT>&& target);
 
   /**
-   * Ordinary feed forward pass of a neural network, evaluating the function
-   * f(x) by propagating the activity forward through f.
+   * Ordinary feed backward pass of a neural network. The negative log
+   * likelihood layer expectes that the input contains log-probabilities for
+   * each class. The layer also expects a class index, in the range between 1
+   * and the number of classes, as target when calling the Forward function.
    *
-   * @param input Input data that contains the log-probabilities for each class.
+   * @param input The propagated input activation.
    * @param target The target vector, that contains the class index in the range
    *        between 1 and the number of classes.
+   * @param output The calculated error.
    */
   template<typename eT>
-  double Forward(const arma::Mat<eT>& input, const arma::Mat<eT>& target)
-  {
-    reward = 0;
-    arma::uword index = 0;
-
-    for (size_t i = 0; i < input.n_cols; i++)
-    {
-      input.unsafe_col(i).max(index);
-      reward = ((index + 1) == target(i)) * scale;
-    }
-
-    if (sizeAverage)
-    {
-      return -reward / input.n_cols;
-    }
-
-    return -reward;
-  }
-
-  /**
-   * Ordinary feed backward pass of a neural network, calculating the function
-   * f(x) by propagating x backwards through f. Using the results from the feed
-   * forward pass.
-   *
-   * @param input The propagated input activation.
-   * @param gy The backpropagated error.
-   * @param g The calculated gradient.
-   */
-  template<typename eT>
-  double Backward(const arma::field<arma::Mat<eT> >& input,
-                const arma::Mat<eT>& /* gy */,
-                arma::field<arma::Mat<eT> >& g)
-  {
-    g = arma::field<arma::Mat<eT> >(2, 1);
-    g(0, 0) = arma::zeros(input(0, 0).n_rows, input(0, 0).n_cols);
-
-    double vrReward = reward - arma::as_scalar(input(1, 0));
-    if (sizeAverage)
-    {
-      vrReward /= input(0, 0).n_cols;
-    }
-
-    const double norm = sizeAverage ? 2.0 / input.n_cols : 2.0;
-
-    g(1, 0) = norm * (input(1, 0) - reward);
-
-    return vrReward;
-  }
+  void Backward(const arma::Mat<eT>&& input,
+                const arma::Mat<eT>&& target,
+                arma::Mat<eT>&& output);
 
   //! Get the input parameter.
   InputDataType& InputParameter() const {return inputParameter; }
@@ -142,6 +89,27 @@ class VRClassRewardLayer
   //! Modify the value of the deterministic parameter.
   bool& Deterministic() { return deterministic; }
 
+  /*
+   * Add a new module to the model.
+   *
+   * @param args The layer parameter.
+   */
+  template <class LayerType, class... Args>
+  void Add(Args... args) { network.push_back(new LayerType(args...)); }
+
+  /*
+   * Add a new module to the model.
+   *
+   * @param layer The Layer to be added to the model.
+   */
+  void Add(LayerTypes layer) { network.push_back(layer); }
+
+  /**
+   * Serialize the layer
+   */
+  template<typename Archive>
+  void Serialize(Archive& /* ar */, const unsigned int /* version */);
+
  private:
   //! Locally-stored value to scale the reward.
   const double scale;
@@ -163,9 +131,15 @@ class VRClassRewardLayer
 
   //! If true dropout and scaling is disabled, see notes above.
   bool deterministic;
-}; // class VRClassRewardLayer
 
-}; // namespace ann
-}; // namespace mlpack
+  //! Locally-stored network modules.
+  std::vector<LayerTypes> network;
+}; // class VRClassReward
+
+} // namespace ann
+} // namespace mlpack
+
+// Include implementation.
+#include "vr_class_reward_impl.hpp"
 
 #endif
diff --git a/src/mlpack/methods/ann/layer/vr_class_reward_impl.hpp b/src/mlpack/methods/ann/layer/vr_class_reward_impl.hpp
new file mode 100644
index 00000000000..a4a2703545b
--- /dev/null
+++ b/src/mlpack/methods/ann/layer/vr_class_reward_impl.hpp
@@ -0,0 +1,101 @@
+/**
+ * @file vr_class_reward_impl.hpp
+ * @author Marcus Edel
+ *
+ * Implementation of the VRClassReward class, which implements the variance
+ * reduced classification reinforcement layer.
+ */
+#ifndef MLPACK_METHODS_ANN_LAYER_VR_CLASS_REWARD_IMPL_HPP
+#define MLPACK_METHODS_ANN_LAYER_VR_CLASS_REWARD_IMPL_HPP
+
+// In case it hasn't yet been included.
+#include "vr_class_reward.hpp"
+
+namespace mlpack {
+namespace ann /** Artificial Neural Network. */ {
+
+template<typename InputDataType, typename OutputDataType>
+VRClassReward<InputDataType, OutputDataType>::VRClassReward(
+    const double scale,
+    const bool sizeAverage) :
+    scale(scale),
+    sizeAverage(sizeAverage)
+{
+  // Nothing to do here.
+}
+
+template<typename InputDataType, typename OutputDataType>
+template<typename eT>
+double VRClassReward<InputDataType, OutputDataType>::Forward(
+    const arma::Mat<eT>&& input, const arma::Mat<eT>&& target)
+{
+  double output = 0;
+
+  for (size_t i = 0; i < input.n_cols - 1; ++i)
+  {
+    size_t currentTarget = target(i) - 1;
+    Log::Assert(currentTarget >= 0 && currentTarget < input.n_rows,
+        "Target class out of range.");
+
+    output -= input(currentTarget, i);
+  }
+
+  reward = 0;
+  arma::uword index = 0;
+
+  for (size_t i = 0; i < input.n_cols - 1; i++)
+  {
+    input.unsafe_col(i).max(index);
+    reward = ((index + 1) == target(i)) * scale;
+  }
+
+  if (sizeAverage)
+  {
+    return output - reward / (input.n_cols - 1);
+  }
+
+  return output - reward;
+}
+
+template<typename InputDataType, typename OutputDataType>
+template<typename eT>
+void VRClassReward<InputDataType, OutputDataType>::Backward(
+    const arma::Mat<eT>&& input,
+    const arma::Mat<eT>&& target,
+    arma::Mat<eT>&& output)
+{
+  output = arma::zeros<arma::Mat<eT> >(input.n_rows, input.n_cols);
+  for (size_t i = 0; i < (input.n_cols - 1); ++i)
+  {
+    size_t currentTarget = target(i) - 1;
+    Log::Assert(currentTarget >= 0 && currentTarget < input.n_rows,
+        "Target class out of range.");
+
+    output(currentTarget, i) = -1;
+  }
+
+  double vrReward = reward - input(0, 1);
+  if (sizeAverage)
+  {
+    vrReward /= input.n_cols - 1;
+  }
+
+  const double norm = sizeAverage ? 2.0 / (input.n_cols - 1) : 2.0;
+
+  output(0, 1) = norm * (input(0, 1) - reward);
+  boost::apply_visitor(RewardSetVisitor(vrReward), network.back());
+}
+
+template<typename InputDataType, typename OutputDataType>
+template<typename Archive>
+void VRClassReward<InputDataType, OutputDataType>::Serialize(
+    Archive& /* ar */,
+    const unsigned int /* version */)
+{
+  // Nothing to do here.
+}
+
+} // namespace ann
+} // namespace mlpack
+
+#endif
diff --git a/src/mlpack/methods/ann/network_traits.hpp b/src/mlpack/methods/ann/network_traits.hpp
deleted file mode 100644
index 5aa91e8ae60..00000000000
--- a/src/mlpack/methods/ann/network_traits.hpp
+++ /dev/null
@@ -1,55 +0,0 @@
-/**
- * @file network_traits.hpp
- * @author Marcus Edel
- *
- * NetworkTraits class, a template class to get information about various
- * networks.
- *
- * mlpack is free software; you may redistribute it and/or modify it under the
- * terms of the 3-clause BSD license.  You should have received a copy of the
- * 3-clause BSD license along with mlpack.  If not, see
- * http://www.opensource.org/licenses/BSD-3-Clause for more information.
- */
-#ifndef MLPACK_METHODS_ANN_NETWORK_TRAITS_HPP
-#define MLPACK_METHODS_ANN_NETWORK_TRAITS_HPP
-
-namespace mlpack {
-namespace ann {
-
-/**
- * This is a template class that can provide information about various
- * networks. By default, this class will provide the weakest possible
- * assumptions on networks, and each network should override values as
- * necessary. If a network doesn't need to override a value, then there's no
- * need to write a NetworkTraits specialization for that class.
- */
-template<typename NetworkType>
-class NetworkTraits
-{
- public:
-  /**
-   * This is true if the network is a feed forward neural network.
-   */
-  static const bool IsFNN = false;
-
-  /**
-   * This is true if the network is a recurrent neural network.
-   */
-  static const bool IsRNN = false;
-
-  /**
-   * This is true if the network is a convolutional neural network.
-   */
-  static const bool IsCNN = false;
-
-  /**
-   * This is true if the network is a sparse autoencoder.
-   */
-  static const bool IsSAE = false;
-};
-
-} // namespace ann
-} // namespace mlpack
-
-#endif
-
diff --git a/src/mlpack/methods/ann/network_util.hpp b/src/mlpack/methods/ann/network_util.hpp
deleted file mode 100644
index 93bdf044355..00000000000
--- a/src/mlpack/methods/ann/network_util.hpp
+++ /dev/null
@@ -1,247 +0,0 @@
-/**
- * @file network_util.hpp
- * @author Marcus Edel
- *
- * Neural network utilities.
- *
- * mlpack is free software; you may redistribute it and/or modify it under the
- * terms of the 3-clause BSD license.  You should have received a copy of the
- * 3-clause BSD license along with mlpack.  If not, see
- * http://www.opensource.org/licenses/BSD-3-Clause for more information.
- */
-#ifndef MLPACK_METHODS_ANN_NETWORK_UTIL_HPP
-#define MLPACK_METHODS_ANN_NETWORK_UTIL_HPP
-
-#include <mlpack/core.hpp>
-
-#include <mlpack/methods/ann/layer/layer_traits.hpp>
-
-/**
- * Neural network utility functions.
- */
-namespace mlpack {
-namespace ann /** Artificial Neural Network. */ {
-
-/**
- * Auxiliary function to get the number of weights of the specified network.
- *
- * @param network The network used for specifying the number of weights.
- * @return The number of weights.
- */
-template<size_t I = 0, typename... Tp>
-typename std::enable_if<I < sizeof...(Tp), size_t>::type
-NetworkSize(std::tuple<Tp...>& network);
-
-template<size_t I, typename... Tp>
-typename std::enable_if<I == sizeof...(Tp), size_t>::type
-NetworkSize(std::tuple<Tp...>& network);
-
-/**
- * Auxiliary function to get the number of weights of the specified layer.
- *
- * @param layer The layer used for specifying the number of weights.
- * @param output The layer output parameter.
- * @return The number of weights.
- */
-template<typename T, typename P>
-typename std::enable_if<
-    !HasWeightsCheck<T, P&(T::*)()>::value, size_t>::type
-LayerSize(T& layer, P& output);
-
-template<typename T, typename P>
-typename std::enable_if<
-    HasWeightsCheck<T, P&(T::*)()>::value, size_t>::type
-LayerSize(T& layer, P& output);
-
-/**
- * Auxiliary function to set the weights of the specified network.
- *
- * @param weights The weights used to set the weights of the network.
- * @param network The network used to set the weights.
- * @param offset The memory offset of the weights.
- */
-template<size_t I = 0, typename... Tp>
-typename std::enable_if<I < sizeof...(Tp), void>::type
-NetworkWeights(arma::mat& weights,
-               std::tuple<Tp...>& network,
-               size_t offset = 0);
-
-template<size_t I, typename... Tp>
-typename std::enable_if<I == sizeof...(Tp), void>::type
-NetworkWeights(arma::mat& weights,
-               std::tuple<Tp...>& network,
-               size_t offset = 0);
-
-/**
- * Auxiliary function to set the weights of the specified layer.
- *
- * @param layer The layer used to set the weights.
- * @param weights The weights used to set the weights of the layer.
- * @param offset The memory offset of the weights.
- * @param output The output parameter of the layer.
- * @return The number of weights.
- */
-template<typename T>
-typename std::enable_if<
-    HasWeightsCheck<T, arma::mat&(T::*)()>::value, size_t>::type
-LayerWeights(T& layer, arma::mat& weights, size_t offset, arma::mat& output);
-
-template<typename T>
-typename std::enable_if<
-    HasWeightsCheck<T, arma::cube&(T::*)()>::value, size_t>::type
-LayerWeights(T& layer, arma::mat& weights, size_t offset, arma::cube& output);
-
-template<typename T, typename P>
-typename std::enable_if<
-    !HasWeightsCheck<T, P&(T::*)()>::value, size_t>::type
-LayerWeights(T& layer, arma::mat& weights, size_t offset, P& output);
-
-/**
- * Auxiliary function to set the gradients of the specified network.
- *
- * @param gradients The gradients used to set the gradient of the network.
- * @param network The network used to set the gradients.
- * @param offset The memory offset of the gradients.
- * return The number of gradients.
- */
-template<size_t I = 0, typename... Tp>
-typename std::enable_if<I < sizeof...(Tp), void>::type
-NetworkGradients(arma::mat& gradients,
-               std::tuple<Tp...>& network,
-               size_t offset = 0);
-
-template<size_t I, typename... Tp>
-typename std::enable_if<I == sizeof...(Tp), void>::type
-NetworkGradients(arma::mat& gradients,
-               std::tuple<Tp...>& network,
-               size_t offset = 0);
-
-/**
- * Auxiliary function to set the gradients of the specified layer.
- *
- * @param layer The layer used to set the gradients.
- * @param gradients The gradients used to set the gradient of the layer.
- * @param offset The memory offset of the gradients.
- * @param output The output parameter of the layer.
- * @return The number of gradients.
- */
-template<typename T>
-typename std::enable_if<
-    HasGradientCheck<T, arma::mat&(T::*)()>::value, size_t>::type
-LayerGradients(T& layer,
-               arma::mat& gradients,
-               size_t offset,
-               arma::mat& output);
-
-template<typename T>
-typename std::enable_if<
-    HasGradientCheck<T, arma::cube&(T::*)()>::value, size_t>::type
-LayerGradients(T& layer,
-               arma::mat& gradients,
-               size_t offset,
-               arma::cube& output);
-
-template<typename T, typename P>
-typename std::enable_if<
-    !HasGradientCheck<T, P&(T::*)()>::value, size_t>::type
-LayerGradients(T& layer, arma::mat& gradients, size_t offset, P& output);
-
-/**
- * Auxiliary function to get the input size of the specified network.
- *
- * @param network The network used for specifying the input size.
- * @return The input size.
- */
-template<size_t I = 0, typename... Tp>
-typename std::enable_if<I < sizeof...(Tp), size_t>::type
-NetworkInputSize(std::tuple<Tp...>& network);
-
-template<size_t I, typename... Tp>
-typename std::enable_if<I == sizeof...(Tp), size_t>::type
-NetworkInputSize(std::tuple<Tp...>& network);
-
-/**
- * Auxiliary function to get the input size of the specified layer.
- *
- * @param layer The layer used for specifying the input size.
- * @param output The layer output parameter.
- * @return The input size.
- */
-template<typename T, typename P>
-typename std::enable_if<
-    !HasWeightsCheck<T, P&(T::*)()>::value, size_t>::type
-LayerInputSize(T& layer, P& output);
-
-template<typename T, typename P>
-typename std::enable_if<
-    HasWeightsCheck<T, P&(T::*)()>::value, size_t>::type
-LayerInputSize(T& layer, P& output);
-
-/**
- * Auxiliary function to set the weights of the specified network using a given
- * initialize rule.
- *
- * @param initializeRule The rule used to initialize the network weights.
- * @param weights The weights used to set the weights of the network.
- * @param network The network used to set the weights.
- * @param offset The memory offset of the weights.
- */
-template<size_t I = 0, typename InitializationRuleType, typename... Tp>
-typename std::enable_if<I < sizeof...(Tp), void>::type
-NetworkWeights(InitializationRuleType& initializeRule,
-               arma::mat& weights,
-               std::tuple<Tp...>& network,
-               size_t offset = 0);
-
-template<size_t I, typename InitializationRuleType, typename... Tp>
-typename std::enable_if<I == sizeof...(Tp), void>::type
-NetworkWeights(InitializationRuleType& initializeRule,
-               arma::mat& weights,
-               std::tuple<Tp...>& network,
-               size_t offset = 0);
-
-/**
- * Auxiliary function to set the weights of the specified layer using the given
- * initialize rule.
- *
- * @param initializeRule The rule used to initialize the layer weights.
- * @param layer The layer used to set the weights.
- * @param weights The weights used to set the weights of the layer.
- * @param offset The memory offset of the weights.
- * @param output The output parameter of the layer.
- * @return The number of weights.
- */
-template<typename InitializationRuleType, typename T>
-typename std::enable_if<
-    HasWeightsCheck<T, arma::mat&(T::*)()>::value, size_t>::type
-LayerWeights(InitializationRuleType& initializeRule,
-             T& layer,
-             arma::mat& weights,
-             size_t offset,
-             arma::mat& output);
-
-template<typename InitializationRuleType, typename T>
-typename std::enable_if<
-    HasWeightsCheck<T, arma::cube&(T::*)()>::value, size_t>::type
-LayerWeights(InitializationRuleType& initializeRule,
-             T& layer,
-             arma::mat& weights,
-             size_t offset,
-             arma::cube& output);
-
-template<typename InitializationRuleType, typename T, typename P>
-typename std::enable_if<
-    !HasWeightsCheck<T, P&(T::*)()>::value, size_t>::type
-LayerWeights(InitializationRuleType& initializeRule,
-             T& layer,
-             arma::mat& weights,
-             size_t offset,
-             P& output);
-
-} // namespace ann
-} // namespace mlpack
-
-// Include implementation.
-#include "network_util_impl.hpp"
-
-#endif
diff --git a/src/mlpack/methods/ann/network_util_impl.hpp b/src/mlpack/methods/ann/network_util_impl.hpp
deleted file mode 100644
index 32034576290..00000000000
--- a/src/mlpack/methods/ann/network_util_impl.hpp
+++ /dev/null
@@ -1,286 +0,0 @@
-/**
- * @file network_util_impl.hpp
- * @author Marcus Edel
- *
- * Implementation of the network auxiliary functions.
- *
- * mlpack is free software; you may redistribute it and/or modify it under the
- * terms of the 3-clause BSD license.  You should have received a copy of the
- * 3-clause BSD license along with mlpack.  If not, see
- * http://www.opensource.org/licenses/BSD-3-Clause for more information.
- */
-#ifndef MLPACK_METHODS_ANN_NETWORK_UTIL_IMPL_HPP
-#define MLPACK_METHODS_ANN_NETWORK_UTIL_IMPL_HPP
-
-#include "network_util_impl.hpp"
-
-#include <mlpack/methods/ann/layer/layer_traits.hpp>
-
-namespace mlpack {
-namespace ann {
-
-template<size_t I, typename... Tp>
-typename std::enable_if<I == sizeof...(Tp), size_t>::type
-NetworkSize(std::tuple<Tp...>& /* unused */)
-{
-  return 0;
-}
-
-template<size_t I, typename... Tp>
-typename std::enable_if<I < sizeof...(Tp), size_t>::type
-NetworkSize(std::tuple<Tp...>& network)
-{
-  return LayerSize(std::get<I>(network), std::get<I>(
-      network).OutputParameter()) + NetworkSize<I + 1, Tp...>(network);
-}
-
-template<typename T, typename P>
-typename std::enable_if<
-  HasWeightsCheck<T, P&(T::*)()>::value, size_t>::type
-LayerSize(T& layer, P& /* unused */)
-{
-  return layer.Weights().n_elem;
-}
-
-template<typename T, typename P>
-typename std::enable_if<
-  !HasWeightsCheck<T, P&(T::*)()>::value, size_t>::type
-LayerSize(T& /* unused */, P& /* unused */)
-{
-  return 0;
-}
-
-template<size_t I, typename... Tp>
-typename std::enable_if<I < sizeof...(Tp), void>::type
-NetworkWeights(arma::mat& weights,
-               std::tuple<Tp...>& network,
-               size_t offset)
-{
-  NetworkWeights<I + 1, Tp...>(weights, network,
-      offset + LayerWeights(std::get<I>(network), weights,
-      offset, std::get<I>(network).OutputParameter()));
-
-}
-
-template<size_t I, typename... Tp>
-typename std::enable_if<I == sizeof...(Tp), void>::type
-NetworkWeights(arma::mat& /* unused */,
-               std::tuple<Tp...>& /* unused */,
-               size_t /* unused */)
-{
-  /* Nothing to do here */
-}
-
-template<typename T>
-typename std::enable_if<
-    HasWeightsCheck<T, arma::mat&(T::*)()>::value, size_t>::type
-LayerWeights(T& layer,
-             arma::mat& weights,
-             size_t offset,
-             arma::mat& /* unused */)
-{
-  layer.Weights() = arma::mat(weights.memptr() + offset,
-      layer.Weights().n_rows, layer.Weights().n_cols, false, false);
-
-  return layer.Weights().n_elem;
-}
-
-template<typename T>
-typename std::enable_if<
-    HasWeightsCheck<T, arma::cube&(T::*)()>::value, size_t>::type
-LayerWeights(T& layer,
-             arma::mat& weights,
-             size_t offset,
-             arma::cube& /* unused */)
-{
-  layer.Weights() = arma::cube(weights.memptr() + offset,
-      layer.Weights().n_rows, layer.Weights().n_cols,
-      layer.Weights().n_slices, false, false);
-
-  return layer.Weights().n_elem;
-}
-
-template<typename T, typename P>
-typename std::enable_if<
-    !HasWeightsCheck<T, P&(T::*)()>::value, size_t>::type
-LayerWeights(T& /* unused */,
-             arma::mat& /* unused */,
-             size_t /* unused */,
-             P& /* unused */)
-{
-  return 0;
-}
-
-template<size_t I, typename... Tp>
-typename std::enable_if<I < sizeof...(Tp), void>::type
-NetworkGradients(arma::mat& gradients,
-                 std::tuple<Tp...>& network,
-                 size_t offset)
-{
-  NetworkGradients<I + 1, Tp...>(gradients, network,
-      offset + LayerGradients(std::get<I>(network), gradients,
-      offset, std::get<I>(network).OutputParameter()));
-}
-
-template<size_t I, typename... Tp>
-typename std::enable_if<I == sizeof...(Tp), void>::type
-NetworkGradients(arma::mat& /* unused */,
-               std::tuple<Tp...>& /* unused */,
-               size_t /* unused */)
-{
-  /* Nothing to do here */
-}
-
-template<typename T>
-typename std::enable_if<
-    HasGradientCheck<T, arma::mat&(T::*)()>::value, size_t>::type
-LayerGradients(T& layer,
-               arma::mat& gradients,
-               size_t offset,
-               arma::mat& /* unused */)
-{
-  layer.Gradient() = arma::mat(gradients.memptr() + offset,
-      layer.Weights().n_rows, layer.Weights().n_cols, false, false);
-
-  return layer.Weights().n_elem;
-}
-
-template<typename T>
-typename std::enable_if<
-    HasGradientCheck<T, arma::cube&(T::*)()>::value, size_t>::type
-LayerGradients(T& layer,
-               arma::mat& gradients,
-               size_t offset,
-               arma::cube& /* unused */)
-{
-  layer.Gradient() = arma::cube(gradients.memptr() + offset,
-      layer.Weights().n_rows, layer.Weights().n_cols,
-      layer.Weights().n_slices, false, false);
-
-  return layer.Weights().n_elem;
-}
-
-template<typename T, typename P>
-typename std::enable_if<
-    !HasGradientCheck<T, P&(T::*)()>::value, size_t>::type
-LayerGradients(T& /* unused */,
-               arma::mat& /* unused */,
-               size_t /* unused */,
-               P& /* unused */)
-{
-  return 0;
-}
-
-template<size_t I, typename... Tp>
-typename std::enable_if<I == sizeof...(Tp), size_t>::type
-NetworkInputSize(std::tuple<Tp...>& /* unused */)
-{
-  return 0;
-}
-
-template<size_t I, typename... Tp>
-typename std::enable_if<I < sizeof...(Tp), size_t>::type
-NetworkInputSize(std::tuple<Tp...>& network)
-{
-  const size_t inputSize = LayerInputSize(std::get<I>(network), std::get<I>(
-      network).OutputParameter());
-
-  if (inputSize)
-  {
-    return inputSize;
-  }
-
-  return NetworkInputSize<I + 1, Tp...>(network);
-}
-
-template<typename T, typename P>
-typename std::enable_if<
-  HasWeightsCheck<T, P&(T::*)()>::value, size_t>::type
-LayerInputSize(T& layer, P& /* unused */)
-{
-  return layer.Weights().n_cols;
-}
-
-template<typename T, typename P>
-typename std::enable_if<
-  !HasWeightsCheck<T, P&(T::*)()>::value, size_t>::type
-LayerInputSize(T& /* unused */, P& /* unused */)
-{
-  return 0;
-}
-
-template<size_t I, typename InitializationRuleType, typename... Tp>
-typename std::enable_if<I < sizeof...(Tp), void>::type
-NetworkWeights(InitializationRuleType& initializeRule,
-               arma::mat& weights,
-               std::tuple<Tp...>& network,
-               size_t offset)
-{
-  NetworkWeights<I + 1, InitializationRuleType, Tp...>(initializeRule, weights,
-      network, offset + LayerWeights(initializeRule, std::get<I>(network),
-      weights, offset, std::get<I>(network).OutputParameter()));
-}
-
-template<size_t I, typename InitializationRuleType, typename... Tp>
-typename std::enable_if<I == sizeof...(Tp), void>::type
-NetworkWeights(InitializationRuleType& /* initializeRule */,
-               arma::mat& /* weights */,
-               std::tuple<Tp...>& /* network */,
-               size_t /* offset */)
-{
-  /* Nothing to do here */
-}
-
-template<typename InitializationRuleType, typename T>
-typename std::enable_if<
-    HasWeightsCheck<T, arma::mat&(T::*)()>::value, size_t>::type
-LayerWeights(InitializationRuleType& initializeRule,
-             T& layer,
-             arma::mat& weights,
-             size_t offset,
-             arma::mat& /* output */)
-{
-  layer.Weights() = arma::mat(weights.memptr() + offset,
-      layer.Weights().n_rows, layer.Weights().n_cols, false, false);
-
-  initializeRule.Initialize(layer.Weights(), layer.Weights().n_rows,
-      layer.Weights().n_cols);
-
-  return layer.Weights().n_elem;
-}
-
-template<typename InitializationRuleType, typename T>
-typename std::enable_if<
-    HasWeightsCheck<T, arma::cube&(T::*)()>::value, size_t>::type
-LayerWeights(InitializationRuleType& initializeRule,
-             T& layer,
-             arma::mat& weights,
-             size_t offset,
-             arma::cube& /* output */)
-{
-  layer.Weights() = arma::cube(weights.memptr() + offset,
-      layer.Weights().n_rows, layer.Weights().n_cols,
-      layer.Weights().n_slices, false, false);
-
-  initializeRule.Initialize(layer.Weights(), layer.Weights().n_rows,
-      layer.Weights().n_cols);
-
-  return layer.Weights().n_elem;
-}
-
-template<typename InitializationRuleType, typename T, typename P>
-typename std::enable_if<
-    !HasWeightsCheck<T, P&(T::*)()>::value, size_t>::type
-LayerWeights(InitializationRuleType& /* initializeRule */,
-             T& /* layer */,
-             arma::mat& /* weights */,
-             size_t /* offset */,
-             P& /* output */)
-{
-  return 0;
-}
-
-} // namespace ann
-} // namespace mlpack
-
-#endif
diff --git a/src/mlpack/methods/ann/performance_functions/CMakeLists.txt b/src/mlpack/methods/ann/performance_functions/CMakeLists.txt
deleted file mode 100644
index c64f7263cea..00000000000
--- a/src/mlpack/methods/ann/performance_functions/CMakeLists.txt
+++ /dev/null
@@ -1,17 +0,0 @@
-# Define the files we need to compile
-# Anything not in this list will not be compiled into mlpack.
-set(SOURCES
-  mse_function.hpp
-  sse_function.hpp
-  cee_function.hpp
-  sparse_function.hpp
-)
-
-# Add directory name to sources.
-set(DIR_SRCS)
-foreach(file ${SOURCES})
-  set(DIR_SRCS ${DIR_SRCS} ${CMAKE_CURRENT_SOURCE_DIR}/${file})
-endforeach()
-# Append sources (with directory name) to list of all mlpack sources (used at
-# the parent scope).
-set(MLPACK_SRCS ${MLPACK_SRCS} ${DIR_SRCS} PARENT_SCOPE)
diff --git a/src/mlpack/methods/ann/performance_functions/cee_function.hpp b/src/mlpack/methods/ann/performance_functions/cee_function.hpp
deleted file mode 100644
index 34244521dc8..00000000000
--- a/src/mlpack/methods/ann/performance_functions/cee_function.hpp
+++ /dev/null
@@ -1,74 +0,0 @@
-/**
- * @file cee_function.hpp
- * @author Marcus Edel
- *
- * Definition and implementation of the cross-entropy error performance
- * function.
- *
- * mlpack is free software; you may redistribute it and/or modify it under the
- * terms of the 3-clause BSD license.  You should have received a copy of the
- * 3-clause BSD license along with mlpack.  If not, see
- * http://www.opensource.org/licenses/BSD-3-Clause for more information.
- */
-#ifndef MLPACK_METHODS_ANN_PERFORMANCE_FUNCTIONS_CEE_FUNCTION_HPP
-#define MLPACK_METHODS_ANN_PERFORMANCE_FUNCTIONS_CEE_FUNCTION_HPP
-
-#include <mlpack/core.hpp>
-#include <mlpack/methods/ann/layer/linear_layer.hpp>
-#include <mlpack/methods/ann/layer/layer_traits.hpp>
-
-namespace mlpack {
-namespace ann /** Artificial Neural Network. */ {
-
-/**
- * The cross-entropy error performance function measures the network's
- * performance according to the cross entropy errors. The log in the cross-
- * entropy take sinto account the closeness of a prediction and is a more
- * granular way to calculate the error.
- *
- * @tparam Layer The layer that is connected with the output layer.
- */
-template<
-    class Layer = LinearLayer< >
->
-class CrossEntropyErrorFunction
-{
- public:
-  /**
-   * Computes the cross-entropy error function..
-   *
-   * @param network Network type of FFN, CNN or RNN
-   * @param target Target data.
-   * @param error same as place holder
-   * @return sum of squared errors.
-   */
-  template<typename DataType, typename... Tp>
-  static double Error(const std::tuple<Tp...>& network,
-                      const DataType& target, const DataType &error)
-  {
-    return Error(std::get<sizeof...(Tp) - 1>(network).OutputParameter(),
-                 target, error);
-  }
-
-  /**
-   * Computes the cross-entropy error function.
-   *
-   * @param input Input data.
-   * @param target Target data.
-   * @return cross-entropy error.
-   */
-  template<typename DataType>
-  static double Error(const DataType& input, const DataType& target, const DataType&)
-  {
-    if (LayerTraits<Layer>::IsBinary)
-      return -arma::dot(arma::trunc_log(arma::abs(target - input)), target);
-
-    return -arma::dot(arma::trunc_log(input), target);
-  }
-
-}; // class CrossEntropyErrorFunction
-
-} // namespace ann
-} // namespace mlpack
-
-#endif
diff --git a/src/mlpack/methods/ann/performance_functions/mse_function.hpp b/src/mlpack/methods/ann/performance_functions/mse_function.hpp
deleted file mode 100644
index d2f19334d6f..00000000000
--- a/src/mlpack/methods/ann/performance_functions/mse_function.hpp
+++ /dev/null
@@ -1,61 +0,0 @@
-/**
- * @file mse_function.hpp
- * @author Marcus Edel
- *
- * Definition and implementation of the mean squared error performance function.
- *
- * mlpack is free software; you may redistribute it and/or modify it under the
- * terms of the 3-clause BSD license.  You should have received a copy of the
- * 3-clause BSD license along with mlpack.  If not, see
- * http://www.opensource.org/licenses/BSD-3-Clause for more information.
- */
-#ifndef MLPACK_METHODS_ANN_PERFORMANCE_FUNCTIONS_MSE_FUNCTION_HPP
-#define MLPACK_METHODS_ANN_PERFORMANCE_FUNCTIONS_MSE_FUNCTION_HPP
-
-#include <mlpack/core.hpp>
-
-namespace mlpack {
-namespace ann /** Artificial Neural Network. */ {
-
-/**
- * The mean squared error performance function measures the network's
- * performance according to the mean of squared errors.
- */
-class MeanSquaredErrorFunction
-{
-  public:
-  /**
-   * Computes the mean squared error function.
-   *
-   * @param network Network type of FFN, CNN or RNN
-   * @param target Target data.
-   * @param error same as place holder
-   * @return sum of squared errors.
-   */
-  template<typename DataType, typename... Tp>
-  static double Error(const std::tuple<Tp...>& network,
-                      const DataType& target, const DataType &error)
-  {
-    return Error(std::get<sizeof...(Tp) - 1>(network).OutputParameter(),
-                 target, error);
-  }
-
-  /**
-   * Computes the mean squared error function.
-   *
-   * @param input Input data.
-   * @param target Target data.
-   * @return mean of squared errors.
-   */
-  template<typename DataType>
-  static double Error(const DataType& input, const DataType& target, const DataType&)
-  {
-    return arma::mean(arma::mean(arma::square(target - input)));
-  }
-
-}; // class MeanSquaredErrorFunction
-
-} // namespace ann
-} // namespace mlpack
-
-#endif
diff --git a/src/mlpack/methods/ann/performance_functions/sparse_function.hpp b/src/mlpack/methods/ann/performance_functions/sparse_function.hpp
deleted file mode 100644
index 145a0b64aff..00000000000
--- a/src/mlpack/methods/ann/performance_functions/sparse_function.hpp
+++ /dev/null
@@ -1,141 +0,0 @@
-/**
- * @file sparse_function.hpp
- * @author Siddharth Agrawal
- * @author Tham Ngap Wei
- *
- * Definition and implementation of the sparse performance function.
- *
- * mlpack is free software; you may redistribute it and/or modify it under the
- * terms of the 3-clause BSD license.  You should have received a copy of the
- * 3-clause BSD license along with mlpack.  If not, see
- * http://www.opensource.org/licenses/BSD-3-Clause for more information.
- */
-
-#ifndef MLPACK_METHODS_ANN_PERFORMANCE_FUNCTIONS_SPARSE_FUNCTION_HPP
-#define MLPACK_METHODS_ANN_PERFORMANCE_FUNCTIONS_SPARSE_FUNCTION_HPP
-
-#include <mlpack/core.hpp>
-
-namespace mlpack {
-namespace ann /** Artificial Neural Network. */ {
-
-/**
- * The cost function design for the sparse autoencoder.
- */
-template<typename DataType = arma::mat>
-class SparseErrorFunction
-{
- public:
-  /**
-   * Computes the cost of sparse autoencoder.
-   *
-   * @param lambda L2-regularization parameter.
-   * @param beta KL divergence parameter.
-   * @param rho Sparsity parameter.
-   */
-  SparseErrorFunction(const double lambda = 0.0001,
-                      const double beta = 3,
-                      const double rho = 0.01) :
-    lambda(lambda), beta(beta), rho(rho)
-  {
-    // Nothing to do here.
-  }
-
-  SparseErrorFunction(SparseErrorFunction &&layer) noexcept
-  {
-    *this = std::move(layer);
-  }
-
-  SparseErrorFunction& operator=(SparseErrorFunction &&layer) noexcept
-  {
-    lambda = layer.lambda;
-    beta = layer.beta;
-    rho = layer.rho;
-
-    return *this;
-  }
-
-  //! Get the KL divergence parameter.
-  double Beta() const { return beta; }
-  //! Modify the KL divergence parameter.
-  void Beta(double value) { beta = value;}
-
-  //! Get the L2-regularization parameter.
-  double Lambda() const { return lambda; }
-  //! Modify the L2-regularization parameter.
-  void Lambda(double value) { lambda = value;}
-
-  //! Get the sparsity parameter.
-  double Rho() const { return rho; }
-  //! Modify the sparsity parameter.
-  void Rho(double value) { rho = value;}
-
-  /**
-   * Computes the cost of sparse autoencoder.
-   *
-   * @param network Network type of FFN, CNN or RNN
-   * @param target Target data.
-   * @param error different between output and the input
-   * @return sum of squared errors.
-   */
-  template<typename InType, typename Tp>
-  double Error(const Tp& network,
-               const InType& target, const InType &error)
-  {
-    return Error(std::get<0>(network).Weights(), std::get<3>(network).Weights(),
-        std::get<3>(network).RhoCap(), target, error);
-  }
-
-  /**
-   * Computes the cost of sparse autoencoder.
-   *
-   * @param w1 weights of hidden layer
-   * @param w2 weights of output layer
-   * @param rhoCap Average activations of the hidden layer
-   * @param target Target data.
-   * @param error different between output and the input
-   * @return sum of squared errors.
-   */
-  template<typename InType>
-  double Error(const InType& w1, const InType& w2,
-               const InType& rhoCap, const InType& target,
-               const InType& error)
-  {
-    // Calculate squared L2-norms of w1 and w2.
-    const double wL2SquaredNorm =
-        arma::accu(w1 % w1) + arma::accu(w2 % w2);
-
-    // Calculate the reconstruction error, the regularization cost and the KL
-    // divergence cost terms. 'sumOfSquaresError' is the average squared l2-norm
-    // of the reconstructed data difference. 'weightDecay' is the squared l2-norm
-    // of the weights w1 and w2. 'klDivergence' is the cost of the hidden layer
-    // activations not being low. It is given by the following formula:
-    // KL = sum_over_hSize(rho*log(rho/rhoCaq) + (1-rho)*log((1-rho)/(1-rhoCap)))
-    const double sumOfSquaresError =
-        0.5 * arma::accu(error % error) / target.n_cols;
-
-    const double weightDecay = 0.5 * lambda * wL2SquaredNorm;
-    const double klDivergence =
-        beta * arma::accu(rho * arma::trunc_log(rho / rhoCap) + (1 - rho) *
-                          arma::trunc_log((1 - rho) / (1 - rhoCap)));
-
-    // The cost is the sum of the terms calculated above.
-    return sumOfSquaresError + weightDecay + klDivergence;
-  }
-
- private:
-  //! Locally stored L2-regularization parameter.
-  double lambda;
-
-  //! Locally stored KL divergence parameter.
-  double beta;
-
-  //! Locally stored sparsity parameter.
-  double rho;
-
-}; // class SparseErrorFunction
-
-} // namespace ann
-} // namespace mlpack
-
-#endif
diff --git a/src/mlpack/methods/ann/performance_functions/sse_function.hpp b/src/mlpack/methods/ann/performance_functions/sse_function.hpp
deleted file mode 100644
index 34055fb74c0..00000000000
--- a/src/mlpack/methods/ann/performance_functions/sse_function.hpp
+++ /dev/null
@@ -1,64 +0,0 @@
-/**
- * @file sse_function.hpp
- * @author Marcus Edel
- *
- * Definition and implementation of the sum squared error performance function.
- *
- * mlpack is free software; you may redistribute it and/or modify it under the
- * terms of the 3-clause BSD license.  You should have received a copy of the
- * 3-clause BSD license along with mlpack.  If not, see
- * http://www.opensource.org/licenses/BSD-3-Clause for more information.
- */
-#ifndef MLPACK_METHODS_ANN_PERFORMANCE_FUNCTIONS_SSE_FUNCTION_HPP
-#define MLPACK_METHODS_ANN_PERFORMANCE_FUNCTIONS_SSE_FUNCTION_HPP
-
-#include <mlpack/core.hpp>
-
-namespace mlpack {
-namespace ann /** Artificial Neural Network. */ {
-
-/**
- * The sum squared error performance function measures the network's performance
- * according to the sum of squared errors.
- */
-class SumSquaredErrorFunction
-{
-  public:
-  /**
-   * Computes the sum squared error function.
-   *
-   * @param network Network type of FFN, CNN or RNN
-   * @param target Target data.
-   * @param error same as place holder
-   * @return sum of squared errors.
-   */
-  template<typename DataType, typename... Tp>
-  static double Error(const std::tuple<Tp...>& network,
-                      const DataType& target,
-                      const DataType &error)
-  {
-    return Error(std::get<sizeof...(Tp) - 1>(network).OutputParameter(),
-                 target, error);
-  }
-
-  /**
-   * Computes the sum squared error function.
-   *
-   * @param input Input data.
-   * @param target Target data.
-   * @return sum of squared errors.
-   */
-  template<typename DataType>
-  static double Error(const DataType& input,
-                      const DataType& target,
-                      const DataType&)
-  {
-    return arma::sum(arma::square(target - input));
-  }
-
-}; // class SumSquaredErrorFunction
-
-} // namespace ann
-} // namespace mlpack
-
-#endif
diff --git a/src/mlpack/methods/ann/pooling_rules/CMakeLists.txt b/src/mlpack/methods/ann/pooling_rules/CMakeLists.txt
deleted file mode 100644
index 99b6b803bb1..00000000000
--- a/src/mlpack/methods/ann/pooling_rules/CMakeLists.txt
+++ /dev/null
@@ -1,15 +0,0 @@
-# Define the files we need to compile
-# Anything not in this list will not be compiled into mlpack.
-set(SOURCES
-  max_pooling.hpp
-  mean_pooling.hpp
-)
-
-# Add directory name to sources.
-set(DIR_SRCS)
-foreach(file ${SOURCES})
-  set(DIR_SRCS ${DIR_SRCS} ${CMAKE_CURRENT_SOURCE_DIR}/${file})
-endforeach()
-# Append sources (with directory name) to list of all mlpack sources (used at
-# the parent scope).
-set(MLPACK_SRCS ${MLPACK_SRCS} ${DIR_SRCS} PARENT_SCOPE)
diff --git a/src/mlpack/methods/ann/pooling_rules/max_pooling.hpp b/src/mlpack/methods/ann/pooling_rules/max_pooling.hpp
deleted file mode 100644
index f50b0419fe3..00000000000
--- a/src/mlpack/methods/ann/pooling_rules/max_pooling.hpp
+++ /dev/null
@@ -1,56 +0,0 @@
-/**
- * @file max_pooling.hpp
- * @author Shangtong Zhang
- *
- * Definition of the MaxPooling class, which implements max pooling.
- *
- * mlpack is free software; you may redistribute it and/or modify it under the
- * terms of the 3-clause BSD license.  You should have received a copy of the
- * 3-clause BSD license along with mlpack.  If not, see
- * http://www.opensource.org/licenses/BSD-3-Clause for more information.
- */
-#ifndef MLPACK_METHODS_ANN_POOLING_RULES_MAX_POOLING_HPP
-#define MLPACK_METHODS_ANN_POOLING_RULES_MAX_POOLING_HPP
-
-#include <mlpack/core.hpp>
-
-namespace mlpack {
-namespace ann /** Artificial Neural Network. */ {
-
-/*
- * The max pooling rule for convolution neural networks. Take the maximum value
- * within the receptive block.
- */
-class MaxPooling
-{
- public:
-  /*
-   * Return the maximum value within the receptive block.
-   *
-   * @param input Input used to perform the pooling operation.
-   */
-  template<typename MatType>
-  double Pooling(const MatType& input)
-  {
-    return input.max();
-  }
-
-  /*
-   * Set the maximum value within the receptive block.
-   *
-   * @param input Input used to perform the pooling operation.
-   * @param value The unpooled value.
-   * @param output The unpooled output data.
-   */
-  template<typename MatType>
-  void Unpooling(const MatType& input, const double value, MatType& output)
-  {
-    output = MatType(input.n_rows, input.n_cols);
-    output.fill(value / input.n_elem);
-  }
-};
-
-} // namespace ann
-} // namespace mlpack
-
-#endif
diff --git a/src/mlpack/methods/ann/pooling_rules/mean_pooling.hpp b/src/mlpack/methods/ann/pooling_rules/mean_pooling.hpp
deleted file mode 100644
index 7ab88c329f4..00000000000
--- a/src/mlpack/methods/ann/pooling_rules/mean_pooling.hpp
+++ /dev/null
@@ -1,56 +0,0 @@
-/**
- * @file mean_pooling.hpp
- * @author Shangtong Zhang
- *
- * Definition of the MeanPooling class, which implements mean pooling.
- *
- * mlpack is free software; you may redistribute it and/or modify it under the
- * terms of the 3-clause BSD license.  You should have received a copy of the
- * 3-clause BSD license along with mlpack.  If not, see
- * http://www.opensource.org/licenses/BSD-3-Clause for more information.
- */
-#ifndef MLPACK_METHODS_ANN_POOLING_RULES_MEAN_POOLING_HPP
-#define MLPACK_METHODS_ANN_POOLING_RULES_MEAN_POOLING_HPP
-
-#include <mlpack/core.hpp>
-
-namespace mlpack {
-namespace ann /** Artificial Neural Network. */ {
-
-/*
- * The mean pooling rule for convolution neural networks. Average all values
- * within the receptive block.
- */
-class MeanPooling
-{
- public:
-  /*
-   * Return the average value within the receptive block.
-   *
-   * @param input Input used to perform the pooling operation.
-   */
-  template<typename MatType>
-  double Pooling(const MatType& input)
-  {
-    return arma::mean(arma::mean(input));
-  }
-
-  /*
-   * Set the average value within the receptive block.
-   *
-   * @param input Input used to perform the pooling operation.
-   * @param value The unpooled value.
-   * @param output The unpooled output data.
-   */
-  template<typename MatType>
-  void Unpooling(const MatType& input, const double value, MatType& output)
-  {
-    output = MatType(input.n_rows, input.n_cols);
-    output.fill(value / input.n_elem);
-  }
-};
-
-} // namespace ann
-} // namespace mlpack
-
-#endif
diff --git a/src/mlpack/methods/ann/rnn.hpp b/src/mlpack/methods/ann/rnn.hpp
index 6b9483cd831..c071f052483 100644
--- a/src/mlpack/methods/ann/rnn.hpp
+++ b/src/mlpack/methods/ann/rnn.hpp
@@ -14,39 +14,29 @@
 
 #include <mlpack/core.hpp>
 
-#include <boost/ptr_container/ptr_vector.hpp>
-
-#include <mlpack/methods/ann/network_util.hpp>
-#include <mlpack/methods/ann/layer/layer_traits.hpp>
-#include <mlpack/methods/ann/init_rules/nguyen_widrow_init.hpp>
-#include <mlpack/methods/ann/performance_functions/cee_function.hpp>
+#include <mlpack/methods/ann/layer/layer_types.hpp>
+#include <mlpack/methods/ann/layer/layer_visitor.hpp>
+#include <mlpack/methods/ann/init_rules/random_init.hpp>
 #include <mlpack/core/optimizers/sgd/sgd.hpp>
 
 namespace mlpack {
 namespace ann /** Artificial Neural Network. */ {
 
 /**
- * Implementation of a standard recurrent neural network.
+ * Implementation of a standard recurrent neural network container.
  *
- * @tparam LayerTypes Contains all layer modules used to construct the network.
  * @tparam OutputLayerType The output layer type used to evaluate the network.
  * @tparam InitializationRuleType Rule used to initialize the weight matrix.
- * @tparam PerformanceFunction Performance strategy used to calculate the error.
  */
-template <
-  typename LayerTypes,
-  typename OutputLayerType,
-  typename InitializationRuleType = NguyenWidrowInitialization,
-  class PerformanceFunction = CrossEntropyErrorFunction<>
+template<
+  typename OutputLayerType = NegativeLogLikelihood<>,
+  typename InitializationRuleType = RandomInitialization
 >
 class RNN
 {
  public:
   //! Convenience typedef for the internal model construction.
-  using NetworkType = RNN<LayerTypes,
-                          OutputLayerType,
-                          InitializationRuleType,
-                          PerformanceFunction>;
+  using NetworkType = RNN<OutputLayerType, InitializationRuleType>;
 
   /**
    * Create the RNN object with the given predictors and responses set (this is
@@ -54,71 +44,44 @@ class RNN
    * Optionally, specify which initialize rule and performance function should
    * be used.
    *
-   * @param network Network modules used to construct the network.
+   * @param rho Maximum number of steps to backpropagate through time (BPTT).
+   * @param single Predict only the last element of the input sequence.
    * @param outputLayer Output layer used to evaluate the network.
-   * @param predictors Input training variables.
-   * @param responses Outputs resulting from input training variables.
-   * @param optimizer Instantiated optimizer used to train the model.
    * @param initializeRule Optional instantiated InitializationRule object
    *        for initializing the network parameter.
-   * @param performanceFunction Optional instantiated PerformanceFunction
-   *        object used to calculate the error.
    */
-  template<typename LayerType,
-           typename OutputType,
-           template<typename> class OptimizerType>
-  RNN(LayerType &&network,
-      OutputType &&outputLayer,
-      const arma::mat& predictors,
-      const arma::mat& responses,
-      OptimizerType<NetworkType>& optimizer,
-      InitializationRuleType initializeRule = InitializationRuleType(),
-      PerformanceFunction performanceFunction = PerformanceFunction());
+  RNN(const size_t rho,
+      const bool single = false,
+      OutputLayerType outputLayer = OutputLayerType(),
+      InitializationRuleType initializeRule = InitializationRuleType());
 
   /**
    * Create the RNN object with the given predictors and responses set (this is
-   * the set that is used to train the network). Optionally, specify which
-   * initialize rule and performance function should be used.
+   * the set that is used to train the network) and the given optimizer.
+   * Optionally, specify which initialize rule and performance function should
+   * be used.
    *
-   * @param network Network modules used to construct the network.
-   * @param outputLayer Output layer used to evaluate the network.
    * @param predictors Input training variables.
-   * @param responses Outputs resulting from input training variables.
+   * @param responses Outputs results from input training variables.
+   * @param rho Maximum number of steps to backpropagate through time (BPTT).
+   * @param single Predict only the last element of the input sequence.
+   * @param outputLayer Output layer used to evaluate the network.
    * @param initializeRule Optional instantiated InitializationRule object
    *        for initializing the network parameter.
-   * @param performanceFunction Optional instantiated PerformanceFunction
-   *        object used to calculate the error.
    */
-  template<typename LayerType, typename OutputType>
-  RNN(LayerType &&network,
-      OutputType &&outputLayer,
-      const arma::mat& predictors,
+  RNN(const arma::mat& predictors,
       const arma::mat& responses,
-      InitializationRuleType initializeRule = InitializationRuleType(),
-      PerformanceFunction performanceFunction = PerformanceFunction());
+      const size_t rho,
+      const bool single = false,
+      OutputLayerType outputLayer = OutputLayerType(),
+      InitializationRuleType initializeRule = InitializationRuleType());
 
-  /**
-   * Create the RNN object with an empty predictors and responses set and
-   * default optimizer. Make sure to call Train(predictors, responses) when
-   * training.
-   *
-   * @param network Network modules used to construct the network.
-   * @param outputLayer Output layer used to evaluate the network.
-   * @param initializeRule Optional instantiated InitializationRule object
-   *        for initializing the network parameter.
-   * @param performanceFunction Optional instantiated PerformanceFunction
-   *        object used to calculate the error.
-   */
-  template<typename LayerType, typename OutputType>
-  RNN(LayerType &&network,
-      OutputType &&outputLayer,
-      InitializationRuleType initializeRule = InitializationRuleType(),
-      PerformanceFunction performanceFunction = PerformanceFunction());
+  //! Destructor to release allocated memory.
+  ~RNN();
 
   /**
-   * Train the recurrent neural network on the given input data. By default, the
-   * SGD optimization algorithm is used, but others can be specified
-   * (such as mlpack::optimization::RMSprop).
+   * Train the recurrent neural network on the given input data using the given
+   * optimizer.
    *
    * This will use the existing model parameters as a starting point for the
    * optimization. If this is not what you want, then you should access the
@@ -127,31 +90,19 @@ class RNN
    * @tparam OptimizerType Type of optimizer to use to train the model.
    * @param predictors Input training variables.
    * @param responses Outputs results from input training variables.
-   */
-  template<
-      template<typename> class OptimizerType = mlpack::optimization::SGD
-  >
-  void Train(const arma::mat& predictors, const arma::mat& responses);
-
-  /**
-   * Train the recurrent neural network with the given instantiated optimizer.
-   * Using this overload allows configuring the instantiated optimizer before
-   * training is performed.
-   *
-   * This will use the existing model parameters as a starting point for the
-   * optimization. If this is not what you want, then you should access the
-   * parameters vector directly with Parameters() and modify it as desired.
-   *
    * @param optimizer Instantiated optimizer used to train the model.
    */
   template<
       template<typename> class OptimizerType = mlpack::optimization::SGD
   >
-  void Train(OptimizerType<NetworkType>& optimizer);
+  void Train(const arma::mat& predictors,
+             const arma::mat& responses,
+             OptimizerType<NetworkType>& optimizer);
 
   /**
-   * Train the recurrent neural network on the given input data using the given
-   * optimizer.
+   * Train the recurrent neural network on the given input data. By default, the
+   * SGD optimization algorithm is used, but others can be specified
+   * (such as mlpack::optimization::RMSprop).
    *
    * This will use the existing model parameters as a starting point for the
    * optimization. If this is not what you want, then you should access the
@@ -160,19 +111,16 @@ class RNN
    * @tparam OptimizerType Type of optimizer to use to train the model.
    * @param predictors Input training variables.
    * @param responses Outputs results from input training variables.
-   * @param optimizer Instantiated optimizer used to train the model.
    */
   template<
       template<typename> class OptimizerType = mlpack::optimization::SGD
   >
-  void Train(const arma::mat& predictors,
-             const arma::mat& responses,
-             OptimizerType<NetworkType>& optimizer);
+  void Train(const arma::mat& predictors, const arma::mat& responses);
 
   /**
    * Predict the responses to a given set of predictors. The responses will
    * reflect the output of the given output layer as returned by the
-   * OutputClass() function.
+   * output layer function.
    *
    * @param predictors Input predictors.
    * @param responses Matrix to put output predictions of responses into.
@@ -186,9 +134,9 @@ class RNN
    * @param parameters Matrix model parameters.
    * @param i Index of point to use for objective function evaluation.
    * @param deterministic Whether or not to train or test the model. Note some
-   * layer act differently in training or testing mode.
+   *        layer act differently in training or testing mode.
    */
-  double Evaluate(const arma::mat& parameters,
+  double Evaluate(const arma::mat& /* parameters */,
                   const size_t i,
                   const bool deterministic = true);
 
@@ -206,6 +154,29 @@ class RNN
                 const size_t i,
                 arma::mat& gradient);
 
+  /*
+   * Add a new module to the model.
+   *
+   * @param layer The Layer to be added to the model.
+   */
+  template<typename LayerType>
+  void Add(const LayerType& layer) { network.push_back(new LayerType(layer)); }
+
+  /*
+   * Add a new module to the model.
+   *
+   * @param args The layer parameter.
+   */
+  template <class LayerType, class... Args>
+  void Add(Args... args) { network.push_back(new LayerType(args...)); }
+
+  /*
+   * Add a new module to the model.
+   *
+   * @param layer The Layer to be added to the model.
+   */
+  void Add(LayerTypes layer) { network.push_back(layer); }
+
   //! Return the number of separable functions (the number of predictor points).
   size_t NumFunctions() const { return numFunctions; }
 
@@ -219,575 +190,117 @@ class RNN
   void Serialize(Archive& ar, const unsigned int /* version */);
 
  private:
-  /*
-   * Predict the response of the given input matrix.
-   */
-  template <typename DataType>
-  void SinglePredict(const DataType& input, DataType& output)
-  {
-    deterministic = true;
-    seqLen = input.n_rows / inputSize;
-    ResetParameter(network);
-
-    // Iterate through the input sequence and perform the feed forward pass.
-    for (seqNum = 0; seqNum < seqLen; seqNum++)
-    {
-      // Perform the forward pass and save the activations.
-      Forward(input.rows(seqNum * inputSize, (seqNum + 1) * inputSize - 1),
-          network);
-      SaveActivations(network);
-
-      // Retrieve output of the subsequence.
-      if (seqOutput)
-      {
-        DataType seqOutput;
-        OutputPrediction(seqOutput, network);
-        output = arma::join_cols(output, seqOutput);
-      }
-    }
-
-    // Retrieve output of the complete sequence.
-    if (!seqOutput)
-      OutputPrediction(output, network);
-  }
-
+  // Helper functions.
   /**
-   * Reset the network by clearing the layer activations and by setting the
-   * layer status.
-   */
-  template<size_t I = 0, typename... Tp>
-  typename std::enable_if<I == sizeof...(Tp), void>::type
-  ResetParameter(std::tuple<Tp...>& /* unused */)
-  {
-    activations.clear();
-  }
-
-  template<size_t I = 0, typename... Tp>
-  typename std::enable_if<I < sizeof...(Tp), void>::type
-  ResetParameter(std::tuple<Tp...>& network)
-  {
-    ResetDeterministic(std::get<I>(network));
-    ResetSeqLen(std::get<I>(network));
-    ResetRecurrent(std::get<I>(network), std::get<I>(network).InputParameter());
-    std::get<I>(network).Delta().zeros();
-
-    ResetParameter<I + 1, Tp...>(network);
-  }
-
-  /**
-   * Reset the layer status by setting the current deterministic parameter
-   * for all layer that implement the Deterministic function.
-   */
-  template<typename T>
-  typename std::enable_if<
-      HasDeterministicCheck<T, bool&(T::*)(void)>::value, void>::type
-  ResetDeterministic(T& layer)
-  {
-    layer.Deterministic() = deterministic;
-  }
-
-  template<typename T>
-  typename std::enable_if<
-      !HasDeterministicCheck<T, bool&(T::*)(void)>::value, void>::type
-  ResetDeterministic(T& /* unused */) { /* Nothing to do here */ }
-
-  /**
-   * Reset the layer sequence length by setting the current seqLen parameter
-   * for all layer that implement the SeqLen function.
-   */
-  template<typename T>
-  typename std::enable_if<
-      HasSeqLenCheck<T, size_t&(T::*)(void)>::value, void>::type
-  ResetSeqLen(T& layer)
-  {
-    layer.SeqLen() = seqLen;
-  }
-
-  template<typename T>
-  typename std::enable_if<
-      !HasSeqLenCheck<T, size_t&(T::*)(void)>::value, void>::type
-  ResetSeqLen(T& /* unused */) { /* Nothing to do here */ }
-
-  /**
-   * Distinguish between recurrent layer and non-recurrent layer when resetting
-   * the recurrent parameter.
-   */
-  template<typename T, typename P>
-  typename std::enable_if<
-      HasRecurrentParameterCheck<T, P&(T::*)()>::value, void>::type
-  ResetRecurrent(T& layer, P& /* unused */)
-  {
-    layer.RecurrentParameter().zeros();
-  }
-
-  template<typename T, typename P>
-  typename std::enable_if<
-      !HasRecurrentParameterCheck<T, P&(T::*)()>::value, void>::type
-  ResetRecurrent(T& /* unused */, P& /* unused */)
-  {
-    /* Nothing to do here */
-  }
-
-  /**
-   * Initialize the network by setting the input size and output size.
-   */
-  template<size_t I = 0, typename InputDataType, typename TargetDataType,
-      typename... Tp>
-  typename std::enable_if<I == sizeof...(Tp) - 1, void>::type
-  InitLayer(const InputDataType& /* unused */,
-            const TargetDataType& target,
-            std::tuple<Tp...>& /* unused */)
-  {
-    seqOutput = outputSize < target.n_elem ? true : false;
-  }
-
-  template<size_t I = 0, typename InputDataType, typename TargetDataType,
-      typename... Tp>
-  typename std::enable_if<I < sizeof...(Tp) - 1, void>::type
-  InitLayer(const InputDataType& input,
-            const TargetDataType& target,
-            std::tuple<Tp...>& network)
-  {
-    Init(std::get<I>(network), std::get<I>(network).OutputParameter(),
-       std::get<I + 1>(network).Delta());
-
-    InitLayer<I + 1, InputDataType, TargetDataType, Tp...>(input, target,
-        network);
-  }
-
-  /**
-   * Retrieve the weight matrix for all layer that implement the Weights
-   * function to extract the input size and output size.
-   */
-  template<typename T, typename P, typename D>
-  typename std::enable_if<
-      HasGradientCheck<T, P&(T::*)()>::value, void>::type
-  Init(T& layer, P& /* unused */, D& /* unused */)
-  {
-    // Initialize the input size only once.
-    if (!inputSize)
-      inputSize = layer.Weights().n_cols;
-
-    outputSize = layer.Weights().n_rows;
-  }
-
-  template<typename T, typename P, typename D>
-  typename std::enable_if<
-      !HasGradientCheck<T, P&(T::*)()>::value, void>::type
-  Init(T& /* unused */, P& /* unused */, D& /* unused */)
-  {
-    /* Nothing to do here */
-  }
-
-  /**
-   * Save the network layer activations.
+   * The Forward algorithm (part of the Forward-Backward algorithm).  Computes
+   * forward probabilities for each module.
+   *
+   * @param input Data sequence to compute probabilities for.
    */
-  template<
-      size_t I = 0,
-      size_t Max = std::tuple_size<LayerTypes>::value - 1,
-      typename... Tp
-  >
-  typename std::enable_if<I == Max, void>::type
-  SaveActivations(std::tuple<Tp...>& /* unused */)
-  {
-    Save(I, std::get<I>(network), std::get<I>(network).InputParameter());
-    LinkRecurrent(network);
-  }
-
-  template<
-      size_t I = 0,
-      size_t Max = std::tuple_size<LayerTypes>::value - 1,
-      typename... Tp
-  >
-  typename std::enable_if<I < Max, void>::type
-  SaveActivations(std::tuple<Tp...>& network)
-  {
-    Save(I, std::get<I>(network), std::get<I>(network).InputParameter());
-    SaveActivations<I + 1, Max, Tp...>(network);
-  }
+  void Forward(arma::mat&& input);
 
   /**
-   * Distinguish between recurrent layer and non-recurrent layer when storing
-   * the activations.
+   * The Backward algorithm (part of the Forward-Backward algorithm). Computes
+   * backward pass for module.
    */
-  template<typename T, typename P>
-  typename std::enable_if<
-      HasRecurrentParameterCheck<T, P&(T::*)()>::value, void>::type
-  Save(const size_t layerNumber, T& layer, P& /* unused */)
-  {
-    if (activations.size() == layerNumber)
-    {
-      activations.push_back(new arma::mat(layer.RecurrentParameter().n_rows,
-          seqLen));
-    }
-
-    activations[layerNumber].unsafe_col(seqNum) = layer.RecurrentParameter();
-  }
-
-  template<typename T, typename P>
-  typename std::enable_if<
-      !HasRecurrentParameterCheck<T, P&(T::*)()>::value, void>::type
-  Save(const size_t layerNumber, T& layer, P& /* unused */)
-  {
-    if (activations.size() == layerNumber)
-    {
-      activations.push_back(new arma::mat(layer.OutputParameter().n_rows,
-          seqLen));
-    }
-
-    activations[layerNumber].unsafe_col(seqNum) = layer.OutputParameter();
-  }
+  void Backward();
 
   /**
-   * Load the network layer activations.
+   * Iterate through all layer modules and update the the gradient using the
+   * layer defined optimizer.
    */
-  template<
-      size_t I = 0,
-      size_t Max = std::tuple_size<LayerTypes>::value - 1,
-      typename DataType, typename... Tp
-  >
-  typename std::enable_if<I == Max, void>::type
-  LoadActivations(DataType& input, std::tuple<Tp...>& network)
-  {
-    Load(I, std::get<I>(network), std::get<I>(network).InputParameter());
-    std::get<0>(network).InputParameter() = input;
-  }
+  void Gradient();
 
-  template<
-      size_t I = 0,
-      size_t Max = std::tuple_size<LayerTypes>::value - 1,
-      typename DataType, typename... Tp
-  >
-  typename std::enable_if<I < Max, void>::type
-  LoadActivations(DataType& input, std::tuple<Tp...>& network)
-  {
-    Load(I, std::get<I>(network), std::get<I>(network).InputParameter());
-    LoadActivations<I + 1, Max, DataType, Tp...>(input, network);
-  }
-
-  /**
-   * Distinguish between recurrent layer and non-recurrent layer when storing
-   * the activations.
+  /*
+   * Predict the response of the given input sequence.
+   *
+   * @param predictors Input predictors.
+   * @param responses Vector to put output prediction of a response into.
    */
-  template<typename T, typename P>
-  typename std::enable_if<
-      HasRecurrentParameterCheck<T, P&(T::*)()>::value, void>::type
-  Load(const size_t layerNumber, T& layer, P& /* unused */)
-  {
-    layer.RecurrentParameter() = activations[layerNumber].unsafe_col(seqNum);
-  }
-
-  template<typename T, typename P>
-  typename std::enable_if<
-      !HasRecurrentParameterCheck<T, P&(T::*)()>::value, void>::type
-  Load(const size_t layerNumber, T& layer, P& /* unused */)
-  {
-    layer.OutputParameter() = activations[layerNumber].unsafe_col(seqNum);
-  }
+  void SinglePredict(const arma::mat& predictors, arma::mat& responses);
 
   /**
-   * Run a single iteration of the feed forward algorithm, using the given
-   * input and target vector, store the calculated error into the error
-   * vector.
+   * Reset the module infomration (weights/parameters).
    */
-  template<size_t I = 0, typename DataType, typename... Tp>
-  void Forward(const DataType& input, std::tuple<Tp...>& network)
-  {
-    std::get<I>(network).InputParameter() = input;
-    std::get<I>(network).Forward(std::get<I>(network).InputParameter(),
-        std::get<I>(network).OutputParameter());
-
-    ForwardTail<I + 1, Tp...>(network);
-  }
-
-  template<size_t I = 1, typename... Tp>
-  typename std::enable_if<I == sizeof...(Tp), void>::type
-  ForwardTail(std::tuple<Tp...>& /* unused */) { /* Nothing to do here */ }
-
-  template<size_t I = 1, typename... Tp>
-  typename std::enable_if<I < sizeof...(Tp), void>::type
-  ForwardTail(std::tuple<Tp...>& network)
-  {
-    std::get<I>(network).Forward(std::get<I - 1>(network).OutputParameter(),
-        std::get<I>(network).OutputParameter());
-
-    ForwardTail<I + 1, Tp...>(network);
-  }
+  void ResetParameters();
 
   /**
-   * Link the calculated activation with the correct layer.
+   * Reset the module status by setting the current deterministic parameter
+   * for all modules that implement the Deterministic function.
    */
-  template<
-      size_t I = 1,
-      size_t Max = std::tuple_size<LayerTypes>::value - 1,
-      typename... Tp
-  >
-  typename std::enable_if<I == Max, void>::type
-  LinkParameter(std::tuple<Tp ...>& /* unused */)
-  {
-    if (!LayerTraits<typename std::remove_reference<
-        decltype(std::get<I>(network))>::type>::IsBiasLayer)
-    {
-      std::get<I>(network).InputParameter() = std::get<I - 1>(
-          network).OutputParameter();
-    }
-  }
-
-  template<
-      size_t I = 1,
-      size_t Max = std::tuple_size<LayerTypes>::value - 1,
-      typename... Tp
-  >
-  typename std::enable_if<I < Max, void>::type
-  LinkParameter(std::tuple<Tp...>& network)
-  {
-    if (!LayerTraits<typename std::remove_reference<
-        decltype(std::get<I>(network))>::type>::IsBiasLayer)
-    {
-      std::get<I>(network).InputParameter() = std::get<I - 1>(
-          network).OutputParameter();
-    }
-
-    LinkParameter<I + 1, Max, Tp...>(network);
-  }
+  void ResetDeterministic();
 
   /**
-   * Link the calculated activation with the correct recurrent layer.
+   * Reset the gradient for all modules that implement the Gradient function.
    */
-  template<
-      size_t I = 0,
-      size_t Max = std::tuple_size<LayerTypes>::value - 1,
-      typename... Tp
-  >
-  typename std::enable_if<I == Max, void>::type
-  LinkRecurrent(std::tuple<Tp ...>& /* unused */) { /* Nothing to do here */ }
+  void ResetGradients(arma::mat& gradient);
 
-  template<
-      size_t I = 0,
-      size_t Max = std::tuple_size<LayerTypes>::value - 1,
-      typename... Tp
-  >
-  typename std::enable_if<I < Max, void>::type
-  LinkRecurrent(std::tuple<Tp...>& network)
-  {
-    UpdateRecurrent(std::get<I>(network), std::get<I>(network).InputParameter(),
-        std::get<I + 1>(network).OutputParameter());
-    LinkRecurrent<I + 1, Max, Tp...>(network);
-  }
+  //! Number of steps to backpropagate through time (BPTT).
+  size_t rho;
 
-  /**
-   * Distinguish between recurrent layer and non-recurrent layer when updating
-   * the recurrent activations.
-   */
-  template<typename T, typename P, typename D>
-  typename std::enable_if<
-      HasRecurrentParameterCheck<T, P&(T::*)()>::value, void>::type
-  UpdateRecurrent(T& layer, P& /* unused */, D& output)
-  {
-    layer.RecurrentParameter() = output;
-  }
-
-  template<typename T, typename P, typename D>
-  typename std::enable_if<
-      !HasRecurrentParameterCheck<T, P&(T::*)()>::value, void>::type
-  UpdateRecurrent(T& /* unused */, P& /* unused */, D& /* unused */)
-  {
-    /* Nothing to do here */
-  }
+  //! Instantiated outputlayer used to evaluate the network.
+  OutputLayerType outputLayer;
 
-  /*
-   * Calculate the output error and update the overall error.
-   */
-  template<typename DataType, typename ErrorType, typename... Tp>
-  double OutputError(const DataType& target,
-                     ErrorType& error,
-                     const std::tuple<Tp...>& network)
-  {
-    // Calculate and store the output error.
-    outputLayer.CalculateError(
-        std::get<sizeof...(Tp) - 1>(network).OutputParameter(), target, error);
-
-    // Masures the network's performance with the specified performance
-    // function.
-    return performanceFunc.Error(network, target, error);
-  }
+  //! Instantiated InitializationRule object for initializing the network
+  //! parameter.
+  InitializationRuleType initializeRule;
 
-  /**
-   * Run a single iteration of the feed backward algorithm, using the given
-   * error of the output layer. Note that we iterate backward through the
-   * layer modules.
-   */
-  template<size_t I = 1, typename DataType, typename... Tp>
-  void Backward(DataType& error, std::tuple<Tp ...>& network)
-  {
-    std::get<sizeof...(Tp) - I>(network).Backward(
-        std::get<sizeof...(Tp) - I>(network).OutputParameter(), error,
-        std::get<sizeof...(Tp) - I>(network).Delta());
-
-    BackwardTail<I + 1, DataType, Tp...>(error, network);
-  }
-
-  template<size_t I = 1, typename DataType, typename... Tp>
-  typename std::enable_if<I == (sizeof...(Tp)), void>::type
-  BackwardTail(const DataType& /* unused */, std::tuple<Tp...>& /* unused */)
-  {
-    /* Nothing to do here */
-  }
-
-  template<size_t I = 1, typename DataType, typename... Tp>
-  typename std::enable_if<I < (sizeof...(Tp)), void>::type
-  BackwardTail(const DataType& error, std::tuple<Tp...>& network)
-  {
-    BackwardRecurrent(std::get<sizeof...(Tp) - I - 1>(network),
-        std::get<sizeof...(Tp) - I - 1>(network).InputParameter(),
-        std::get<sizeof...(Tp) - I + 1>(network).Delta());
-
-    std::get<sizeof...(Tp) - I>(network).Backward(
-        std::get<sizeof...(Tp) - I>(network).OutputParameter(),
-        std::get<sizeof...(Tp) - I + 1>(network).Delta(),
-        std::get<sizeof...(Tp) - I>(network).Delta());
-
-    BackwardTail<I + 1, DataType, Tp...>(error, network);
-  }
+  //! The input size.
+  size_t inputSize;
 
-  /*
-   * Update the delta of the recurrent layer.
-   */
-  template<typename T, typename P, typename D>
-  typename std::enable_if<
-      HasRecurrentParameterCheck<T, P&(T::*)()>::value, void>::type
-  BackwardRecurrent(T& layer, P& /* unused */, D& delta)
-  {
-    if (!layer.Delta().is_empty())
-      delta += layer.Delta();
-  }
-
-  template<typename T, typename P, typename D>
-  typename std::enable_if<
-      !HasRecurrentParameterCheck<T, P&(T::*)()>::value, void>::type
-  BackwardRecurrent(T& /* unused */, P& /* unused */, D& /* unused */)
-  {
-    /* Nothing to do here */
-  }
+  //! The output size.
+  size_t outputSize;
 
-  /**
-   * Iterate through all layer modules and update the the gradient using the
-   * layer defined optimizer.
-   */
-  template<size_t I = 0, size_t Max = std::tuple_size<LayerTypes>::value - 2,
-      typename... Tp>
-  typename std::enable_if<I == Max, void>::type
-  UpdateGradients(std::tuple<Tp...>& network)
-  {
-    Update(std::get<I>(network), std::get<I>(network).OutputParameter(),
-        std::get<I + 1>(network).Delta(), std::get<I + 1>(network),
-        std::get<I + 1>(network).InputParameter(),
-        std::get<I + 1>(network).Delta());
-  }
-
-  template<size_t I = 0, size_t Max = std::tuple_size<LayerTypes>::value - 2,
-      typename... Tp>
-  typename std::enable_if<I < Max, void>::type
-  UpdateGradients(std::tuple<Tp...>& network)
-  {
-    Update(std::get<I>(network), std::get<I>(network).OutputParameter(),
-        std::get<I + 1>(network).Delta(), std::get<I + 1>(network),
-        std::get<I + 1>(network).InputParameter(),
-        std::get<I + 2>(network).Delta());
-
-    UpdateGradients<I + 1, Max, Tp...>(network);
-  }
-
-  template<typename T1, typename P1, typename D1, typename T2, typename P2,
-      typename D2>
-  typename std::enable_if<
-      HasGradientCheck<T1, P1&(T1::*)()>::value &&
-      HasRecurrentParameterCheck<T2, P2&(T2::*)()>::value, void>::type
-  Update(T1& layer, P1& /* unused */, D1& /* unused */, T2& /* unused */,
-         P2& /* unused */, D2& delta2)
-  {
-    layer.Gradient(layer.InputParameter(), delta2, layer.Gradient());
-  }
-
-  template<typename T1, typename P1, typename D1, typename T2, typename P2,
-      typename D2>
-  typename std::enable_if<
-      (!HasGradientCheck<T1, P1&(T1::*)()>::value &&
-      !HasRecurrentParameterCheck<T2, P2&(T2::*)()>::value) ||
-      (!HasGradientCheck<T1, P1&(T1::*)()>::value &&
-      HasRecurrentParameterCheck<T2, P2&(T2::*)()>::value), void>::type
-  Update(T1& /* unused */, P1& /* unused */, D1& /* unused */, T2& /* unused */,
-         P2& /* unused */, D2& /* unused */)
-  {
-    /* Nothing to do here */
-  }
-
-  template<typename T1, typename P1, typename D1, typename T2, typename P2,
-      typename D2>
-  typename std::enable_if<
-      HasGradientCheck<T1, P1&(T1::*)()>::value &&
-      !HasRecurrentParameterCheck<T2, P2&(T2::*)()>::value, void>::type
-  Update(T1& layer, P1& /* unused */, D1& delta1, T2& /* unused */,
-         P2& /* unused */, D2& /* unused */)
-  {
-    layer.Gradient(layer.InputParameter(), delta1, layer.Gradient());
-  }
+  //! The target size.
+  size_t targetSize;
 
-  /*
-   * Calculate and store the output activation.
-   */
-  template<typename DataType, typename... Tp>
-  void OutputPrediction(DataType& output, std::tuple<Tp...>& network)
-  {
-    // Calculate and store the output prediction.
-    outputLayer.OutputClass(std::get<sizeof...(Tp) - 1>(
-        network).OutputParameter(), output);
-  }
+  //! Indicator if we already trained the model.
+  bool reset;
 
-  //! Instantiated recurrent neural network.
-  LayerTypes network;
+    //! Only predict the last element of the input sequence.
+  bool single;
 
-  //! The outputlayer used to evaluate the network
-  OutputLayerType& outputLayer;
+  //! Locally-stored model modules.
+  std::vector<LayerTypes> network;
 
-  //! Performance strategy used to claculate the error.
-  PerformanceFunction performanceFunc;
+  //! The matrix of data points (predictors).
+  arma::mat predictors;
 
-  //! The current evaluation mode (training or testing).
-  bool deterministic;
+  //! The matrix of responses to the input data points.
+  arma::mat responses;
 
   //! Matrix of (trained) parameters.
   arma::mat parameter;
 
-  //! The matrix of data points (predictors).
-  arma::mat predictors;
+  //! The number of separable functions (the number of predictor points).
+  size_t numFunctions;
 
-  //! The matrix of responses to the input data points.
-  arma::mat responses;
+  //! The current error for the backward pass.
+  arma::mat error;
 
-  //! Locally stored network input size.
-  size_t inputSize;
+  //! THe current input of the forward/backward pass.
+  arma::mat currentInput;
 
-  //! Locally stored network output size.
-  size_t outputSize;
+  //! Locally-stored delta visitor.
+  DeltaVisitor deltaVisitor;
 
-  //! The index of the current sequence number.
-  size_t seqNum;
+  //! Locally-stored output parameter visitor.
+  OutputParameterVisitor outputParameterVisitor;
 
-  //! Locally stored number of samples in one input sequence.
-  size_t seqLen;
+  //! List of all module parameters for the backward pass (BBTT).
+  std::vector<arma::mat> moduleOutputParameter;
 
-  //! Locally stored parameter that indicates if the input is a sequence.
-  bool seqOutput;
+  //! Locally-stored weight size visitor.
+  WeightSizeVisitor weightSizeVisitor;
 
-  //! The activation storage we are using to perform the feed backward pass.
-  boost::ptr_vector<arma::mat> activations;
+  //! Locally-stored reset visitor.
+  ResetVisitor resetVisitor;
 
-  //! The number of separable functions (the number of predictor points).
-  size_t numFunctions;
+  //! Locally-stored delete visitor.
+  DeleteVisitor deleteVisitor;
 
-  //! Locally stored backward error.
-  arma::mat error;
+  //! The current evaluation mode (training or testing).
+  bool deterministic;
 }; // class RNN
 
 } // namespace ann
diff --git a/src/mlpack/methods/ann/rnn_impl.hpp b/src/mlpack/methods/ann/rnn_impl.hpp
index d8d2f07f0ac..cdac030a2ec 100644
--- a/src/mlpack/methods/ann/rnn_impl.hpp
+++ b/src/mlpack/methods/ann/rnn_impl.hpp
@@ -19,129 +19,85 @@ namespace mlpack {
 namespace ann /** Artificial Neural Network. */ {
 
 
-template<typename LayerTypes,
-         typename OutputLayerType,
-         typename InitializationRuleType,
-         typename PerformanceFunction
->
-template<typename LayerType,
-         typename OutputType,
-         template<typename> class OptimizerType
->
-RNN<LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction
->::RNN(LayerType &&network,
-       OutputType &&outputLayer,
-       const arma::mat& predictors,
-       const arma::mat& responses,
-       OptimizerType<NetworkType>& optimizer,
-       InitializationRuleType initializeRule,
-       PerformanceFunction performanceFunction) :
-    network(std::forward<LayerType>(network)),
-    outputLayer(std::forward<OutputType>(outputLayer)),
-    performanceFunc(std::move(performanceFunction)),
-    predictors(predictors),
-    responses(responses),
-    numFunctions(predictors.n_cols),
+template<typename OutputLayerType, typename InitializationRuleType>
+RNN<OutputLayerType, InitializationRuleType>::RNN(
+    const size_t rho,
+    const bool single,
+    OutputLayerType outputLayer,
+    InitializationRuleType initializeRule) :
+    rho(rho),
+    outputLayer(outputLayer),
+    initializeRule(initializeRule),
     inputSize(0),
-    outputSize(0)
+    outputSize(0),
+    targetSize(0),
+    reset(false),
+    single(single)
 {
-  static_assert(std::is_same<typename std::decay<LayerType>::type,
-                  LayerTypes>::value,
-                  "The type of network must be LayerTypes.");
-
-  static_assert(std::is_same<typename std::decay<OutputType>::type,
-                OutputLayerType>::value,
-                "The type of outputLayer must be OutputLayerType.");
-
-  initializeRule.Initialize(parameter, NetworkSize(this->network), 1);
-  NetworkWeights(parameter, this->network);
-
-  // Train the model.
-  Timer::Start("rnn_optimization");
-  const double out = optimizer.Optimize(parameter);
-  Timer::Stop("rnn_optimization");
-
-  Log::Info << "RNN::RNN(): final objective of trained model is " << out
-      << "." << std::endl;
+  /* Nothing to do here */
 }
 
-template<typename LayerTypes,
-         typename OutputLayerType,
-         typename InitializationRuleType,
-         typename PerformanceFunction
->
-template<typename LayerType, typename OutputType>
-RNN<LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction
->::RNN(LayerType &&network,
-       OutputType &&outputLayer,
-       const arma::mat& predictors,
-       const arma::mat& responses,
-       InitializationRuleType initializeRule,
-       PerformanceFunction performanceFunction) :
-    network(std::forward<LayerType>(network)),
-    outputLayer(std::forward<OutputType>(outputLayer)),
-    performanceFunc(std::move(performanceFunction)),
+template<typename OutputLayerType, typename InitializationRuleType>
+RNN<OutputLayerType, InitializationRuleType>::RNN(
+    const arma::mat& predictors,
+    const arma::mat& responses,
+    const size_t rho,
+    const bool single,
+    OutputLayerType outputLayer,
+    InitializationRuleType initializeRule) :
+    rho(rho),
+    outputLayer(outputLayer),
+    initializeRule(initializeRule),
     inputSize(0),
-    outputSize(0)
+    outputSize(0),
+    targetSize(0),
+    reset(false),
+    single(single)
 {
-  static_assert(std::is_same<typename std::decay<LayerType>::type,
-                  LayerTypes>::value,
-                  "The type of network must be LayerTypes.");
+  numFunctions = responses.n_cols;
 
-  static_assert(std::is_same<typename std::decay<OutputType>::type,
-                OutputLayerType>::value,
-                "The type of outputLayer must be OutputLayerType.");
+  this->predictors = std::move(predictors);
+  this->responses = std::move(responses);
 
-  initializeRule.Initialize(parameter, NetworkSize(this->network), 1);
-  NetworkWeights(parameter, this->network);
+  this->deterministic = true;
+  ResetDeterministic();
 
-  Train(predictors, responses);
+  if (!reset)
+  {
+    ResetParameters();
+    reset = true;
+  }
 }
 
-template<typename LayerTypes,
-         typename OutputLayerType,
-         typename InitializationRuleType,
-         typename PerformanceFunction
->
-template<typename LayerType, typename OutputType>
-RNN<LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction
->::RNN(LayerType &&network,
-       OutputType &&outputLayer,
-       InitializationRuleType initializeRule,
-       PerformanceFunction performanceFunction) :
-    network(std::forward<LayerType>(network)),
-    outputLayer(std::forward<OutputType>(outputLayer)),
-    performanceFunc(std::move(performanceFunction)),
-    inputSize(0),
-    outputSize(0)
+template<typename OutputLayerType, typename InitializationRuleType>
+RNN<OutputLayerType, InitializationRuleType>::~RNN()
 {
-  static_assert(std::is_same<typename std::decay<LayerType>::type,
-                  LayerTypes>::value,
-                  "The type of network must be LayerTypes.");
-
-  static_assert(std::is_same<typename std::decay<OutputType>::type,
-                OutputLayerType>::value,
-                "The type of outputLayer must be OutputLayerType.");
-
-  initializeRule.Initialize(parameter, NetworkSize(this->network), 1);
-  NetworkWeights(parameter, this->network);
+  for (LayerTypes& layer : network)
+  {
+    boost::apply_visitor(deleteVisitor, layer);
+  }
 }
 
-template<typename LayerTypes,
-         typename OutputLayerType,
-         typename InitializationRuleType,
-         typename PerformanceFunction
->
+template<typename OutputLayerType, typename InitializationRuleType>
 template<template<typename> class OptimizerType>
-void RNN<
-LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction
->::Train(const arma::mat& predictors, const arma::mat& responses)
+void RNN<OutputLayerType, InitializationRuleType>::Train(
+    const arma::mat& predictors,
+    const arma::mat& responses,
+    OptimizerType<NetworkType>& optimizer)
 {
-  numFunctions = predictors.n_cols;
-  this->predictors = predictors;
-  this->responses = responses;
+  numFunctions = responses.n_cols;
 
-  OptimizerType<decltype(*this)> optimizer(*this);
+  this->predictors = std::move(predictors);
+  this->responses = std::move(responses);
+
+  this->deterministic = true;
+  ResetDeterministic();
+
+  if (!reset)
+  {
+    ResetParameters();
+    reset = true;
+  }
 
   // Train the model.
   Timer::Start("rnn_optimization");
@@ -152,43 +108,27 @@ LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction
       << "." << std::endl;
 }
 
-template<typename LayerTypes,
-         typename OutputLayerType,
-         typename InitializationRuleType,
-         typename PerformanceFunction
->
+template<typename OutputLayerType, typename InitializationRuleType>
 template<template<typename> class OptimizerType>
-void RNN<
-LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction
->::Train(const arma::mat& predictors,
-         const arma::mat& responses,
-         OptimizerType<NetworkType>& optimizer)
+void RNN<OutputLayerType, InitializationRuleType>::Train(
+    const arma::mat& predictors, const arma::mat& responses)
 {
-  numFunctions = predictors.n_cols;
-  this->predictors = predictors;
-  this->responses = responses;
+  numFunctions = responses.n_cols;
 
-  // Train the model.
-  Timer::Start("rnn_optimization");
-  const double out = optimizer.Optimize(parameter);
-  Timer::Stop("rnn_optimization");
+  this->predictors = std::move(predictors);
+  this->responses = std::move(responses);
 
-  Log::Info << "RNN::RNN(): final objective of trained model is " << out
-      << "." << std::endl;
-}
+  this->deterministic = true;
+  ResetDeterministic();
+
+  if (!reset)
+  {
+    ResetParameters();
+    reset = true;
+  }
+
+  OptimizerType<decltype(*this)> optimizer(*this);
 
-template<typename LayerTypes,
-         typename OutputLayerType,
-         typename InitializationRuleType,
-         typename PerformanceFunction
->
-template<
-    template<typename> class OptimizerType
->
-void RNN<
-LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction
->::Train(OptimizerType<NetworkType>& optimizer)
-{
   // Train the model.
   Timer::Start("rnn_optimization");
   const double out = optimizer.Optimize(parameter);
@@ -198,96 +138,120 @@ LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction
       << "." << std::endl;
 }
 
-template<typename LayerTypes,
-         typename OutputLayerType,
-         typename InitializationRuleType,
-         typename PerformanceFunction
->
-void RNN<
-LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction
->::Predict(arma::mat& predictors, arma::mat& responses)
+template<typename OutputLayerType, typename InitializationRuleType>
+void RNN<OutputLayerType, InitializationRuleType>::Predict(
+    arma::mat& predictors, arma::mat& responses)
 {
-  arma::mat responsesTemp;
-  SinglePredict(arma::mat(predictors.colptr(0), predictors.n_rows,
-      1, false, true), responsesTemp);
+  if (parameter.is_empty())
+  {
+    ResetParameters();
+  }
 
-  responses = arma::mat(responsesTemp.n_elem, predictors.n_cols);
-  responses.col(0) = responsesTemp.col(0);
+  if (!deterministic)
+  {
+    deterministic = true;
+    ResetDeterministic();
+  }
 
-  for (size_t i = 1; i < predictors.n_cols; i++)
+  responses = arma::zeros<arma::mat>(outputSize * rho, predictors.n_cols);
+  arma::mat responsesTemp = responses.col(0);
+
+  for (size_t i = 0; i < predictors.n_cols; i++)
   {
-    SinglePredict(arma::mat(predictors.colptr(i), predictors.n_rows,
-      1, false, true), responsesTemp);
-    responses.col(i) = responsesTemp.col(0);
+    SinglePredict(
+        arma::mat(predictors.colptr(i), predictors.n_rows, 1, false, true),
+        responsesTemp);
+
+    responses.col(i) = responsesTemp;
   }
 }
 
-template<typename LayerTypes,
-         typename OutputLayerType,
-         typename InitializationRuleType,
-         typename PerformanceFunction
->
-double RNN<
-LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction
->::Evaluate(const arma::mat& /* unused */,
-            const size_t i,
-            const bool deterministic)
+template<typename OutputLayerType, typename InitializationRuleType>
+void RNN<OutputLayerType, InitializationRuleType>::SinglePredict(
+    const arma::mat& predictors, arma::mat& responses)
 {
-  this->deterministic = deterministic;
+  for (size_t seqNum = 0; seqNum < rho; ++seqNum)
+  {
+    currentInput = predictors.rows(seqNum * inputSize,
+        (seqNum + 1) * inputSize - 1);
+    Forward(std::move(currentInput));
+
+    responses.rows(seqNum * outputSize, (seqNum + 1) * outputSize - 1) =
+        boost::apply_visitor(outputParameterVisitor, network.back());
+  }
+}
+
+template<typename OutputLayerType, typename InitializationRuleType>
+double RNN<OutputLayerType, InitializationRuleType>::Evaluate(
+    const arma::mat& /* parameters */, const size_t i, const bool deterministic)
+{
+  if (parameter.is_empty())
+  {
+    ResetParameters();
+    reset = true;
+  }
+
+  if (deterministic != this->deterministic)
+  {
+    this->deterministic = deterministic;
+    ResetDeterministic();
+  }
 
   arma::mat input = arma::mat(predictors.colptr(i), predictors.n_rows,
       1, false, true);
   arma::mat target = arma::mat(responses.colptr(i), responses.n_rows,
       1, false, true);
 
-  // Initialize the activation storage only once.
-  if (activations.empty())
-    InitLayer(input, target, network);
-
-  double networkError = 0;
-  seqLen = input.n_rows / inputSize;
-  ResetParameter(network);
+  if (!inputSize)
+  {
+    inputSize = input.n_elem / rho;
+    targetSize = target.n_elem / rho;
+  }
 
-  error = arma::mat(outputSize, outputSize < target.n_elem ? seqLen : 1);
+  double performance = 0;
 
-  // Iterate through the input sequence and perform the feed forward pass.
-  for (seqNum = 0; seqNum < seqLen; seqNum++)
+  for (size_t seqNum = 0; seqNum < rho; ++seqNum)
   {
-    // Perform the forward pass and save the activations.
-    Forward(input.rows(seqNum * inputSize, (seqNum + 1) * inputSize - 1),
-        network);
-    SaveActivations(network);
+    currentInput = input.rows(seqNum * inputSize, (seqNum + 1) * inputSize - 1);
+    arma::mat currentTarget = target.rows(seqNum * targetSize,
+        (seqNum + 1) * targetSize - 1);
 
-    // Retrieve output error of the subsequence.
-    if (seqOutput)
+    Forward(std::move(currentInput));
+
+    if (!deterministic)
     {
-      arma::mat seqError = error.unsafe_col(seqNum);
-      arma::mat seqTarget = target.submat(seqNum * outputSize, 0,
-          (seqNum + 1) * outputSize - 1, 0);
-      networkError += OutputError(seqTarget, seqError, network);
+      for (size_t l = 0; l < network.size(); ++l)
+      {
+        boost::apply_visitor(SaveOutputParameterVisitor(
+            std::move(moduleOutputParameter)), network[l]);
+      }
     }
+
+    performance += outputLayer.Forward(std::move(boost::apply_visitor(
+        outputParameterVisitor, network.back())), std::move(currentTarget));
   }
 
-  // Retrieve output error of the complete sequence.
-  if (!seqOutput)
-    return OutputError(target, error, network);
+  if (!outputSize)
+  {
+    outputSize = boost::apply_visitor(outputParameterVisitor,
+        network.back()).n_elem;
+  }
 
-  return networkError;
+  return performance;
 }
 
-template<typename LayerTypes,
-         typename OutputLayerType,
-         typename InitializationRuleType,
-         typename PerformanceFunction
->
-void RNN<
-LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction
->::Gradient(const arma::mat& /* unused */,
-            const size_t i,
-            arma::mat& gradient)
+template<typename OutputLayerType, typename InitializationRuleType>
+void RNN<OutputLayerType, InitializationRuleType>::Gradient(
+    const arma::mat& parameters, const size_t i, arma::mat& gradient)
 {
   if (gradient.is_empty())
   {
+    if (parameter.is_empty())
+    {
+      ResetParameters();
+      reset = true;
+    }
+
     gradient = arma::zeros<arma::mat>(parameter.n_rows, parameter.n_cols);
   }
   else
@@ -295,59 +259,167 @@ LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction
     gradient.zeros();
   }
 
-  Evaluate(parameter, i, false);
+  Evaluate(parameters, i, false);
 
-  arma::mat currentGradient = arma::mat(gradient.n_rows, gradient.n_cols);
-  NetworkGradients(currentGradient, network);
+  arma::mat currentGradient = arma::zeros<arma::mat>(parameter.n_rows,
+      parameter.n_cols);
+  ResetGradients(currentGradient);
 
-  const arma::mat input = arma::mat(predictors.colptr(i), predictors.n_rows,
+  arma::mat input = arma::mat(predictors.colptr(i), predictors.n_rows,
+      1, false, true);
+  arma::mat target = arma::mat(responses.colptr(i), responses.n_rows,
       1, false, true);
 
-  // Iterate through the input sequence and perform the feed backward pass.
-  for (seqNum = seqLen - 1; seqNum >= 0; seqNum--)
+  for (size_t seqNum = 0; seqNum < rho; ++seqNum)
   {
-    // Load the network activation for the upcoming backward pass.
-    LoadActivations(input.rows(seqNum * inputSize, (seqNum + 1) *
-        inputSize - 1), network);
+    currentGradient.zeros();
+
+    arma::mat currentTarget = target.rows((rho - seqNum - 1) * targetSize,
+        (rho - seqNum) * targetSize - 1);
+    currentInput = input.rows((rho - seqNum - 1) * inputSize,
+        (rho - seqNum) * inputSize - 1);
+
+    for (size_t l = 0; l < network.size(); ++l)
+    {
+      boost::apply_visitor(LoadOutputParameterVisitor(
+          std::move(moduleOutputParameter)), network[network.size() - 1 - l]);
+    }
 
-    // Perform the backward pass.
-    if (seqOutput)
+    if (single && seqNum > 0)
     {
-      arma::mat seqError = error.unsafe_col(seqNum);
-      Backward(seqError, network);
+      error.zeros();
     }
     else
     {
-      Backward(error, network);
+      outputLayer.Backward(std::move(boost::apply_visitor(
+          outputParameterVisitor, network.back())), std::move(currentTarget),
+          std::move(error));
     }
 
-    // Link the parameters and update the gradients.
-    LinkParameter(network);
-    UpdateGradients<>(network);
-
-    // Update the overall gradient.
+    Backward();
+    Gradient();
     gradient += currentGradient;
+  }
+}
+
+template<typename OutputLayerType, typename InitializationRuleType>
+void RNN<OutputLayerType, InitializationRuleType>::ResetParameters()
+{
+  size_t weights = 0;
+  for (LayerTypes& layer : network)
+  {
+    weights += boost::apply_visitor(weightSizeVisitor, layer);
+  }
+
+  parameter.set_size(weights, 1);
+  initializeRule.Initialize(parameter, parameter.n_elem, 1);
+
+  size_t offset = 0;
+  for (LayerTypes& layer : network)
+  {
+    offset += boost::apply_visitor(WeightSetVisitor(std::move(parameter),
+        offset), layer);
+
+    boost::apply_visitor(resetVisitor, layer);
+  }
+}
 
-    if (seqNum == 0) break;
+template<typename OutputLayerType, typename InitializationRuleType>
+void RNN<OutputLayerType, InitializationRuleType>::ResetDeterministic()
+{
+  DeterministicSetVisitor deterministicSetVisitor(deterministic);
+  std::for_each(network.begin(), network.end(),
+      boost::apply_visitor(deterministicSetVisitor));
+}
+
+template<typename OutputLayerType, typename InitializationRuleType>
+void RNN<OutputLayerType, InitializationRuleType>::ResetGradients(
+    arma::mat& gradient)
+{
+  size_t offset = 0;
+  for (LayerTypes& layer : network)
+  {
+    offset += boost::apply_visitor(GradientSetVisitor(std::move(gradient),
+        offset), layer);
+  }
+}
+
+template<typename OutputLayerType, typename InitializationRuleType>
+void RNN<OutputLayerType, InitializationRuleType>::Forward(arma::mat&& input)
+{
+  boost::apply_visitor(ForwardVisitor(std::move(input), std::move(
+      boost::apply_visitor(outputParameterVisitor, network.front()))),
+      network.front());
+
+  for (size_t i = 1; i < network.size(); ++i)
+  {
+    boost::apply_visitor(ForwardVisitor(
+        std::move(boost::apply_visitor(outputParameterVisitor, network[i - 1])),
+        std::move(boost::apply_visitor(outputParameterVisitor, network[i]))),
+        network[i]);
   }
 }
 
-template<typename LayerTypes,
-         typename OutputLayerType,
-         typename InitializationRuleType,
-         typename PerformanceFunction
->
+template<typename OutputLayerType, typename InitializationRuleType>
+void RNN<OutputLayerType, InitializationRuleType>::Backward()
+{
+  boost::apply_visitor(BackwardVisitor(
+        std::move(boost::apply_visitor(outputParameterVisitor, network.back())),
+        std::move(error), std::move(boost::apply_visitor(deltaVisitor,
+        network.back()))), network.back());
+
+  for (size_t i = 2; i < network.size(); ++i)
+  {
+    boost::apply_visitor(BackwardVisitor(
+        std::move(boost::apply_visitor(outputParameterVisitor,
+        network[network.size() - i])), std::move(boost::apply_visitor(
+        deltaVisitor, network[network.size() - i + 1])), std::move(
+        boost::apply_visitor(deltaVisitor, network[network.size() - i]))),
+        network[network.size() - i]);
+  }
+}
+
+template<typename OutputLayerType, typename InitializationRuleType>
+void RNN<OutputLayerType, InitializationRuleType>::Gradient()
+{
+  boost::apply_visitor(GradientVisitor(std::move(currentInput), std::move(
+      boost::apply_visitor(deltaVisitor, network[1]))), network.front());
+
+  for (size_t i = 1; i < network.size() - 1; ++i)
+  {
+    boost::apply_visitor(GradientVisitor(
+        std::move(boost::apply_visitor(outputParameterVisitor, network[i - 1])),
+        std::move(boost::apply_visitor(deltaVisitor, network[i + 1]))),
+        network[i]);
+  }
+}
+
+template<typename OutputLayerType, typename InitializationRuleType>
 template<typename Archive>
-void RNN<
-LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction
->::Serialize(Archive& ar, const unsigned int /* version */)
+void RNN<OutputLayerType, InitializationRuleType>::Serialize(
+    Archive& ar, const unsigned int /* version */)
 {
   ar & data::CreateNVP(parameter, "parameter");
+  ar & data::CreateNVP(rho, "rho");
+  ar & data::CreateNVP(single, "single");
+  ar & data::CreateNVP(inputSize, "inputSize");
+  ar & data::CreateNVP(outputSize, "outputSize");
+  ar & data::CreateNVP(targetSize, "targetSize");
+  ar & data::CreateNVP(currentInput, "currentInput");
 
   // If we are loading, we need to initialize the weights.
   if (Archive::is_loading::value)
   {
-    NetworkWeights(parameter, network);
+    reset = false;
+
+    size_t offset = 0;
+    for (LayerTypes& layer : network)
+    {
+      offset += boost::apply_visitor(WeightSetVisitor(std::move(parameter),
+          offset), layer);
+
+      boost::apply_visitor(resetVisitor, layer);
+    }
   }
 }
 
diff --git a/src/mlpack/methods/rmva/CMakeLists.txt b/src/mlpack/methods/rmva/CMakeLists.txt
deleted file mode 100644
index ced53a30ea7..00000000000
--- a/src/mlpack/methods/rmva/CMakeLists.txt
+++ /dev/null
@@ -1,17 +0,0 @@
-# Define the files we need to compile
-# Anything not in this list will not be compiled into mlpack.
-set(SOURCES
-  rmva.hpp
-  rmva_impl.hpp
-)
-
-# Add directory name to sources.
-set(DIR_SRCS)
-foreach(file ${SOURCES})
-  set(DIR_SRCS ${DIR_SRCS} ${CMAKE_CURRENT_SOURCE_DIR}/${file})
-endforeach()
-# Append sources (with directory name) to list of all mlpack sources (used at
-# the parent scope).
-set(MLPACK_SRCS ${MLPACK_SRCS} ${DIR_SRCS} PARENT_SCOPE)
-
-add_cli_executable(rmva)
diff --git a/src/mlpack/methods/rmva/rmva.hpp b/src/mlpack/methods/rmva/rmva.hpp
deleted file mode 100644
index 5f4f03112c8..00000000000
--- a/src/mlpack/methods/rmva/rmva.hpp
+++ /dev/null
@@ -1,963 +0,0 @@
-/**
- * @file rmva.hpp
- * @author Marcus Edel
- *
- * Definition of the RecurrentNeuralAttention class, which implements the
- * Recurrent Model for Visual Attention.
- *
- * mlpack is free software; you may redistribute it and/or modify it under the
- * terms of the 3-clause BSD license.  You should have received a copy of the
- * 3-clause BSD license along with mlpack.  If not, see
- * http://www.opensource.org/licenses/BSD-3-Clause for more information.
- */
-#ifndef __MLPACK_METHODS_RMVA_RMVA_HPP
-#define __MLPACK_METHODS_RMVA_RMVA_HPP
-
-#include <mlpack/core.hpp>
-
-#include <mlpack/methods/ann/network_util.hpp>
-#include <mlpack/methods/ann/layer/layer_traits.hpp>
-#include <mlpack/methods/ann/init_rules/random_init.hpp>
-#include <mlpack/methods/ann/performance_functions/cee_function.hpp>
-#include <mlpack/core/optimizers/rmsprop/rmsprop.hpp>
-#include <mlpack/methods/ann/layer/negative_log_likelihood_layer.hpp>
-#include <mlpack/methods/ann/layer/vr_class_reward_layer.hpp>
-
-#include <boost/ptr_container/ptr_vector.hpp>
-
-namespace mlpack {
-namespace ann /** Artificial Neural Network. */ {
-
-/**
- * This class implements the Recurrent Model for Visual Attention, using a
- * variety of possible layer implementations.
- *
- * For more information, see the following paper.
- *
- * @code
- * @article{MnihHGK14,
- *   title={Recurrent Models of Visual Attention},
- *   author={Volodymyr Mnih, Nicolas Heess, Alex Graves, Koray Kavukcuoglu},
- *   journal={CoRR},
- *   volume={abs/1406.6247},
- *   year={2014}
- * }
- * @endcode
- *
- * @tparam LocatorType Type of locator network.
- * @tparam LocationSensorType Type of location sensor network.
- * @tparam GlimpseSensorType Type of glimpse sensor network.
- * @tparam GlimpseType Type of glimpse network.
- * @tparam StartType Type of start network.
- * @tparam FeedbackType Type of feedback network.
- * @tparam TransferType Type of transfer network.
- * @tparam ClassifierType Type of classifier network.
- * @tparam RewardPredictorType Type of reward predictor network.
- * @tparam InitializationRuleType Rule used to initialize the weight matrix.
- * @tparam MatType Matrix type (arma::mat or arma::sp_mat).
- */
-template<
-  typename LocatorType,
-  typename LocationSensorType,
-  typename GlimpseSensorType,
-  typename GlimpseType,
-  typename StartType,
-  typename FeedbackType,
-  typename TransferType,
-  typename ClassifierType,
-  typename RewardPredictorType,
-  typename InitializationRuleType = RandomInitialization,
-  typename MatType = arma::mat
->
-class RecurrentNeuralAttention
-{
- public:
-  //! Convenience typedef for the internal model construction.
-  using NetworkType = RecurrentNeuralAttention<
-      LocatorType,
-      LocationSensorType,
-      GlimpseSensorType,
-      GlimpseType,
-      StartType,
-      FeedbackType,
-      TransferType,
-      ClassifierType,
-      RewardPredictorType,
-      InitializationRuleType,
-      MatType>;
-
-  /**
-   * Construct the RecurrentNeuralAttention object, which will construct the
-   * recurrent model for visual attentionh using the specified networks.
-   *
-   * @param locator The locator network.
-   * @param locationSensor The location sensor network.
-   * @param glimpseSensor The glimpse sensor network.
-   * @param glimpse The glimpse network.
-   * @param start The start network.
-   * @param feedback The feedback network.
-   * @param transfer The transfer network.
-   * @param classifier The classifier network.
-   * @param rewardPredictor The reward predictor network.
-   * @param nStep Number of steps for the back-propagate through time.
-   * @param initializeRule Rule used to initialize the weight matrix.
-   */
-  template<typename TypeLocator,
-           typename TypeLocationSensor,
-           typename TypeGlimpseSensor,
-           typename TypeGlimpse,
-           typename TypeStart,
-           typename TypeFeedback,
-           typename TypeTransfer,
-           typename TypeClassifier,
-           typename TypeRewardPredictor>
-  RecurrentNeuralAttention(TypeLocator&& locator,
-                           TypeLocationSensor&& locationSensor,
-                           TypeGlimpseSensor&& glimpseSensor,
-                           TypeGlimpse&& glimpse,
-                           TypeStart&& start,
-                           TypeFeedback&& feedback,
-                           TypeTransfer&& transfer,
-                           TypeClassifier&& classifier,
-                           TypeRewardPredictor&& rewardPredictor,
-                           const size_t nStep,
-                           InitializationRuleType initializeRule =
-                              InitializationRuleType());
-  /**
-   * Train the network on the given input data using the given optimizer.
-   *
-   * This will use the existing model parameters as a starting point for the
-   * optimization. If this is not what you want, then you should access the
-   * parameters vector directly with Parameters() and modify it as desired.
-   *
-   * @tparam OptimizerType Type of optimizer to use to train the model.
-   * @param predictors Input training variables.
-   * @param responses Outputs results from input training variables.
-   * @param optimizer Instantiated optimizer used to train the model.
-   */
-  template<
-      template<typename> class OptimizerType = mlpack::optimization::RMSprop
-  >
-  void Train(const arma::mat& predictors,
-             const arma::mat& responses,
-             OptimizerType<NetworkType>& optimizer);
-
-  /**
-   * Predict the responses to a given set of predictors. The responses will
-   * reflect the output of the given output layer as returned by the
-   * OutputClass() function.
-   *
-   * @param predictors Input predictors.
-   * @param responses Matrix to put output predictions of responses into.
-   */
-  void Predict(arma::mat& predictors, arma::mat& responses);
-
-  /**
-   * Evaluate the network with the given parameters. This function is usually
-   * called by the optimizer to train the model.
-   *
-   * @param parameters Matrix model parameters.
-   * @param i Index of point to use for objective function evaluation.
-   * @param deterministic Whether or not to train or test the model. Note some
-   * layer act differently in training or testing mode.
-   */
-  double Evaluate(const arma::mat& parameters,
-                  const size_t i,
-                  const bool deterministic = true);
-
-  /**
-   * Evaluate the gradient of the network with the given parameters, and with
-   * respect to only one point in the dataset. This is useful for
-   * optimizers such as SGD, which require a separable objective function.
-   *
-   * @param parameters Matrix of the model parameters to be optimized.
-   * @param i Index of points to use for objective function gradient evaluation.
-   * @param gradient Matrix to output gradient into.
-   */
-  void Gradient(const arma::mat& parameters,
-                const size_t i,
-                arma::mat& gradient);
-
-  //! Return the number of separable functions (the number of predictor points).
-  size_t NumFunctions() const { return numFunctions; }
-
-  //! Return the initial point for the optimization.
-  const arma::mat& Parameters() const { return parameter; }
-  //! Modify the initial point for the optimization.
-  arma::mat& Parameters() { return parameter; }
-
-  //! Return the number of steps to back-propagate through time.
-  const size_t& Rho() const { return nStep; }
-  //! Modify the number of steps to back-propagate through time.
-  size_t& Rho() { return nStep; }
-
-  //! Return the current location.
-  const arma::mat& Location();
-
-  //! Serialize the model.
-  template<typename Archive>
-  void Serialize(Archive& ar, const unsigned int /* version */);
-
- private:
-  /*
-   * Predict the response of the given input matrix.
-   */
-  template <typename InputType, typename OutputType>
-  void SinglePredict(const InputType& input, OutputType& output)
-  {
-    // Get the locator input size.
-    if (!inputSize)
-    {
-      inputSize = NetworkInputSize(locator);
-    }
-
-    // Reset networks.
-    ResetParameter(locator);
-    ResetParameter(locationSensor);
-    ResetParameter(glimpseSensor);
-    ResetParameter(glimpse);
-    ResetParameter(feedback);
-    ResetParameter(transfer);
-    ResetParameter(classifier);
-    ResetParameter(rewardPredictor);
-    ResetParameter(start);
-
-    // Sample an initial starting actions by forwarding zeros through the
-    // locator.
-    locatorInput.push_back(new arma::cube(arma::zeros<arma::cube>(inputSize, 1,
-        input.n_slices)));
-
-    // Forward pass throught the recurrent network.
-    for (step = 0; step < nStep; step++)
-    {
-      // Locator forward pass.
-      Forward(locatorInput.back(), locator);
-
-      // Location sensor forward pass.
-      Forward(std::get<std::tuple_size<LocatorType>::value - 1>(
-          locator).OutputParameter(), locationSensor);
-
-      // Set the location parameter for all layer that implement a Location
-      // function e.g. GlimpseLayer.
-      ResetLocation(std::get<std::tuple_size<LocatorType>::value - 1>(
-          locator).OutputParameter(), glimpseSensor);
-
-      // Glimpse sensor forward pass.
-      Forward(input, glimpseSensor);
-
-      // Concat the parameter activation from the location sensor and
-      // glimpse sensor.
-      arma::mat concatLayerOutput = arma::join_cols(
-          std::get<std::tuple_size<LocationSensorType>::value - 1>(
-          locationSensor).OutputParameter(),
-          std::get<std::tuple_size<GlimpseSensorType>::value - 1>(
-          glimpseSensor).OutputParameter());
-
-      // Glimpse forward pass.
-      Forward(concatLayerOutput, glimpse);
-
-      if (step == 0)
-      {
-        // Start forward pass.
-        Forward(std::get<std::tuple_size<GlimpseType>::value - 1>(
-            glimpse).OutputParameter(), start);
-
-        // Transfer forward pass.
-        Forward(std::get<std::tuple_size<StartType>::value - 1>(
-            start).OutputParameter(), transfer);
-      }
-      else
-      {
-        // Feedback forward pass.
-        Forward(std::get<std::tuple_size<TransferType>::value - 1>(
-            transfer).OutputParameter(), feedback);
-
-        arma::mat feedbackLayerOutput =
-          std::get<std::tuple_size<GlimpseType>::value - 1>(
-          glimpse).OutputParameter() +
-          std::get<std::tuple_size<FeedbackType>::value - 1>(
-          feedback).OutputParameter();
-
-        // Transfer forward pass.
-        Forward(feedbackLayerOutput, transfer);
-      }
-
-      // Update the input for the next run
-      locatorInput.push_back(new arma::cube(
-          std::get<std::tuple_size<TransferType>::value - 1>(
-          transfer).OutputParameter().memptr(), locatorInput.back().n_rows,
-          locatorInput.back().n_cols, locatorInput.back().n_slices));
-    }
-
-    // Classifier forward pass.
-    Forward(locatorInput.back().slice(0), classifier);
-
-    output = std::get<std::tuple_size<ClassifierType>::value - 1>(
-        classifier).OutputParameter();
-  }
-
-  /**
-   * Update the layer reward for all layer that implement the Rewards function.
-   */
-  template<size_t I = 0, typename... Tp>
-  typename std::enable_if<I < sizeof...(Tp), void>::type
-  ResetReward(const double reward, std::tuple<Tp...>& network)
-  {
-    SetReward(reward, std::get<I>(network));
-    ResetReward<I + 1, Tp...>(reward, network);
-  }
-
-  template<size_t I = 0, typename... Tp>
-  typename std::enable_if<I == sizeof...(Tp), void>::type
-  ResetReward(const double /* reward */, std::tuple<Tp...>& /* network */)
-  {
-  }
-
-  template<typename T>
-  typename std::enable_if<
-      HasRewardCheck<T, double&(T::*)()>::value, void>::type
-  SetReward(const double reward, T& layer)
-  {
-    layer.Reward() = reward;
-  }
-
-  template<typename T>
-  typename std::enable_if<
-      !HasRewardCheck<T, double&(T::*)()>::value, void>::type
-  SetReward(const double /* reward */, T& /* layer */)
-  {
-    /* Nothing to do here */
-  }
-
-  /**
-   * Reset the network by clearing the delta and by setting the layer status.
-   */
-  template<size_t I = 0, typename... Tp>
-  typename std::enable_if<I == sizeof...(Tp), void>::type
-  ResetParameter(std::tuple<Tp...>& /* network */) { /* Nothing to do here */ }
-
-  template<size_t I = 0, typename... Tp>
-  typename std::enable_if<I < sizeof...(Tp), void>::type
-  ResetParameter(std::tuple<Tp...>& network)
-  {
-    ResetDeterministic(std::get<I>(network));
-    std::get<I>(network).Delta().zeros();
-
-    ResetParameter<I + 1, Tp...>(network);
-  }
-
-  template<typename T>
-  typename std::enable_if<
-      HasDeterministicCheck<T, bool&(T::*)(void)>::value, void>::type
-  ResetDeterministic(T& layer)
-  {
-    layer.Deterministic() = deterministic;
-  }
-
-  template<typename T>
-  typename std::enable_if<
-      !HasDeterministicCheck<T, bool&(T::*)(void)>::value, void>::type
-  ResetDeterministic(T& /* layer */) { /* Nothing to do here */ }
-
-  /**
-   * Reset the location by updating the location for all layer that implement
-   * the Location function.
-   */
-  template<size_t I = 0, typename... Tp>
-  typename std::enable_if<I == sizeof...(Tp), void>::type
-  ResetLocation(const arma::mat& /* location */,
-                std::tuple<Tp...>& /* network */)
-  {
-    // Nothing to do here.
-  }
-
-  template<size_t I = 0, typename... Tp>
-  typename std::enable_if<I < sizeof...(Tp), void>::type
-  ResetLocation(const arma::mat& location, std::tuple<Tp...>& network)
-  {
-    SetLocation(std::get<I>(network), location);
-    ResetLocation<I + 1, Tp...>(location, network);
-  }
-
-  template<typename T>
-  typename std::enable_if<
-      HasLocationCheck<T, void(T::*)(const arma::mat&)>::value, void>::type
-  SetLocation(T& layer, const arma::mat& location)
-  {
-    layer.Location(location);
-  }
-
-  template<typename T>
-  typename std::enable_if<
-      !HasLocationCheck<T, void(T::*)(const arma::mat&)>::value, void>::type
-  SetLocation(T& /* layer */, const arma::mat& /* location */)
-  {
-    // Nothing to do here.
-  }
-
-  /**
-   * Save the network layer activations.
-   */
-  template<size_t I = 0, typename... Tp>
-  typename std::enable_if<I < sizeof...(Tp), void>::type
-  SaveActivations(boost::ptr_vector<MatType>& activations,
-                  std::tuple<Tp...>& network,
-                  size_t& activationCounter)
-  {
-    Save(I, activations, std::get<I>(network),
-        std::get<I>(network).InputParameter());
-
-    activationCounter++;
-    SaveActivations<I + 1, Tp...>(activations, network, activationCounter);
-  }
-
-  template<size_t I = 0, typename... Tp>
-  typename std::enable_if<I == sizeof...(Tp), void>::type
-  SaveActivations(boost::ptr_vector<MatType>& /* activations */,
-                  std::tuple<Tp...>& /* network */,
-                  size_t& /* activationCounter */)
-  {
-    // Nothing to do here.
-  }
-
-  /**
-   * Distinguish between recurrent layer and non-recurrent layer when storing
-   * the activations.
-   */
-  template<typename T, typename P>
-  typename std::enable_if<
-      HasRecurrentParameterCheck<T, P&(T::*)()>::value, void>::type
-  Save(const size_t /* layerNumber */,
-       boost::ptr_vector<MatType>& activations,
-       T& layer,
-       P& /* unused */)
-  {
-    activations.push_back(new MatType(layer.RecurrentParameter()));
-  }
-
-  template<typename T, typename P>
-  typename std::enable_if<
-      !HasRecurrentParameterCheck<T, P&(T::*)()>::value, void>::type
-  Save(const size_t /* layerNumber */,
-       boost::ptr_vector<MatType>& activations,
-       T& layer,
-       P& /* unused */)
-  {
-    activations.push_back(new MatType(layer.OutputParameter()));
-  }
-
-  template<size_t I = 0, typename DataTypeA, typename DataTypeB, typename... Tp>
-  typename std::enable_if<I < sizeof...(Tp), void>::type
-  SaveActivations(boost::ptr_vector<DataTypeA>& activationsA,
-                  boost::ptr_vector<DataTypeB>& activationsB,
-                  size_t& dataTypeACounter,
-                  size_t& dataTypeBCounter,
-                  std::tuple<Tp...>& network)
-  {
-    Save(activationsA, activationsB, dataTypeACounter, dataTypeBCounter,
-        std::get<I>(network), std::get<I>(network).OutputParameter());
-
-    SaveActivations<I + 1, DataTypeA, DataTypeB, Tp...>(
-        activationsA, activationsB, dataTypeACounter, dataTypeBCounter,
-        network);
-  }
-
-  template<size_t I = 0, typename DataTypeA, typename DataTypeB, typename... Tp>
-  typename std::enable_if<I == sizeof...(Tp), void>::type
-  SaveActivations(boost::ptr_vector<DataTypeA>& /* activationsA */,
-                  boost::ptr_vector<DataTypeB>& /* activationsB */,
-                  size_t& /* dataTypeACounter */,
-                  size_t& /* dataTypeBCounter */,
-                  std::tuple<Tp...>& /* network */)
-  {
-    // Nothing to do here.
-  }
-
-  template<typename T, typename DataTypeA, typename DataTypeB>
-  void Save(boost::ptr_vector<DataTypeA>& activationsA,
-        boost::ptr_vector<DataTypeB>& /* activationsB */,
-       size_t& dataTypeACounter,
-       size_t& /* dataTypeBCounter */,
-       T& layer,
-       DataTypeA& /* unused */)
-  {
-    activationsA.push_back(new DataTypeA(layer.OutputParameter()));
-    dataTypeACounter++;
-  }
-
-  template<typename T, typename DataTypeA, typename DataTypeB>
-  void Save(boost::ptr_vector<DataTypeA>& /* activationsA */,
-            boost::ptr_vector<DataTypeB>& activationsB,
-            size_t& /* dataTypeACounter */,
-            size_t& dataTypeBCounter,
-            T& layer,
-            DataTypeB& /* unused */)
-  {
-    activationsB.push_back(new DataTypeB(layer.OutputParameter()));
-    dataTypeBCounter++;
-  }
-
-  /**
-   * Load the network layer activations.
-   */
-  template<size_t I = 0, typename DataType, typename... Tp>
-  typename std::enable_if<I == sizeof...(Tp), void>::type
-  LoadActivations(DataType& input,
-                  boost::ptr_vector<MatType>& /* activations */,
-                  size_t& /* activationCounter */,
-                  std::tuple<Tp...>& network)
-  {
-    std::get<0>(network).InputParameter() = input;
-    LinkParameter(network);
-  }
-
-  template<size_t I = 0, typename DataType, typename... Tp>
-  typename std::enable_if<I < sizeof...(Tp), void>::type
-  LoadActivations(DataType& input,
-                  boost::ptr_vector<MatType>& activations,
-                  size_t& activationCounter,
-                  std::tuple<Tp...>& network)
-  {
-    Load(--activationCounter, activations,
-        std::get<sizeof...(Tp) - I - 1>(network),
-        std::get<I>(network).InputParameter());
-
-    LoadActivations<I + 1, DataType, Tp...>(input, activations,
-        activationCounter, network);
-  }
-
-  /**
-   * Distinguish between recurrent layer and non-recurrent layer when storing
-   * the activations.
-   */
-  template<typename T, typename P>
-  typename std::enable_if<
-      HasRecurrentParameterCheck<T, P&(T::*)()>::value, void>::type
-  Load(const size_t layerNumber,
-       boost::ptr_vector<MatType>& activations,
-       T& layer,
-       P& /* output */)
-  {
-    layer.RecurrentParameter() = activations[layerNumber];
-  }
-
-  template<typename T, typename P>
-  typename std::enable_if<
-      !HasRecurrentParameterCheck<T, P&(T::*)()>::value, void>::type
-  Load(const size_t layerNumber,
-       boost::ptr_vector<MatType>& activations,
-       T& layer,
-       P& /* output */)
-  {
-    layer.OutputParameter() = activations[layerNumber];
-  }
-
-  template<size_t I = 0,
-           typename DataType,
-           typename DataTypeA,
-           typename DataTypeB,
-           typename... Tp>
-  typename std::enable_if<I < sizeof...(Tp), void>::type
-  LoadActivations(DataType& input,
-                  boost::ptr_vector<DataTypeA>& activationsA,
-                  boost::ptr_vector<DataTypeB>& activationsB,
-                  size_t& dataTypeACounter,
-                  size_t& dataTypeBCounter,
-                  std::tuple<Tp...>& network)
-  {
-    Load(activationsA,
-         activationsB,
-         dataTypeACounter,
-         dataTypeBCounter,
-         std::get<sizeof...(Tp) - I - 1>(network),
-         std::get<sizeof...(Tp) - I - 1>(network).OutputParameter());
-
-    LoadActivations<I + 1, DataType, DataTypeA, DataTypeB, Tp...>(
-        input, activationsA, activationsB, dataTypeACounter, dataTypeBCounter,
-        network);
-  }
-
-  template<size_t I = 0,
-           typename DataType,
-           typename DataTypeA,
-           typename DataTypeB,
-           typename... Tp>
-  typename std::enable_if<I == sizeof...(Tp), void>::type
-  LoadActivations(DataType& input,
-                  boost::ptr_vector<DataTypeA>& /* activationsA */,
-                  boost::ptr_vector<DataTypeB>& /* activationsB */,
-                  size_t& /* dataTypeACounter */,
-                  size_t& /* dataTypeBCounter */,
-                  std::tuple<Tp...>& network)
-  {
-    std::get<0>(network).InputParameter() = input;
-    LinkParameter(network);
-  }
-
-  template<typename T, typename DataTypeA, typename DataTypeB>
-  void Load(boost::ptr_vector<DataTypeA>& activationsA,
-            boost::ptr_vector<DataTypeB>& /* activationsB */,
-            size_t& dataTypeACounter,
-            size_t& /* dataTypeBCounter */,
-            T& layer,
-            DataTypeA& /* output */)
-  {
-    layer.OutputParameter() = activationsA[--dataTypeACounter];
-  }
-
-  template<typename T, typename DataTypeA, typename DataTypeB>
-  void Load(boost::ptr_vector<DataTypeA>& /* activationsA */,
-            boost::ptr_vector<DataTypeB>& activationsB,
-            size_t& /* dataTypeACounter */,
-            size_t& dataTypeBCounter,
-            T& layer,
-            DataTypeB& /* output */)
-  {
-    layer.OutputParameter() = activationsB[--dataTypeBCounter];
-  }
-
-  /**
-   * Run a single iteration of the feed forward algorithm, using the given
-   * input and target vector, store the calculated error into the error
-   * vector.
-   */
-  template<size_t I = 0, typename DataType, typename... Tp>
-  void Forward(const DataType& input, std::tuple<Tp...>& t)
-  {
-    std::get<I>(t).InputParameter() = input;
-    std::get<I>(t).Forward(std::get<I>(t).InputParameter(),
-        std::get<I>(t).OutputParameter());
-
-    ForwardTail<I + 1, Tp...>(t);
-  }
-
-  template<size_t I = 1, typename... Tp>
-  typename std::enable_if<I == sizeof...(Tp), void>::type
-  ForwardTail(std::tuple<Tp...>& network)
-  {
-    LinkParameter(network);
-  }
-
-  template<size_t I = 1, typename... Tp>
-  typename std::enable_if<I < sizeof...(Tp), void>::type
-  ForwardTail(std::tuple<Tp...>& t)
-  {
-    std::get<I>(t).Forward(std::get<I - 1>(t).OutputParameter(),
-        std::get<I>(t).OutputParameter());
-
-    ForwardTail<I + 1, Tp...>(t);
-  }
-
-  /**
-   * Run a single iteration of the backward algorithm, using the given
-   * input and target vector, store the calculated error into the error
-   * vector.
-   */
-  template<size_t I = 1, typename DataType, typename... Tp>
-  typename std::enable_if<sizeof...(Tp) == 1, void>::type
-  Backward(const DataType& error, std::tuple<Tp ...>& t)
-  {
-    std::get<sizeof...(Tp) - I>(t).Backward(
-      std::get<sizeof...(Tp) - I>(t).OutputParameter(), error,
-      std::get<sizeof...(Tp) - I>(t).Delta());
-  }
-
-  template<size_t I = 1, typename DataType, typename... Tp>
-  typename std::enable_if<I < sizeof...(Tp), void>::type
-  Backward(const DataType& error, std::tuple<Tp ...>& t)
-  {
-    std::get<sizeof...(Tp) - I>(t).Backward(
-        std::get<sizeof...(Tp) - I>(t).OutputParameter(), error,
-        std::get<sizeof...(Tp) - I>(t).Delta());
-
-    BackwardTail<I + 1, DataType, Tp...>(error, t);
-  }
-
-  template<size_t I = 1, typename DataType, typename... Tp>
-  typename std::enable_if<I == (sizeof...(Tp)), void>::type
-  BackwardTail(const DataType& /* error */, std::tuple<Tp...>& t)
-  {
-    std::get<sizeof...(Tp) - I>(t).Backward(
-        std::get<sizeof...(Tp) - I>(t).OutputParameter(),
-        std::get<sizeof...(Tp) - I + 1>(t).Delta(),
-        std::get<sizeof...(Tp) - I>(t).Delta());
-  }
-
-  template<size_t I = 1, typename DataType, typename... Tp>
-  typename std::enable_if<I < (sizeof...(Tp)), void>::type
-  BackwardTail(const DataType& error, std::tuple<Tp...>& t)
-  {
-    std::get<sizeof...(Tp) - I>(t).Backward(
-        std::get<sizeof...(Tp) - I>(t).OutputParameter(),
-        std::get<sizeof...(Tp) - I + 1>(t).Delta(),
-        std::get<sizeof...(Tp) - I>(t).Delta());
-
-    BackwardTail<I + 1, DataType, Tp...>(error, t);
-  }
-
-  /**
-   * Link the calculated activation with the correct layer.
-   */
-  template<size_t I = 1, typename... Tp>
-  typename std::enable_if<I == sizeof...(Tp), void>::type
-  LinkParameter(std::tuple<Tp ...>& /* network */) { /* Nothing to do here */ }
-
-  template<size_t I = 1, typename... Tp>
-  typename std::enable_if<I < sizeof...(Tp), void>::type
-  LinkParameter(std::tuple<Tp...>& network)
-  {
-    if (!LayerTraits<typename std::remove_reference<
-        decltype(std::get<I>(network))>::type>::IsBiasLayer)
-    {
-      std::get<I>(network).InputParameter() = std::get<I - 1>(
-          network).OutputParameter();
-    }
-
-    LinkParameter<I + 1, Tp...>(network);
-  }
-
-  /**
-   * Iterate through all layer modules and update the the gradient using the
-   * layer defined optimizer.
-   */
-  template<typename InputType, typename ErrorType, typename... Tp>
-  void UpdateGradients(const InputType& input,
-                       const ErrorType& error,
-                       std::tuple<Tp...>& network)
-  {
-     Update(std::get<0>(network),
-           input,
-           std::get<1>(network).Delta(),
-           std::get<1>(network).OutputParameter());
-
-     UpdateGradients<1, ErrorType, Tp...>(error, network);
-  }
-
-  template<size_t I = 0, typename ErrorType, typename... Tp>
-  typename std::enable_if<I < (sizeof...(Tp) - 1), void>::type
-  UpdateGradients(const ErrorType& error, std::tuple<Tp...>& network)
-  {
-    Update(std::get<I>(network),
-           std::get<I>(network).InputParameter(),
-           std::get<I + 1>(network).Delta(),
-           std::get<I>(network).OutputParameter());
-
-    UpdateGradients<I + 1, ErrorType, Tp...>(error, network);
-  }
-
-  template<size_t I = 0, typename ErrorType, typename... Tp>
-  typename std::enable_if<I == (sizeof...(Tp) - 1), void>::type
-  UpdateGradients(const ErrorType& error, std::tuple<Tp...>& network)
-  {
-    Update(std::get<I>(network),
-       std::get<I>(network).InputParameter(),
-       error,
-       std::get<I>(network).OutputParameter());
-  }
-
-  template<typename LayerType,
-           typename InputType,
-           typename ErrorType,
-           typename GradientType>
-  typename std::enable_if<
-      HasGradientCheck<LayerType,
-          void(LayerType::*)(const InputType&,
-                             const ErrorType&,
-                             GradientType&)>::value, void>::type
-  Update(LayerType& layer,
-         const InputType& input,
-         const ErrorType& error,
-         GradientType& /* gradient */)
-  {
-    layer.Gradient(input, error, layer.Gradient());
-  }
-
-  template<typename LayerType,
-           typename InputType,
-           typename ErrorType,
-           typename GradientType>
-  typename std::enable_if<
-      !HasGradientCheck<LayerType,
-          void(LayerType::*)(const InputType&,
-                             const ErrorType&,
-                             GradientType&)>::value, void>::type
-  Update(LayerType& /* layer */,
-         const InputType& /* input */,
-         const ErrorType& /* error */,
-         GradientType& /* gradient */)
-  {
-    // Nothing to do here
-  }
-
-  //! The locator network.
-  LocatorType locator;
-
-  //! The location sensor network.
-  LocationSensorType locationSensor;
-
-  //! The glimpse sensor network.
-  GlimpseSensorType glimpseSensor;
-
-  //! The glimpse network.
-  GlimpseType glimpse;
-
-  //! The start network.
-  StartType start;
-
-  //! The feedback network.
-  FeedbackType feedback;
-
-  //! The transfer network.
-  TransferType transfer;
-
-  //! The classifier network.
-  ClassifierType classifier;
-
-  //! The reward predictor network.
-  RewardPredictorType rewardPredictor;
-
-  //! The number of steps for the back-propagate through time.
-  size_t nStep;
-
-  //! Locally stored network input size.
-  size_t inputSize;
-
-  //! The current evaluation mode (training or testing).
-  bool deterministic;
-
-  //! The index of the current step.
-  size_t step;
-
-  //! The activation storage we are using to perform the feed backward pass for
-  //! the glimpse network.
-  boost::ptr_vector<arma::mat> glimpseActivations;
-
-  //! The activation storage we are using to perform the feed backward pass for
-  //! the locator network.
-  boost::ptr_vector<arma::mat> locatorActivations;
-
-  //! The activation storage we are using to perform the feed backward pass for
-  //! the feedback network.
-  boost::ptr_vector<arma::mat> feedbackActivations;
-
-  //! The activation storage we are using to save the feedback network input.
-  boost::ptr_vector<arma::mat> feedbackActivationsInput;
-
-  //! The activation storage we are using to perform the feed backward pass for
-  //! the transfer network.
-  boost::ptr_vector<arma::mat> transferActivations;
-
-  //! The activation storage we are using to perform the feed backward pass for
-  //! the location sensor network.
-  boost::ptr_vector<arma::mat> locationSensorActivations;
-
-  //! The activation storage we are using to perform the feed backward pass for
-  //! the glimpse sensor network.
-  boost::ptr_vector<arma::mat> glimpseSensorMatActivations;
-  boost::ptr_vector<arma::cube> glimpseSensorCubeActivations;
-
-  //! The activation storage we are using to perform the feed backward pass for
-  //! the locator input.
-  boost::ptr_vector<arma::cube> locatorInput;
-
-  //! The storage we are using to save the location.
-  boost::ptr_vector<arma::mat> location;
-
-  //! The current number of activations in the glimpse sensor network.
-  size_t glimpseSensorMatCounter;
-  size_t glimpseSensorCubeCounter;
-
-  //! The current number of activations in the glimpse network.
-  size_t glimpseActivationsCounter;
-
-  //! The current number of activations in the glimpse start network.
-  size_t startActivationsCounter;
-
-  //! The current number of activations in the feedback network.
-  size_t feedbackActivationsCounter;
-
-  //! The current number of activations in the transfer network.
-  size_t transferActivationsCounter;
-
-  //! The current number of activations in the locator network.
-  size_t locatorActivationsCounter;
-
-  //! The current number of activations in the location sensor network.
-  size_t locationSensorActivationsCounter;
-
-  //! The current number of activations in the glimpse sensor network.
-  size_t glimpseSensorMatActivationsCounter;
-  size_t glimpseSensorCubeActivationsCounter;
-
-  //! The current number of location for the location storage.
-  size_t locationCounter;
-
-  //! Matrix of (trained) parameters.
-  arma::mat parameter;
-
-  //! The matrix of data points (predictors).
-  arma::mat predictors;
-
-  //! The matrix of responses to the input data points.
-  arma::mat responses;
-
-  //! The number of separable functions (the number of predictor points).
-  size_t numFunctions;
-
-  //! Storage the merge the reward input.
-  arma::field<arma::mat> rewardInput;
-
-  //! The current input.
-  arma::cube input;
-
-  //! The current target.
-  arma::mat target;
-
-  //! Locally stored performance functions.
-  NegativeLogLikelihoodLayer<> negativeLogLikelihoodFunction;
-  VRClassRewardLayer<> vRClassRewardFunction;
-
-  //! Locally stored size of the locator network.
-  size_t locatorSize;
-
-  //! Locally stored size of the location sensor network.
-  size_t locationSensorSize;
-
-  //! Locally stored size of the glimpse sensor network.
-  size_t glimpseSensorSize;
-
-  //! Locally stored size of the glimpse network.
-  size_t glimpseSize;
-
-  //! Locally stored size of the start network.
-  size_t startSize;
-
-  //! Locally stored size of the feedback network.
-  size_t feedbackSize;
-
-  //! Locally stored size of the transfer network.
-  size_t transferSize;
-
-  //! Locally stored size of the classifier network.
-  size_t classifierSize;
-
-  //! Locally stored size of the reward predictor network.
-  size_t rewardPredictorSize;
-
-  //! Locally stored recurrent gradient.
-  arma::mat recurrentGradient;
-
-  //! Locally stored action error.
-  arma::mat actionError;
-
-  //! Locally stored current location.
-  arma::mat evaluationLocation;
-}; // class RecurrentNeuralAttention
-
-} // namespace ann
-} // namespace mlpack
-
-// Include implementation.
-#include "rmva_impl.hpp"
-
-#endif
diff --git a/src/mlpack/methods/rmva/rmva_impl.hpp b/src/mlpack/methods/rmva/rmva_impl.hpp
deleted file mode 100644
index cfb310b3993..00000000000
--- a/src/mlpack/methods/rmva/rmva_impl.hpp
+++ /dev/null
@@ -1,740 +0,0 @@
-/**
- * @file rmva_impl.hpp
- * @author Marcus Edel
- *
- * Implementation of the Recurrent Model for Visual Attention.
- *
- * mlpack is free software; you may redistribute it and/or modify it under the
- * terms of the 3-clause BSD license.  You should have received a copy of the
- * 3-clause BSD license along with mlpack.  If not, see
- * http://www.opensource.org/licenses/BSD-3-Clause for more information.
- */
-#ifndef __MLPACK_METHODS_RMVA_RMVA_IMPL_HPP
-#define __MLPACK_METHODS_RMVA_RMVA_IMPL_HPP
-
-// In case it hasn't been included yet.
-#include "rmva.hpp"
-
-namespace mlpack {
-namespace ann /** Artificial Neural Network. */ {
-
-template<
-  typename LocatorType,
-  typename LocationSensorType,
-  typename GlimpseSensorType,
-  typename GlimpseType,
-  typename StartType,
-  typename FeedbackType,
-  typename TransferType,
-  typename ClassifierType,
-  typename RewardPredictorType,
-  typename InitializationRuleType,
-  typename MatType
->
-template<
-    typename TypeLocator,
-    typename TypeLocationSensor,
-    typename TypeGlimpseSensor,
-    typename TypeGlimpse,
-    typename TypeStart,
-    typename TypeFeedback,
-    typename TypeTransfer,
-    typename TypeClassifier,
-    typename TypeRewardPredictor
->
-RecurrentNeuralAttention<
-  LocatorType,
-  LocationSensorType,
-  GlimpseSensorType,
-  GlimpseType,
-  StartType,
-  FeedbackType,
-  TransferType,
-  ClassifierType,
-  RewardPredictorType,
-  InitializationRuleType,
-  MatType
->::RecurrentNeuralAttention(TypeLocator&& locator,
-                            TypeLocationSensor&& locationSensor,
-                            TypeGlimpseSensor&& glimpseSensor,
-                            TypeGlimpse&& glimpse,
-                            TypeStart&& start,
-                            TypeFeedback&& feedback,
-                            TypeTransfer&& transfer,
-                            TypeClassifier&& classifier,
-                            TypeRewardPredictor&& rewardPredictor,
-                            const size_t nStep,
-                            InitializationRuleType initializeRule) :
-    locator(std::forward<TypeLocator>(locator)),
-    locationSensor(std::forward<TypeLocationSensor>(locationSensor)),
-    glimpseSensor(std::forward<TypeGlimpseSensor>(glimpseSensor)),
-    glimpse(std::forward<TypeGlimpse>(glimpse)),
-    start(std::forward<TypeStart>(start)),
-    feedback(std::forward<TypeFeedback>(feedback)),
-    transfer(std::forward<TypeTransfer>(transfer)),
-    classifier(std::forward<TypeClassifier>(classifier)),
-    rewardPredictor(std::forward<TypeRewardPredictor>(rewardPredictor)),
-    nStep(nStep),
-    inputSize(0)
-{
-  // Set the network size.
-  locatorSize = NetworkSize(this->locator);
-  locationSensorSize = NetworkSize(this->locationSensor);
-  glimpseSensorSize = NetworkSize(this->glimpseSensor);
-  glimpseSize = NetworkSize(this->glimpse);
-  feedbackSize = NetworkSize(this->feedback);
-  transferSize = NetworkSize(this->transfer);
-  classifierSize = NetworkSize(this->classifier);
-  rewardPredictorSize = NetworkSize(this->rewardPredictor);
-  startSize = NetworkSize(this->start);
-
-  initializeRule.Initialize(parameter, locatorSize + locationSensorSize + glimpseSensorSize +
-      glimpseSize + feedbackSize + transferSize + classifierSize + rewardPredictorSize + startSize, 1);
-
-  // Set the network weights.
-  NetworkWeights(initializeRule, parameter, this->locator);
-  NetworkWeights(initializeRule, parameter, this->locationSensor, locatorSize);
-  NetworkWeights(initializeRule, parameter, this->glimpseSensor, locatorSize +
-      locationSensorSize);
-  NetworkWeights(initializeRule, parameter, this->glimpse, locatorSize +
-      locationSensorSize + glimpseSensorSize);
-  NetworkWeights(initializeRule, parameter, this->feedback, locatorSize +
-      locationSensorSize + glimpseSensorSize + glimpseSize);
-  NetworkWeights(initializeRule, parameter, this->transfer, locatorSize +
-      locationSensorSize + glimpseSensorSize + glimpseSize + feedbackSize);
-  NetworkWeights(initializeRule, parameter, this->classifier, locatorSize +
-      locationSensorSize + glimpseSensorSize + glimpseSize + feedbackSize +
-      transferSize);
-  NetworkWeights(initializeRule, parameter, this->rewardPredictor, locatorSize +
-      locationSensorSize + glimpseSensorSize + glimpseSize + feedbackSize +
-      transferSize + classifierSize);
-  NetworkWeights(initializeRule, parameter, this->start, locatorSize +
-      locationSensorSize + glimpseSensorSize + glimpseSize + feedbackSize +
-      transferSize + classifierSize + rewardPredictorSize);
-
-  rewardInput = arma::field<arma::mat>(2, 1);
-}
-
-template<
-  typename LocatorType,
-  typename LocationSensorType,
-  typename GlimpseSensorType,
-  typename GlimpseType,
-  typename StartType,
-  typename FeedbackType,
-  typename TransferType,
-  typename ClassifierType,
-  typename RewardPredictorType,
-  typename InitializationRuleType,
-  typename MatType
->
-template<template<typename> class OptimizerType>
-void RecurrentNeuralAttention<
-  LocatorType,
-  LocationSensorType,
-  GlimpseSensorType,
-  GlimpseType,
-  StartType,
-  FeedbackType,
-  TransferType,
-  ClassifierType,
-  RewardPredictorType,
-  InitializationRuleType,
-  MatType
->::Train(const arma::mat& predictors,
-         const arma::mat& responses,
-         OptimizerType<NetworkType>& optimizer)
-{
-  numFunctions = predictors.n_cols;
-  this->predictors = predictors;
-  this->responses = responses;
-
-  // Train the model.
-  Timer::Start("ffn_optimization");
-  const double out = optimizer.Optimize(parameter);
-  Timer::Stop("ffn_optimization");
-
-  Log::Info << "FFN::FFN(): final objective of trained model is " << out
-      << "." << std::endl;
-}
-
-template<
-  typename LocatorType,
-  typename LocationSensorType,
-  typename GlimpseSensorType,
-  typename GlimpseType,
-  typename StartType,
-  typename FeedbackType,
-  typename TransferType,
-  typename ClassifierType,
-  typename RewardPredictorType,
-  typename InitializationRuleType,
-  typename MatType
->
-void RecurrentNeuralAttention<
-  LocatorType,
-  LocationSensorType,
-  GlimpseSensorType,
-  GlimpseType,
-  StartType,
-  FeedbackType,
-  TransferType,
-  ClassifierType,
-  RewardPredictorType,
-  InitializationRuleType,
-  MatType
->::Predict(arma::mat& predictors, arma::mat& responses)
-{
-  deterministic = true;
-
-  arma::mat responsesTemp;
-  SinglePredict(arma::cube(predictors.colptr(0), 28, 28, 1), responsesTemp);
-
-  responses = arma::mat(responsesTemp.n_elem, predictors.n_cols);
-  responses.col(0) = responsesTemp.col(0);
-
-  for (size_t i = 1; i < predictors.n_cols; i++)
-  {
-    SinglePredict(arma::cube(predictors.colptr(i), 28, 28, 1), responsesTemp);
-    responses.col(i) = responsesTemp.col(0);
-  }
-}
-
-template<
-  typename LocatorType,
-  typename LocationSensorType,
-  typename GlimpseSensorType,
-  typename GlimpseType,
-  typename StartType,
-  typename FeedbackType,
-  typename TransferType,
-  typename ClassifierType,
-  typename RewardPredictorType,
-  typename InitializationRuleType,
-  typename MatType
->
-double RecurrentNeuralAttention<
-  LocatorType,
-  LocationSensorType,
-  GlimpseSensorType,
-  GlimpseType,
-  StartType,
-  FeedbackType,
-  TransferType,
-  ClassifierType,
-  RewardPredictorType,
-  InitializationRuleType,
-  MatType
->::Evaluate(const arma::mat& /* unused */,
-            const size_t i,
-            const bool deterministic)
-{
-  this->deterministic = deterministic;
-
-  input = arma::cube(predictors.colptr(i), 28, 28, 1);
-  target = arma::mat(responses.colptr(i), responses.n_rows, 1, false, true);
-
-  // Get the locator input size.
-  if (!inputSize)
-  {
-    inputSize = NetworkInputSize(locator);
-  }
-
-  glimpseSensorMatCounter = 0;
-  glimpseSensorCubeCounter = 0;
-  glimpseActivationsCounter = 0;
-  locatorActivationsCounter = 0;
-  locationSensorActivationsCounter = 0;
-  glimpseSensorMatActivationsCounter = 0;
-  glimpseSensorCubeActivationsCounter = 0;
-  locationCounter = 0;
-  feedbackActivationsCounter = 0;
-  transferActivationsCounter = 0;
-
-  // Reset networks.
-  ResetParameter(locator);
-  ResetParameter(locationSensor);
-  ResetParameter(glimpseSensor);
-  ResetParameter(glimpse);
-  ResetParameter(feedback);
-  ResetParameter(transfer);
-  ResetParameter(classifier);
-  ResetParameter(rewardPredictor);
-  ResetParameter(start);
-
-  // Reset activation storage.
-  glimpseActivations.clear();
-  locatorActivations.clear();
-  locationSensorActivations.clear();
-  glimpseSensorMatActivations.clear();
-  glimpseSensorCubeActivations.clear();
-  feedbackActivations.clear();
-  transferActivations.clear();
-  locatorInput.clear();
-  location.clear();
-  feedbackActivationsInput.clear();
-
-  // Sample an initial starting actions by forwarding zeros through the locator.
-  locatorInput.push_back(new arma::cube(arma::zeros<arma::cube>(inputSize, 1,
-      input.n_slices)));
-
-  // Forward pass throught the recurrent network.
-  for (step = 0; step < nStep; step++)
-  {
-    // Locator forward pass.
-    Forward(locatorInput.back(), locator);
-    SaveActivations(locatorActivations, locator, locatorActivationsCounter);
-
-    // Location sensor forward pass.
-    Forward(std::get<std::tuple_size<LocatorType>::value - 1>(
-        locator).OutputParameter(), locationSensor);
-    SaveActivations(locationSensorActivations, locationSensor,
-        locationSensorActivationsCounter);
-
-    // Set the location parameter for all layer that implement a Location
-    // function e.g. GlimpseLayer.
-    ResetLocation(std::get<std::tuple_size<LocatorType>::value - 1>(
-        locator).OutputParameter(), glimpseSensor);
-
-    // Save the location for the backward path.
-    location.push_back(new arma::mat(std::get<std::tuple_size<
-        LocatorType>::value - 1>(locator).OutputParameter()));
-
-    // Glimpse sensor forward pass.
-    Forward(input, glimpseSensor);
-    SaveActivations(glimpseSensorMatActivations, glimpseSensorCubeActivations,
-        glimpseSensorMatCounter, glimpseSensorCubeCounter, glimpseSensor);
-
-    // Concat the parameter activation from the location sensor and
-    // glimpse sensor.
-    arma::mat concatLayerOutput = arma::join_cols(
-        std::get<std::tuple_size<LocationSensorType>::value - 1>(
-        locationSensor).OutputParameter(),
-        std::get<std::tuple_size<GlimpseSensorType>::value - 1>(
-        glimpseSensor).OutputParameter());
-
-    // Glimpse forward pass.
-    Forward(concatLayerOutput, glimpse);
-    SaveActivations(glimpseActivations, glimpse, glimpseActivationsCounter);
-
-    if (step == 0)
-    {
-      // Start forward pass.
-      Forward(std::get<std::tuple_size<GlimpseType>::value - 1>(
-          glimpse).OutputParameter(), start);
-
-      // Transfer forward pass.
-      Forward(std::get<std::tuple_size<StartType>::value - 1>(
-          start).OutputParameter(), transfer);
-      SaveActivations(transferActivations, transfer,
-          transferActivationsCounter);
-    }
-    else
-    {
-      // Feedback forward pass.
-      Forward(std::get<std::tuple_size<TransferType>::value - 1>(
-          transfer).OutputParameter(), feedback);
-      SaveActivations(feedbackActivations, feedback,
-          feedbackActivationsCounter);
-
-      feedbackActivationsInput.push_back(new arma::mat(
-          std::get<std::tuple_size<TransferType>::value - 1>(
-          transfer).OutputParameter().memptr(),
-          std::get<std::tuple_size<TransferType>::value - 1>(
-          transfer).OutputParameter().n_rows,
-          std::get<std::tuple_size<TransferType>::value - 1>(
-          transfer).OutputParameter().n_cols));
-
-      arma::mat feedbackLayerOutput =
-        std::get<std::tuple_size<GlimpseType>::value - 1>(
-        glimpse).OutputParameter() +
-        std::get<std::tuple_size<FeedbackType>::value - 1>(
-        feedback).OutputParameter();
-
-      // Transfer forward pass.
-      Forward(feedbackLayerOutput, transfer);
-      SaveActivations(transferActivations, transfer,
-          transferActivationsCounter);
-    }
-
-    // Update the input for the next run
-    locatorInput.push_back(new arma::cube(
-        std::get<std::tuple_size<TransferType>::value - 1>(
-        transfer).OutputParameter().memptr(), locatorInput.back().n_rows,
-        locatorInput.back().n_cols, locatorInput.back().n_slices));
-  }
-
-  // Classifier forward pass.
-  Forward(locatorInput.back().slice(0), classifier);
-
-  // Reward predictor forward pass.
-  Forward(std::get<std::tuple_size<ClassifierType>::value - 1>(
-      classifier).OutputParameter(), rewardPredictor);
-
-  double performanceError = negativeLogLikelihoodFunction.Forward(
-      std::get<std::tuple_size<ClassifierType>::value - 1>(
-      classifier).OutputParameter(), target);
-
-  // Create the input for the vRClassRewardFunction function.
-  // For which we use the output from the classifier and the rewardPredictor.
-  rewardInput(0, 0) = std::get<std::tuple_size<ClassifierType>::value - 1>(
-      classifier).OutputParameter();
-  rewardInput(1, 0) = std::get<std::tuple_size<RewardPredictorType>::value - 1>(
-      rewardPredictor).OutputParameter();
-
-  performanceError += vRClassRewardFunction.Forward(rewardInput, target);
-
-  return performanceError;
-}
-
-template<
-  typename LocatorType,
-  typename LocationSensorType,
-  typename GlimpseSensorType,
-  typename GlimpseType,
-  typename StartType,
-  typename FeedbackType,
-  typename TransferType,
-  typename ClassifierType,
-  typename RewardPredictorType,
-  typename InitializationRuleType,
-  typename MatType
->
-void RecurrentNeuralAttention<
-  LocatorType,
-  LocationSensorType,
-  GlimpseSensorType,
-  GlimpseType,
-  StartType,
-  FeedbackType,
-  TransferType,
-  ClassifierType,
-  RewardPredictorType,
-  InitializationRuleType,
-  MatType
->::Gradient(const arma::mat& /* unused */,
-            const size_t i,
-            arma::mat& gradient)
-{
-  Evaluate(parameter, i, false);
-
-  // Reset the gradient.
-  if (gradient.is_empty())
-  {
-    gradient = arma::zeros<arma::mat>(parameter.n_rows, parameter.n_cols);
-  }
-  else
-  {
-    gradient.zeros();
-  }
-
-  // Reset the recurrent gradient.
-  if (recurrentGradient.is_empty())
-  {
-    recurrentGradient = arma::zeros<arma::mat>(parameter.n_rows,
-        parameter.n_cols);
-
-    actionError = arma::zeros<arma::mat>(
-        std::get<std::tuple_size<LocatorType>::value - 1>(
-        locator).OutputParameter().n_rows,
-        std::get<std::tuple_size<LocatorType>::value - 1>(
-        locator).OutputParameter().n_cols);
-  }
-  else
-  {
-    recurrentGradient.zeros();
-  }
-
-  // Set the recurrent gradient.
-  NetworkGradients(recurrentGradient, this->locator);
-  NetworkGradients(recurrentGradient, this->locationSensor, locatorSize);
-  NetworkGradients(recurrentGradient, this->glimpseSensor, locatorSize +
-      locationSensorSize);
-  NetworkGradients(recurrentGradient, this->glimpse, locatorSize +
-      locationSensorSize + glimpseSensorSize);
-  NetworkGradients(recurrentGradient, this->feedback, locatorSize +
-      locationSensorSize + glimpseSensorSize + glimpseSize);
-  NetworkGradients(recurrentGradient, this->transfer, locatorSize +
-      locationSensorSize + glimpseSensorSize + glimpseSize + feedbackSize);
-
-  // Set the gradient.
-  NetworkGradients(gradient, this->classifier, locatorSize + locationSensorSize
-      + glimpseSensorSize + glimpseSize + feedbackSize + transferSize);
-  NetworkGradients(gradient, this->rewardPredictor, locatorSize +
-      locationSensorSize + glimpseSensorSize + glimpseSize + feedbackSize +
-      transferSize + classifierSize);
-  NetworkGradients(gradient, this->start, locatorSize + locationSensorSize +
-      glimpseSensorSize + glimpseSize + feedbackSize + transferSize +
-      classifierSize + rewardPredictorSize);
-
-  // Negative log likelihood backward pass.
-  negativeLogLikelihoodFunction.Backward(std::get<std::tuple_size<
-      ClassifierType>::value - 1>(classifier).OutputParameter(), target,
-      negativeLogLikelihoodFunction.OutputParameter());
-
-  const double reward = vRClassRewardFunction.Backward(rewardInput, target,
-      vRClassRewardFunction.OutputParameter());
-
-  // Propogate reward through all modules.
-  ResetReward(reward, locator);
-  ResetReward(reward, locationSensor);
-  ResetReward(reward, glimpseSensor);
-  ResetReward(reward, glimpse);
-  ResetReward(reward, classifier);
-
-  // RewardPredictor backward pass.
-  Backward(vRClassRewardFunction.OutputParameter()(1, 0), rewardPredictor);
-
-  arma::mat classifierError =
-    negativeLogLikelihoodFunction.OutputParameter() +
-    vRClassRewardFunction.OutputParameter()(0, 0) +
-    std::get<0>(rewardPredictor).Delta();
-
-  // Classifier backward pass.
-  Backward(classifierError, classifier);
-
-  // Set the initial recurrent error for the first backward step.
-  arma::mat recurrentError = std::get<0>(classifier).Delta();
-
-  for (step = nStep - 1; nStep >= 0; step--)
-  {
-    // Load the locator activations.
-    LoadActivations(locatorInput[step], locatorActivations,
-        locatorActivationsCounter, locator);
-
-    // Load the location sensor activations.
-    LoadActivations(std::get<std::tuple_size<LocatorType>::value - 1>(
-        locator).OutputParameter(), locationSensorActivations,
-        locationSensorActivationsCounter, locationSensor);
-
-    // Load the glimpse sensor activations.
-    LoadActivations(input, glimpseSensorMatActivations,
-        glimpseSensorCubeActivations, glimpseSensorMatCounter,
-        glimpseSensorCubeCounter, glimpseSensor);
-
-    // Concat the parameter activation from the location and glimpse sensor.
-    arma::mat concatLayerOutput = arma::join_cols(
-        std::get<std::tuple_size<LocationSensorType>::value - 1>(
-        locationSensor).OutputParameter(),
-        std::get<std::tuple_size<GlimpseSensorType>::value - 1>(
-        glimpseSensor).OutputParameter());
-
-    // Load the glimpse activations.
-    LoadActivations(concatLayerOutput, glimpseActivations,
-        glimpseActivationsCounter, glimpse);
-
-
-    if (step == 0)
-    {
-      // Load the transfer activations.
-     LoadActivations(std::get<std::tuple_size<StartType>::value - 1>(
-          start).OutputParameter(), transferActivations,
-          transferActivationsCounter, transfer);
-    }
-    else
-    {
-      // Load the feedback activations.
-      LoadActivations(std::get<std::tuple_size<TransferType>::value - 1>(
-          transfer).OutputParameter(), feedbackActivations,
-          feedbackActivationsCounter, feedback);
-
-      arma::mat feedbackLayerOutput =
-        std::get<std::tuple_size<GlimpseType>::value - 1>(
-        glimpse).OutputParameter() +
-        std::get<std::tuple_size<FeedbackType>::value - 1>(
-        feedback).OutputParameter();
-
-      // Load the transfer activations.
-      LoadActivations(feedbackLayerOutput, transferActivations,
-          transferActivationsCounter, transfer);
-    }
-
-    // Set the location parameter for all layer that implement a Location
-    // function e.g. GlimpseLayer.
-    ResetLocation(location[step], glimpseSensor);
-
-    // Locator backward pass.
-    Backward(actionError, locator);
-
-    // Transfer backward pass.
-    Backward(recurrentError, transfer);
-
-    // glimpse network
-    Backward(std::get<0>(transfer).Delta(), glimpse);
-
-    // Split up the error of the concat layer.
-    arma::mat locationSensorError = std::get<0>(glimpse).Delta().submat(
-        0, 0, std::get<0>(glimpse).Delta().n_elem / 2 - 1, 0);
-    arma::mat glimpseSensorError = std::get<0>(glimpse).Delta().submat(
-        std::get<0>(glimpse).Delta().n_elem / 2, 0,
-        std::get<0>(glimpse).Delta().n_elem - 1, 0);
-
-    // Location sensor backward pass.
-    Backward(locationSensorError, locationSensor);
-
-    // Glimpse sensor backward pass.
-    Backward(glimpseSensorError, glimpseSensor);
-
-    if (step != 0)
-    {
-      // Feedback backward pass.
-      Backward(std::get<0>(transfer).Delta(), feedback);
-    }
-
-    // Update the recurrent network gradients.
-    UpdateGradients(std::get<0>(locationSensor).Delta(), locator);
-    UpdateGradients(std::get<0>(transfer).Delta(), glimpse);
-    UpdateGradients(std::get<0>(transfer).Delta(), locationSensor);
-    UpdateGradients(std::get<0>(transfer).Delta(), glimpseSensor);
-
-    // Feedback module.
-    if (step != 0)
-    {
-      UpdateGradients(feedbackActivationsInput[step - 1],
-          std::get<0>(transfer).Delta(), feedback);
-    }
-    else
-    {
-      // Set the feedback gradient to zero.
-      recurrentGradient.submat(locatorSize + locationSensorSize +
-          glimpseSensorSize + glimpseSize, 0, locatorSize + locationSensorSize +
-          glimpseSensorSize + glimpseSize + feedbackSize - 1, 0).zeros();
-
-      UpdateGradients(std::get<0>(transfer).Delta(), start);
-    }
-
-    // Update the overall recurrent gradient.
-    gradient += recurrentGradient;
-
-    if (step != 0)
-    {
-      // Update the recurrent error for the next backward step.
-      recurrentError = std::get<0>(locator).Delta() +
-          std::get<0>(feedback).Delta();
-    }
-    else
-    {
-      break;
-    }
-  }
-
-  // Reward predictor gradient update.
-  UpdateGradients(vRClassRewardFunction.OutputParameter()(1, 0),
-      rewardPredictor);
-
-  // Classifier gradient update.
-  UpdateGradients(std::get<1>(classifier).Delta(), classifier);
-}
-
-template<
-  typename LocatorType,
-  typename LocationSensorType,
-  typename GlimpseSensorType,
-  typename GlimpseType,
-  typename StartType,
-  typename FeedbackType,
-  typename TransferType,
-  typename ClassifierType,
-  typename RewardPredictorType,
-  typename InitializationRuleType,
-  typename MatType
->
-const arma::mat& RecurrentNeuralAttention<
-  LocatorType,
-  LocationSensorType,
-  GlimpseSensorType,
-  GlimpseType,
-  StartType,
-  FeedbackType,
-  TransferType,
-  ClassifierType,
-  RewardPredictorType,
-  InitializationRuleType,
-  MatType
->::Location()
-{
-  if (!location.empty())
-  {
-    evaluationLocation = arma::mat(location[0].n_elem, location.size());
-
-    for (size_t i = 0; i < location.size(); i++)
-    {
-      evaluationLocation.col(i) = arma::vectorise(location[i]);
-    }
-  }
-
-  return evaluationLocation;
-}
-
-template<
-  typename LocatorType,
-  typename LocationSensorType,
-  typename GlimpseSensorType,
-  typename GlimpseType,
-  typename StartType,
-  typename FeedbackType,
-  typename TransferType,
-  typename ClassifierType,
-  typename RewardPredictorType,
-  typename InitializationRuleType,
-  typename MatType
->
-template<typename Archive>
-void RecurrentNeuralAttention<
-  LocatorType,
-  LocationSensorType,
-  GlimpseSensorType,
-  GlimpseType,
-  StartType,
-  FeedbackType,
-  TransferType,
-  ClassifierType,
-  RewardPredictorType,
-  InitializationRuleType,
-  MatType
->::Serialize(Archive& ar, const unsigned int /* version */)
-{
-  ar & data::CreateNVP(parameter, "parameter");
-  ar & data::CreateNVP(inputSize, "inputSize");
-  ar & data::CreateNVP(nStep, "nStep");
-
-  // If we are loading, we need to initialize the weights.
-  if (Archive::is_loading::value)
-  {
-    // Set the netork size.
-    locatorSize = NetworkSize(this->locator);
-    locationSensorSize = NetworkSize(this->locationSensor);
-    glimpseSensorSize = NetworkSize(this->glimpseSensor);
-    glimpseSize = NetworkSize(this->glimpse);
-    feedbackSize = NetworkSize(this->feedback);
-    transferSize = NetworkSize(this->transfer);
-    classifierSize = NetworkSize(this->classifier);
-    rewardPredictorSize = NetworkSize(this->rewardPredictor);
-    startSize = NetworkSize(this->start);
-
-    // Set the network weights.
-    NetworkWeights(parameter, this->locator);
-    NetworkWeights(parameter, this->locationSensor, locatorSize);
-    NetworkWeights(parameter, this->glimpseSensor, locatorSize +
-        locationSensorSize);
-    NetworkWeights(parameter, this->glimpse, locatorSize + locationSensorSize +
-        glimpseSensorSize);
-    NetworkWeights(parameter, this->feedback, locatorSize + locationSensorSize +
-        glimpseSensorSize + glimpseSize);
-    NetworkWeights(parameter, this->transfer, locatorSize + locationSensorSize +
-        glimpseSensorSize + glimpseSize + feedbackSize);
-    NetworkWeights(parameter, this->classifier, locatorSize + locationSensorSize
-        + glimpseSensorSize + glimpseSize + feedbackSize + transferSize);
-    NetworkWeights(parameter, this->rewardPredictor, locatorSize +
-        locationSensorSize + glimpseSensorSize + glimpseSize + feedbackSize +
-        transferSize + classifierSize);
-    NetworkWeights(parameter, this->start, locatorSize + locationSensorSize +
-        glimpseSensorSize + glimpseSize + feedbackSize + transferSize +
-        classifierSize + rewardPredictorSize);
-  }
-}
-
-} // namespace ann
-} // namespace mlpack
-
-#endif
diff --git a/src/mlpack/methods/rmva/rmva_main.cpp b/src/mlpack/methods/rmva/rmva_main.cpp
deleted file mode 100644
index a3483d63de6..00000000000
--- a/src/mlpack/methods/rmva/rmva_main.cpp
+++ /dev/null
@@ -1,285 +0,0 @@
-/**
- * @file rmva_main.cpp
- * @author Marcus Edel
- *
- * Main executable for the Recurrent Model for Visual Attention.
- *
- * mlpack is free software; you may redistribute it and/or modify it under the
- * terms of the 3-clause BSD license.  You should have received a copy of the
- * 3-clause BSD license along with mlpack.  If not, see
- * http://www.opensource.org/licenses/BSD-3-Clause for more information.
- */
-#include <mlpack/core.hpp>
-
-#include "rmva.hpp"
-
-#include <mlpack/methods/ann/layer/glimpse_layer.hpp>
-#include <mlpack/methods/ann/layer/linear_layer.hpp>
-#include <mlpack/methods/ann/layer/bias_layer.hpp>
-#include <mlpack/methods/ann/layer/base_layer.hpp>
-#include <mlpack/methods/ann/layer/reinforce_normal_layer.hpp>
-#include <mlpack/methods/ann/layer/multiply_constant_layer.hpp>
-#include <mlpack/methods/ann/layer/constant_layer.hpp>
-#include <mlpack/methods/ann/layer/log_softmax_layer.hpp>
-#include <mlpack/methods/ann/layer/hard_tanh_layer.hpp>
-
-#include <mlpack/core/optimizers/minibatch_sgd/minibatch_sgd.hpp>
-#include <mlpack/core/optimizers/sgd/sgd.hpp>
-
-using namespace mlpack;
-using namespace mlpack::ann;
-using namespace mlpack::optimization;
-using namespace std;
-
-PROGRAM_INFO("Recurrent Model for Visual Attention",
-    "This program trains the Recurrent Model for Visual Attention on the given "
-    "labeled training set, or loads a model from the given model file, and then"
-    " may use that trained model to classify the points in a given test set."
-    "\n\n"
-    "Labels are expected to be passed in separately as their own file "
-    "(--labels_file).  If training is not desired, a pre-existing model can be "
-    "loaded with the --input_model_file (-m) option."
-    "\n\n"
-    "If classifying a test set is desired, the test set should be in the file "
-    "specified with the --test_file (-T) option, and the classifications will "
-    "be saved to the file specified with the --output_file (-o) option.  If "
-    "saving a trained model is desired, the --output_model_file (-M) option "
-    "should be given.");
-
-// Model loading/saving.
-PARAM_STRING_IN("input_model_file", "File containing the Recurrent Model for "
-    "Visual Attention.", "m", "");
-PARAM_STRING_OUT("output_model_file", "File to save trained Recurrent Model for"
-    " Visual Attention to.", "M");
-
-// Training parameters.
-PARAM_MATRIX_IN("training", "Matrix containing the training set.", "t");
-PARAM_MATRIX_IN("labels", "Matrix containing labels for the training set.",
-    "l");
-
-PARAM_STRING_IN("optimizer", "Optimizer to use; 'sgd', 'minibatch-sgd', or "
-    "'lbfgs'.", "O", "minibatch-sgd");
-
-PARAM_INT_IN("max_iterations", "Maximum number of iterations for SGD or RMSProp"
-    " (0 indicates no limit).", "n", 500000);
-PARAM_DOUBLE_IN("tolerance", "Maximum tolerance for termination of SGD or "
-    "RMSProp.", "e", 1e-7);
-
-PARAM_DOUBLE_IN("step_size", "Step size for stochastic gradient descent "
-    "(alpha),", "a", 0.01);
-PARAM_FLAG("linear_scan", "Don't shuffle the order in which data points are "
-    "visited for SGD or mini-batch SGD.", "L");
-PARAM_INT_IN("batch_size", "Batch size for mini-batch SGD.", "b", 20);
-
-PARAM_INT_IN("rho", "Number of steps for the back-propagate through time.", "r",
-    7);
-
-PARAM_INT_IN("classes", "The number of classes.", "c", 10);
-
-PARAM_INT_IN("seed", "Random seed.  If 0, 'std::time(NULL)' is used.", "s", 0);
-
-// Test parameters.
-PARAM_MATRIX_IN("test", "Matrix containing the test set.", "T");
-PARAM_MATRIX_OUT("output", "The matrix in which the predicted labels for the "
-    "test set will be written.", "o");
-
-int main(int argc, char** argv)
-{
-  CLI::ParseCommandLine(argc, argv);
-
-  // Check input parameters.
-  if (CLI::HasParam("training") && CLI::HasParam("input_model_file"))
-    Log::Fatal << "Cannot specify both --training_file (-t) and "
-       << "--input_model_file (-m)!" << endl;
-
-  if (!CLI::HasParam("training") && !CLI::HasParam("input_model_file"))
-    Log::Fatal << "Neither --training_file (-t) nor --input_model_file (-m) are"
-        << " specified!" << endl;
-
-  if (!CLI::HasParam("training") && CLI::HasParam("labels"))
-    Log::Warn << "--labels_file (-l) ignored because --training_file (-t) is "
-        << "not specified." << endl;
-
-  if (!CLI::HasParam("output") && !CLI::HasParam("output_model_file"))
-    Log::Warn << "Neither --output_file (-o) nor --output_model_file (-M) "
-        << "specified; no output will be saved!" << endl;
-
-  if (CLI::HasParam("output") && !CLI::HasParam("test"))
-    Log::Warn << "--output_file (-o) ignored because no test file specified "
-        << "with --test_file (-T)." << endl;
-
-  if (!CLI::HasParam("output") && CLI::HasParam("test"))
-    Log::Warn << "--test_file (-T) specified, but classification results will "
-        << "not be saved because --output_file (-o) is not specified." << endl;
-
-  const string optimizerType = CLI::GetParam<string>("optimizer");
-
-  if ((optimizerType != "sgd") && (optimizerType != "lbfgs") &&
-      (optimizerType != "minibatch-sgd"))
-  {
-    Log::Fatal << "Optimizer type '" << optimizerType << "' unknown; must be "
-        << "'sgd', 'minibatch-sgd', or 'lbfgs'!" << endl;
-  }
-
-  const double stepSize = CLI::GetParam<double>("step_size");
-  const size_t maxIterations = (size_t) CLI::GetParam<int>("max_iterations");
-  const double tolerance = CLI::GetParam<double>("tolerance");
-  const bool shuffle = !CLI::HasParam("linear_scan");
-  const size_t batchSize = (size_t) CLI::GetParam<int>("batch_size");
-  const size_t rho = (size_t) CLI::GetParam<int>("rho");
-  const size_t numClasses = (size_t) CLI::GetParam<int>("classes");
-
-  const size_t hiddenSize = 256;
-  const double unitPixels = 13;
-  const double locatorStd = 0.11;
-  const size_t imageSize = 28;
-  const size_t locatorHiddenSize = 128;
-  const size_t glimpsePatchSize = 8;
-  const size_t glimpseDepth = 1;
-  const size_t glimpseScale = 2;
-  const size_t glimpseHiddenSize = 128;
-  const size_t imageHiddenSize = 256;
-
-
-  // Locator network.
-  LinearMappingLayer<> linearLayer0(hiddenSize, 2);
-  BiasLayer<> biasLayer0(2, 1);
-  HardTanHLayer<> hardTanhLayer0;
-  ReinforceNormalLayer<> reinforceNormalLayer0(2 * locatorStd);
-  HardTanHLayer<> hardTanhLayer1;
-  MultiplyConstantLayer<> multiplyConstantLayer0(2 * unitPixels / imageSize);
-  auto locator = std::tie(linearLayer0, biasLayer0, hardTanhLayer0,
-      reinforceNormalLayer0, hardTanhLayer1, multiplyConstantLayer0);
-
-  // Location sensor network.
-  LinearLayer<> linearLayer1(2, locatorHiddenSize);
-  BiasLayer<> biasLayer1(locatorHiddenSize, 1);
-  ReLULayer<> rectifierLayer0;
-  auto locationSensor = std::tie(linearLayer1, biasLayer1, rectifierLayer0);
-
-  // Glimpse sensor network.
-  GlimpseLayer<> glimpseLayer0(1, glimpsePatchSize, glimpseDepth, glimpseScale);
-  LinearMappingLayer<> linearLayer2(64, glimpseHiddenSize);
-  BiasLayer<> biasLayer2(glimpseHiddenSize, 1);
-  ReLULayer<> rectifierLayer1;
-  auto glimpseSensor = std::tie(glimpseLayer0, linearLayer2, biasLayer2,
-      rectifierLayer1);
-
-  // Glimpse network.
-  LinearLayer<> linearLayer3(glimpseHiddenSize + locatorHiddenSize,
-      imageHiddenSize);
-  BiasLayer<> biasLayer3(imageHiddenSize, 1);
-  ReLULayer<> rectifierLayer2;
-  LinearLayer<> linearLayer4(imageHiddenSize, hiddenSize);
-  BiasLayer<> biasLayer4(hiddenSize, 1);
-  auto glimpse = std::tie(linearLayer3, biasLayer3, rectifierLayer2,
-      linearLayer4, biasLayer4);
-
-  // Feedback network.
-  LinearLayer<> recurrentLayer0(imageHiddenSize, hiddenSize);
-  BiasLayer<> recurrentLayerBias0(hiddenSize, 1);
-  auto feedback = std::tie(recurrentLayer0, recurrentLayerBias0);
-
-  // Start network.
-  AdditionLayer<> startLayer0(hiddenSize, 1);
-  auto start = std::tie(startLayer0);
-
-  // Transfer network.
-  ReLULayer<> rectifierLayer3;
-  auto transfer = std::tie(rectifierLayer3);
-
-  // Classifier network.
-  LinearLayer<> linearLayer5(hiddenSize, numClasses);
-  BiasLayer<> biasLayer6(numClasses, 1);
-  LogSoftmaxLayer<> logSoftmaxLayer0;
-  auto classifier = std::tie(linearLayer5, biasLayer6, logSoftmaxLayer0);
-
-  // Reward predictor network.
-  ConstantLayer<> constantLayer0(1, 1);
-  AdditionLayer<> additionLayer0(1, 1);
-  auto rewardPredictor = std::tie(constantLayer0, additionLayer0);
-
-  // Recurrent Model for Visual Attention.
-  RecurrentNeuralAttention<decltype(locator),
-                           decltype(locationSensor),
-                           decltype(glimpseSensor),
-                           decltype(glimpse),
-                           decltype(start),
-                           decltype(feedback),
-                           decltype(transfer),
-                           decltype(classifier),
-                           decltype(rewardPredictor),
-                           RandomInitialization>
-    net(locator, locationSensor, glimpseSensor, glimpse, start, feedback,
-        transfer, classifier, rewardPredictor, rho);
-
-  // Either we have to train a model, or load a model.
-  if (CLI::HasParam("training"))
-  {
-    arma::mat trainingData = std::move(CLI::GetParam<arma::mat>("training"));
-
-    arma::mat labels;
-
-    // Did the user pass in labels?
-    if (CLI::HasParam("labels"))
-    {
-      // Load labels.
-      labels = std::move(CLI::GetParam<arma::mat>("labels"));
-
-      // Do the labels need to be transposed?
-      if (labels.n_cols == 1)
-        labels = labels.t();
-    }
-
-    // Now run the optimization.
-    if (optimizerType == "sgd")
-    {
-      SGD<decltype(net)> opt(net);
-      opt.StepSize() = stepSize;
-      opt.MaxIterations() = maxIterations;
-      opt.Tolerance() = tolerance;
-      opt.Shuffle() = shuffle;
-
-      Timer::Start("rmva_training");
-      net.Train(trainingData, labels, opt);
-      Timer::Stop("rmva_training");
-    }
-    else if (optimizerType == "minibatch-sgd")
-    {
-      MiniBatchSGD<decltype(net)> opt(net);
-      opt.StepSize() = stepSize;
-      opt.MaxIterations() = maxIterations;
-      opt.Tolerance() = tolerance;
-      opt.Shuffle() = shuffle;
-      opt.BatchSize() = batchSize;
-
-      Timer::Start("rmva_training");
-      net.Train(trainingData, labels, opt);
-      Timer::Stop("rmva_training");
-    }
-  }
-  else
-  {
-    // Load the model from file.
-    data::Load(CLI::GetParam<string>("input_model_file"), "rmva_model", net);
-  }
-
-  // Do we need to do testing?
-  if (CLI::HasParam("test"))
-  {
-    arma::mat testingData = std::move(CLI::GetParam<arma::mat>("test"));
-
-    // Time the running of the Naive Bayes Classifier.
-    arma::mat results;
-    Timer::Start("rmva_testing");
-    net.Predict(testingData, results);
-    Timer::Stop("rmva_testing");
-
-    if (CLI::HasParam("output"))
-      CLI::GetParam<arma::mat>("output") = std::move(results);
-  }
-
-  // Save the model, if requested.
-  if (CLI::HasParam("output_model_file"))
-    data::Save(CLI::GetParam<string>("output_model_file"), "rmva_model", net);
-}
diff --git a/src/mlpack/prereqs.hpp b/src/mlpack/prereqs.hpp
index eb172ab15b5..178341ead5a 100644
--- a/src/mlpack/prereqs.hpp
+++ b/src/mlpack/prereqs.hpp
@@ -62,6 +62,12 @@ using enable_if_t = typename enable_if<B, T>::type;
 #endif
 #endif
 
+// Increase the number of template arguments for the boost list class.
+#undef BOOST_MPL_CFG_NO_PREPROCESSED_HEADERS
+#undef BOOST_MPL_LIMIT_LIST_SIZE
+#define BOOST_MPL_CFG_NO_PREPROCESSED_HEADERS
+#define BOOST_MPL_LIMIT_LIST_SIZE 40
+
 // We'll need the necessary boost::serialization features, as well as what we
 // use with mlpack.  In Boost 1.59 and newer, the BOOST_PFTO code is no longer
 // defined, but we still need to define it (as nothing) so that the mlpack
@@ -95,4 +101,4 @@ using enable_if_t = typename enable_if<B, T>::type;
   #define ARMA_USE_CXX11
 #endif
 
-#endif
+#endif
\ No newline at end of file
diff --git a/src/mlpack/tests/CMakeLists.txt b/src/mlpack/tests/CMakeLists.txt
index 3b3ab0d30b1..b1dc438eaa2 100644
--- a/src/mlpack/tests/CMakeLists.txt
+++ b/src/mlpack/tests/CMakeLists.txt
@@ -6,6 +6,7 @@ add_executable(mlpack_test
   ada_delta_test.cpp
   akfn_test.cpp
   aknn_test.cpp
+  ann_layer_test.cpp
   arma_extend_test.cpp
   armadillo_svd_test.cpp
   aug_lagrangian_test.cpp
@@ -56,7 +57,6 @@ add_executable(mlpack_test
   mlpack_test.cpp
   nbc_test.cpp
   nca_test.cpp
-  network_util_test.cpp
   nmf_test.cpp
   nystroem_method_test.cpp
   octree_test.cpp
diff --git a/src/mlpack/tests/activation_functions_test.cpp b/src/mlpack/tests/activation_functions_test.cpp
index bebca0de09b..94df3b59380 100644
--- a/src/mlpack/tests/activation_functions_test.cpp
+++ b/src/mlpack/tests/activation_functions_test.cpp
@@ -12,23 +12,13 @@
  */
 #include <mlpack/core.hpp>
 
+#include <mlpack/methods/ann/layer/layer.hpp>
 #include <mlpack/methods/ann/activation_functions/logistic_function.hpp>
 #include <mlpack/methods/ann/activation_functions/identity_function.hpp>
 #include <mlpack/methods/ann/activation_functions/softsign_function.hpp>
 #include <mlpack/methods/ann/activation_functions/tanh_function.hpp>
 #include <mlpack/methods/ann/activation_functions/rectifier_function.hpp>
 
-#include <mlpack/methods/ann/ffn.hpp>
-#include <mlpack/methods/ann/init_rules/random_init.hpp>
-#include <mlpack/methods/ann/performance_functions/mse_function.hpp>
-
-#include <mlpack/methods/ann/layer/bias_layer.hpp>
-#include <mlpack/methods/ann/layer/linear_layer.hpp>
-#include <mlpack/methods/ann/layer/base_layer.hpp>
-#include <mlpack/methods/ann/layer/binary_classification_layer.hpp>
-#include <mlpack/methods/ann/layer/leaky_relu_layer.hpp>
-#include <mlpack/methods/ann/layer/hard_tanh_layer.hpp>
-
 #include <boost/test/unit_test.hpp>
 #include "test_tools.hpp"
 
@@ -125,7 +115,7 @@ void CheckInverseCorrect(const arma::colvec input)
 
 /*
  * Implementation of the HardTanH activation function test. The function is
- * implemented as a HardTanH Layer in hard_tanh_layer.hpp
+ * implemented as a HardTanH Layer in hard_tanh.hpp
  *
  * @param input Input data used for evaluating the HardTanH activation function.
  * @param target Target data used to evaluate the HardTanH activation.
@@ -133,11 +123,11 @@ void CheckInverseCorrect(const arma::colvec input)
 void CheckHardTanHActivationCorrect(const arma::colvec input,
                                     const arma::colvec target)
 {
-  HardTanHLayer<> htf;
+  HardTanH<> htf;
 
   // Test the activation function using the entire vector as input.
   arma::colvec activations;
-  htf.Forward(input, activations);
+  htf.Forward(std::move(input), std::move(activations));
   for (size_t i = 0; i < activations.n_elem; i++)
   {
     BOOST_REQUIRE_CLOSE(activations.at(i), target.at(i), 1e-3);
@@ -146,7 +136,7 @@ void CheckHardTanHActivationCorrect(const arma::colvec input,
 
 /*
  * Implementation of the HardTanH activation function derivative test. The
- * derivative is implemented as HardTanH Layer in hard_tanh_layer.hpp
+ * derivative is implemented as HardTanH Layer in hard_tanh.hpp
  *
  * @param input Input data used for evaluating the HardTanH activation function.
  * @param target Target data used to evaluate the HardTanH activation.
@@ -154,14 +144,15 @@ void CheckHardTanHActivationCorrect(const arma::colvec input,
 void CheckHardTanHDerivativeCorrect(const arma::colvec input,
                                     const arma::colvec target)
 {
-  HardTanHLayer<> htf;
+  HardTanH<> htf;
 
   // Test the calculation of the derivatives using the entire vector as input.
   arma::colvec derivatives;
 
   // This error vector will be set to 1 to get the derivatives.
-  arma::colvec error(input.n_elem);
-  htf.Backward(input, (arma::colvec)error.ones(), derivatives);
+  arma::colvec error = arma::ones<arma::colvec>(input.n_elem);
+  htf.Backward(std::move(input), std::move(error), std::move(derivatives));
+
   for (size_t i = 0; i < derivatives.n_elem; i++)
   {
     BOOST_REQUIRE_CLOSE(derivatives.at(i), target.at(i), 1e-3);
@@ -170,7 +161,7 @@ void CheckHardTanHDerivativeCorrect(const arma::colvec input,
 
 /*
  * Implementation of the LeakyReLU activation function test. The function is
- * implemented as LeakyReLU layer in the file leaky_relu_layer.hpp
+ * implemented as LeakyReLU layer in the file leaky_relu.hpp
  *
  * @param input Input data used for evaluating the LeakyReLU activation function.
  * @param target Target data used to evaluate the LeakyReLU activation.
@@ -178,11 +169,11 @@ void CheckHardTanHDerivativeCorrect(const arma::colvec input,
 void CheckLeakyReLUActivationCorrect(const arma::colvec input,
                                      const arma::colvec target)
 {
-  LeakyReLULayer<> lrf;
+  LeakyReLU<> lrf;
 
   // Test the activation function using the entire vector as input.
   arma::colvec activations;
-  lrf.Forward(input, activations);
+  lrf.Forward(std::move(input), std::move(activations));
   for (size_t i = 0; i < activations.n_elem; i++)
   {
     BOOST_REQUIRE_CLOSE(activations.at(i), target.at(i), 1e-3);
@@ -197,18 +188,17 @@ void CheckLeakyReLUActivationCorrect(const arma::colvec input,
  * @param input Input data used for evaluating the LeakyReLU activation function.
  * @param target Target data used to evaluate the LeakyReLU activation.
  */
-
 void CheckLeakyReLUDerivativeCorrect(const arma::colvec input,
                                      const arma::colvec target)
 {
-  LeakyReLULayer<> lrf;
+  LeakyReLU<> lrf;
 
   // Test the calculation of the derivatives using the entire vector as input.
   arma::colvec derivatives;
 
   // This error vector will be set to 1 to get the derivatives.
-  arma::colvec error(input.n_elem);
-  lrf.Backward(input, (arma::colvec)error.ones(), derivatives);
+  arma::colvec error = arma::ones<arma::colvec>(input.n_elem);
+  lrf.Backward(std::move(input), std::move(error), std::move(derivatives));
   for (size_t i = 0; i < derivatives.n_elem; i++)
   {
     BOOST_REQUIRE_CLOSE(derivatives.at(i), target.at(i), 1e-3);
diff --git a/src/mlpack/tests/ann_layer_test.cpp b/src/mlpack/tests/ann_layer_test.cpp
new file mode 100644
index 00000000000..101a0170751
--- /dev/null
+++ b/src/mlpack/tests/ann_layer_test.cpp
@@ -0,0 +1,524 @@
+/**
+ * @file ann_layer_test.cpp
+ * @author Marcus Edel
+ *
+ * Tests the ann layer modules.
+ *
+ * mlpack is free software; you may redistribute it and/or modify it under the
+ * terms of the 3-clause BSD license.  You should have received a copy of the
+ * 3-clause BSD license along with mlpack.  If not, see
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
+ */
+#include <mlpack/core.hpp>
+
+#include <mlpack/methods/ann/layer/layer.hpp>
+#include <mlpack/methods/ann/layer/layer_types.hpp>
+#include <mlpack/methods/ann/layer/layer_visitor.hpp>
+#include <mlpack/methods/ann/init_rules/random_init.hpp>
+
+#include <boost/test/unit_test.hpp>
+#include "test_tools.hpp"
+
+using namespace mlpack;
+using namespace mlpack::ann;
+
+BOOST_AUTO_TEST_SUITE(ANNLayerTest);
+
+// Helper function whcih calls the Reset function of the given module.
+template<class T>
+void ResetFunction(
+    T& layer,
+    typename std::enable_if<HasResetCheck<T, void(T::*)()>::value>::type* = 0)
+{
+   layer.Reset();
+}
+
+template<class T>
+void ResetFunction(
+    T& /* layer */,
+    typename std::enable_if<!HasResetCheck<T, void(T::*)()>::value>::type* = 0)
+{
+  /* Nothing to do here */
+}
+
+// Approximate Jacobian and supposedly-true Jacobian, then compare them
+// similarly to before.
+template<typename ModuleType>
+double JacobianTest(ModuleType& module,
+                  arma::mat& input,
+                  const double minValue = -2,
+                  const double maxValue = -1,
+                  const double perturbation = 1e-6)
+{
+  arma::mat output, outputA, outputB, jacobianA, jacobianB;
+
+  // Initialize the input matrix.
+  RandomInitialization init(minValue, maxValue);
+  init.Initialize(input, input.n_rows, input.n_cols);
+
+  // Initialize the module parameters.
+  ResetFunction(module);
+
+  // Initialize the jacobian matrix.
+  module.Forward(std::move(input), std::move(output));
+  jacobianA = arma::zeros(input.n_elem, output.n_elem);
+
+  // Share the input paramter matrix.
+  arma::mat sin = arma::mat(input.memptr(), input.n_rows, input.n_cols,
+      false, false);
+
+  for (size_t i = 0; i < input.n_elem; ++i)
+  {
+    double original = sin(i);
+    sin(i) = original - perturbation;
+    module.Forward(std::move(input), std::move(outputA));
+    sin(i) = original + perturbation;
+    module.Forward(std::move(input), std::move(outputB));
+    sin(i) = original;
+
+    outputB -= outputA;
+    outputB /= 2 * perturbation;
+    jacobianA.row(i) = outputB.t();
+  }
+
+  // Initialize the derivative parameter.
+  arma::mat deriv = arma::zeros(output.n_rows, output.n_cols);
+
+  // Share the derivative parameter.
+  arma::mat derivTemp = arma::mat(deriv.memptr(), deriv.n_rows, deriv.n_cols,
+      false, false);
+
+  // Initialize the jacobian matrix.
+  jacobianB = arma::zeros(input.n_elem, output.n_elem);
+
+  for (size_t i = 0; i < derivTemp.n_elem; ++i)
+  {
+    deriv.zeros();
+    derivTemp(i) = 1;
+
+    arma::mat delta;
+    module.Backward(std::move(input), std::move(deriv), std::move(delta));
+
+    jacobianB.col(i) = delta;
+  }
+
+  return arma::max(arma::max(arma::abs(jacobianA - jacobianB)));
+}
+
+// Approximate Jacobian and supposedly-true Jacobian, then compare them
+// similarly to before.
+template<typename ModuleType>
+double JacobianPerformanceTest(ModuleType& module,
+                               arma::mat& input,
+                               arma::mat& target,
+                               const double eps = 1e-6)
+{
+  module.Forward(std::move(input), std::move(target));
+
+  arma::mat delta;
+  module.Backward(std::move(input), std::move(target), std::move(delta));
+
+  arma::mat centralDifference = arma::zeros(delta.n_rows, delta.n_cols);
+  arma::mat inputTemp = arma::mat(input.memptr(), input.n_rows, input.n_cols,
+      false, false);
+
+  arma::mat centralDifferenceTemp = arma::mat(centralDifference.memptr(),
+      centralDifference.n_rows, centralDifference.n_cols, false, false);
+
+  for (size_t i = 0; i < input.n_elem; ++i)
+  {
+    inputTemp(i) = inputTemp(i) + eps;
+    double outputA = module.Forward(std::move(input), std::move(target));
+    inputTemp(i) = inputTemp(i) - (2 * eps);
+    double outputB = module.Forward(std::move(input), std::move(target));
+
+    centralDifferenceTemp(i) = (outputA - outputB) / ( 2 * eps);
+    inputTemp(i) = inputTemp(i) + eps;
+  }
+
+  return arma::max(arma::max(arma::abs(centralDifference - delta)));
+}
+
+/**
+ * Simple add module test.
+ */
+BOOST_AUTO_TEST_CASE(SimpleAddLayerTest)
+{
+  arma::mat output, input, delta;
+  Add<> module(10);
+
+  // Test the Forward function.
+  input = arma::zeros(10, 1);
+  module.Forward(std::move(input), std::move(output));
+  BOOST_REQUIRE_EQUAL(arma::accu(module.Parameters()), arma::accu(output));
+
+  // Test the Backward function.
+  module.Backward(std::move(input), std::move(output), std::move(delta));
+  BOOST_REQUIRE_EQUAL(arma::accu(output), arma::accu(delta));
+
+  // Test the forward function.
+  input = arma::ones(10, 1);
+  module.Forward(std::move(input), std::move(output));
+  BOOST_REQUIRE_CLOSE(10 + arma::accu(module.Parameters()),
+      arma::accu(output), 1e-3);
+
+  // Test the backward function.
+  module.Backward(std::move(input), std::move(output), std::move(delta));
+  BOOST_REQUIRE_CLOSE(arma::accu(output), arma::accu(delta), 1e-3);
+}
+
+/**
+ * Jacobian add module test.
+ */
+BOOST_AUTO_TEST_CASE(JacobianAddLayerTest)
+{
+  for (size_t i = 0; i < 5; i++)
+  {
+    const size_t elements = math::RandInt(2, 1000);
+    arma::mat input;
+    input.set_size(elements, 1);
+
+    Add<> module(elements);
+    module.Parameters().randu();
+
+    double error = JacobianTest(module, input);
+    BOOST_REQUIRE_LE(error, 1e-5);
+  }
+}
+
+/**
+ * Simple constant module test.
+ */
+BOOST_AUTO_TEST_CASE(SimpleConstantLayerTest)
+{
+  arma::mat output, input, delta;
+  Constant<> module(10, 3.0);
+
+  // Test the Forward function.
+  input = arma::zeros(10, 1);
+  module.Forward(std::move(input), std::move(output));
+  BOOST_REQUIRE_EQUAL(arma::accu(output), 30.0);
+
+  // Test the Backward function.
+  module.Backward(std::move(input), std::move(output), std::move(delta));
+  BOOST_REQUIRE_EQUAL(arma::accu(delta), 0);
+
+  // Test the forward function.
+  input = arma::ones(10, 1);
+  module.Forward(std::move(input), std::move(output));
+  BOOST_REQUIRE_EQUAL(arma::accu(output), 30.0);
+
+  // Test the backward function.
+  module.Backward(std::move(input), std::move(output), std::move(delta));
+  BOOST_REQUIRE_EQUAL(arma::accu(delta), 0);
+}
+
+/**
+ * Jacobian constant module test.
+ */
+BOOST_AUTO_TEST_CASE(JacobianConstantLayerTest)
+{
+  for (size_t i = 0; i < 5; i++)
+  {
+    const size_t elements = math::RandInt(2, 1000);
+    arma::mat input;
+    input.set_size(elements, 1);
+
+    Constant<> module(elements, 1.0);
+
+    double error = JacobianTest(module, input);
+    BOOST_REQUIRE_LE(error, 1e-5);
+  }
+}
+
+/**
+ * Simple dropout module test.
+ */
+BOOST_AUTO_TEST_CASE(SimpleDropoutLayerTest)
+{
+  // Initialize the probability of setting a value to zero and the scale
+  // parameter.
+  const double p = 0.2;
+  const double scale = 1.0 / (1.0 - p);
+
+  // Initialize the input parameter.
+  arma::mat input(1000, 1);
+  input.fill(1 - p);
+
+  Dropout<> module(p);
+  module.Deterministic() = false;
+
+  // Test the Forward function.
+  arma::mat output;
+  module.Forward(std::move(input), std::move(output));
+  BOOST_REQUIRE_LE(
+      arma::as_scalar(arma::abs(arma::mean(output) - (1 - p))), 0.05);
+
+  // Test the Backward function.
+  arma::mat delta;
+  module.Backward(std::move(input), std::move(input), std::move(delta));
+  BOOST_REQUIRE_LE(
+      arma::as_scalar(arma::abs(arma::mean(delta) - (1 - p))), 0.05);
+
+  // Test the Forward function.
+  module.Deterministic() = true;
+  module.Rescale() = false;
+  module.Forward(std::move(input), std::move(output));
+  BOOST_REQUIRE_EQUAL(arma::accu(input), arma::accu(output));
+
+  // Test the Forward function.
+  module.Rescale() = true;
+  module.Forward(std::move(input), std::move(output));
+  BOOST_REQUIRE_CLOSE(arma::accu(input) * scale, arma::accu(output), 1e-3);
+}
+
+/**
+ * Simple linear module test.
+ */
+BOOST_AUTO_TEST_CASE(SimpleLinearLayerTest)
+{
+  arma::mat output, input, delta;
+  Linear<> module(10, 10);
+  module.Parameters().randu();
+  module.Reset();
+
+  // Test the Forward function.
+  input = arma::zeros(10, 1);
+  module.Forward(std::move(input), std::move(output));
+  BOOST_REQUIRE_CLOSE(arma::accu(
+      module.Parameters().submat(100, 0, module.Parameters().n_elem - 1, 0)),
+      arma::accu(output), 1e-3);
+
+  // Test the Backward function.
+  module.Backward(std::move(input), std::move(input), std::move(delta));
+  BOOST_REQUIRE_EQUAL(arma::accu(delta), 0);
+}
+
+/**
+ * Jacobian linear module test.
+ */
+BOOST_AUTO_TEST_CASE(JacobianLinearLayerTest)
+{
+  for (size_t i = 0; i < 5; i++)
+  {
+    const size_t inputElements = math::RandInt(2, 1000);
+    const size_t outputElements = math::RandInt(2, 1000);
+
+    arma::mat input;
+    input.set_size(inputElements, 1);
+
+    Linear<> module(inputElements, outputElements);
+    module.Parameters().randu();
+
+    double error = JacobianTest(module, input);
+    BOOST_REQUIRE_LE(error, 1e-5);
+  }
+}
+
+/**
+ * Simple linear no bias module test.
+ */
+BOOST_AUTO_TEST_CASE(SimpleLinearNoBiasLayerTest)
+{
+  arma::mat output, input, delta;
+  LinearNoBias<> module(10, 10);
+  module.Parameters().randu();
+  module.Reset();
+
+  // Test the Forward function.
+  input = arma::zeros(10, 1);
+  module.Forward(std::move(input), std::move(output));
+  BOOST_REQUIRE_EQUAL(0, arma::accu(output));
+
+  // Test the Backward function.
+  module.Backward(std::move(input), std::move(input), std::move(delta));
+  BOOST_REQUIRE_EQUAL(arma::accu(delta), 0);
+}
+
+/**
+ * Jacobian linear no bias module test.
+ */
+BOOST_AUTO_TEST_CASE(JacobianLinearNoBiasLayerTest)
+{
+  for (size_t i = 0; i < 5; i++)
+  {
+    const size_t inputElements = math::RandInt(2, 1000);
+    const size_t outputElements = math::RandInt(2, 1000);
+
+    arma::mat input;
+    input.set_size(inputElements, 1);
+
+    LinearNoBias<> module(inputElements, outputElements);
+    module.Parameters().randu();
+
+    double error = JacobianTest(module, input);
+    BOOST_REQUIRE_LE(error, 1e-5);
+  }
+}
+
+/**
+ * Jacobian negative log likelihood module test.
+ */
+BOOST_AUTO_TEST_CASE(JacobianNegativeLogLikelihoodLayerTest)
+{
+  for (size_t i = 0; i < 5; i++)
+  {
+    NegativeLogLikelihood<> module;
+    const size_t inputElements = math::RandInt(5, 100);
+    arma::mat input;
+    RandomInitialization init(0, 1);
+    init.Initialize(input, inputElements, 1);
+
+    arma::mat target(1, 1);
+    target(0) = math::RandInt(1, inputElements - 1);
+
+    double error = JacobianPerformanceTest(module, input, target);
+    BOOST_REQUIRE_LE(error, 1e-5);
+  }
+}
+
+/**
+ * Jacobian LeakyReLU module test.
+ */
+BOOST_AUTO_TEST_CASE(JacobianLeakyReLULayerTest)
+{
+  for (size_t i = 0; i < 5; i++)
+  {
+    const size_t inputElements = math::RandInt(2, 1000);
+
+    arma::mat input;
+    input.set_size(inputElements, 1);
+
+    LeakyReLU<> module;
+
+    double error = JacobianTest(module, input);
+    BOOST_REQUIRE_LE(error, 1e-5);
+  }
+}
+
+/**
+ * Jacobian MultiplyConstant module test.
+ */
+BOOST_AUTO_TEST_CASE(JacobianMultiplyConstantLayerTest)
+{
+  for (size_t i = 0; i < 5; i++)
+  {
+    const size_t inputElements = math::RandInt(2, 1000);
+
+    arma::mat input;
+    input.set_size(inputElements, 1);
+
+    MultiplyConstant<> module(3.0);
+
+    double error = JacobianTest(module, input);
+    BOOST_REQUIRE_LE(error, 1e-5);
+  }
+}
+
+/**
+ * Jacobian HardTanH module test.
+ */
+BOOST_AUTO_TEST_CASE(JacobianHardTanHLayerTest)
+{
+  for (size_t i = 0; i < 5; i++)
+  {
+    const size_t inputElements = math::RandInt(2, 1000);
+
+    arma::mat input;
+    input.set_size(inputElements, 1);
+
+    HardTanH<> module;
+
+    double error = JacobianTest(module, input);
+    BOOST_REQUIRE_LE(error, 1e-5);
+  }
+}
+
+/**
+ * Simple select module test.
+ */
+BOOST_AUTO_TEST_CASE(SimpleSelectLayerTest)
+{
+  arma::mat outputA, outputB, input, delta;
+
+  input = arma::ones(10, 5);
+  for (size_t i = 0; i < input.n_cols; ++i)
+  {
+    input.col(i) *= i;
+  }
+
+  // Test the Forward function.
+  Select<> moduleA(3);
+  moduleA.Forward(std::move(input), std::move(outputA));
+  BOOST_REQUIRE_EQUAL(30, arma::accu(outputA));
+
+  // Test the Forward function.
+  Select<> moduleB(3, 5);
+  moduleB.Forward(std::move(input), std::move(outputB));
+  BOOST_REQUIRE_EQUAL(15, arma::accu(outputB));
+
+  // Test the Backward function.
+  moduleA.Backward(std::move(input), std::move(outputA), std::move(delta));
+  BOOST_REQUIRE_EQUAL(30, arma::accu(delta));
+
+  // Test the Backward function.
+  moduleB.Backward(std::move(input), std::move(outputA), std::move(delta));
+  BOOST_REQUIRE_EQUAL(15, arma::accu(delta));
+}
+
+/**
+ * Simple join module test.
+ */
+BOOST_AUTO_TEST_CASE(SimpleJoinLayerTest)
+{
+  arma::mat output, input, delta;
+  input = arma::ones(10, 5);
+
+  // Test the Forward function.
+  Join<> module;
+  module.Forward(std::move(input), std::move(output));
+  BOOST_REQUIRE_EQUAL(50, arma::accu(output));
+
+  bool b = output.n_rows == 1 || output.n_cols == 1;
+  BOOST_REQUIRE_EQUAL(b, true);
+
+  // Test the Backward function.
+  module.Backward(std::move(input), std::move(output), std::move(delta));
+  BOOST_REQUIRE_EQUAL(50, arma::accu(delta));
+
+  b = delta.n_rows == input.n_rows && input.n_cols;
+  BOOST_REQUIRE_EQUAL(b, true);
+}
+
+/**
+ * Simple add merge module test.
+ */
+BOOST_AUTO_TEST_CASE(SimpleAddMergeLayerTest)
+{
+  arma::mat output, input, delta;
+  input = arma::ones(10, 1);
+
+  for (size_t i = 0; i < 5; ++i)
+  {
+    AddMerge<> module;
+    const size_t numMergeModules = math::RandInt(2, 10);
+    for (size_t m = 0; m < numMergeModules; ++m)
+    {
+      IdentityLayer<> identityLayer;
+      identityLayer.Forward(std::move(input),
+          std::move(identityLayer.OutputParameter()));
+
+      module.Add(identityLayer);
+    }
+
+    // Test the Forward function.
+    module.Forward(std::move(input), std::move(output));
+    BOOST_REQUIRE_EQUAL(10 * numMergeModules, arma::accu(output));
+
+    // Test the Backward function.
+    module.Backward(std::move(input), std::move(output), std::move(delta));
+    BOOST_REQUIRE_EQUAL(arma::accu(output), arma::accu(delta));
+  }
+}
+
+BOOST_AUTO_TEST_SUITE_END();
\ No newline at end of file
diff --git a/src/mlpack/tests/convolutional_network_test.cpp b/src/mlpack/tests/convolutional_network_test.cpp
index 52e1a6c6394..0c99722ca1c 100644
--- a/src/mlpack/tests/convolutional_network_test.cpp
+++ b/src/mlpack/tests/convolutional_network_test.cpp
@@ -11,21 +11,9 @@
  */
 #include <mlpack/core.hpp>
 
-#include <mlpack/methods/ann/activation_functions/logistic_function.hpp>
-
-#include <mlpack/methods/ann/layer/one_hot_layer.hpp>
-#include <mlpack/methods/ann/layer/conv_layer.hpp>
-#include <mlpack/methods/ann/layer/pooling_layer.hpp>
-#include <mlpack/methods/ann/layer/softmax_layer.hpp>
-#include <mlpack/methods/ann/layer/bias_layer.hpp>
-#include <mlpack/methods/ann/layer/linear_layer.hpp>
-#include <mlpack/methods/ann/layer/base_layer.hpp>
-
-#include <mlpack/methods/ann/performance_functions/mse_function.hpp>
 #include <mlpack/core/optimizers/rmsprop/rmsprop.hpp>
-
-#include <mlpack/methods/ann/init_rules/random_init.hpp>
-#include <mlpack/methods/ann/cnn.hpp>
+#include <mlpack/methods/ann/layer/layer.hpp>
+#include <mlpack/methods/ann/ffn.hpp>
 
 #include <boost/test/unit_test.hpp>
 #include "test_tools.hpp"
@@ -38,12 +26,9 @@ using namespace mlpack::optimization;
 BOOST_AUTO_TEST_SUITE(ConvolutionalNetworkTest);
 
 /**
- * Train and evaluate a vanilla network with the specified structure.
+ * Train the vanilla network on a larger dataset.
  */
-template<
-    typename PerformanceFunction
->
-void BuildVanillaNetwork()
+BOOST_AUTO_TEST_CASE(VanillaNetworkTest)
 {
   arma::mat X;
   X.load("mnist_first250_training_4s_and_9s.arm");
@@ -56,23 +41,19 @@ void BuildVanillaNetwork()
   }
 
   // Build the target matrix.
-  arma::mat Y = arma::zeros<arma::mat>(10, nPoints);
+  arma::mat Y = arma::zeros<arma::mat>(1, nPoints);
   for (size_t i = 0; i < nPoints; i++)
   {
     if (i < nPoints / 2)
     {
-      Y.col(i)(5) = 1;
+      Y(i) = 4;
     }
     else
     {
-      Y.col(i)(8) = 1;
+      Y(i) = 9;
     }
   }
 
-  arma::cube input = arma::cube(28, 28, nPoints);
-  for (size_t i = 0; i < nPoints; i++)
-    input.slice(i) = arma::mat(X.colptr(i), 28, 28);
-
   /*
    * Construct a convolutional neural network with a 28x28x1 input layer,
    * 24x24x8 convolution layer, 12x12x8 pooling layer, 8x8x12 convolution layer
@@ -90,57 +71,46 @@ void BuildVanillaNetwork()
    * |   |      +-+   |      +-+   |      +-+   |      +-+   |    |   |
    * +---+        +---+        +---+        +---+        +---+    +---+
    */
-
-  ConvLayer<> convLayer0(1, 8, 5, 5);
-  BiasLayer2D<> biasLayer0(8);
-  BaseLayer2D<> baseLayer0;
-  PoolingLayer<> poolingLayer0(2);
-
-  ConvLayer<> convLayer1(8, 12, 5, 5);
-  BiasLayer2D<> biasLayer1(12);
-  BaseLayer2D<> baseLayer1;
-  PoolingLayer<> poolingLayer1(2);
-
-  LinearMappingLayer<> linearLayer0(4608, 10);
-  BiasLayer<> biasLayer2(10);
-  SoftmaxLayer<> softmaxLayer0;
-
-  OneHotLayer outputLayer;
-
-  auto modules = std::tie(convLayer0, baseLayer0, linearLayer0, softmaxLayer0);
-
-  CNN<decltype(modules), decltype(outputLayer),
-      RandomInitialization, MeanSquaredErrorFunction> net(modules, outputLayer);
-  biasLayer0.Weights().zeros();
-  biasLayer1.Weights().zeros();
-
-  RMSprop<decltype(net)> opt(net, 0.01, 0.88, 1e-8, 10 * input.n_slices, 0);
-
-  net.Train(input, Y, opt);
-
-  arma::mat prediction;
-  net.Predict(input, prediction);
+  FFN<NegativeLogLikelihood<> > model;
+
+  model.Add<Convolution<> >(1, 8, 5, 5, 1, 1, 0, 0, 28, 28);
+  model.Add<ReLULayer<> >();
+  model.Add<MaxPooling<> >(8, 8, 2, 2);
+  model.Add<Convolution<> >(8, 12, 2, 2);
+  model.Add<ReLULayer<> >();
+  model.Add<MaxPooling<> >(2, 2, 2, 2);
+  model.Add<Linear<> >(192, 20);
+  model.Add<ReLULayer<> >();
+  model.Add<Linear<> >(20, 30);
+  model.Add<ReLULayer<> >();
+  model.Add<Linear<> >(30, 10);
+  model.Add<LogSoftMax<> >();
+
+  RMSprop<decltype(model)> opt(model, 0.01, 0.88, 1e-8, 5000, -1);
+
+  model.Train(std::move(X), std::move(Y), opt);
+
+  arma::mat predictionTemp;
+  model.Predict(X, predictionTemp);
+  arma::mat prediction = arma::zeros<arma::mat>(1, predictionTemp.n_cols);
+
+  for (size_t i = 0; i < predictionTemp.n_cols; ++i)
+  {
+    prediction(i) = arma::as_scalar(arma::find(
+        arma::max(predictionTemp.col(i)) == predictionTemp.col(i), 1)) + 1;
+  }
 
   size_t error = 0;
-  for (size_t i = 0; i < nPoints; i++)
+  for (size_t i = 0; i < X.n_cols; i++)
   {
-    if (arma::sum(arma::sum(
-        arma::abs(prediction.col(i) - Y.col(i)))) == 0)
+    if (prediction(i) == Y(i))
     {
       error++;
     }
   }
 
-  double classificationError = 1 - double(error) / nPoints;
-  BOOST_REQUIRE_LE(classificationError, 0.6);
-}
-
-/**
- * Train the vanilla network on a larger dataset.
- */
-BOOST_AUTO_TEST_CASE(VanillaNetworkTest)
-{
-  BuildVanillaNetwork<LogisticFunction>();
+  double classificationError = 1 - double(error) / X.n_cols;
+  BOOST_REQUIRE_LE(classificationError, 0.2);
 }
 
 BOOST_AUTO_TEST_SUITE_END();
diff --git a/src/mlpack/tests/feedforward_network_test.cpp b/src/mlpack/tests/feedforward_network_test.cpp
index 4477bf22568..883fe9e6b00 100644
--- a/src/mlpack/tests/feedforward_network_test.cpp
+++ b/src/mlpack/tests/feedforward_network_test.cpp
@@ -12,21 +12,9 @@
  */
 #include <mlpack/core.hpp>
 
-#include <mlpack/methods/ann/activation_functions/logistic_function.hpp>
-#include <mlpack/methods/ann/activation_functions/tanh_function.hpp>
-
-#include <mlpack/methods/ann/init_rules/random_init.hpp>
-
-#include <mlpack/methods/ann/layer/bias_layer.hpp>
-#include <mlpack/methods/ann/layer/linear_layer.hpp>
-#include <mlpack/methods/ann/layer/base_layer.hpp>
-#include <mlpack/methods/ann/layer/dropout_layer.hpp>
-#include <mlpack/methods/ann/layer/binary_classification_layer.hpp>
-#include <mlpack/methods/ann/layer/dropconnect_layer.hpp>
-
-#include <mlpack/methods/ann/ffn.hpp>
-#include <mlpack/methods/ann/performance_functions/mse_function.hpp>
 #include <mlpack/core/optimizers/rmsprop/rmsprop.hpp>
+#include <mlpack/methods/ann/layer/layer.hpp>
+#include <mlpack/methods/ann/ffn.hpp>
 
 #include <boost/test/unit_test.hpp>
 #include "test_tools.hpp"
@@ -40,16 +28,12 @@ BOOST_AUTO_TEST_SUITE(FeedForwardNetworkTest);
 /**
  * Train and evaluate a vanilla network with the specified structure.
  */
-template<
-    typename PerformanceFunction,
-    typename OutputLayerType,
-    typename PerformanceFunctionType,
-    typename MatType = arma::mat
->
+template<typename MatType = arma::mat>
 void BuildVanillaNetwork(MatType& trainData,
                          MatType& trainLabels,
                          MatType& testData,
                          MatType& testLabels,
+                         const size_t outputSize,
                          const size_t hiddenLayerSize,
                          const size_t maxEpochs,
                          const double classificationErrorThreshold)
@@ -76,35 +60,32 @@ void BuildVanillaNetwork(MatType& trainData,
    * +-----+       +-----+
    */
 
-  LinearLayer<> inputLayer(trainData.n_rows, hiddenLayerSize);
-  BiasLayer<> inputBiasLayer(hiddenLayerSize);
-  BaseLayer<PerformanceFunction> inputBaseLayer;
-
-  LinearLayer<> hiddenLayer1(hiddenLayerSize, trainLabels.n_rows);
-  BiasLayer<> hiddenBiasLayer1(trainLabels.n_rows);
-  BaseLayer<PerformanceFunction> outputLayer;
+  FFN<NegativeLogLikelihood<> > model;
+  model.Add<Linear<> >(trainData.n_rows, hiddenLayerSize);
+  model.Add<SigmoidLayer<> >();
+  model.Add<Linear<> >(hiddenLayerSize, outputSize);
+  model.Add<LogSoftMax<> >();
 
-  OutputLayerType classOutputLayer;
+  RMSprop<decltype(model)> opt(model, 0.01, 0.88, 1e-8,
+      maxEpochs * trainData.n_cols, -1);
 
-  auto modules = std::tie(inputLayer, inputBiasLayer, inputBaseLayer,
-                          hiddenLayer1, hiddenBiasLayer1, outputLayer);
+  model.Train(std::move(trainData), std::move(trainLabels), opt);
 
-  FFN<decltype(modules), decltype(classOutputLayer), RandomInitialization,
-      PerformanceFunctionType> net(modules, classOutputLayer);
+  MatType predictionTemp;
+  model.Predict(testData, predictionTemp);
+  MatType prediction = arma::zeros<MatType>(1, predictionTemp.n_cols);
 
-  RMSprop<decltype(net)> opt(net, 0.01, 0.88, 1e-8,
-      maxEpochs * trainData.n_cols, 1e-18);
-
-  net.Train(trainData, trainLabels, opt);
-
-  MatType prediction;
-  net.Predict(testData, prediction);
+  for (size_t i = 0; i < predictionTemp.n_cols; ++i)
+  {
+    prediction(i) = arma::as_scalar(arma::find(
+        arma::max(predictionTemp.col(i)) == predictionTemp.col(i), 1)) + 1;
+  }
 
   size_t error = 0;
   for (size_t i = 0; i < testData.n_cols; i++)
   {
-    if (arma::sum(arma::sum(
-        arma::abs(prediction.col(i) - testLabels.col(i)))) == 0)
+    if (int(arma::as_scalar(prediction.col(i))) ==
+        int(arma::as_scalar(testLabels.col(i))))
     {
       error++;
     }
@@ -125,23 +106,36 @@ BOOST_AUTO_TEST_CASE(VanillaNetworkTest)
 
   arma::mat trainData = dataset.submat(0, 0, dataset.n_rows - 4,
       dataset.n_cols - 1);
-  arma::mat trainLabels = dataset.submat(dataset.n_rows - 3, 0,
+
+  arma::mat trainLabelsTemp = dataset.submat(dataset.n_rows - 3, 0,
       dataset.n_rows - 1, dataset.n_cols - 1);
+  arma::mat trainLabels = arma::zeros<arma::mat>(1, trainLabelsTemp.n_cols);
+  for (size_t i = 0; i < trainLabelsTemp.n_cols; ++i)
+  {
+    trainLabels(i) = arma::as_scalar(arma::find(
+        arma::max(trainLabelsTemp.col(i)) == trainLabelsTemp.col(i), 1)) + 1;
+  }
 
   data::Load("thyroid_test.csv", dataset, true);
 
   arma::mat testData = dataset.submat(0, 0, dataset.n_rows - 4,
       dataset.n_cols - 1);
-  arma::mat testLabels = dataset.submat(dataset.n_rows - 3, 0,
+
+  arma::mat testLabelsTemp = dataset.submat(dataset.n_rows - 3, 0,
       dataset.n_rows - 1, dataset.n_cols - 1);
 
+  arma::mat testLabels = arma::zeros<arma::mat>(1, testLabelsTemp.n_cols);
+  for (size_t i = 0; i < testLabels.n_cols; ++i)
+  {
+    testLabels(i) = arma::as_scalar(arma::find(
+        arma::max(testLabelsTemp.col(i)) == testLabelsTemp.col(i), 1)) + 1;
+  }
+
   // Vanilla neural net with logistic activation function.
   // Because 92 percent of the patients are not hyperthyroid the neural
   // network must be significant better than 92%.
-  BuildVanillaNetwork<LogisticFunction,
-                      BinaryClassificationLayer,
-                      MeanSquaredErrorFunction>
-      (trainData, trainLabels, testData, testLabels, 8, 200, 0.1);
+  BuildVanillaNetwork<>
+      (trainData, trainLabels, testData, testLabels, 3, 8, 70, 0.1);
 
   dataset.load("mnist_first250_training_4s_and_9s.arm");
 
@@ -151,33 +145,22 @@ BOOST_AUTO_TEST_CASE(VanillaNetworkTest)
 
   arma::mat labels = arma::zeros(1, dataset.n_cols);
   labels.submat(0, labels.n_cols / 2, 0, labels.n_cols - 1).fill(1);
+  labels += 1;
 
   // Vanilla neural net with logistic activation function.
-  BuildVanillaNetwork<LogisticFunction,
-                      BinaryClassificationLayer,
-                      MeanSquaredErrorFunction>
-      (dataset, labels, dataset, labels, 30, 30, 0.4);
-
-  // Vanilla neural net with tanh activation function.
-  BuildVanillaNetwork<TanhFunction,
-                      BinaryClassificationLayer,
-                      MeanSquaredErrorFunction>
-    (dataset, labels, dataset, labels, 10, 30, 0.4);
+  BuildVanillaNetwork<>
+      (dataset, labels, dataset, labels, 2, 10, 50, 0.2);
 }
 
 /**
  * Train and evaluate a Dropout network with the specified structure.
  */
-template<
-    typename PerformanceFunction,
-    typename OutputLayerType,
-    typename PerformanceFunctionType,
-    typename MatType = arma::mat
->
+template<typename MatType = arma::mat>
 void BuildDropoutNetwork(MatType& trainData,
                          MatType& trainLabels,
                          MatType& testData,
                          MatType& testLabels,
+                         const size_t outputSize,
                          const size_t hiddenLayerSize,
                          const size_t maxEpochs,
                          const double classificationErrorThreshold)
@@ -204,35 +187,33 @@ void BuildDropoutNetwork(MatType& trainData,
    * +-----+
    */
 
-  LinearLayer<> inputLayer(trainData.n_rows, hiddenLayerSize);
-  BiasLayer<> biasLayer(hiddenLayerSize);
-  BaseLayer<PerformanceFunction> hiddenLayer0;
-  DropoutLayer<> dropoutLayer0;
-
-  LinearLayer<> hiddenLayer1(hiddenLayerSize, trainLabels.n_rows);
-  BaseLayer<PerformanceFunction> outputLayer;
-
-  OutputLayerType classOutputLayer;
+  FFN<NegativeLogLikelihood<> > model;
+  model.Add<Linear<> >(trainData.n_rows, hiddenLayerSize);
+  model.Add<SigmoidLayer<> >();
+  model.Add<Dropout<> >();
+  model.Add<Linear<> >(hiddenLayerSize, outputSize);
+  model.Add<LogSoftMax<> >();
 
-  auto modules = std::tie(inputLayer, biasLayer, hiddenLayer0, dropoutLayer0,
-                          hiddenLayer1, outputLayer);
+  RMSprop<decltype(model)> opt(model, 0.01, 0.88, 1e-8,
+      maxEpochs * trainData.n_cols, -1);
 
-  FFN<decltype(modules), decltype(classOutputLayer), RandomInitialization,
-      PerformanceFunctionType> net(modules, classOutputLayer);
+  model.Train(std::move(trainData), std::move(trainLabels), opt);
 
-  RMSprop<decltype(net)> opt(net, 0.01, 0.88, 1e-8,
-      maxEpochs * trainData.n_cols, 1e-18);
+  MatType predictionTemp;
+  model.Predict(testData, predictionTemp);
+  MatType prediction = arma::zeros<MatType>(1, predictionTemp.n_cols);
 
-  net.Train(trainData, trainLabels, opt);
-
-  MatType prediction;
-  net.Predict(testData, prediction);
+  for (size_t i = 0; i < predictionTemp.n_cols; ++i)
+  {
+    prediction(i) = arma::as_scalar(arma::find(
+        arma::max(predictionTemp.col(i)) == predictionTemp.col(i), 1)) + 1;
+  }
 
   size_t error = 0;
   for (size_t i = 0; i < testData.n_cols; i++)
   {
-    if (arma::sum(arma::sum(
-        arma::abs(prediction.col(i) - testLabels.col(i)))) == 0)
+    if (int(arma::as_scalar(prediction.col(i))) ==
+        int(arma::as_scalar(testLabels.col(i))))
     {
       error++;
     }
@@ -253,23 +234,36 @@ BOOST_AUTO_TEST_CASE(DropoutNetworkTest)
 
   arma::mat trainData = dataset.submat(0, 0, dataset.n_rows - 4,
       dataset.n_cols - 1);
-  arma::mat trainLabels = dataset.submat(dataset.n_rows - 3, 0,
+
+  arma::mat trainLabelsTemp = dataset.submat(dataset.n_rows - 3, 0,
       dataset.n_rows - 1, dataset.n_cols - 1);
+  arma::mat trainLabels = arma::zeros<arma::mat>(1, trainLabelsTemp.n_cols);
+  for (size_t i = 0; i < trainLabelsTemp.n_cols; ++i)
+  {
+    trainLabels(i) = arma::as_scalar(arma::find(
+        arma::max(trainLabelsTemp.col(i)) == trainLabelsTemp.col(i), 1)) + 1;
+  }
 
   data::Load("thyroid_test.csv", dataset, true);
 
   arma::mat testData = dataset.submat(0, 0, dataset.n_rows - 4,
       dataset.n_cols - 1);
-  arma::mat testLabels = dataset.submat(dataset.n_rows - 3, 0,
+
+  arma::mat testLabelsTemp = dataset.submat(dataset.n_rows - 3, 0,
       dataset.n_rows - 1, dataset.n_cols - 1);
 
+  arma::mat testLabels = arma::zeros<arma::mat>(1, testLabelsTemp.n_cols);
+  for (size_t i = 0; i < testLabels.n_cols; ++i)
+  {
+    testLabels(i) = arma::as_scalar(arma::find(
+        arma::max(testLabelsTemp.col(i)) == testLabelsTemp.col(i), 1)) + 1;
+  }
+
   // Vanilla neural net with logistic activation function.
   // Because 92 percent of the patients are not hyperthyroid the neural
   // network must be significant better than 92%.
-  BuildDropoutNetwork<LogisticFunction,
-                      BinaryClassificationLayer,
-                      MeanSquaredErrorFunction>
-      (trainData, trainLabels, testData, testLabels, 4, 100, 0.1);
+  BuildDropoutNetwork<>
+      (trainData, trainLabels, testData, testLabels, 3, 8, 70, 0.1);
 
   dataset.load("mnist_first250_training_4s_and_9s.arm");
 
@@ -279,34 +273,23 @@ BOOST_AUTO_TEST_CASE(DropoutNetworkTest)
 
   arma::mat labels = arma::zeros(1, dataset.n_cols);
   labels.submat(0, labels.n_cols / 2, 0, labels.n_cols - 1).fill(1);
+  labels += 1;
 
   // Vanilla neural net with logistic activation function.
-  BuildDropoutNetwork<LogisticFunction,
-                      BinaryClassificationLayer,
-                      MeanSquaredErrorFunction>
-      (dataset, labels, dataset, labels, 8, 30, 0.4);
-
-  // Vanilla neural net with tanh activation function.
-  BuildDropoutNetwork<TanhFunction,
-                      BinaryClassificationLayer,
-                      MeanSquaredErrorFunction>
-    (dataset, labels, dataset, labels, 8, 30, 0.4);
+  BuildDropoutNetwork<>
+      (dataset, labels, dataset, labels, 2, 10, 50, 0.2);
 }
 
 /**
  * Train and evaluate a DropConnect network(with a baselayer) with the
  * specified structure.
  */
-template<
-    typename PerformanceFunction,
-    typename OutputLayerType,
-    typename PerformanceFunctionType,
-    typename MatType = arma::mat
->
+template<typename MatType = arma::mat>
 void BuildDropConnectNetwork(MatType& trainData,
                              MatType& trainLabels,
                              MatType& testData,
                              MatType& testLabels,
+                             const size_t outputSize,
                              const size_t hiddenLayerSize,
                              const size_t maxEpochs,
                              const double classificationErrorThreshold)
@@ -334,122 +317,42 @@ void BuildDropConnectNetwork(MatType& trainData,
   *
   *
   */
-  LinearLayer<> inputLayer(trainData.n_rows, hiddenLayerSize);
-  BiasLayer<> biasLayer(hiddenLayerSize);
-  BaseLayer<PerformanceFunction> hiddenLayer0;
-
-  LinearLayer<> hiddenLayer1(hiddenLayerSize, trainLabels.n_rows);
-  DropConnectLayer<decltype(hiddenLayer1)> dropConnectLayer0(hiddenLayer1);
-
-  BaseLayer<PerformanceFunction> outputLayer;
-
-  OutputLayerType classOutputLayer;
 
-  auto modules = std::tie(inputLayer, biasLayer, hiddenLayer0,
-                          dropConnectLayer0, outputLayer);
+  FFN<NegativeLogLikelihood<> > model;
+  model.Add<Linear<> >(trainData.n_rows, hiddenLayerSize);
+  model.Add<SigmoidLayer<> >();
+  model.Add<DropConnect<> >(hiddenLayerSize, outputSize);
+  model.Add<LogSoftMax<> >();
 
-  FFN<decltype(modules), decltype(classOutputLayer), RandomInitialization,
-              PerformanceFunctionType> net(modules, classOutputLayer);
+  RMSprop<decltype(model)> opt(model, 0.01, 0.88, 1e-8,
+      maxEpochs * trainData.n_cols, -1);
 
-  RMSprop<decltype(net)> opt(net, 0.01, 0.88, 1e-8,
-      maxEpochs * trainData.n_cols, 1e-18);
+  model.Train(std::move(trainData), std::move(trainLabels), opt);
 
-  net.Train(trainData, trainLabels, opt);
+  MatType predictionTemp;
+  model.Predict(testData, predictionTemp);
+  MatType prediction = arma::zeros<MatType>(1, predictionTemp.n_cols);
 
-  MatType prediction;
-  net.Predict(testData, prediction);
-
-  size_t error = 0;
-  for (size_t i = 0; i < testData.n_cols; i++)
+  for (size_t i = 0; i < predictionTemp.n_cols; ++i)
   {
-      if (arma::sum(arma::sum(
-          arma::abs(prediction.col(i) - testLabels.col(i)))) == 0)
-      {
-          error++;
-      }
+    prediction(i) = arma::as_scalar(arma::find(
+        arma::max(predictionTemp.col(i)) == predictionTemp.col(i), 1)) + 1;
   }
 
-  double classificationError = 1 - double(error) / testData.n_cols;
-  BOOST_REQUIRE_LE(classificationError, classificationErrorThreshold);
-}
-
-/**
- * Train and evaluate a DropConnect network(with a linearlayer) with the
- * specified structure.
- */
-template<
-    typename PerformanceFunction,
-    typename OutputLayerType,
-    typename PerformanceFunctionType,
-    typename MatType = arma::mat
->
-void BuildDropConnectNetworkLinear(MatType& trainData,
-                                   MatType& trainLabels,
-                                   MatType& testData,
-                                   MatType& testLabels,
-                                   const size_t hiddenLayerSize,
-                                   const size_t maxEpochs,
-                                   const double classificationErrorThreshold)
-{
- /*
-  * Construct a feed forward network with trainData.n_rows input nodes,
-  * hiddenLayerSize hidden nodes and trainLabels.n_rows output nodes. The
-  * network struct that looks like:
-  *
-  * Input         Hidden       DropConnect     Output
-  * Layer         Layer          Layer         Layer
-  * +-----+       +-----+       +-----+       +-----+
-  * |     |       |     |       |     |       |     |
-  * |     +------>|     +------>|     +------>|     |
-  * |     |     +>|     |       |     |       |     |
-  * +-----+     | +--+--+       +-----+       +-----+
-  *             |
-  *  Bias       |
-  *  Layer      |
-  * +-----+     |
-  * |     |     |
-  * |     +-----+
-  * |     |
-  * +-----+
-  *
-  *
-  */
-  LinearLayer<> inputLayer(trainData.n_rows, hiddenLayerSize);
-  BiasLayer<> biasLayer(hiddenLayerSize);
-  BaseLayer<PerformanceFunction> hiddenLayer0;
-
-  DropConnectLayer<> dropConnectLayer0(hiddenLayerSize, trainLabels.n_rows);
-
-  BaseLayer<PerformanceFunction> outputLayer;
-
-  OutputLayerType classOutputLayer;
-  auto modules = std::tie(inputLayer, biasLayer, hiddenLayer0,
-                          dropConnectLayer0, outputLayer);
-
-  FFN<decltype(modules), decltype(classOutputLayer), RandomInitialization,
-              PerformanceFunctionType> net(modules, classOutputLayer);
-
-  RMSprop<decltype(net)> opt(net, 0.01, 0.88, 1e-8,
-      maxEpochs * trainData.n_cols, 1e-18);
-
-  net.Train(trainData, trainLabels, opt);
-
-  MatType prediction;
-  net.Predict(testData, prediction);
-
   size_t error = 0;
   for (size_t i = 0; i < testData.n_cols; i++)
   {
-      if (arma::sum(arma::sum(
-          arma::abs(prediction.col(i) - testLabels.col(i)))) == 0)
-      {
-              error++;
-      }
+    if (int(arma::as_scalar(prediction.col(i))) ==
+        int(arma::as_scalar(testLabels.col(i))))
+    {
+      error++;
+    }
   }
 
   double classificationError = 1 - double(error) / testData.n_cols;
   BOOST_REQUIRE_LE(classificationError, classificationErrorThreshold);
 }
+
 /**
  * Train the dropconnect network on a larger dataset.
  */
@@ -461,28 +364,36 @@ BOOST_AUTO_TEST_CASE(DropConnectNetworkTest)
 
   arma::mat trainData = dataset.submat(0, 0, dataset.n_rows - 4,
       dataset.n_cols - 1);
-  arma::mat trainLabels = dataset.submat(dataset.n_rows - 3, 0,
+
+  arma::mat trainLabelsTemp = dataset.submat(dataset.n_rows - 3, 0,
       dataset.n_rows - 1, dataset.n_cols - 1);
+  arma::mat trainLabels = arma::zeros<arma::mat>(1, trainLabelsTemp.n_cols);
+  for (size_t i = 0; i < trainLabelsTemp.n_cols; ++i)
+  {
+    trainLabels(i) = arma::as_scalar(arma::find(
+        arma::max(trainLabelsTemp.col(i)) == trainLabelsTemp.col(i), 1)) + 1;
+  }
 
   data::Load("thyroid_test.csv", dataset, true);
 
   arma::mat testData = dataset.submat(0, 0, dataset.n_rows - 4,
       dataset.n_cols - 1);
-  arma::mat testLabels = dataset.submat(dataset.n_rows - 3, 0,
+
+  arma::mat testLabelsTemp = dataset.submat(dataset.n_rows - 3, 0,
       dataset.n_rows - 1, dataset.n_cols - 1);
 
+  arma::mat testLabels = arma::zeros<arma::mat>(1, testLabelsTemp.n_cols);
+  for (size_t i = 0; i < testLabels.n_cols; ++i)
+  {
+    testLabels(i) = arma::as_scalar(arma::find(
+        arma::max(testLabelsTemp.col(i)) == testLabelsTemp.col(i), 1)) + 1;
+  }
+
   // Vanilla neural net with logistic activation function.
   // Because 92 percent of the patients are not hyperthyroid the neural
   // network must be significant better than 92%.
-  BuildDropConnectNetwork<LogisticFunction,
-                          BinaryClassificationLayer,
-                          MeanSquaredErrorFunction>
-      (trainData, trainLabels, testData, testLabels, 4, 100, 0.1);
-
-  BuildDropConnectNetworkLinear<LogisticFunction,
-                                BinaryClassificationLayer,
-                                MeanSquaredErrorFunction>
-      (trainData, trainLabels, testData, testLabels, 4, 100, 0.1);
+  BuildDropConnectNetwork<>
+      (trainData, trainLabels, testData, testLabels, 3, 8, 70, 0.1);
 
   dataset.load("mnist_first250_training_4s_and_9s.arm");
 
@@ -492,18 +403,11 @@ BOOST_AUTO_TEST_CASE(DropConnectNetworkTest)
 
   arma::mat labels = arma::zeros(1, dataset.n_cols);
   labels.submat(0, labels.n_cols / 2, 0, labels.n_cols - 1).fill(1);
+  labels += 1;
 
   // Vanilla neural net with logistic activation function.
-  BuildDropConnectNetwork<LogisticFunction,
-                          BinaryClassificationLayer,
-                          MeanSquaredErrorFunction>
-      (dataset, labels, dataset, labels, 8, 30, 0.4);
-
-
-  BuildDropConnectNetworkLinear<LogisticFunction,
-                                BinaryClassificationLayer,
-                                MeanSquaredErrorFunction>
-      (dataset, labels, dataset, labels, 8, 30, 0.4);
+  BuildDropConnectNetwork<>
+      (dataset, labels, dataset, labels, 2, 10, 50, 0.2);
 }
 
-BOOST_AUTO_TEST_SUITE_END();
+BOOST_AUTO_TEST_SUITE_END();
\ No newline at end of file
diff --git a/src/mlpack/tests/network_util_test.cpp b/src/mlpack/tests/network_util_test.cpp
deleted file mode 100644
index 4f0fcf105e0..00000000000
--- a/src/mlpack/tests/network_util_test.cpp
+++ /dev/null
@@ -1,149 +0,0 @@
-/**
- * @file network_util_test.cpp
- * @author Marcus Edel
- *
- * Simple tests for things in the network_util file.
- *
- * mlpack is free software; you may redistribute it and/or modify it under the
- * terms of the 3-clause BSD license.  You should have received a copy of the
- * 3-clause BSD license along with mlpack.  If not, see
- * http://www.opensource.org/licenses/BSD-3-Clause for more information.
- */
-#include <mlpack/core.hpp>
-
-#include <mlpack/methods/ann/network_util.hpp>
-#include <mlpack/methods/ann/layer/linear_layer.hpp>
-#include <mlpack/methods/ann/layer/base_layer.hpp>
-#include <mlpack/methods/ann/init_rules/random_init.hpp>
-
-#include <boost/test/unit_test.hpp>
-#include "test_tools.hpp"
-
-using namespace mlpack;
-using namespace mlpack::ann;
-
-BOOST_AUTO_TEST_SUITE(NetworkUtilTest);
-
-/**
- * Test the network size auxiliary function.
- */
-BOOST_AUTO_TEST_CASE(NetworkSizeTest)
-{
-  // Create a two layer network without weights.
-  BaseLayer<> baseLayer1;
-  BaseLayer<> baseLayer2;
-  auto noneWeightNetwork = std::tie(baseLayer1, baseLayer2);
-
-  BOOST_REQUIRE_EQUAL(NetworkSize(noneWeightNetwork), 0);
-
-  // Create a two layer network.
-  LinearLayer<> linearLayer1(10, 10);
-  LinearLayer<> linearLayer2(10, 100);
-
-  // Reuse the layer form the first network.
-  auto weightNetwork = std::tie(linearLayer1, baseLayer1, linearLayer2,
-      baseLayer2);
-
-  BOOST_REQUIRE_EQUAL(NetworkSize(weightNetwork), 1100);
-}
-
-/**
- * Test the layer size auxiliary function.
- */
-BOOST_AUTO_TEST_CASE(LayerSizeTest)
-{
-  // Create layer without weights.
-  BaseLayer<> baseLayer;
-  BOOST_REQUIRE_EQUAL(LayerSize(baseLayer, baseLayer.OutputParameter()), 0);
-
-  // Create layer with weights.
-  LinearLayer<> linearLayer(10, 10);
-  BOOST_REQUIRE_EQUAL(LayerSize(linearLayer,
-      linearLayer.OutputParameter()), 100);
-}
-
-/**
- * Test the network input size auxiliary function.
- */
-BOOST_AUTO_TEST_CASE(NetworkInputSizeTest)
-{
-  // Create a two layer network without weights.
-  BaseLayer<> baseLayer1;
-  BaseLayer<> baseLayer2;
-  auto noneWeightNetwork = std::tie(baseLayer1, baseLayer2);
-
-  BOOST_REQUIRE_EQUAL(NetworkInputSize(noneWeightNetwork), 0);
-
-  // Create a two layer network.
-  LinearLayer<> linearLayer1(5, 10);
-  LinearLayer<> linearLayer2(10, 100);
-
-  // Reuse the layer form the first network.
-  auto weightNetwork = std::tie(linearLayer1, baseLayer1, linearLayer2,
-      baseLayer2);
-
-  BOOST_REQUIRE_EQUAL(NetworkInputSize(weightNetwork), 5);
-}
-
-/**
- * Test the layer input size auxiliary function.
- */
-BOOST_AUTO_TEST_CASE(LayerInputSizeTest)
-{
-  // Create layer without weights.
-  BaseLayer<> baseLayer;
-  BOOST_REQUIRE_EQUAL(LayerInputSize(baseLayer,
-    baseLayer.OutputParameter()), 0);
-
-  // Create layer with weights.
-  LinearLayer<> linearLayer(5, 10);
-  BOOST_REQUIRE_EQUAL(LayerInputSize(linearLayer,
-      linearLayer.OutputParameter()), 5);
-}
-
-/**
- * Test the network weight auxiliary function using the given initialization
- * rule.
- */
-BOOST_AUTO_TEST_CASE(NetworkWeightsInitTest)
-{
-  // Create a two layer network.
-  LinearLayer<> linearLayer1(10, 10);
-  LinearLayer<> linearLayer2(10, 100);
-
-  arma::mat parameter = arma::zeros<arma::mat>(1100, 1);
-
-  // Create the network.
-  auto network = std::tie(linearLayer1, linearLayer2);
-
-  BOOST_REQUIRE_EQUAL(arma::accu(parameter), 0);
-
-  RandomInitialization constantInit(1, 1);
-  NetworkWeights(constantInit, parameter, network);
-
-  BOOST_REQUIRE_EQUAL(arma::accu(linearLayer1.Weights()), 100);
-  BOOST_REQUIRE_EQUAL(arma::accu(linearLayer2.Weights()), 1000);
-  BOOST_REQUIRE_EQUAL(arma::accu(parameter), 1100);
-}
-
-/**
- * Test the layer weight auxiliary function using the given initialization rule.
- */
-BOOST_AUTO_TEST_CASE(LayerWeightsInitTest)
-{
-  // Create a two layer network.
-  LinearLayer<> linearLayer1(10, 10);
-
-  arma::mat parameter = arma::zeros<arma::mat>(100, 1);
-
-  BOOST_REQUIRE_EQUAL(arma::accu(parameter), 0);
-
-  RandomInitialization constantInit(1, 1);
-  arma::mat output;
-  LayerWeights(constantInit, linearLayer1, parameter, 0, output);
-
-  BOOST_REQUIRE_EQUAL(arma::accu(linearLayer1.Weights()), 100);
-  BOOST_REQUIRE_EQUAL(arma::accu(parameter), 100);
-}
-
-BOOST_AUTO_TEST_SUITE_END();
diff --git a/src/mlpack/tests/performance_functions_test.cpp b/src/mlpack/tests/performance_functions_test.cpp
deleted file mode 100644
index 35902911422..00000000000
--- a/src/mlpack/tests/performance_functions_test.cpp
+++ /dev/null
@@ -1,54 +0,0 @@
-/**
- * @file performance_functions_test.cpp
- * @author Marcus Edel
- *
- *  Tests for the various performance functions.
- *
- * mlpack is free software; you may redistribute it and/or modify it under the
- * terms of the 3-clause BSD license.  You should have received a copy of the
- * 3-clause BSD license along with mlpack.  If not, see
- * http://www.opensource.org/licenses/BSD-3-Clause for more information.
- */
-#include <mlpack/core.hpp>
-
-#include <mlpack/methods/ann/performance_functions/mse_function.hpp>
-#include <mlpack/methods/ann/performance_functions/cee_function.hpp>
-#include <mlpack/methods/ann/performance_functions/sse_function.hpp>
-
-#include <boost/test/unit_test.hpp>
-#include "test_tools.hpp"
-
-using namespace mlpack;
-using namespace mlpack::ann;
-
-BOOST_AUTO_TEST_SUITE(PerformanceFunctionsTest);
-
-// Test the mean squared error performance function.
-BOOST_AUTO_TEST_CASE(MeanSquaredErrorTest)
-{
-  arma::colvec input("1.0 0.0 1.0 0.0 -1.0 0.0 -1.0 0.0");
-  arma::colvec target = arma::zeros<arma::colvec>(8);
-
-  BOOST_REQUIRE_EQUAL(MeanSquaredErrorFunction::Error(input, target), 0.5);
-}
-
-// Test the cross entropy performance function.
-BOOST_AUTO_TEST_CASE(CrossEntropyErrorTest)
-{
-  arma::colvec input;
-  input << std::exp(-2.0) << std::exp(-1.0);
-  arma::colvec target = arma::ones<arma::colvec>(2);
-
-  BOOST_REQUIRE_EQUAL(CrossEntropyErrorFunction<>::Error(input, target), 3);
-}
-
-// Test the sum squared error performance function.
-BOOST_AUTO_TEST_CASE(SumSquaredErrorTest)
-{
-  arma::colvec input("1.0 0.0 1.0 0.0 -1.0 0.0 -1.0 0.0");
-  arma::colvec target = arma::zeros<arma::colvec>(8);
-
-  BOOST_REQUIRE_EQUAL(SumSquaredErrorFunction::Error(input, target), 4);
-}
-
-BOOST_AUTO_TEST_SUITE_END();
diff --git a/src/mlpack/tests/pooling_rules_test.cpp b/src/mlpack/tests/pooling_rules_test.cpp
deleted file mode 100644
index 0dd2c9dcb24..00000000000
--- a/src/mlpack/tests/pooling_rules_test.cpp
+++ /dev/null
@@ -1,80 +0,0 @@
-/**
- * @file convolution_test.cpp
- * @author Marcus Edel
- *
- * Tests for various convolution strategies.
- *
- * mlpack is free software; you may redistribute it and/or modify it under the
- * terms of the 3-clause BSD license.  You should have received a copy of the
- * 3-clause BSD license along with mlpack.  If not, see
- * http://www.opensource.org/licenses/BSD-3-Clause for more information.
- */
-#include <mlpack/core.hpp>
-
-#include <mlpack/methods/ann/pooling_rules/max_pooling.hpp>
-#include <mlpack/methods/ann/pooling_rules/mean_pooling.hpp>
-
-#include <boost/test/unit_test.hpp>
-#include "test_tools.hpp"
-
-using namespace mlpack;
-using namespace mlpack::ann;
-
-BOOST_AUTO_TEST_SUITE(PoolingTest);
-
-/**
- * Test the max pooling rule.
- */
-BOOST_AUTO_TEST_CASE(MaxPoolingTest)
-{
-  // The data was generated by magic(6) in MATLAB.
-  arma::mat input, output;
-  input << 35 << 1 << 6 << 26 << 19 << 24 << arma::endr
-        << 3 << 32 << 7 << 21 << 23 << 25 << arma::endr
-        << 31 << 9 << 2 << 22 << 27 << 20 << arma::endr
-        << 8 << 28 << 33 << 17 << 10 << 15 << arma::endr
-        << 30 << 5 << 34 << 12 << 14 << 16 << arma::endr
-        << 4 << 36 << 29 << 13 << 18 << 11;
-
-  // Expected output of the generated 6 x 6 matrix.
-  const double poolingOutput = 36;
-
-  MaxPooling poolingRule;
-
-  // Test the pooling function.
-  BOOST_REQUIRE_EQUAL(poolingRule.Pooling(input), poolingOutput);
-
-  // Test the unpooling function.
-  poolingRule.Unpooling(input, input.max(), output);
-  BOOST_REQUIRE_EQUAL(arma::accu(output), input.max());
-}
-
-/**
- * Test the mean pooling rule.
- */
-BOOST_AUTO_TEST_CASE(MeanPoolingTest)
-{
-  // The data was generated by magic(6) in MATLAB.
-  arma::mat input, output;
-  input << 35 << 1 << 6 << 26 << 19 << 24 << arma::endr
-        << 3 << 32 << 7 << 21 << 23 << 25 << arma::endr
-        << 31 << 9 << 2 << 22 << 27 << 20 << arma::endr
-        << 8 << 28 << 33 << 17 << 10 << 15 << arma::endr
-        << 30 << 5 << 34 << 12 << 14 << 16 << arma::endr
-        << 4 << 36 << 29 << 13 << 18 << 11;
-
-  // Expected output of the generated 6 x 6 matrix.
-  const double poolingOutput = 18.5;
-
-  MeanPooling poolingRule;
-
-  // Test the pooling function.
-  BOOST_REQUIRE_EQUAL(poolingRule.Pooling(input), poolingOutput);
-
-  // Test the unpooling function.
-  poolingRule.Unpooling(input, input.max(), output);
-  bool b = arma::all(arma::vectorise(output) == (input.max() / input.n_elem));
-  BOOST_REQUIRE_EQUAL(b, true);
-}
-
-BOOST_AUTO_TEST_SUITE_END();
diff --git a/src/mlpack/tests/recurrent_network_test.cpp b/src/mlpack/tests/recurrent_network_test.cpp
index c49ae423ae2..f7546e83b50 100644
--- a/src/mlpack/tests/recurrent_network_test.cpp
+++ b/src/mlpack/tests/recurrent_network_test.cpp
@@ -11,18 +11,10 @@
  */
 #include <mlpack/core.hpp>
 
-#include <mlpack/methods/ann/layer/linear_layer.hpp>
-#include <mlpack/methods/ann/layer/recurrent_layer.hpp>
-#include <mlpack/methods/ann/layer/base_layer.hpp>
-#include <mlpack/methods/ann/layer/lstm_layer.hpp>
-#include <mlpack/methods/ann/layer/binary_classification_layer.hpp>
-
-#include <mlpack/methods/ann/rnn.hpp>
-#include <mlpack/methods/ann/performance_functions/mse_function.hpp>
 #include <mlpack/core/optimizers/sgd/sgd.hpp>
-#include <mlpack/methods/ann/activation_functions/logistic_function.hpp>
-#include <mlpack/methods/ann/init_rules/random_init.hpp>
- #include <mlpack/methods/ann/init_rules/nguyen_widrow_init.hpp>
+#include <mlpack/methods/ann/layer/layer.hpp>
+#include <mlpack/methods/ann/rnn.hpp>
+#include <mlpack/core/data/binarize.hpp>
 
 #include <boost/test/unit_test.hpp>
 #include "test_tools.hpp"
@@ -78,19 +70,29 @@ BOOST_AUTO_TEST_CASE(SequenceClassificationTest)
   // times, I'm fine with that. All I want to know is that the network is able
   // to escape from local minima and to solve the task.
   size_t successes = 0;
+  const size_t rho = 10;
 
   for (size_t trial = 0; trial < 5; ++trial)
   {
-    // Generate 12 (2 * 6) noisy sines. A single sine contains 10 points/features.
-    arma::mat input, labels;
-    GenerateNoisySines(input, labels, 10, 6);
+    // Generate 12 (2 * 6) noisy sines. A single sine contains rho points/features.
+    arma::mat input, labelsTemp;
+    GenerateNoisySines(input, labelsTemp, rho, 6);
+
+    arma::mat labels = arma::zeros<arma::mat>(rho, labelsTemp.n_cols);
+    for (size_t i = 0; i < labelsTemp.n_cols; ++i)
+    {
+      const int value = arma::as_scalar(arma::find(
+          arma::max(labelsTemp.col(i)) == labelsTemp.col(i), 1)) + 1;
+      labels.col(i).fill(value);
+    }
 
     /*
-     * Construct a network with 1 input unit, 4 hidden units and 2 output units.
-     * The hidden layer is connected to itself. The network structure looks like:
+     * Construct a network with 1 input unit, 4 hidden units and 10 output
+     * units. The hidden layer is connected to itself. The network structure
+     * looks like:
      *
      *  Input         Hidden        Output
-     * Layer(1)      Layer(4)      Layer(2)
+     * Layer(1)      Layer(4)      Layer(10)
      * +-----+       +-----+       +-----+
      * |     |       |     |       |     |
      * |     +------>|     +------>|     |
@@ -100,38 +102,45 @@ BOOST_AUTO_TEST_CASE(SequenceClassificationTest)
      *            .     .
      *            .......
      */
-    LinearLayer<> linearLayer0(1, 4);
-    RecurrentLayer<> recurrentLayer0(4);
-    BaseLayer<LogisticFunction> inputBaseLayer;
+    Add<> add(4);
+    Linear<> lookup(1, 4);
+    SigmoidLayer<> sigmoidLayer;
+    Linear<> linear(4, 4);
+    Recurrent<> recurrent(add, lookup, linear, sigmoidLayer, rho);
 
-    LinearLayer<> hiddenLayer(4, 2);
-    BaseLayer<LogisticFunction> hiddenBaseLayer;
+    RNN<> model(rho);
+    model.Add<IdentityLayer<> >();
+    model.Add(recurrent);
+    model.Add<Linear<> >(4, 10);
+    model.Add<LogSoftMax<> >();
 
-    BinaryClassificationLayer classOutputLayer;
-
-    auto modules = std::tie(linearLayer0, recurrentLayer0, inputBaseLayer,
-                            hiddenLayer, hiddenBaseLayer);
-
-    RNN<decltype(modules), BinaryClassificationLayer, RandomInitialization,
-        MeanSquaredErrorFunction> net(modules, classOutputLayer);
-
-    SGD<decltype(net)> opt(net, 0.5, 500 * input.n_cols, -100);
-
-    net.Train(input, labels, opt);
+    SGD<decltype(model)> opt(model, 0.1, 500 * input.n_cols, -100);
+    model.Train(input, labels, opt);
 
     arma::mat prediction;
-    net.Predict(input, prediction);
+    model.Predict(input, prediction);
 
     size_t error = 0;
-    for (size_t i = 0; i < labels.n_cols; i++)
+    for (size_t i = 0; i < prediction.n_cols; ++i)
     {
-      if (arma::sum(arma::sum(arma::abs(prediction.col(i) - labels.col(i)))) == 0)
+      arma::mat singlePrediction = prediction.submat((rho - 1) * rho, i,
+          rho * rho - 1, i);
+
+      const int predictionValue = arma::as_scalar(arma::find(
+          arma::max(singlePrediction.col(0)) ==
+          singlePrediction.col(0), 1) + 1);
+
+      const int targetValue = arma::as_scalar(arma::find(
+          arma::max(labelsTemp.col(i)) == labelsTemp.col(i), 1)) + 1;
+
+      if (predictionValue == targetValue)
       {
         error++;
       }
     }
 
-    double classificationError = 1 - double(error) / labels.n_cols;
+    double classificationError = 1 - double(error) / prediction.n_cols;
+
     if (classificationError <= 0.2)
     {
       ++successes;
@@ -279,9 +288,7 @@ void GenerateNextEmbeddedReber(const arma::Mat<char>& transitions,
 /**
  * Train the specified network and the construct a Reber grammar dataset.
  */
-template<typename HiddenLayerType>
-void ReberGrammarTestNetwork(HiddenLayerType& hiddenLayer0,
-                             bool embedded = false)
+void ReberGrammarTestNetwork(bool embedded = false)
 {
   // Reber state transition matrix. (The last two columns are the indices to the
   // next path).
@@ -346,36 +353,34 @@ void ReberGrammarTestNetwork(HiddenLayerType& hiddenLayer0,
    * |     |       |     |       |     |
    * |     +------>|     +------>|     |
    * |     |    ..>|     |       |     |
-   * +-----+    .  +--+--+       +-----+
+   * +-----+    .  +--+--+       +-- ---+
    *            .     .
    *            .     .
    *            .......
    */
-  const size_t lstmSize = 4 * 10;
-  LinearLayer<> linearLayer0(7, lstmSize);
-  RecurrentLayer<> recurrentLayer0(10, lstmSize);
+  const size_t outputSize = 7;
+  const size_t inputSize = 7;
+  const size_t rho = trainInput.at(0, 0).n_elem / inputSize;
 
-  LinearLayer<>hiddenLayer(10, 7);
-  BaseLayer<LogisticFunction> hiddenBaseLayer;
+  RNN<MeanSquaredError<> > model(rho);
 
-  BinaryClassificationLayer classOutputLayer;
+  model.Add<IdentityLayer<> >();
+  model.Add<Linear<> >(inputSize, 20);
+  model.Add<LSTM<> >(20, 7, rho);
+  model.Add<Linear<> >(7, outputSize);
+  model.Add<SigmoidLayer<> >();
 
-  auto modules = std::tie(linearLayer0, recurrentLayer0, hiddenLayer0,
-                          hiddenLayer, hiddenBaseLayer);
-
-  RNN<decltype(modules), BinaryClassificationLayer, RandomInitialization,
-      MeanSquaredErrorFunction> net(modules, classOutputLayer);
-
-  SGD<decltype(net)> opt(net, 0.5, 2, -200);
+  SGD<decltype(model)> opt(model, 0.1, 2, -50000);
 
   arma::mat inputTemp, labelsTemp;
-  for (size_t i = 0; i < 15; i++)
+  for (size_t i = 0; i < 40; i++)
   {
     for (size_t j = 0; j < trainReberGrammarCount; j++)
     {
       inputTemp = trainInput.at(0, j);
       labelsTemp = trainLabels.at(0, j);
-      net.Train(inputTemp, labelsTemp, opt);
+
+      model.Train(inputTemp, labelsTemp, opt);
     }
   }
 
@@ -384,10 +389,11 @@ void ReberGrammarTestNetwork(HiddenLayerType& hiddenLayer0,
   // Ask the network to predict the next Reber grammar in the given sequence.
   for (size_t i = 0; i < testReberGrammarCount; i++)
   {
-    arma::mat output;
+    arma::mat output, prediction;
     arma::mat input = testInput.at(0, i);
 
-    net.Predict(input, output);
+    model.Predict(input, prediction);
+    data::Binarize(prediction, output, 0.5);
 
     const size_t reberGrammerSize = 7;
     std::string inputReber = "";
@@ -429,8 +435,7 @@ void ReberGrammarTestNetwork(HiddenLayerType& hiddenLayer0,
  */
 BOOST_AUTO_TEST_CASE(ReberGrammarTest)
 {
-  LSTMLayer<> hiddenLayerLSTM(10);
-  ReberGrammarTestNetwork(hiddenLayerLSTM);
+  ReberGrammarTestNetwork(false);
 }
 
 /**
@@ -438,8 +443,7 @@ BOOST_AUTO_TEST_CASE(ReberGrammarTest)
  */
 BOOST_AUTO_TEST_CASE(EmbeddedReberGrammarTest)
 {
-  LSTMLayer<> hiddenLayerLSTM(10);
-  ReberGrammarTestNetwork(hiddenLayerLSTM, true);
+  ReberGrammarTestNetwork(true);
 }
 
 /*
@@ -490,7 +494,6 @@ void GenerateDistractedSequence(arma::mat& input, arma::mat& output)
   for (size_t i = 2; i < 8; i++)
     input(2 + rand() % 6, index(i)) = 1;
 
-
   // Set the prompts which direct the network to give an answer.
   input(8, 8) = 1;
   input(9, 9) = 1;
@@ -503,8 +506,7 @@ void GenerateDistractedSequence(arma::mat& input, arma::mat& output)
  * Train the specified network and the construct distracted sequence recall
  * dataset.
  */
-template<typename HiddenLayerType>
-void DistractedSequenceRecallTestNetwork(HiddenLayerType& hiddenLayer0)
+void DistractedSequenceRecallTestNetwork()
 {
   const size_t trainDistractedSequenceCount = 1000;
   const size_t testDistractedSequenceCount = 1000;
@@ -538,22 +540,18 @@ void DistractedSequenceRecallTestNetwork(HiddenLayerType& hiddenLayer0)
    *            .     .
    *            .......
    */
-  const size_t lstmSize = 4 * 10;
-  LinearLayer<> linearLayer0(10, lstmSize);
-  RecurrentLayer<> recurrentLayer0(10, lstmSize);
-
-  LinearLayer<> hiddenLayer(10, 3);
-  TanHLayer<> hiddenBaseLayer;
-
-  BinaryClassificationLayer classOutputLayer;
-
-  auto modules = std::tie(linearLayer0, recurrentLayer0, hiddenLayer0,
-                          hiddenLayer, hiddenBaseLayer);
+  const size_t outputSize = 3;
+  const size_t inputSize = 10;
+  const size_t rho = trainInput.at(0, 0).n_elem / inputSize;
 
-  RNN<decltype(modules), BinaryClassificationLayer, NguyenWidrowInitialization,
-      MeanSquaredErrorFunction> net(modules, classOutputLayer);
+  RNN<MeanSquaredError<> > model(rho);
+  model.Add<IdentityLayer<> >();
+  model.Add<Linear<> >(inputSize, 20);
+  model.Add<LSTM<> >(20, 7, rho);
+  model.Add<Linear<> >(7, outputSize);
+  model.Add<SigmoidLayer<> >();
 
-  SGD<decltype(net)> opt(net, 0.04, 2, -200);
+  SGD<decltype(model)> opt(model, 0.1, 2, -50000);
 
   arma::mat inputTemp, labelsTemp;
   for (size_t i = 0; i < 40; i++)
@@ -563,7 +561,7 @@ void DistractedSequenceRecallTestNetwork(HiddenLayerType& hiddenLayer0)
       inputTemp = trainInput.at(0, j);
       labelsTemp = trainLabels.at(0, j);
 
-      net.Train(inputTemp, labelsTemp, opt);
+      model.Train(inputTemp, labelsTemp, opt);
     }
   }
 
@@ -576,7 +574,8 @@ void DistractedSequenceRecallTestNetwork(HiddenLayerType& hiddenLayer0)
     arma::mat output;
     arma::mat input = testInput.at(0, i);
 
-    net.Predict(input, output);
+    model.Predict(input, output);
+    data::Binarize(output, output, 0.5);
 
     if (arma::accu(arma::abs(testLabels.at(0, i) - output)) != 0)
       error += 1;
@@ -597,8 +596,7 @@ void DistractedSequenceRecallTestNetwork(HiddenLayerType& hiddenLayer0)
  */
 BOOST_AUTO_TEST_CASE(DistractedSequenceRecallTest)
 {
-  LSTMLayer<> hiddenLayerLSTMPeephole(10, true);
-  DistractedSequenceRecallTestNetwork(hiddenLayerLSTMPeephole);
+  DistractedSequenceRecallTestNetwork();
 }
 
 BOOST_AUTO_TEST_SUITE_END();
diff --git a/src/mlpack/tests/rmsprop_test.cpp b/src/mlpack/tests/rmsprop_test.cpp
index 481741a4cc1..831df74302d 100644
--- a/src/mlpack/tests/rmsprop_test.cpp
+++ b/src/mlpack/tests/rmsprop_test.cpp
@@ -16,14 +16,6 @@
 
 #include <mlpack/methods/logistic_regression/logistic_regression.hpp>
 
-#include <mlpack/methods/ann/ffn.hpp>
-#include <mlpack/methods/ann/init_rules/random_init.hpp>
-#include <mlpack/methods/ann/performance_functions/mse_function.hpp>
-#include <mlpack/methods/ann/layer/binary_classification_layer.hpp>
-#include <mlpack/methods/ann/layer/bias_layer.hpp>
-#include <mlpack/methods/ann/layer/linear_layer.hpp>
-#include <mlpack/methods/ann/layer/base_layer.hpp>
-
 #include <boost/test/unit_test.hpp>
 #include "test_tools.hpp"
 
@@ -35,8 +27,6 @@ using namespace mlpack::optimization::test;
 using namespace mlpack::distribution;
 using namespace mlpack::regression;
 
-using namespace mlpack::ann;
-
 BOOST_AUTO_TEST_SUITE(RMSpropTest);
 
 /**
@@ -116,47 +106,4 @@ BOOST_AUTO_TEST_CASE(LogisticRegressionTest)
   BOOST_REQUIRE_CLOSE(testAcc, 100.0, 0.6); // 0.6% error tolerance.
 }
 
-/**
- * Run RMSprop on a feedforward neural network and make sure the results are
- * acceptable.
- */
-BOOST_AUTO_TEST_CASE(FeedforwardTest)
-{
-  // Test on a non-linearly separable dataset (XOR).
-  arma::mat input, labels;
-  input << 0 << 1 << 1 << 0 << arma::endr
-        << 1 << 0 << 1 << 0 << arma::endr;
-  labels << 1 << 1 << 0 << 0;
-
-  // Instantiate the first layer.
-  LinearLayer<> inputLayer(input.n_rows, 8);
-  BiasLayer<> biasLayer(8);
-  TanHLayer<> hiddenLayer0;
-
-  // Instantiate the second layer.
-  LinearLayer<> hiddenLayer1(8, labels.n_rows);
-  TanHLayer<> outputLayer;
-
-  // Instantiate the output layer.
-  BinaryClassificationLayer classOutputLayer;
-
-  // Instantiate the feedforward network.
-  auto modules = std::tie(inputLayer, biasLayer, hiddenLayer0, hiddenLayer1,
-      outputLayer);
-  FFN<decltype(modules), decltype(classOutputLayer), RandomInitialization,
-      MeanSquaredErrorFunction> net(modules, classOutputLayer);
-
-  RMSprop<decltype(net)> opt(net, 0.03, 0.99, 1e-8, 300 * input.n_cols, -10);
-
-  net.Train(input, labels, opt);
-
-  arma::mat prediction;
-  net.Predict(input, prediction);
-
-  BOOST_REQUIRE_EQUAL(prediction(0), 1);
-  BOOST_REQUIRE_EQUAL(prediction(1), 1);
-  BOOST_REQUIRE_EQUAL(prediction(2), 0);
-  BOOST_REQUIRE_EQUAL(prediction(3), 0);
-}
-
 BOOST_AUTO_TEST_SUITE_END();