From 4536910b8b606a9ba2a06ae481f719f5ae2147cf Mon Sep 17 00:00:00 2001
From: Ryan Curtin <ryan@ratml.org>
Date: Mon, 16 May 2016 14:57:23 -0400
Subject: [PATCH 01/87] Force positive-definiteness when training a Gaussian.

---
 .../core/dists/gaussian_distribution.cpp      | 33 ++++---------------
 1 file changed, 7 insertions(+), 26 deletions(-)
diff --git a/src/mlpack/core/dists/gaussian_distribution.cpp b/src/mlpack/core/dists/gaussian_distribution.cpp
index cd9559fd58f..b8d1b042b63 100644
--- a/src/mlpack/core/dists/gaussian_distribution.cpp
+++ b/src/mlpack/core/dists/gaussian_distribution.cpp
@@ -6,6 +6,7 @@
  * Implementation of Gaussian distribution class.
  */
 #include "gaussian_distribution.hpp"
+#include <mlpack/methods/gmm/positive_definite_constraint.hpp>
 
 using namespace mlpack;
 using namespace mlpack::distribution;
@@ -116,18 +117,7 @@ void GaussianDistribution::Train(const arma::mat& observations)
   covariance /= (observations.n_cols - 1);
 
   // Ensure that the covariance is positive definite.
-  if (det(covariance) <= 1e-50)
-  {
-    Log::Debug << "GaussianDistribution::Train(): Covariance matrix is not "
-        << "positive definite. Adding perturbation." << std::endl;
-
-    double perturbation = 1e-30;
-    while (det(covariance) <= 1e-50)
-    {
-      covariance.diag() += perturbation;
-      perturbation *= 10; // Slow, but we don't want to add too much.
-    }
-  }
+  gmm::PositiveDefiniteConstraint::ApplyConstraint(covariance);
 
   FactorCovariance();
 }
@@ -173,7 +163,8 @@ void GaussianDistribution::Train(const arma::mat& observations,
   }
 
   // Normalize.
-  mean /= sumProb;
+  if (sumProb > 0)
+    mean /= sumProb;
 
   // Now find the covariance.
   for (size_t i = 0; i < observations.n_cols; i++)
@@ -183,21 +174,11 @@ void GaussianDistribution::Train(const arma::mat& observations,
   }
 
   // This is probably biased, but I don't know how to unbias it.
-  covariance /= sumProb;
+  if (sumProb > 0)
+    covariance /= sumProb;
 
   // Ensure that the covariance is positive definite.
-  if (det(covariance) <= 1e-50)
-  {
-    Log::Debug << "GaussianDistribution::Train(): Covariance matrix is not "
-        << "positive definite. Adding perturbation." << std::endl;
-
-    double perturbation = 1e-30;
-    while (det(covariance) <= 1e-50)
-    {
-      covariance.diag() += perturbation;
-      perturbation *= 10; // Slow, but we don't want to add too much.
-    }
-  }
+  gmm::PositiveDefiniteConstraint::ApplyConstraint(covariance);
 
   FactorCovariance();
 }

From 274462840d9a62193ed796de75aa0d1a64550236 Mon Sep 17 00:00:00 2001
From: Ryan Curtin <ryan@ratml.org>
Date: Mon, 16 May 2016 14:58:05 -0400
Subject: [PATCH 02/87] Better handling of NaNs.

---
 src/mlpack/methods/hmm/hmm_impl.hpp | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/src/mlpack/methods/hmm/hmm_impl.hpp b/src/mlpack/methods/hmm/hmm_impl.hpp
index b567f0fcf1f..2bfc4cb8fe9 100644
--- a/src/mlpack/methods/hmm/hmm_impl.hpp
+++ b/src/mlpack/methods/hmm/hmm_impl.hpp
@@ -173,7 +173,13 @@ void HMM<Distribution>::Train(const std::vector<arma::mat>& dataSeq)
 
     // Now we normalize the transition matrix.
     for (size_t i = 0; i < transition.n_cols; i++)
-      transition.col(i) /= accu(transition.col(i));
+    {
+      const double sum = accu(transition.col(i));
+      if (sum > 0.0)
+        transition.col(i) /= sum;
+      else
+        transition.col(i).fill(1.0 / (double) transition.n_rows);
+    }
 
     // Now estimate emission probabilities.
     for (size_t state = 0; state < transition.n_cols; state++)
@@ -513,7 +519,8 @@ void HMM<Distribution>::Forward(const arma::mat& dataSeq,
 
   // Then normalize the column.
   scales[0] = accu(forwardProb.col(0));
-  forwardProb.col(0) /= scales[0];
+  if (scales[0] > 0.0)
+    forwardProb.col(0) /= scales[0];
 
   // Now compute the probabilities for each successive observation.
   for (size_t t = 1; t < dataSeq.n_cols; t++)
@@ -530,7 +537,8 @@ void HMM<Distribution>::Forward(const arma::mat& dataSeq,
 
     // Normalize probability.
     scales[t] = accu(forwardProb.col(t));
-    forwardProb.col(t) /= scales[t];
+    if (scales[t] > 0.0)
+      forwardProb.col(t) /= scales[t];
   }
 }
 
@@ -560,7 +568,8 @@ void HMM<Distribution>::Backward(const arma::mat& dataSeq,
             * emission[state].Probability(dataSeq.unsafe_col(t + 1));
 
       // Normalize by the weights from the forward algorithm.
-      backwardProb(j, t) /= scales[t + 1];
+      if (scales[t + 1] > 0.0)
+        backwardProb(j, t) /= scales[t + 1];
     }
   }
 }

From 3e4f3cade8a59f1a49c07e08c39fe6af33e2da06 Mon Sep 17 00:00:00 2001
From: Ryan Curtin <ryan@ratml.org>
Date: Mon, 16 May 2016 15:11:56 -0400
Subject: [PATCH 03/87] Add --random_initialization for mlpack_hmm_train.

---
 HISTORY.md                                |  3 ++
 src/mlpack/methods/hmm/hmm_train_main.cpp | 64 ++++++++++++++++++++++-
 2 files changed, 66 insertions(+), 1 deletion(-)

diff --git a/HISTORY.md b/HISTORY.md
index ab7a041408a..cddb28a3e3c 100644
--- a/HISTORY.md
+++ b/HISTORY.md
@@ -22,6 +22,9 @@
     mlpack_allknn and mlpack_allkfn programs will remain as copies until mlpack
     3.0.0.
 
+  * Add --random_initialization option to mlpack_hmm_train, for use when no
+    labels are provided.
+
 ### mlpack 2.0.1
 ###### 2016-02-04
   * Fix CMake to properly detect when MKL is being used with Armadillo.
diff --git a/src/mlpack/methods/hmm/hmm_train_main.cpp b/src/mlpack/methods/hmm/hmm_train_main.cpp
index e7820a6438d..546fb3c9197 100644
--- a/src/mlpack/methods/hmm/hmm_train_main.cpp
+++ b/src/mlpack/methods/hmm/hmm_train_main.cpp
@@ -24,7 +24,9 @@ PROGRAM_INFO("Hidden Markov Model (HMM) Training", "This program allows a "
     "\n\n"
     "The HMM is trained with the Baum-Welch algorithm if no labels are "
     "provided.  The tolerance of the Baum-Welch algorithm can be set with the "
-    "--tolerance option."
+    "--tolerance option.  In general it is a good idea to use random "
+    "initialization in this case, which can be specified with the "
+    "--random_initialization (-r) option."
     "\n\n"
     "Optionally, a pre-created HMM model can be used as a guess for the "
     "transition matrix and emission probabilities; this is specifiable with "
@@ -47,6 +49,8 @@ PARAM_STRING("output_model_file", "File to save trained HMM to.", "o",
     "output_hmm.xml");
 PARAM_INT("seed", "Random seed.  If 0, 'std::time(NULL)' is used.", "s", 0);
 PARAM_DOUBLE("tolerance", "Tolerance of the Baum-Welch algorithm.", "T", 1e-5);
+PARAM_FLAG("random_initialization", "Initialize emissions and transition "
+    "matrices with a uniform random distribution.", "r");
 
 using namespace mlpack;
 using namespace mlpack::hmm;
@@ -296,6 +300,21 @@ int main(int argc, char** argv)
       HMM<DiscreteDistribution> hmm(size_t(states),
           DiscreteDistribution(maxEmission), tolerance);
 
+      // Initialize with random starting point.
+      if (CLI::HasParam("random_initialization"))
+      {
+        hmm.Transition().randu();
+        for (size_t c = 0; c < hmm.Transition().n_cols; ++c)
+          hmm.Transition().col(c) /= arma::accu(hmm.Transition().col(c));
+
+        for (size_t e = 0; e < hmm.Emission().size(); ++e)
+        {
+          hmm.Emission()[e].Probabilities().randu();
+          hmm.Emission()[e].Probabilities() /=
+              arma::accu(hmm.Emission()[e].Probabilities());
+        }
+      }
+
       // Now train it.  Pass the already-loaded training data.
       Train::Apply(hmm, &trainSeq);
     }
@@ -314,6 +333,22 @@ int main(int argc, char** argv)
       HMM<GaussianDistribution> hmm(size_t(states),
           GaussianDistribution(dimensionality), tolerance);
 
+      // Initialize with random starting point.
+      if (CLI::HasParam("random_initialization"))
+      {
+        hmm.Transition().randu();
+        for (size_t c = 0; c < hmm.Transition().n_cols; ++c)
+          hmm.Transition().col(c) /= arma::accu(hmm.Transition().col(c));
+
+        for (size_t e = 0; e < hmm.Emission().size(); ++e)
+        {
+          hmm.Emission()[e].Mean().randu();
+          // Generate random covariance.
+          arma::mat r = arma::randu<arma::mat>(dimensionality, dimensionality);
+          hmm.Emission()[e].Covariance(r * r.t());
+        }
+      }
+
       // Now train it.
       Train::Apply(hmm, &trainSeq);
     }
@@ -336,6 +371,33 @@ int main(int argc, char** argv)
       HMM<GMM> hmm(size_t(states), GMM(size_t(gaussians), dimensionality),
           tolerance);
 
+      // Initialize with random starting point.
+      if (CLI::HasParam("random_initialization"))
+      {
+        hmm.Transition().randu();
+        for (size_t c = 0; c < hmm.Transition().n_cols; ++c)
+          hmm.Transition().col(c) /= arma::accu(hmm.Transition().col(c));
+
+        for (size_t e = 0; e < hmm.Emission().size(); ++e)
+        {
+          // Random weights.
+          hmm.Emission()[e].Weights().randu();
+          hmm.Emission()[e].Weights() /=
+              arma::accu(hmm.Emission()[e].Weights());
+
+          // Random means and covariances.
+          for (int g = 0; g < gaussians; ++g)
+          {
+            hmm.Emission()[e].Component(g).Mean().randu();
+
+            // Generate random covariance.
+            arma::mat r = arma::randu<arma::mat>(dimensionality,
+                dimensionality);
+            hmm.Emission()[e].Component(g).Covariance(r * r.t());
+          }
+        }
+      }
+
       // Issue a warning if the user didn't give labels.
       if (!CLI::HasParam("labels_file"))
         Log::Warn << "Unlabeled training of GMM HMMs is almost certainly not "

From 986620375ce84cdc75fdfd99f63f17b5c8ee507a Mon Sep 17 00:00:00 2001
From: Ryan Curtin <ryan@ratml.org>
Date: Mon, 16 May 2016 15:12:19 -0400
Subject: [PATCH 04/87] Don't forget the period in the output.

---
 src/mlpack/methods/hmm/hmm_impl.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mlpack/methods/hmm/hmm_impl.hpp b/src/mlpack/methods/hmm/hmm_impl.hpp
index 2bfc4cb8fe9..05b7a292e31 100644
--- a/src/mlpack/methods/hmm/hmm_impl.hpp
+++ b/src/mlpack/methods/hmm/hmm_impl.hpp
@@ -186,7 +186,7 @@ void HMM<Distribution>::Train(const std::vector<arma::mat>& dataSeq)
       emission[state].Train(emissionList, emissionProb[state]);
 
     Log::Debug << "Iteration " << iter << ": log-likelihood " << loglik
-        << std::endl;
+        << "." << std::endl;
 
     if (std::abs(oldLoglik - loglik) < tolerance)
     {

From 2114358dbe9c25ef71ceaf09449ff0a68b81ffc1 Mon Sep 17 00:00:00 2001
From: Marcus Edel <marcus.edel@fu-berlin.de>
Date: Fri, 15 Apr 2016 15:58:05 +0200
Subject: [PATCH 05/87] Add MultiplyConstantLayer which multiplies the input by
 a non-learnable constant.

---
 .../ann/layer/multiply_constant_layer.hpp     | 108 ++++++++++++++++++
 1 file changed, 108 insertions(+)
 create mode 100644 src/mlpack/methods/ann/layer/multiply_constant_layer.hpp

diff --git a/src/mlpack/methods/ann/layer/multiply_constant_layer.hpp b/src/mlpack/methods/ann/layer/multiply_constant_layer.hpp
new file mode 100644
index 00000000000..78c42e9e03f
--- /dev/null
+++ b/src/mlpack/methods/ann/layer/multiply_constant_layer.hpp
@@ -0,0 +1,108 @@
+/**
+ * @file multiply_constant_layer.hpp
+ * @author Marcus Edel
+ *
+ * Definition of the MultiplyConstantLayer class, which multiplies the input by
+ * a (non-learnable) constant.
+ */
+#ifndef __MLPACK_METHODS_ANN_LAYER_MULTIPLY_CONSTANT_LAYER_HPP
+#define __MLPACK_METHODS_ANN_LAYER_MULTIPLY_CONSTANT_LAYER_HPP
+
+#include <mlpack/core.hpp>
+
+namespace mlpack {
+namespace ann /** Artificial Neural Network. */ {
+
+/**
+ * Implementation of the multiply constant layer. The multiply constant layer
+ * multiplies the input by a (non-learnable) constant.
+ *
+ * @tparam InputDataType Type of the input data (arma::colvec, arma::mat,
+ *         arma::sp_mat or arma::cube).
+ * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat,
+ *         arma::sp_mat or arma::cube).
+ */
+template <
+    typename InputDataType = arma::mat,
+    typename OutputDataType = arma::mat
+>
+class MultiplyConstantLayer
+{
+ public:
+  /**
+   * Create the BaseLayer object.
+   */
+  MultiplyConstantLayer(const double scalar) : scalar(scalar)
+  {
+    // Nothing to do here.
+  }
+
+  /**
+   * Ordinary feed forward pass of a neural network. Multiply the input with the
+   * specified constant scalar value.
+   *
+   * @param input Input data used for evaluating the specified function.
+   * @param output Resulting output activation.
+   */
+  template<typename InputType, typename OutputType>
+  void Forward(const InputType& input, OutputType& output)
+  {
+    output = input * scalar;
+  }
+
+  /**
+   * Ordinary feed backward pass of a neural network. The backward pass
+   * multiplies the error with the specified constant scalar value.
+   *
+   * @param input The propagated input activation.
+   * @param gy The backpropagated error.
+   * @param g The calculated gradient.
+   */
+  template<typename DataType>
+  void Backward(const DataType& /* input */, const DataType& gy, DataType& g)
+  {
+    g = gy * scalar;
+  }
+
+  //! Get the input parameter.
+  InputDataType& InputParameter() const { return inputParameter; }
+  //! Modify the input parameter.
+  InputDataType& InputParameter() { return inputParameter; }
+
+  //! Get the output parameter.
+  OutputDataType& OutputParameter() const { return outputParameter; }
+  //! Modify the output parameter.
+  OutputDataType& OutputParameter() { return outputParameter; }
+
+  //! Get the delta.
+  OutputDataType& Delta() const { return delta; }
+  //! Modify the delta.
+  OutputDataType& Delta() { return delta; }
+
+  /**
+   * Serialize the layer.
+   */
+  template<typename Archive>
+  void Serialize(Archive& ar, const unsigned int /* version */)
+  {
+    ar & data::CreateNVP(scalar, "scalar");
+  }
+
+ private:
+  //! Locally-stored constant scalar value.
+  const double scalar;
+
+  //! Locally-stored delta object.
+  OutputDataType delta;
+
+  //! Locally-stored input parameter object.
+  InputDataType inputParameter;
+
+  //! Locally-stored output parameter object.
+  OutputDataType outputParameter;
+}; // class MultiplyConstantLayer
+
+}; // namespace ann
+}; // namespace mlpack
+
+#endif

From cc7bac36a7784a8a640efc06834f534eb74559ac Mon Sep 17 00:00:00 2001
From: Marcus Edel <marcus.edel@fu-berlin.de>
Date: Mon, 18 Apr 2016 22:50:13 +0200
Subject: [PATCH 06/87] Add NegativeLogLikelihoodLayer class which is useful to
 train a classication problem with n classes.

---
 .../layer/negative_log_likelihood_layer.hpp   | 122 ++++++++++++++++++
 1 file changed, 122 insertions(+)
 create mode 100644 src/mlpack/methods/ann/layer/negative_log_likelihood_layer.hpp

diff --git a/src/mlpack/methods/ann/layer/negative_log_likelihood_layer.hpp b/src/mlpack/methods/ann/layer/negative_log_likelihood_layer.hpp
new file mode 100644
index 00000000000..27a337e81ff
--- /dev/null
+++ b/src/mlpack/methods/ann/layer/negative_log_likelihood_layer.hpp
@@ -0,0 +1,122 @@
+/**
+ * @file negative_log_likelihood_layer.hpp
+ * @author Marcus Edel
+ *
+ * Definition of the NegativeLogLikelihoodLayer class.
+ */
+#ifndef __MLPACK_METHODS_ANN_LAYER_NEGATIVE_LOG_LIKELIHOOD_Layer_HPP
+#define __MLPACK_METHODS_ANN_LAYER_NEGATIVE_LOG_LIKELIHOOD_Layer_HPP
+
+#include <mlpack/core.hpp>
+
+namespace mlpack {
+namespace ann /** Artificial Neural Network. */ {
+
+/**
+ * Implementation of the negative log likelihood layer. The negative log
+ * likelihood layer expectes that the input contains log-probabilities for each
+ * class. The layer also expects a class index, in the range between 1 and the
+ * number of classes, as target when calling the Forward function.
+ *
+ * @tparam ActivationFunction Activation function used for the embedding layer.
+ * @tparam InputDataType Type of the input data (arma::colvec, arma::mat,
+ *         arma::sp_mat or arma::cube).
+ * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat,
+ *         arma::sp_mat or arma::cube).
+ */
+template <
+    typename InputDataType = arma::mat,
+    typename OutputDataType = arma::mat
+>
+class NegativeLogLikelihoodLayer
+{
+ public:
+  /**
+   * Create the NegativeLogLikelihoodLayer object.
+   */
+  NegativeLogLikelihoodLayer() { /* Nothing to do here. */ }
+
+  /**
+   * Ordinary feed forward pass of a neural network. The negative log
+   * likelihood layer expectes that the input contains log-probabilities for
+   * each class. The layer also expects a class index, in the range between 1
+   * and the number of classes, as target when calling the Forward function.
+   *
+   * @param input Input data that contains the log-probabilities for each class.
+   * @param target The target vector, that contains the class index in the range
+   *        between 1 and the number of classes.
+   */
+  template<typename eT>
+  double Forward(const arma::Mat<eT>& input, const arma::Mat<eT>& target)
+  {
+    double output = 0;
+
+    for (size_t i = 0; i < input.n_cols; ++i)
+    {
+      size_t currentTarget = target(i) - 1;
+      Log::Assert(currentTarget >= 0 && currentTarget < input.n_rows,
+          "Target class out of range.");
+
+      output -= input(currentTarget, i);
+    }
+
+    return output;
+  }
+
+  /**
+   * Ordinary feed backward pass of a neural network. The negative log
+   * likelihood layer expectes that the input contains log-probabilities for
+   * each class. The layer also expects a class index, in the range between 1
+   * and the number of classes, as target when calling the Forward function.
+   *
+   * @param input The propagated input activation.
+   * @param target The target vector, that contains the class index in the range
+   *        between 1 and the number of classes.
+   * @param output The calculated error.
+   */
+  template<typename eT>
+  void Backward(const arma::Mat<eT>& input,
+                const arma::Mat<eT>& target,
+                arma::Mat<eT>& output)
+  {
+    output = arma::zeros<arma::Mat<eT> >(input.n_rows, input.n_cols);
+    for (size_t i = 0; i < input.n_cols; ++i)
+    {
+      size_t currentTarget = target(i) - 1;
+      Log::Assert(currentTarget >= 0 && currentTarget < input.n_rows,
+          "Target class out of range.");
+
+      output(currentTarget, i) = -1;
+    }
+  }
+
+  //! Get the input parameter.
+  InputDataType& InputParameter() const { return inputParameter; }
+  //! Modify the input parameter.
+  InputDataType& InputParameter() { return inputParameter; }
+
+  //! Get the output parameter.
+  OutputDataType& OutputParameter() const { return outputParameter; }
+  //! Modify the output parameter.
+  OutputDataType& OutputParameter() { return outputParameter; }
+
+  //! Get the delta.
+  OutputDataType& Delta() const { return delta; }
+  //! Modify the delta.
+  OutputDataType& Delta() { return delta; }
+
+ private:
+  //! Locally-stored delta object.
+  OutputDataType delta;
+
+  //! Locally-stored input parameter object.
+  InputDataType inputParameter;
+
+  //! Locally-stored output parameter object.
+  OutputDataType outputParameter;
+}; // class NegativeLogLikelihoodLayer
+
+}; // namespace ann
+}; // namespace mlpack
+
+#endif

From b69c6dce9d62f819433d1bdc5ed233b2dd941422 Mon Sep 17 00:00:00 2001
From: Marcus Edel <marcus.edel@fu-berlin.de>
Date: Tue, 19 Apr 2016 00:00:32 +0200
Subject: [PATCH 07/87] Add the VRClassRewardLayer class which implements the
 REINFORCE algoritm for classification models. To be precise, this is is a
 Variance Reduces classification reinforcement learning rule.

---
 .../ann/layer/vr_class_reward_layer.hpp       | 167 ++++++++++++++++++
 1 file changed, 167 insertions(+)
 create mode 100644 src/mlpack/methods/ann/layer/vr_class_reward_layer.hpp

diff --git a/src/mlpack/methods/ann/layer/vr_class_reward_layer.hpp b/src/mlpack/methods/ann/layer/vr_class_reward_layer.hpp
new file mode 100644
index 00000000000..9b1451c0b4a
--- /dev/null
+++ b/src/mlpack/methods/ann/layer/vr_class_reward_layer.hpp
@@ -0,0 +1,167 @@
+/**
+ * @file vr_class_reward_layer.hpp
+ * @author Marcus Edel
+ *
+ * Definition of the VRClassRewardLayer class, which implements the variance
+ * reduced classification reinforcement layer.
+ */
+#ifndef __MLPACK_METHODS_ANN_LAYER_VR_CLASS_REWARD_LAYER_HPP
+#define __MLPACK_METHODS_ANN_LAYER_VR_CLASS_REWARD_LAYER_HPP
+
+#include <mlpack/core.hpp>
+
+namespace mlpack {
+namespace ann /** Artificial Neural Network. */ {
+
+/**
+ * Implementation of the variance reduced classification reinforcement layer.
+ * This layer is meant to be used in combination with the reinforce normal layer
+ * (ReinforceNormalLayer), which expects that an reward:
+ * (1 for success, 0 otherwise).
+ *
+ * @tparam InputDataType Type of the input data (arma::colvec, arma::mat,
+ *         arma::sp_mat or arma::cube).
+ * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat,
+ *         arma::sp_mat or arma::cube).
+ */
+template <
+    typename InputDataType = arma::field<arma::mat>,
+    typename OutputDataType = arma::field<arma::mat>
+>
+class VRClassRewardLayer
+{
+ public:
+  /**
+   * Create the VRClassRewardLayer object.
+   *
+   * @param scale Parameter used to scale the reward.
+   * @param sizeAverage Take the average over all batches.
+   */
+  VRClassRewardLayer(const double scale = 1, const bool sizeAverage = true) :
+      scale(scale),
+      sizeAverage(sizeAverage)
+  {
+    // Nothing to do here.
+  }
+
+  /**
+   * Ordinary feed forward pass of a neural network, evaluating the function
+   * f(x) by propagating the activity forward through f.
+   *
+   * @param input Input data that contains the log-probabilities for each class.
+   * @param target The target vector, that contains the class index in the range
+   *        between 1 and the number of classes.
+   */
+  template<typename eT>
+  double Forward(const arma::field<arma::Mat<eT> >& input,
+                 const arma::Mat<eT>& target)
+  {
+    return Forward(input(0, 0), target);
+  }
+
+  /**
+   * Ordinary feed forward pass of a neural network, evaluating the function
+   * f(x) by propagating the activity forward through f.
+   *
+   * @param input Input data that contains the log-probabilities for each class.
+   * @param target The target vector, that contains the class index in the range
+   *        between 1 and the number of classes.
+   */
+  template<typename eT>
+  double Forward(const arma::Mat<eT>& input, const arma::Mat<eT>& target)
+  {
+    double output = 0;
+    reward = 0;
+    arma::uword index;
+
+    for (size_t i = 0; i < input.n_cols; i++)
+    {
+      input.unsafe_col(i).max(index);
+      reward = ((index + 1) == target(i)) * scale;
+    }   
+
+    if (sizeAverage)
+    {
+      return -reward / input.n_cols;
+    }
+
+    return -reward;
+  }
+
+  /**
+   * Ordinary feed backward pass of a neural network, calculating the function
+   * f(x) by propagating x backwards through f. Using the results from the feed
+   * forward pass.
+   *
+   * @param input The propagated input activation.
+   * @param gy The backpropagated error.
+   * @param g The calculated gradient.
+   */
+  template<typename eT>
+  double Backward(const arma::field<arma::Mat<eT> >& input,
+                const arma::Mat<eT>& /* gy */,
+                arma::field<arma::Mat<eT> >& g)
+  {    
+    g = arma::field<arma::Mat<eT> >(2, 1);
+    g(0, 0) = arma::zeros(input(0, 0).n_rows, input(0, 0).n_cols);
+
+    double vrReward = reward - arma::as_scalar(input(1, 0));
+    if (sizeAverage)
+    {
+      vrReward /= input(0, 0).n_cols;
+    }
+
+    const double norm = sizeAverage ? 2.0 / input.n_cols : 2.0;
+
+    g(1, 0) = norm * (input(1, 0) - reward);
+
+    return vrReward;
+  }
+
+  //! Get the input parameter.
+  InputDataType& InputParameter() const {return inputParameter; }
+  //! Modify the input parameter.
+  InputDataType& InputParameter() { return inputParameter; }
+
+  //! Get the output parameter.
+  OutputDataType& OutputParameter() const {return outputParameter; }
+  //! Modify the output parameter.
+  OutputDataType& OutputParameter() { return outputParameter; }
+
+  //! Get the delta.
+  OutputDataType& Delta() const {return delta; }
+  //! Modify the delta.
+  OutputDataType& Delta() { return delta; }
+
+  //! Get the value of the deterministic parameter.
+  bool Deterministic() const { return deterministic; }
+  //! Modify the value of the deterministic parameter.
+  bool& Deterministic() { return deterministic; }
+
+ private:
+  //! Locally-stored value to scale the reward.
+  const double scale;
+
+  //! If true take the average over all batches.
+  const bool sizeAverage;
+
+  //! Locally stored reward parameter.
+  double reward;
+
+  //! Locally-stored delta object.
+  OutputDataType delta;
+
+  //! Locally-stored input parameter object.
+  InputDataType inputParameter;
+
+  //! Locally-stored output parameter object.
+  OutputDataType outputParameter;
+
+  //! If true dropout and scaling is disabled, see notes above.
+  bool deterministic;
+}; // class VRClassRewardLayer
+
+}; // namespace ann
+}; // namespace mlpack
+
+#endif

From be43684ec0507b0b74e4f8a551c80d28e50b3168 Mon Sep 17 00:00:00 2001
From: Marcus Edel <marcus.edel@fu-berlin.de>
Date: Wed, 20 Apr 2016 18:26:56 +0200
Subject: [PATCH 08/87] Add GlimpseLayer class which takes an input image and a
 location to extract a retina-like representation of the input image.

---
 .../methods/ann/layer/glimpse_layer.hpp       | 480 ++++++++++++++++++
 1 file changed, 480 insertions(+)
 create mode 100644 src/mlpack/methods/ann/layer/glimpse_layer.hpp

diff --git a/src/mlpack/methods/ann/layer/glimpse_layer.hpp b/src/mlpack/methods/ann/layer/glimpse_layer.hpp
new file mode 100644
index 00000000000..f41a615a1f6
--- /dev/null
+++ b/src/mlpack/methods/ann/layer/glimpse_layer.hpp
@@ -0,0 +1,480 @@
+/**
+ * @file glimpse_layer.hpp
+ * @author Marcus Edel
+ *
+ * Definition of the GlimpseLayer class, which takes an input image and a
+ * location to extract a retina-like representation of the input image at
+ * different increasing scales.
+ *
+ * For more information, see the following.
+ *
+ * @code
+ * @article{CoRR2014,
+ *   author  = {Volodymyr Mnih, Nicolas Heess, Alex Graves, Koray Kavukcuoglu},
+ *   title   = {Recurrent Models of Visual Attention},
+ *   journal = {CoRR},
+ *   volume  = {abs/1406.6247},
+ *   year    = {2014},
+ * }
+ * @endcode
+ */
+#ifndef __MLPACK_METHODS_ANN_LAYER_GLIMPSE_LAYER_HPP
+#define __MLPACK_METHODS_ANN_LAYER_GLIMPSE_LAYER_HPP
+
+#include <mlpack/core.hpp>
+#include <mlpack/methods/ann/pooling_rules/mean_pooling.hpp>
+#include <algorithm>
+
+namespace mlpack {
+namespace ann /** Artificial Neural Network. */ {
+
+/**
+ * The glimpse layer returns a retina-like representation
+ * (down-scaled cropped images) of increasing scale around a given location in a
+ * given image.
+ *
+ * @tparam InputDataType Type of the input data (arma::colvec, arma::mat,
+ *         arma::sp_mat or arma::cube).
+ * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat,
+ *         arma::sp_mat or arma::cube).
+ */
+template <
+    typename InputDataType = arma::cube,
+    typename OutputDataType = arma::cube
+>
+class GlimpseLayer
+{
+ public:
+
+  /**
+   * Create the GlimpseLayer object using the specified ratio and rescale
+   * parameter.
+   *
+   * @param inSize The size of the input units.
+   * @param size The used glimpse size (height = width).
+   * @param depth The number of patches to crop per glimpse.
+   * @param scale The scaling factor used to create the increasing retina-like
+   *        representation.
+   */
+  GlimpseLayer(const size_t inSize,
+               const size_t size,
+               const size_t depth = 3,
+               const size_t scale = 2) :
+      inSize(inSize),
+      size(size),
+      depth(depth),
+      scale(scale)
+  {
+    // Nothing to do here.
+  }
+
+  /**
+   * Ordinary feed forward pass of the glimpse layer.
+   *
+   * @param input Input data used for evaluating the specified function.
+   * @param output Resulting output activation.
+   */
+  template<typename eT>
+  void Forward(const arma::Cube<eT>& input, arma::Cube<eT>& output)
+  {
+    output = arma::Cube<eT>(size, size, depth * input.n_slices);
+
+    inputDepth = input.n_slices / inSize;
+
+    for (size_t inputIdx = 0; inputIdx < inSize; inputIdx++)
+    {
+      for (size_t depthIdx = 0, glimpseSize = size;
+          depthIdx < depth; depthIdx++, glimpseSize *= scale)
+      {
+        size_t padSize = std::floor((glimpseSize - 1) / 2);
+
+        arma::Cube<eT> inputPadded = arma::zeros<arma::Cube<eT> >(
+            input.n_rows + padSize * 2, input.n_cols + padSize * 2,
+            input.n_slices / inSize);
+
+        inputPadded.tube(padSize, padSize, padSize + input.n_rows - 1,
+            padSize + input.n_cols - 1) = input.subcube(0, 0,
+            inputIdx * inputDepth, input.n_rows - 1, input.n_cols - 1,
+            (inputIdx + 1) * inputDepth - 1);
+
+        size_t h = inputPadded.n_rows - glimpseSize;
+        size_t w = inputPadded.n_cols - glimpseSize;
+
+        size_t x = std::min(h, (size_t) std::max(0.0,
+            (location(0, inputIdx) + 1) / 2.0 * h));
+        size_t y = std::min(w, (size_t) std::max(0.0,
+            (location(1, inputIdx) + 1) / 2.0 * w));
+
+        if (depthIdx == 0)
+        {
+          for (size_t j = (inputIdx + depthIdx), paddedSlice = 0;
+              j < output.n_slices; j += (inSize * depth), paddedSlice++)
+          {
+            output.slice(j) = inputPadded.subcube(x, y,
+                paddedSlice, x + glimpseSize - 1, y + glimpseSize - 1,
+                paddedSlice);
+          }
+        }
+        else
+        {
+          for (size_t j = (inputIdx + depthIdx * (depth - 1)), paddedSlice = 0;
+              j < output.n_slices; j += (inSize * depth), paddedSlice++)
+          {
+            arma::Mat<eT> poolingInput = inputPadded.subcube(x, y,
+                paddedSlice, x + glimpseSize - 1, y + glimpseSize - 1,
+                paddedSlice);
+
+            if (scale == 2)
+            {
+              Pooling(glimpseSize / size, poolingInput, output.slice(j));
+            }
+            else
+            {
+              ReSampling(poolingInput, output.slice(j));
+            }
+          }
+        }        
+      }
+    }
+  }
+
+  /**
+   * Ordinary feed backward pass of the glimpse layer.
+   *
+   * @param input The propagated input activation.
+   * @param gy The backpropagated error.
+   * @param g The calculated gradient.
+   */
+  template<typename InputType, typename ErrorType, typename eT>
+  void Backward(const InputType& input,
+                const ErrorType& gy,
+                arma::Cube<eT>& g)
+  {
+    // Generate a cube using the backpropagated error matrix.
+    arma::Cube<eT> mappedError = arma::zeros<arma::cube>(input.n_rows,
+        input.n_cols, input.n_slices);
+
+    for (size_t s = 0, j = 0; s < mappedError.n_slices; s+= gy.n_cols, j++)
+    {
+      for (size_t i = 0; i < gy.n_cols; i++)
+      {
+        arma::Col<eT> temp = gy.col(i).subvec(
+            j * input.n_rows * input.n_cols,
+            (j + 1) * input.n_rows * input.n_cols - 1);
+
+        mappedError.slice(s + i) = arma::Mat<eT>(temp.memptr(),
+            input.n_rows, input.n_cols);
+      }
+    }
+
+    g = arma::zeros<arma::cube>(inputParameter.n_rows, inputParameter.n_cols,
+        inputParameter.n_slices);
+
+    for (size_t inputIdx = 0; inputIdx < inSize; inputIdx++)
+    {
+      for (size_t depthIdx = 0, glimpseSize = size;
+          depthIdx < depth; depthIdx++, glimpseSize *= scale)
+      {
+        size_t padSize = std::floor((glimpseSize - 1) / 2);
+
+        arma::Cube<eT> inputPadded = arma::zeros<arma::Cube<eT> >(
+            inputParameter.n_rows + padSize * 2, inputParameter.n_cols +
+            padSize * 2, inputParameter.n_slices / inSize);
+
+        size_t h = inputPadded.n_rows - glimpseSize;
+        size_t w = inputPadded.n_cols - glimpseSize;
+
+        size_t x = std::min(h, (size_t) std::max(0.0,
+            (location(0, inputIdx) + 1) / 2.0 * h));
+        size_t y = std::min(w, (size_t) std::max(0.0,
+            (location(1, inputIdx) + 1) / 2.0 * w));
+
+        if (depthIdx == 0)
+        {
+          for (size_t j = (inputIdx + depthIdx), paddedSlice = 0;
+              j < mappedError.n_slices; j += (inSize * depth), paddedSlice++)
+          {
+            inputPadded.subcube(x, y,
+            paddedSlice, x + glimpseSize - 1, y + glimpseSize - 1,
+            paddedSlice) = mappedError.slice(j);
+          }
+        }
+        else
+        {
+          for (size_t j = (inputIdx + depthIdx * (depth - 1)), paddedSlice = 0;
+              j < mappedError.n_slices; j += (inSize * depth), paddedSlice++)
+          {
+            arma::Mat<eT> poolingOutput = inputPadded.subcube(x, y,
+                 paddedSlice, x + glimpseSize - 1, y + glimpseSize - 1,
+                 paddedSlice);
+
+            if (scale == 2)
+            {
+              Unpooling(inputParameter.slice(paddedSlice), mappedError.slice(j),
+                  poolingOutput);
+            }
+            else
+            {
+              DownwardReSampling(inputParameter.slice(paddedSlice),
+                  mappedError.slice(j), poolingOutput);
+            }            
+
+            inputPadded.subcube(x, y,
+                paddedSlice, x + glimpseSize - 1, y + glimpseSize - 1,
+                paddedSlice) = poolingOutput;
+          }
+        }
+
+        g += inputPadded.tube(padSize, padSize, padSize +
+            inputParameter.n_rows - 1, padSize + inputParameter.n_cols - 1);
+      }
+    }
+
+    Transform(g);
+  }
+
+  //! Get the input parameter.
+  InputDataType& InputParameter() const {return inputParameter; }
+  //! Modify the input parameter.
+  InputDataType& InputParameter() { return inputParameter; }
+
+  //! Get the output parameter.
+  OutputDataType& OutputParameter() const {return outputParameter; }
+  //! Modify the output parameter.
+  OutputDataType& OutputParameter() { return outputParameter; }
+
+  //! Get the detla.
+  OutputDataType& Delta() const { return delta; }
+  //! Modify the delta.
+  OutputDataType& Delta() { return delta; }
+
+  //! Set the locationthe x and y coordinate of the center of the output
+  //! glimpse.
+  void Location(const arma::mat& location)
+  {
+    // Log::Debug << "location: " << location.t() << std::endl;
+    this->location = location;
+  }
+
+ private:
+  /*
+   * Transform the given input by changing rows to columns.
+   *
+   * @param w The input matrix used to perform the transformation.
+   */
+  void Transform(arma::mat& w)
+  {
+    arma::mat t = w;
+
+    for (size_t i = 0, k = 0; i < w.n_elem; k++)
+    {
+      for (size_t j = 0; j < w.n_cols; j++, i++)
+      {
+        w(k, j) = t(i);
+      }      
+    }
+  }
+
+  /*
+   * Transform the given input by changing rows to columns.
+   *
+   * @param w The input matrix used to perform the transformation.
+   */
+  void Transform(arma::cube& w)
+  {
+    for (size_t i = 0; i < w.n_slices; i++)
+    {
+      arma::mat t = w.slice(i);
+      Transform(t);
+      w.slice(i) = t;
+    }
+  }
+
+  /**
+   * Apply pooling to the input and store the results to the output parameter.
+   *
+   * @param kSize the kernel size used to perform the pooling operation.
+   * @param input The input to be apply the pooling rule.
+   * @param output The pooled result.
+   */
+  template<typename eT>
+  void Pooling(const size_t kSize,
+               const arma::Mat<eT>& input,
+               arma::Mat<eT>& output)
+  {
+
+    const size_t rStep = kSize;
+    const size_t cStep = kSize;
+
+    for (size_t j = 0; j < input.n_cols; j += cStep)
+    {
+      for (size_t i = 0; i < input.n_rows; i += rStep)
+      {
+        output(i / rStep, j / cStep) += pooling.Pooling(
+            input(arma::span(i, i + rStep - 1), arma::span(j, j + cStep - 1)));
+      }
+    }
+  }
+
+  /**
+   * Apply unpooling to the input and store the results.
+   *
+   * @param input The input to be apply the unpooling rule.
+   * @param error The error used to perform the unpooling operation.
+   * @param output The pooled result.
+   */
+  template<typename eT>
+  void Unpooling(const arma::Mat<eT>& input,
+                 const arma::Mat<eT>& error,
+                 arma::Mat<eT>& output)
+  {
+    const size_t rStep = input.n_rows / error.n_rows;
+    const size_t cStep = input.n_cols / error.n_cols;
+
+    arma::Mat<eT> unpooledError;
+    for (size_t j = 0; j < input.n_cols; j += cStep)
+    {
+      for (size_t i = 0; i < input.n_rows; i += rStep)
+      {
+        const arma::Mat<eT>& inputArea = input(arma::span(i, i + rStep - 1),
+                                               arma::span(j, j + cStep - 1));
+
+        pooling.Unpooling(inputArea, error(i / rStep, j / cStep),
+            unpooledError);
+
+        output(arma::span(i, i + rStep - 1),
+            arma::span(j, j + cStep - 1)) += unpooledError;
+      }
+    }
+  }
+
+  /**
+   * Apply ReSampling to the input and store the results in the output
+   * parameter.
+   *
+   * @param input The input to be apply the ReSampling rule.
+   * @param output The pooled result.
+   */
+  template<typename eT>
+  void ReSampling(const arma::Mat<eT>& input, arma::Mat<eT>& output)
+  {
+    double wRatio = (double) (input.n_rows - 1) / (size - 1);
+    double hRatio = (double) (input.n_cols - 1) / (size - 1);
+
+    double iWidth = input.n_rows - 1;
+    double iHeight = input.n_cols - 1;
+
+    for (size_t y = 0; y < size; y++)
+    {
+      for (size_t x = 0; x < size; x++)
+      {
+        double ix = wRatio * x;
+        double iy = hRatio * y;
+
+        // Get the 4 nearest neighbors.
+        double ixNw = std::floor(ix);
+        double iyNw = std::floor(iy);
+        double ixNe = ixNw + 1;
+        double iySw = iyNw + 1;
+
+        // Get surfaces to each neighbor.
+        double se = (ix - ixNw) * (iy - iyNw);
+        double sw = (ixNe - ix) * (iy - iyNw);
+        double ne = (ix - ixNw) * (iySw - iy);
+        double nw = (ixNe - ix) * (iySw - iy);
+
+        // Calculate the weighted sum.
+        output(y, x) = input(iyNw, ixNw) * nw + 
+            input(iyNw, std::min(ixNe,  iWidth)) * ne +
+            input(std::min(iySw, iHeight), ixNw) * sw + 
+            input(std::min(iySw, iHeight), std::min(ixNe, iWidth)) * se;
+      }
+    }
+  }
+
+  /**
+   * Apply DownwardReSampling to the input and store the results into the output
+   * parameter.
+   *
+   * @param input The input to be apply the DownwardReSampling rule.
+   * @param error The error used to perform the DownwardReSampling operation.
+   * @param output The DownwardReSampled result.
+   */
+  template<typename eT>
+  void DownwardReSampling(const arma::Mat<eT>& input,
+                          const arma::Mat<eT>& error,
+                          arma::Mat<eT>& output)
+  {
+    double iWidth = input.n_rows - 1;
+    double iHeight = input.n_cols - 1;
+
+    double wRatio = iWidth / (size - 1);
+    double hRatio = iHeight / (size - 1);    
+
+    for (size_t y = 0; y < size; y++)
+    {
+      for (size_t x = 0; x < size; x++)
+      {
+        double ix = wRatio * x;
+        double iy = hRatio * y;
+
+        // Get the 4 nearest neighbors.
+        double ixNw = std::floor(ix);
+        double iyNw = std::floor(iy);
+        double ixNe = ixNw + 1;
+        double iySw = iyNw + 1;
+
+        // Get surfaces to each neighbor.
+        double se = (ix - ixNw) * (iy - iyNw);
+        double sw = (ixNe - ix) * (iy - iyNw);
+        double ne = (ix - ixNw) * (iySw - iy);
+        double nw = (ixNe - ix) * (iySw - iy);
+
+        double ograd = error(y, x);
+
+        output(iyNw, ixNw) = output(iyNw, ixNw) + nw * ograd;
+        output(iyNw, std::min(ixNe, iWidth)) = output(iyNw,
+            std::min(ixNe, iWidth)) + ne * ograd;
+        output(std::min(iySw, iHeight), ixNw) = output(std::min(iySw, iHeight),
+            ixNw) + sw * ograd;
+        output(std::min(iySw, iHeight), std::min(ixNe, iWidth)) = output(
+            std::min(iySw, iHeight), std::min(ixNe, iWidth)) + se * ograd;
+      }
+    }
+  }
+
+  //! Locally-stored delta object.
+  OutputDataType delta;
+
+  //! Locally-stored input parameter object.
+  InputDataType inputParameter;
+
+  //! Locally-stored output parameter object.
+  OutputDataType outputParameter;
+
+  //! Locally-stored depth of the input.
+  size_t inputDepth;
+
+  //! The size of the input units.
+  size_t inSize;
+
+  //! The used glimpse size (height = width).
+  size_t size;
+
+  //! The number of patches to crop per glimpse.
+  size_t depth;
+
+  //! The scale fraction.
+  size_t scale;
+
+  //! The x and y coordinate of the center of the output glimpse.
+  arma::mat location;
+
+  //! Locally-stored object to perform the mean pooling operation.
+  MeanPooling pooling;
+}; // class GlimpseLayer
+
+}; // namespace ann
+}; // namespace mlpack
+
+#endif

From fe69d336ad95e55129c5fd4587cabde507916f1d Mon Sep 17 00:00:00 2001
From: Marcus Edel <marcus.edel@fu-berlin.de>
Date: Sat, 23 Apr 2016 12:33:37 +0200
Subject: [PATCH 09/87] Remove debug message.

---
 src/mlpack/methods/ann/layer/glimpse_layer.hpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/mlpack/methods/ann/layer/glimpse_layer.hpp b/src/mlpack/methods/ann/layer/glimpse_layer.hpp
index f41a615a1f6..8e7ab8b49d1 100644
--- a/src/mlpack/methods/ann/layer/glimpse_layer.hpp
+++ b/src/mlpack/methods/ann/layer/glimpse_layer.hpp
@@ -252,7 +252,6 @@ class GlimpseLayer
   //! glimpse.
   void Location(const arma::mat& location)
   {
-    // Log::Debug << "location: " << location.t() << std::endl;
     this->location = location;
   }
 

From 4d8347a1d5f84344c00411592a1352b9ab14b522 Mon Sep 17 00:00:00 2001
From: Marcus Edel <marcus.edel@fu-berlin.de>
Date: Mon, 2 May 2016 15:38:51 +0200
Subject: [PATCH 10/87] Add Recurrent Model of Visual Attention (RMVA)
 implementation.

---
 src/mlpack/methods/rmva/rmva.hpp      | 914 ++++++++++++++++++++++++++
 src/mlpack/methods/rmva/rmva_impl.hpp | 736 +++++++++++++++++++++
 src/mlpack/methods/rmva/rmva_main.cpp | 289 ++++++++
 3 files changed, 1939 insertions(+)
 create mode 100644 src/mlpack/methods/rmva/rmva.hpp
 create mode 100644 src/mlpack/methods/rmva/rmva_impl.hpp
 create mode 100644 src/mlpack/methods/rmva/rmva_main.cpp

diff --git a/src/mlpack/methods/rmva/rmva.hpp b/src/mlpack/methods/rmva/rmva.hpp
new file mode 100644
index 00000000000..6756465c852
--- /dev/null
+++ b/src/mlpack/methods/rmva/rmva.hpp
@@ -0,0 +1,914 @@
+/**
+ * @file rmva.hpp
+ * @author Marcus Edel
+ *
+ * Definition of the RNN class, which implements feed forward neural networks.
+ */
+#ifndef __MLPACK_METHODS_RMVA_RMVA_HPP
+#define __MLPACK_METHODS_RMVA_RMVA_HPP
+
+#include <mlpack/core.hpp>
+
+#include <mlpack/methods/ann/network_util.hpp>
+#include <mlpack/methods/ann/layer/layer_traits.hpp>
+#include <mlpack/methods/ann/init_rules/random_init.hpp>
+#include <mlpack/methods/ann/performance_functions/cee_function.hpp>
+#include <mlpack/core/optimizers/rmsprop/rmsprop.hpp>
+#include <mlpack/methods/ann/layer/negative_log_likelihood_layer.hpp>
+#include <mlpack/methods/ann/layer/vr_class_reward_layer.hpp>
+
+#include <boost/ptr_container/ptr_vector.hpp>
+
+namespace mlpack {
+namespace ann /** Artificial Neural Network. */ {
+
+/**
+ * An implementation of a standard feed forward network.
+ *
+ * @tparam LayerTypes Contains all layer modules used to construct the network.
+ * @tparam OutputLayerType The outputlayer type used to evaluate the network.
+ * @tparam PerformanceFunction Performance strategy used to claculate the error.
+ */
+template<
+  typename LocatorType,
+  typename LocationSensorType,
+  typename GlimpseSensorType,
+  typename GlimpseType,
+  typename StartType,
+  typename FeedbackType,
+  typename TransferType,
+  typename ClassifierType,
+  typename RewardPredictorType,
+  typename InitializationRuleType = RandomInitialization,
+  typename MatType = arma::mat
+>
+class RecurrentNeuralAttention
+{
+ public:
+  //! Convenience typedef for the internal model construction.
+  using NetworkType = RecurrentNeuralAttention<
+      LocatorType,
+      LocationSensorType,
+      GlimpseSensorType,
+      GlimpseType,
+      StartType,
+      FeedbackType,
+      TransferType,
+      ClassifierType,
+      RewardPredictorType,
+      InitializationRuleType,
+      MatType>;
+
+  /**
+   * Construct the RNN object, which will construct a recurrent neural
+   * network with the specified layers.
+   *
+   * @param network The network modules used to construct the network.
+   * @param outputLayer The outputlayer used to evaluate the network.
+   */
+  template<typename TypeLocator,
+           typename TypeLocationSensor,
+           typename TypeGlimpseSensor,
+           typename TypeGlimpse,
+           typename TypeStart,
+           typename TypeFeedback,
+           typename TypeTransfer,
+           typename TypeClassifier,
+           typename TypeRewardPredictor>
+  RecurrentNeuralAttention(TypeLocator&& locator,
+                           TypeLocationSensor&& locationSensor,
+                           TypeGlimpseSensor&& glimpseSensor,
+                           TypeGlimpse&& glimpse,
+                           TypeStart&& start,
+                           TypeFeedback&& feedback,
+                           TypeTransfer&& transfer,
+                           TypeClassifier&& classifier,
+                           TypeRewardPredictor&& rewardPredictor,
+                           const size_t nStep,
+                           InitializationRuleType initializeRule = InitializationRuleType());
+
+  template<
+      template<typename> class OptimizerType = mlpack::optimization::RMSprop
+  >
+  void Train(const arma::mat& predictors,
+             const arma::mat& responses,
+             OptimizerType<NetworkType>& optimizer);
+
+  /**
+   * Predict the responses to a given set of predictors. The responses will
+   * reflect the output of the given output layer as returned by the
+   * OutputClass() function.
+   *
+   * @param predictors Input predictors.
+   * @param responses Matrix to put output predictions of responses into.
+   */
+  void Predict(arma::mat& predictors, arma::mat& responses);
+
+  /**
+   * Evaluate the feedforward network with the given parameters. This function
+   * is usually called by the optimizer to train the model.
+   *
+   * @param parameters Matrix model parameters.
+   * @param i Index of point to use for objective function evaluation.
+   * @param deterministic Whether or not to train or test the model. Note some
+   * layer act differently in training or testing mode.
+   */
+  double Evaluate(const arma::mat& parameters,
+                  const size_t i,
+                  const bool deterministic = true);
+
+  /**
+   * Evaluate the gradient of the feedforward network with the given parameters,
+   * and with respect to only one point in the dataset. This is useful for
+   * optimizers such as SGD, which require a separable objective function.
+   *
+   * @param parameters Matrix of the model parameters to be optimized.
+   * @param i Index of points to use for objective function gradient evaluation.
+   * @param gradient Matrix to output gradient into.
+   */
+  void Gradient(const arma::mat& parameters,
+                const size_t i,
+                arma::mat& gradient);
+
+  //! Return the number of separable functions (the number of predictor points).
+  size_t NumFunctions() const { return numFunctions; }
+
+  //! Return the initial point for the optimization.
+  const arma::mat& Parameters() const { return parameter; }
+  //! Modify the initial point for the optimization.
+  arma::mat& Parameters() { return parameter; }
+
+  //! Return the number of steps to back-propagate through time.
+  const size_t& Rho() const { return nStep; }
+  //! Modify the number of steps to back-propagate through time.
+  size_t& Rho() { return nStep; }
+
+  //! Return the current location.
+  const arma::mat& Location();
+
+  //! Serialize the model.
+  template<typename Archive>
+  void Serialize(Archive& ar, const unsigned int /* version */);
+
+ private:
+  /*
+   * Predict the response of the given input matrix.
+   */
+  template <typename InputType, typename OutputType>
+  void SinglePredict(const InputType& input, OutputType& output)
+  {
+    // Get the locator input size.
+    if (!inputSize)
+    {
+      inputSize = NetworkInputSize(locator);
+    }
+
+    // Reset networks.
+    ResetParameter(locator);
+    ResetParameter(locationSensor);
+    ResetParameter(glimpseSensor);
+    ResetParameter(glimpse);
+    ResetParameter(feedback);
+    ResetParameter(transfer);
+    ResetParameter(classifier);
+    ResetParameter(rewardPredictor);
+    ResetParameter(start);
+
+    // Aample an initial starting actions by forwarding zeros through the locator.
+    locatorInput.push_back(new arma::cube(arma::zeros<arma::cube>(inputSize, 1,
+        input.n_slices)));
+
+    // Forward pass throught the recurrent network.
+    for (step = 0; step < nStep; step++)
+    {
+      // Locator forward pass.
+      Forward(locatorInput.back(), locator);
+
+      // Location sensor forward pass.
+      Forward(std::get<std::tuple_size<LocatorType>::value - 1>(
+          locator).OutputParameter(), locationSensor);
+
+      // Set the location parameter for all layer that implement a Location
+      // function e.g. GlimpseLayer.
+      ResetLocation(std::get<std::tuple_size<LocatorType>::value - 1>(
+          locator).OutputParameter(), glimpseSensor);
+
+      // Glimpse sensor forward pass.
+      Forward(input, glimpseSensor);
+
+      // Concat the parameter activation from the location sensor and
+      // glimpse sensor.
+      arma::mat concatLayerOutput = arma::join_cols(
+          std::get<std::tuple_size<LocationSensorType>::value - 1>(
+          locationSensor).OutputParameter(),
+          std::get<std::tuple_size<GlimpseSensorType>::value - 1>(
+          glimpseSensor).OutputParameter());
+
+      // Glimpse forward pass.
+      Forward(concatLayerOutput, glimpse);
+
+      if (step == 0)
+      {
+        // Start forward pass.
+        Forward(std::get<std::tuple_size<GlimpseType>::value - 1>(
+            glimpse).OutputParameter(), start);
+
+        // Transfer forward pass.
+        Forward(std::get<std::tuple_size<StartType>::value - 1>(
+            start).OutputParameter(), transfer);
+      }
+      else
+      {
+        // Feedback forward pass.
+        Forward(std::get<std::tuple_size<TransferType>::value - 1>(
+            transfer).OutputParameter(), feedback);
+
+        arma::mat feedbackLayerOutput =
+          std::get<std::tuple_size<GlimpseType>::value - 1>(
+          glimpse).OutputParameter() +
+          std::get<std::tuple_size<FeedbackType>::value - 1>(
+          feedback).OutputParameter();
+
+        // Transfer forward pass.
+        Forward(feedbackLayerOutput, transfer);
+      }
+
+      // Update the input for the next run
+      locatorInput.push_back(new arma::cube(
+          std::get<std::tuple_size<TransferType>::value - 1>(
+          transfer).OutputParameter().memptr(), locatorInput.back().n_rows,
+          locatorInput.back().n_cols, locatorInput.back().n_slices));
+    }
+
+    // Classifier forward pass.
+    Forward(locatorInput.back().slice(0), classifier);
+
+    output = std::get<std::tuple_size<ClassifierType>::value - 1>(
+        classifier).OutputParameter();
+  }
+
+  /**
+   * Update the layer reward for all layer that implement the Rewards function.
+   */
+  template<size_t I = 0, typename... Tp>
+  typename std::enable_if<I < sizeof...(Tp), void>::type
+  ResetReward(const double reward, std::tuple<Tp...>& network)
+  {
+    SetReward(reward, std::get<I>(network));
+    ResetReward<I + 1, Tp...>(reward, network);
+  }
+
+  template<size_t I = 0, typename... Tp>
+  typename std::enable_if<I == sizeof...(Tp), void>::type
+  ResetReward(const double /* reward */, std::tuple<Tp...>& /* network */)
+  {
+  }
+
+  template<typename T>
+  typename std::enable_if<
+      HasRewardCheck<T, double&(T::*)()>::value, void>::type
+  SetReward(const double reward, T& layer)
+  {
+    layer.Reward() = reward;
+  }
+
+  template<typename T>
+  typename std::enable_if<
+      !HasRewardCheck<T, double&(T::*)()>::value, void>::type
+  SetReward(const double /* reward */, T& /* layer */)
+  {
+    /* Nothing to do here */
+  }
+
+  /**
+   * Reset the network by clearing the delta and by setting the layer status.
+   */
+  template<size_t I = 0, typename... Tp>
+  typename std::enable_if<I == sizeof...(Tp), void>::type
+  ResetParameter(std::tuple<Tp...>& /* network */) { /* Nothing to do here */ }
+
+  template<size_t I = 0, typename... Tp>
+  typename std::enable_if<I < sizeof...(Tp), void>::type
+  ResetParameter(std::tuple<Tp...>& network)
+  {
+    ResetDeterministic(std::get<I>(network));
+    std::get<I>(network).Delta().zeros();
+
+    ResetParameter<I + 1, Tp...>(network);
+  }
+
+  template<typename T>
+  typename std::enable_if<
+      HasDeterministicCheck<T, bool&(T::*)(void)>::value, void>::type
+  ResetDeterministic(T& layer)
+  {
+    layer.Deterministic() = deterministic;
+  }
+
+  template<typename T>
+  typename std::enable_if<
+      !HasDeterministicCheck<T, bool&(T::*)(void)>::value, void>::type
+  ResetDeterministic(T& /* layer */) { /* Nothing to do here */ }
+
+  /**
+   * Reset the location by updating the location for all layer that implement
+   * the Location function.
+   */
+  template<size_t I = 0, typename... Tp>
+  typename std::enable_if<I == sizeof...(Tp), void>::type
+  ResetLocation(const arma::mat& /* location */,
+                std::tuple<Tp...>& /* network */)
+  {
+    // Nothing to do here.
+  }
+
+  template<size_t I = 0, typename... Tp>
+  typename std::enable_if<I < sizeof...(Tp), void>::type
+  ResetLocation(const arma::mat& location, std::tuple<Tp...>& network)
+  {
+    SetLocation(std::get<I>(network), location);
+    ResetLocation<I + 1, Tp...>(location, network);
+  }
+
+  template<typename T>
+  typename std::enable_if<
+      HasLocationCheck<T, void(T::*)(const arma::mat&)>::value, void>::type
+  SetLocation(T& layer, const arma::mat& location)
+  {
+    layer.Location(location);
+  }
+
+  template<typename T>
+  typename std::enable_if<
+      !HasLocationCheck<T, void(T::*)(const arma::mat&)>::value, void>::type
+  SetLocation(T& /* layer */, const arma::mat& /* location */)
+  {
+    // Nothing to do here.
+  }
+
+  /**
+   * Save the network layer activations.
+   */
+  template<size_t I = 0, typename... Tp>
+  typename std::enable_if<I < sizeof...(Tp), void>::type
+  SaveActivations(boost::ptr_vector<MatType>& activations,
+                  std::tuple<Tp...>& network,
+                  size_t& activationCounter)
+  {
+    Save(I, activations, std::get<I>(network),
+        std::get<I>(network).InputParameter());
+
+    activationCounter++;
+    SaveActivations<I + 1, Tp...>(activations, network, activationCounter);
+  }
+
+  template<size_t I = 0, typename... Tp>
+  typename std::enable_if<I == sizeof...(Tp), void>::type
+  SaveActivations(boost::ptr_vector<MatType>& /* activations */,
+                  std::tuple<Tp...>& /* network */,
+                  size_t& /* activationCounter */)
+  {
+    // Nothing to do here.
+  }
+
+  /**
+   * Distinguish between recurrent layer and non-recurrent layer when storing
+   * the activations.
+   */
+  template<typename T, typename P>
+  typename std::enable_if<
+      HasRecurrentParameterCheck<T, P&(T::*)()>::value, void>::type
+  Save(const size_t layerNumber,
+       boost::ptr_vector<MatType>& activations,
+       T& layer,
+       P& /* unused */)
+  {
+    activations.push_back(new MatType(layer.RecurrentParameter()));
+  }
+
+  template<typename T, typename P>
+  typename std::enable_if<
+      !HasRecurrentParameterCheck<T, P&(T::*)()>::value, void>::type
+  Save(const size_t /* layerNumber */,
+       boost::ptr_vector<MatType>& activations,
+       T& layer,
+       P& /* unused */)
+  {
+    activations.push_back(new MatType(layer.OutputParameter()));
+  }
+
+  template<size_t I = 0, typename DataTypeA, typename DataTypeB, typename... Tp>
+  typename std::enable_if<I < sizeof...(Tp), void>::type
+  SaveActivations(boost::ptr_vector<DataTypeA>& activationsA,
+                  boost::ptr_vector<DataTypeB>& activationsB,
+                  size_t& dataTypeACounter,
+                  size_t& dataTypeBCounter,
+                  std::tuple<Tp...>& network)
+  {
+    Save(activationsA, activationsB, dataTypeACounter, dataTypeBCounter,
+        std::get<I>(network), std::get<I>(network).OutputParameter());
+
+    SaveActivations<I + 1, DataTypeA, DataTypeB, Tp...>(
+        activationsA, activationsB, dataTypeACounter, dataTypeBCounter,
+        network);
+  }
+
+  template<size_t I = 0, typename DataTypeA, typename DataTypeB, typename... Tp>
+  typename std::enable_if<I == sizeof...(Tp), void>::type
+  SaveActivations(boost::ptr_vector<DataTypeA>& /* activationsA */,
+                  boost::ptr_vector<DataTypeB>& /* activationsB */,
+                  size_t& /* dataTypeACounter */,
+                  size_t& /* dataTypeBCounter */,
+                  std::tuple<Tp...>& /* network */)
+  {
+    // Nothing to do here.
+  }
+
+  template<typename T, typename DataTypeA, typename DataTypeB>
+  void Save(boost::ptr_vector<DataTypeA>& activationsA,
+        boost::ptr_vector<DataTypeB>& /* activationsB */,
+       size_t& dataTypeACounter,
+       size_t& /* dataTypeBCounter */,
+       T& layer,
+       DataTypeA& /* unused */)
+  {
+    activationsA.push_back(new DataTypeA(layer.OutputParameter()));
+    dataTypeACounter++;
+  }
+
+  template<typename T, typename DataTypeA, typename DataTypeB>
+  void Save(boost::ptr_vector<DataTypeA>& /* activationsA */,
+            boost::ptr_vector<DataTypeB>& activationsB,
+            size_t& /* dataTypeACounter */,
+            size_t& dataTypeBCounter,
+            T& layer,
+            DataTypeB& /* unused */)
+  {
+    activationsB.push_back(new DataTypeB(layer.OutputParameter()));
+    dataTypeBCounter++;
+  }
+
+  /**
+   * Load the network layer activations.
+   */
+  template<size_t I = 0, typename DataType, typename... Tp>
+  typename std::enable_if<I == sizeof...(Tp), void>::type
+  LoadActivations(DataType& input,
+                  boost::ptr_vector<MatType>& /* activations */,
+                  size_t& /* activationCounter */,
+                  std::tuple<Tp...>& network)
+  {
+    std::get<0>(network).InputParameter() = input;
+    LinkParameter(network);
+  }
+
+  template<size_t I = 0, typename DataType, typename... Tp>
+  typename std::enable_if<I < sizeof...(Tp), void>::type
+  LoadActivations(DataType& input,
+                  boost::ptr_vector<MatType>& activations,
+                  size_t& activationCounter,
+                  std::tuple<Tp...>& network)
+  {
+    Load(--activationCounter, activations,
+        std::get<sizeof...(Tp) - I - 1>(network),
+        std::get<I>(network).InputParameter());
+
+    LoadActivations<I + 1, DataType, Tp...>(input, activations,
+        activationCounter, network);
+  }
+
+  /**
+   * Distinguish between recurrent layer and non-recurrent layer when storing
+   * the activations.
+   */
+  template<typename T, typename P>
+  typename std::enable_if<
+      HasRecurrentParameterCheck<T, P&(T::*)()>::value, void>::type
+  Load(const size_t layerNumber,
+       boost::ptr_vector<MatType>& activations,
+       T& layer,
+       P& /* output */)
+  {
+    layer.RecurrentParameter() = activations[layerNumber];
+  }
+
+  template<typename T, typename P>
+  typename std::enable_if<
+      !HasRecurrentParameterCheck<T, P&(T::*)()>::value, void>::type
+  Load(const size_t layerNumber,
+       boost::ptr_vector<MatType>& activations,
+       T& layer,
+       P& /* output */)
+  {
+    layer.OutputParameter() = activations[layerNumber];
+  }
+
+  template<size_t I = 0,
+           typename DataType,
+           typename DataTypeA,
+           typename DataTypeB,
+           typename... Tp>
+  typename std::enable_if<I < sizeof...(Tp), void>::type
+  LoadActivations(DataType& input,
+                  boost::ptr_vector<DataTypeA>& activationsA,
+                  boost::ptr_vector<DataTypeB>& activationsB,
+                  size_t& dataTypeACounter,
+                  size_t& dataTypeBCounter,
+                  std::tuple<Tp...>& network)
+  {
+    Load(activationsA,
+         activationsB,
+         dataTypeACounter,
+         dataTypeBCounter,
+         std::get<sizeof...(Tp) - I - 1>(network),
+         std::get<sizeof...(Tp) - I - 1>(network).OutputParameter());
+
+    LoadActivations<I + 1, DataType, DataTypeA, DataTypeB, Tp...>(
+        input, activationsA, activationsB, dataTypeACounter, dataTypeBCounter,
+        network);
+  }
+
+  template<size_t I = 0,
+           typename DataType,
+           typename DataTypeA,
+           typename DataTypeB,
+           typename... Tp>
+  typename std::enable_if<I == sizeof...(Tp), void>::type
+  LoadActivations(DataType& input,
+                  boost::ptr_vector<DataTypeA>& /* activationsA */,
+                  boost::ptr_vector<DataTypeB>& /* activationsB */,
+                  size_t& /* dataTypeACounter */,
+                  size_t& /* dataTypeBCounter */,
+                  std::tuple<Tp...>& network)
+  {
+    std::get<0>(network).InputParameter() = input;
+    LinkParameter(network);
+  }
+
+  template<typename T, typename DataTypeA, typename DataTypeB>
+  void Load(boost::ptr_vector<DataTypeA>& activationsA,
+            boost::ptr_vector<DataTypeB>& /* activationsB */,
+            size_t& dataTypeACounter,
+            size_t& /* dataTypeBCounter */,
+            T& layer,
+            DataTypeA& /* output */)
+  {
+    layer.OutputParameter() = activationsA[--dataTypeACounter];
+  }
+
+  template<typename T, typename DataTypeA, typename DataTypeB>
+  void Load(boost::ptr_vector<DataTypeA>& /* activationsA */,
+            boost::ptr_vector<DataTypeB>& activationsB,
+            size_t& /* dataTypeACounter */,
+            size_t& dataTypeBCounter,
+            T& layer,
+            DataTypeB& /* output */)
+  {
+    layer.OutputParameter() = activationsB[--dataTypeBCounter];
+  }
+
+  /**
+   * Run a single iteration of the feed forward algorithm, using the given
+   * input and target vector, store the calculated error into the error
+   * vector.
+   */
+  template<size_t I = 0, typename DataType, typename... Tp>
+  void Forward(const DataType& input, std::tuple<Tp...>& t)
+  {
+    std::get<I>(t).InputParameter() = input;
+    std::get<I>(t).Forward(std::get<I>(t).InputParameter(),
+        std::get<I>(t).OutputParameter());
+
+    ForwardTail<I + 1, Tp...>(t);
+  }
+
+  template<size_t I = 1, typename... Tp>
+  typename std::enable_if<I == sizeof...(Tp), void>::type
+  ForwardTail(std::tuple<Tp...>& network)
+  {
+    LinkParameter(network);
+  }
+
+  template<size_t I = 1, typename... Tp>
+  typename std::enable_if<I < sizeof...(Tp), void>::type
+  ForwardTail(std::tuple<Tp...>& t)
+  {
+    std::get<I>(t).Forward(std::get<I - 1>(t).OutputParameter(),
+        std::get<I>(t).OutputParameter());
+
+    ForwardTail<I + 1, Tp...>(t);
+  }
+
+  /**
+   * Run a single iteration of the backward algorithm, using the given
+   * input and target vector, store the calculated error into the error
+   * vector.
+   */
+  template<size_t I = 1, typename DataType, typename... Tp>
+  typename std::enable_if<sizeof...(Tp) == 1, void>::type
+  Backward(const DataType& error, std::tuple<Tp ...>& t)
+  {
+    std::get<sizeof...(Tp) - I>(t).Backward(
+      std::get<sizeof...(Tp) - I>(t).OutputParameter(), error,
+      std::get<sizeof...(Tp) - I>(t).Delta());
+  }
+
+  template<size_t I = 1, typename DataType, typename... Tp>
+  typename std::enable_if<I < sizeof...(Tp), void>::type
+  Backward(const DataType& error, std::tuple<Tp ...>& t)
+  {
+    std::get<sizeof...(Tp) - I>(t).Backward(
+        std::get<sizeof...(Tp) - I>(t).OutputParameter(), error,
+        std::get<sizeof...(Tp) - I>(t).Delta());
+
+    BackwardTail<I + 1, DataType, Tp...>(error, t);
+  }
+
+  template<size_t I = 1, typename DataType, typename... Tp>
+  typename std::enable_if<I == (sizeof...(Tp)), void>::type
+  BackwardTail(const DataType& error, std::tuple<Tp...>& t)
+  {
+    std::get<sizeof...(Tp) - I>(t).Backward(
+        std::get<sizeof...(Tp) - I>(t).OutputParameter(),
+        std::get<sizeof...(Tp) - I + 1>(t).Delta(),
+        std::get<sizeof...(Tp) - I>(t).Delta());
+  }
+
+  template<size_t I = 1, typename DataType, typename... Tp>
+  typename std::enable_if<I < (sizeof...(Tp)), void>::type
+  BackwardTail(const DataType& error, std::tuple<Tp...>& t)
+  {
+    std::get<sizeof...(Tp) - I>(t).Backward(
+        std::get<sizeof...(Tp) - I>(t).OutputParameter(),
+        std::get<sizeof...(Tp) - I + 1>(t).Delta(),
+        std::get<sizeof...(Tp) - I>(t).Delta());
+
+    BackwardTail<I + 1, DataType, Tp...>(error, t);
+  }
+
+  /**
+   * Link the calculated activation with the correct layer.
+   */
+  template<size_t I = 1, typename... Tp>
+  typename std::enable_if<I == sizeof...(Tp), void>::type
+  LinkParameter(std::tuple<Tp ...>& /* network */) { /* Nothing to do here */ }
+
+  template<size_t I = 1, typename... Tp>
+  typename std::enable_if<I < sizeof...(Tp), void>::type
+  LinkParameter(std::tuple<Tp...>& network)
+  {
+    if (!LayerTraits<typename std::remove_reference<
+        decltype(std::get<I>(network))>::type>::IsBiasLayer)
+    {
+      std::get<I>(network).InputParameter() = std::get<I - 1>(
+          network).OutputParameter();
+    }
+
+    LinkParameter<I + 1, Tp...>(network);
+  }
+
+  /**
+   * Iterate through all layer modules and update the the gradient using the
+   * layer defined optimizer.
+   */
+  template<typename InputType, typename ErrorType, typename... Tp>
+  void UpdateGradients(const InputType& input,
+                       const ErrorType& error,
+                       std::tuple<Tp...>& network)
+  {
+     Update(std::get<0>(network),
+           input,
+           std::get<1>(network).Delta(),
+           std::get<1>(network).OutputParameter());
+
+     UpdateGradients<1, ErrorType, Tp...>(error, network);
+  }
+
+  template<size_t I = 0, typename ErrorType, typename... Tp>
+  typename std::enable_if<I < (sizeof...(Tp) - 1), void>::type
+  UpdateGradients(const ErrorType& error, std::tuple<Tp...>& network)
+  {
+    Update(std::get<I>(network),
+           std::get<I>(network).InputParameter(),
+           std::get<I + 1>(network).Delta(),
+           std::get<I>(network).OutputParameter());
+
+    UpdateGradients<I + 1, ErrorType, Tp...>(error, network);
+  }
+
+  template<size_t I = 0, typename ErrorType, typename... Tp>
+  typename std::enable_if<I == (sizeof...(Tp) - 1), void>::type
+  UpdateGradients(const ErrorType& error, std::tuple<Tp...>& network)
+  {
+    Update(std::get<I>(network),
+       std::get<I>(network).InputParameter(),
+       error,
+       std::get<I>(network).OutputParameter());
+  }
+
+  template<typename LayerType,
+           typename InputType,
+           typename ErrorType,
+           typename GradientType>
+  typename std::enable_if<
+      HasGradientCheck<LayerType,
+          void(LayerType::*)(const InputType&,
+                             const ErrorType&,
+                             GradientType&)>::value, void>::type
+  Update(LayerType& layer,
+         const InputType& input,
+         const ErrorType& error,
+         GradientType& /* gradient */)
+  {
+    layer.Gradient(input, error, layer.Gradient());
+  }
+
+  template<typename LayerType,
+           typename InputType,
+           typename ErrorType,
+           typename GradientType>
+  typename std::enable_if<
+      !HasGradientCheck<LayerType,
+          void(LayerType::*)(const InputType&,
+                             const ErrorType&,
+                             GradientType&)>::value, void>::type
+  Update(LayerType& /* layer */,
+         const InputType& /* input */,
+         const ErrorType& /* error */,
+         GradientType& /* gradient */)
+  {
+    // Nothing to do here
+  }
+
+  //! The location sensor network.
+  LocationSensorType locationSensor;
+
+  //! The glimpse sensor network.
+  GlimpseSensorType glimpseSensor;
+
+  //! The locator network.
+  LocatorType locator;
+
+  //! The glimpse network.
+  GlimpseType glimpse;
+
+  //! The start network.
+  StartType start;
+
+  //! The feedback network.
+  FeedbackType feedback;
+
+  //! The transfer network.
+  TransferType transfer;
+
+  //! The classifier network.
+  ClassifierType classifier;
+
+  //! The reward predictor network.
+  RewardPredictorType rewardPredictor;
+
+  //! The number of steps for the back-propagate through time.
+  size_t nStep;
+
+  //! Locally stored network input size.
+  size_t inputSize;
+
+  //! The current evaluation mode (training or testing).
+  bool deterministic;
+
+  //! The index of the current step.
+  size_t step;
+
+  //! The activation storage we are using to perform the feed backward pass for
+  //! the glimpse network.
+  boost::ptr_vector<arma::mat> glimpseActivations;
+
+  //! The activation storage we are using to perform the feed backward pass for
+  //! the locator network.
+  boost::ptr_vector<arma::mat> locatorActivations;
+
+  //! The activation storage we are using to perform the feed backward pass for
+  //! the feedback network.
+  boost::ptr_vector<arma::mat> feedbackActivations;
+
+  //! The activation storage we are using to save the feedback network input.
+  boost::ptr_vector<arma::mat> feedbackActivationsInput;
+
+  //! The activation storage we are using to perform the feed backward pass for
+  //! the transfer network.
+  boost::ptr_vector<arma::mat> transferActivations;
+
+  //! The activation storage we are using to perform the feed backward pass for
+  //! the location sensor network.
+  boost::ptr_vector<arma::mat> locationSensorActivations;
+
+  //! The activation storage we are using to perform the feed backward pass for
+  //! the glimpse sensor network.
+  boost::ptr_vector<arma::mat> glimpseSensorMatActivations;
+  boost::ptr_vector<arma::cube> glimpseSensorCubeActivations;
+
+  //! The activation storage we are using to perform the feed backward pass for
+  //! the locator input.
+  boost::ptr_vector<arma::cube> locatorInput;
+
+  //! The storage we are using to save the location.
+  boost::ptr_vector<arma::mat> location;
+
+  //! The current number of activations in the glimpse sensor network.
+  size_t glimpseSensorMatCounter;
+  size_t glimpseSensorCubeCounter;
+
+  //! The current number of activations in the glimpse network.
+  size_t glimpseActivationsCounter;
+
+  //! The current number of activations in the glimpse start network.
+  size_t startActivationsCounter;
+
+  //! The current number of activations in the feedback network.
+  size_t feedbackActivationsCounter;
+
+  //! The current number of activations in the transfer network.
+  size_t transferActivationsCounter;
+
+  //! The current number of activations in the locator network.
+  size_t locatorActivationsCounter;
+
+  //! The current number of activations in the location sensor network.
+  size_t locationSensorActivationsCounter;
+
+  //! The current number of activations in the glimpse sensor network.
+  size_t glimpseSensorMatActivationsCounter;
+  size_t glimpseSensorCubeActivationsCounter;
+
+  //! The current number of location for the location storage.
+  size_t locationCounter;
+
+  //! Matrix of (trained) parameters.
+  arma::mat parameter;
+
+  //! The matrix of data points (predictors).
+  arma::mat predictors;
+
+  //! The matrix of responses to the input data points.
+  arma::mat responses;
+
+  //! The number of separable functions (the number of predictor points).
+  size_t numFunctions;
+
+  //! Storage the merge the reward input.
+  arma::field<arma::mat> rewardInput;
+
+  //! The current input.
+  arma::cube input;
+
+  //! The current target.
+  arma::mat target;
+
+  //! Locally stored performance functions.
+  NegativeLogLikelihoodLayer<> negativeLogLikelihoodFunction;
+  VRClassRewardLayer<> vRClassRewardFunction;
+
+  //! Locally stored size of the locator network.
+  size_t locatorSize;
+
+  //! Locally stored size of the location sensor network.
+  size_t locationSensorSize;
+
+  //! Locally stored size of the glimpse sensor network.
+  size_t glimpseSensorSize;
+
+  //! Locally stored size of the glimpse network.
+  size_t glimpseSize;
+
+  //! Locally stored size of the start network.
+  size_t startSize;
+
+  //! Locally stored size of the feedback network.
+  size_t feedbackSize;
+
+  //! Locally stored size of the transfer network.
+  size_t transferSize;
+
+  //! Locally stored size of the classifier network.
+  size_t classifierSize;
+
+  //! Locally stored size of the reward predictor network.
+  size_t rewardPredictorSize;
+
+  //! Locally stored recurrent gradient.
+  arma::mat recurrentGradient;
+
+  //! Locally stored action error.
+  arma::mat actionError;
+
+  //! Locally stored current location.
+  arma::mat evaluationLocation;
+}; // class RecurrentNeuralAttention
+
+}; // namespace ann
+}; // namespace mlpack
+
+// Include implementation.
+#include "rmva_impl.hpp"
+
+#endif
diff --git a/src/mlpack/methods/rmva/rmva_impl.hpp b/src/mlpack/methods/rmva/rmva_impl.hpp
new file mode 100644
index 00000000000..9efa39f199c
--- /dev/null
+++ b/src/mlpack/methods/rmva/rmva_impl.hpp
@@ -0,0 +1,736 @@
+/**
+ * @file rmva_impl.hpp
+ * @author Marcus Edel
+ *
+ * Definition of the FFN class, which implements feed forward neural networks.
+ */
+#ifndef __MLPACK_METHODS_RMVA_RMVA_IMPL_HPP
+#define __MLPACK_METHODS_RMVA_RMVA_IMPL_HPP
+
+// In case it hasn't been included yet.
+#include "rmva.hpp"
+
+namespace mlpack {
+namespace ann /** Artificial Neural Network. */ {
+
+
+template<
+  typename LocatorType,
+  typename LocationSensorType,
+  typename GlimpseSensorType,
+  typename GlimpseType,
+  typename StartType,
+  typename FeedbackType,
+  typename TransferType,
+  typename ClassifierType,
+  typename RewardPredictorType,
+  typename InitializationRuleType,
+  typename MatType
+>
+template<
+    typename TypeLocator,
+    typename TypeLocationSensor,
+    typename TypeGlimpseSensor,
+    typename TypeGlimpse,
+    typename TypeStart,
+    typename TypeFeedback,
+    typename TypeTransfer,
+    typename TypeClassifier,
+    typename TypeRewardPredictor
+>
+RecurrentNeuralAttention<
+  LocatorType,
+  LocationSensorType,
+  GlimpseSensorType,
+  GlimpseType,
+  StartType,
+  FeedbackType,
+  TransferType,
+  ClassifierType,
+  RewardPredictorType,
+  InitializationRuleType,
+  MatType
+>::RecurrentNeuralAttention(TypeLocator&& locator,
+                            TypeLocationSensor&& locationSensor,
+                            TypeGlimpseSensor&& glimpseSensor,
+                            TypeGlimpse&& glimpse,
+                            TypeStart&& start,
+                            TypeFeedback&& feedback,
+                            TypeTransfer&& transfer,
+                            TypeClassifier&& classifier,
+                            TypeRewardPredictor&& rewardPredictor,
+                            const size_t nStep,
+                            InitializationRuleType initializeRule) :
+    locator(std::forward<TypeLocator>(locator)),
+    locationSensor(std::forward<TypeLocationSensor>(locationSensor)),
+    glimpseSensor(std::forward<TypeGlimpseSensor>(glimpseSensor)),
+    glimpse(std::forward<TypeGlimpse>(glimpse)),
+    start(std::forward<TypeStart>(start)),
+    feedback(std::forward<TypeFeedback>(feedback)),
+    transfer(std::forward<TypeTransfer>(transfer)),
+    classifier(std::forward<TypeClassifier>(classifier)),
+    rewardPredictor(std::forward<TypeRewardPredictor>(rewardPredictor)),
+    nStep(nStep),
+    inputSize(0)
+{
+  // Set the network size.
+  locatorSize = NetworkSize(this->locator);
+  locationSensorSize = NetworkSize(this->locationSensor);
+  glimpseSensorSize = NetworkSize(this->glimpseSensor);
+  glimpseSize = NetworkSize(this->glimpse);
+  feedbackSize = NetworkSize(this->feedback);
+  transferSize = NetworkSize(this->transfer);
+  classifierSize = NetworkSize(this->classifier);
+  rewardPredictorSize = NetworkSize(this->rewardPredictor);
+  startSize = NetworkSize(this->start);
+
+  initializeRule.Initialize(parameter, locatorSize + locationSensorSize + glimpseSensorSize +
+      glimpseSize + feedbackSize + transferSize + classifierSize + rewardPredictorSize + startSize, 1);
+
+  // Set the network weights.
+  NetworkWeights(initializeRule, parameter, this->locator);
+  NetworkWeights(initializeRule, parameter, this->locationSensor, locatorSize);
+  NetworkWeights(initializeRule, parameter, this->glimpseSensor, locatorSize +
+      locationSensorSize);
+  NetworkWeights(initializeRule, parameter, this->glimpse, locatorSize +
+      locationSensorSize + glimpseSensorSize);
+  NetworkWeights(initializeRule, parameter, this->feedback, locatorSize +
+      locationSensorSize + glimpseSensorSize + glimpseSize);
+  NetworkWeights(initializeRule, parameter, this->transfer, locatorSize +
+      locationSensorSize + glimpseSensorSize + glimpseSize + feedbackSize);
+  NetworkWeights(initializeRule, parameter, this->classifier, locatorSize +
+      locationSensorSize + glimpseSensorSize + glimpseSize + feedbackSize +
+      transferSize);
+  NetworkWeights(initializeRule, parameter, this->rewardPredictor, locatorSize +
+      locationSensorSize + glimpseSensorSize + glimpseSize + feedbackSize +
+      transferSize + classifierSize);
+  NetworkWeights(initializeRule, parameter, this->start, locatorSize +
+      locationSensorSize + glimpseSensorSize + glimpseSize + feedbackSize +
+      transferSize + classifierSize + rewardPredictorSize);
+
+  rewardInput = arma::field<arma::mat>(2, 1);
+}
+
+template<
+  typename LocatorType,
+  typename LocationSensorType,
+  typename GlimpseSensorType,
+  typename GlimpseType,
+  typename StartType,
+  typename FeedbackType,
+  typename TransferType,
+  typename ClassifierType,
+  typename RewardPredictorType,
+  typename InitializationRuleType,
+  typename MatType
+>
+template<template<typename> class OptimizerType>
+void RecurrentNeuralAttention<
+  LocatorType,
+  LocationSensorType,
+  GlimpseSensorType,
+  GlimpseType,
+  StartType,
+  FeedbackType,
+  TransferType,
+  ClassifierType,
+  RewardPredictorType,
+  InitializationRuleType,
+  MatType
+>::Train(const arma::mat& predictors,
+         const arma::mat& responses,
+         OptimizerType<NetworkType>& optimizer)
+{
+  numFunctions = predictors.n_cols;
+  this->predictors = predictors;
+  this->responses = responses;
+
+  // Train the model.
+  Timer::Start("ffn_optimization");
+  const double out = optimizer.Optimize(parameter);
+  Timer::Stop("ffn_optimization");
+
+  Log::Info << "FFN::FFN(): final objective of trained model is " << out
+      << "." << std::endl;
+}
+
+template<
+  typename LocatorType,
+  typename LocationSensorType,
+  typename GlimpseSensorType,
+  typename GlimpseType,
+  typename StartType,
+  typename FeedbackType,
+  typename TransferType,
+  typename ClassifierType,
+  typename RewardPredictorType,
+  typename InitializationRuleType,
+  typename MatType
+>
+void RecurrentNeuralAttention<
+  LocatorType,
+  LocationSensorType,
+  GlimpseSensorType,
+  GlimpseType,
+  StartType,
+  FeedbackType,
+  TransferType,
+  ClassifierType,
+  RewardPredictorType,
+  InitializationRuleType,
+  MatType
+>::Predict(arma::mat& predictors, arma::mat& responses)
+{
+  deterministic = true;
+
+  arma::mat responsesTemp;
+  SinglePredict(arma::cube(predictors.colptr(0), 28, 28, 1), responsesTemp);
+
+  responses = arma::mat(responsesTemp.n_elem, predictors.n_cols);
+  responses.col(0) = responsesTemp.col(0);
+
+  for (size_t i = 1; i < predictors.n_cols; i++)
+  {
+    SinglePredict(arma::cube(predictors.colptr(i), 28, 28, 1), responsesTemp);
+    responses.col(i) = responsesTemp.col(0);
+  }
+}
+
+template<
+  typename LocatorType,
+  typename LocationSensorType,
+  typename GlimpseSensorType,
+  typename GlimpseType,
+  typename StartType,
+  typename FeedbackType,
+  typename TransferType,
+  typename ClassifierType,
+  typename RewardPredictorType,
+  typename InitializationRuleType,
+  typename MatType
+>
+double RecurrentNeuralAttention<
+  LocatorType,
+  LocationSensorType,
+  GlimpseSensorType,
+  GlimpseType,
+  StartType,
+  FeedbackType,
+  TransferType,
+  ClassifierType,
+  RewardPredictorType,
+  InitializationRuleType,
+  MatType
+>::Evaluate(const arma::mat& /* unused */,
+            const size_t i,
+            const bool deterministic)
+{
+  this->deterministic = deterministic;
+
+  input = arma::cube(predictors.colptr(i), 28, 28, 1);
+  target = arma::mat(responses.colptr(i), responses.n_rows, 1, false, true);
+
+  // Get the locator input size.
+  if (!inputSize)
+  {
+    inputSize = NetworkInputSize(locator);
+  }
+
+  glimpseSensorMatCounter = 0;
+  glimpseSensorCubeCounter = 0;
+  glimpseActivationsCounter = 0;
+  locatorActivationsCounter = 0;
+  locationSensorActivationsCounter = 0;
+  glimpseSensorMatActivationsCounter = 0;
+  glimpseSensorCubeActivationsCounter = 0;
+  locationCounter = 0;
+  feedbackActivationsCounter = 0;
+  transferActivationsCounter = 0;
+
+  // Reset networks.
+  ResetParameter(locator);
+  ResetParameter(locationSensor);
+  ResetParameter(glimpseSensor);
+  ResetParameter(glimpse);
+  ResetParameter(feedback);
+  ResetParameter(transfer);
+  ResetParameter(classifier);
+  ResetParameter(rewardPredictor);
+  ResetParameter(start);
+
+  // Reset activation storage.
+  glimpseActivations.clear();
+  locatorActivations.clear();
+  locationSensorActivations.clear();
+  glimpseSensorMatActivations.clear();
+  glimpseSensorCubeActivations.clear();
+  feedbackActivations.clear();
+  transferActivations.clear();
+  locatorInput.clear();
+  location.clear();
+  feedbackActivationsInput.clear();
+
+  // Aample an initial starting actions by forwarding zeros through the locator.
+  locatorInput.push_back(new arma::cube(arma::zeros<arma::cube>(inputSize, 1,
+      input.n_slices)));
+
+  // Forward pass throught the recurrent network.
+  for (step = 0; step < nStep; step++)
+  {
+    // Locator forward pass.
+    Forward(locatorInput.back(), locator);
+    SaveActivations(locatorActivations, locator, locatorActivationsCounter);
+
+    // Location sensor forward pass.
+    Forward(std::get<std::tuple_size<LocatorType>::value - 1>(
+        locator).OutputParameter(), locationSensor);
+    SaveActivations(locationSensorActivations, locationSensor,
+        locationSensorActivationsCounter);
+
+    // Set the location parameter for all layer that implement a Location
+    // function e.g. GlimpseLayer.
+    ResetLocation(std::get<std::tuple_size<LocatorType>::value - 1>(
+        locator).OutputParameter(), glimpseSensor);
+
+    // Save the location for the backward path.
+    location.push_back(new arma::mat(std::get<std::tuple_size<
+        LocatorType>::value - 1>(locator).OutputParameter()));
+
+    // Glimpse sensor forward pass.
+    Forward(input, glimpseSensor);
+    SaveActivations(glimpseSensorMatActivations, glimpseSensorCubeActivations,
+        glimpseSensorMatCounter, glimpseSensorCubeCounter, glimpseSensor);
+
+    // Concat the parameter activation from the location sensor and
+    // glimpse sensor.
+    arma::mat concatLayerOutput = arma::join_cols(
+        std::get<std::tuple_size<LocationSensorType>::value - 1>(
+        locationSensor).OutputParameter(),
+        std::get<std::tuple_size<GlimpseSensorType>::value - 1>(
+        glimpseSensor).OutputParameter());
+
+    // Glimpse forward pass.
+    Forward(concatLayerOutput, glimpse);
+    SaveActivations(glimpseActivations, glimpse, glimpseActivationsCounter);
+
+    if (step == 0)
+    {
+      // Start forward pass.
+      Forward(std::get<std::tuple_size<GlimpseType>::value - 1>(
+          glimpse).OutputParameter(), start);
+
+      // Transfer forward pass.
+      Forward(std::get<std::tuple_size<StartType>::value - 1>(
+          start).OutputParameter(), transfer);
+      SaveActivations(transferActivations, transfer,
+          transferActivationsCounter);
+    }
+    else
+    {
+      // Feedback forward pass.
+      Forward(std::get<std::tuple_size<TransferType>::value - 1>(
+          transfer).OutputParameter(), feedback);
+      SaveActivations(feedbackActivations, feedback,
+          feedbackActivationsCounter);
+
+      feedbackActivationsInput.push_back(new arma::mat(
+          std::get<std::tuple_size<TransferType>::value - 1>(
+          transfer).OutputParameter().memptr(),
+          std::get<std::tuple_size<TransferType>::value - 1>(
+          transfer).OutputParameter().n_rows,
+          std::get<std::tuple_size<TransferType>::value - 1>(
+          transfer).OutputParameter().n_cols));
+
+      arma::mat feedbackLayerOutput =
+        std::get<std::tuple_size<GlimpseType>::value - 1>(
+        glimpse).OutputParameter() +
+        std::get<std::tuple_size<FeedbackType>::value - 1>(
+        feedback).OutputParameter();
+
+      // Transfer forward pass.
+      Forward(feedbackLayerOutput, transfer);
+      SaveActivations(transferActivations, transfer,
+          transferActivationsCounter);
+    }
+
+    // Update the input for the next run
+    locatorInput.push_back(new arma::cube(
+        std::get<std::tuple_size<TransferType>::value - 1>(
+        transfer).OutputParameter().memptr(), locatorInput.back().n_rows,
+        locatorInput.back().n_cols, locatorInput.back().n_slices));
+  }
+
+  // Classifier forward pass.
+  Forward(locatorInput.back().slice(0), classifier);
+
+  // Reward predictor forward pass.
+  Forward(std::get<std::tuple_size<ClassifierType>::value - 1>(
+      classifier).OutputParameter(), rewardPredictor);
+
+  double performanceError = negativeLogLikelihoodFunction.Forward(
+      std::get<std::tuple_size<ClassifierType>::value - 1>(
+      classifier).OutputParameter(), target);
+
+  // Create the input for the vRClassRewardFunction function.
+  // For which we use the output from the classifier and the rewardPredictor.
+  rewardInput(0, 0) = std::get<std::tuple_size<ClassifierType>::value - 1>(
+      classifier).OutputParameter();
+  rewardInput(1, 0) = std::get<std::tuple_size<RewardPredictorType>::value - 1>(
+      rewardPredictor).OutputParameter();
+
+  performanceError += vRClassRewardFunction.Forward(rewardInput, target);
+
+  return performanceError;
+}
+
+template<
+  typename LocatorType,
+  typename LocationSensorType,
+  typename GlimpseSensorType,
+  typename GlimpseType,
+  typename StartType,
+  typename FeedbackType,
+  typename TransferType,
+  typename ClassifierType,
+  typename RewardPredictorType,
+  typename InitializationRuleType,
+  typename MatType
+>
+void RecurrentNeuralAttention<
+  LocatorType,
+  LocationSensorType,
+  GlimpseSensorType,
+  GlimpseType,
+  StartType,
+  FeedbackType,
+  TransferType,
+  ClassifierType,
+  RewardPredictorType,
+  InitializationRuleType,
+  MatType
+>::Gradient(const arma::mat& /* unused */,
+            const size_t i,
+            arma::mat& gradient)
+{
+  Evaluate(parameter, i, false);
+
+  // Reset the gradient.
+  if (gradient.is_empty())
+  {
+    gradient = arma::zeros<arma::mat>(parameter.n_rows, parameter.n_cols);
+  }
+  else
+  {
+    gradient.zeros();
+  }
+
+  // Reset the recurrent gradient.
+  if (recurrentGradient.is_empty())
+  {
+    recurrentGradient = arma::zeros<arma::mat>(parameter.n_rows,
+        parameter.n_cols);
+
+    actionError = arma::zeros<arma::mat>(
+        std::get<std::tuple_size<LocatorType>::value - 1>(
+        locator).OutputParameter().n_rows,
+        std::get<std::tuple_size<LocatorType>::value - 1>(
+        locator).OutputParameter().n_cols);
+  }
+  else
+  {
+    recurrentGradient.zeros();
+  }
+
+  // Set the recurrent gradient.
+  NetworkGradients(recurrentGradient, this->locator);
+  NetworkGradients(recurrentGradient, this->locationSensor, locatorSize);
+  NetworkGradients(recurrentGradient, this->glimpseSensor, locatorSize +
+      locationSensorSize);
+  NetworkGradients(recurrentGradient, this->glimpse, locatorSize +
+      locationSensorSize + glimpseSensorSize);
+  NetworkGradients(recurrentGradient, this->feedback, locatorSize +
+      locationSensorSize + glimpseSensorSize + glimpseSize);
+  NetworkGradients(recurrentGradient, this->transfer, locatorSize +
+      locationSensorSize + glimpseSensorSize + glimpseSize + feedbackSize);
+
+  // Set the gradient.
+  NetworkGradients(gradient, this->classifier, locatorSize + locationSensorSize
+      + glimpseSensorSize + glimpseSize + feedbackSize + transferSize);
+  NetworkGradients(gradient, this->rewardPredictor, locatorSize +
+      locationSensorSize + glimpseSensorSize + glimpseSize + feedbackSize +
+      transferSize + classifierSize);
+  NetworkGradients(gradient, this->start, locatorSize + locationSensorSize +
+      glimpseSensorSize + glimpseSize + feedbackSize + transferSize +
+      classifierSize + rewardPredictorSize);
+
+  // Negative log likelihood backward pass.
+  negativeLogLikelihoodFunction.Backward(std::get<std::tuple_size<
+      ClassifierType>::value - 1>(classifier).OutputParameter(), target,
+      negativeLogLikelihoodFunction.OutputParameter());
+
+  const double reward = vRClassRewardFunction.Backward(rewardInput, target,
+      vRClassRewardFunction.OutputParameter());
+
+  // Propogate reward through all modules.
+  ResetReward(reward, locator);
+  ResetReward(reward, locationSensor);
+  ResetReward(reward, glimpseSensor);
+  ResetReward(reward, glimpse);
+  ResetReward(reward, classifier);
+
+  // RewardPredictor backward pass.
+  Backward(vRClassRewardFunction.OutputParameter()(1, 0), rewardPredictor);
+
+  arma::mat classifierError =
+    negativeLogLikelihoodFunction.OutputParameter() +
+    vRClassRewardFunction.OutputParameter()(0, 0) +
+    std::get<0>(rewardPredictor).Delta();
+
+  // Classifier backward pass.
+  Backward(classifierError, classifier);
+
+  // Set the initial recurrent error for the first backward step.
+  arma::mat recurrentError = std::get<0>(classifier).Delta();
+
+  for (step = nStep - 1; nStep >= 0; step--)
+  {
+    // Load the locator activations.
+    LoadActivations(locatorInput[step], locatorActivations,
+        locatorActivationsCounter, locator);
+
+    // Load the location sensor activations.
+    LoadActivations(std::get<std::tuple_size<LocatorType>::value - 1>(
+        locator).OutputParameter(), locationSensorActivations,
+        locationSensorActivationsCounter, locationSensor);
+
+    // Load the glimpse sensor activations.
+    LoadActivations(input, glimpseSensorMatActivations,
+        glimpseSensorCubeActivations, glimpseSensorMatCounter,
+        glimpseSensorCubeCounter, glimpseSensor);
+
+    // Concat the parameter activation from the location and glimpse sensor.
+    arma::mat concatLayerOutput = arma::join_cols(
+        std::get<std::tuple_size<LocationSensorType>::value - 1>(
+        locationSensor).OutputParameter(),
+        std::get<std::tuple_size<GlimpseSensorType>::value - 1>(
+        glimpseSensor).OutputParameter());
+
+    // Load the glimpse activations.
+    LoadActivations(concatLayerOutput, glimpseActivations,
+        glimpseActivationsCounter, glimpse);
+
+
+    if (step == 0)
+    {
+      // Load the transfer activations.
+     LoadActivations(std::get<std::tuple_size<StartType>::value - 1>(
+          start).OutputParameter(), transferActivations,
+          transferActivationsCounter, transfer);
+    }
+    else
+    {
+      // Load the feedback activations.
+      LoadActivations(std::get<std::tuple_size<TransferType>::value - 1>(
+          transfer).OutputParameter(), feedbackActivations,
+          feedbackActivationsCounter, feedback);
+
+      arma::mat feedbackLayerOutput =
+        std::get<std::tuple_size<GlimpseType>::value - 1>(
+        glimpse).OutputParameter() +
+        std::get<std::tuple_size<FeedbackType>::value - 1>(
+        feedback).OutputParameter();
+
+      // Load the transfer activations.
+      LoadActivations(feedbackLayerOutput, transferActivations,
+          transferActivationsCounter, transfer);
+    }
+
+    // Set the location parameter for all layer that implement a Location
+    // function e.g. GlimpseLayer.
+    ResetLocation(location[step], glimpseSensor);
+
+    // Locator backward pass.
+    Backward(actionError, locator);
+
+    // Transfer backward pass.
+    Backward(recurrentError, transfer);
+
+    // glimpse network
+    Backward(std::get<0>(transfer).Delta(), glimpse);
+
+    // Split up the error of the concat layer.
+    arma::mat locationSensorError = std::get<0>(glimpse).Delta().submat(
+        0, 0, std::get<0>(glimpse).Delta().n_elem / 2 - 1, 0);
+    arma::mat glimpseSensorError = std::get<0>(glimpse).Delta().submat(
+        std::get<0>(glimpse).Delta().n_elem / 2, 0,
+        std::get<0>(glimpse).Delta().n_elem - 1, 0);
+
+    // Location sensor backward pass.
+    Backward(locationSensorError, locationSensor);
+
+    // Glimpse sensor backward pass.
+    Backward(glimpseSensorError, glimpseSensor);
+
+    if (step != 0)
+    {
+      // Feedback backward pass.
+      Backward(std::get<0>(transfer).Delta(), feedback);
+    }
+
+    // Update the recurrent network gradients.
+    UpdateGradients(std::get<0>(locationSensor).Delta(), locator);
+    UpdateGradients(std::get<0>(transfer).Delta(), glimpse);
+    UpdateGradients(std::get<0>(transfer).Delta(), locationSensor);
+    UpdateGradients(std::get<0>(transfer).Delta(), glimpseSensor);
+
+    // feedback module
+    if (step != 0)
+    {
+      UpdateGradients(feedbackActivationsInput[step - 1],
+          std::get<0>(transfer).Delta(), feedback);
+    }
+    else
+    {
+      // Set the feedback gradient to zero.
+      recurrentGradient.submat(locatorSize + locationSensorSize +
+          glimpseSensorSize + glimpseSize, 0, locatorSize + locationSensorSize +
+          glimpseSensorSize + glimpseSize + feedbackSize - 1, 0).zeros();
+
+      UpdateGradients(std::get<0>(transfer).Delta(), start);
+    }
+
+    // Update the overall recurrent gradient.
+    gradient += recurrentGradient;
+
+    if (step != 0)
+    {
+      // Update the recurrent error for the next backward step.
+      recurrentError = std::get<0>(locator).Delta() +
+          std::get<0>(feedback).Delta();
+    }
+    else
+    {
+      break;
+    }
+  }
+
+  // Reward predictor gradient update.
+  UpdateGradients(vRClassRewardFunction.OutputParameter()(1, 0),
+      rewardPredictor);
+
+  // Classifier gradient update.
+  UpdateGradients(std::get<1>(classifier).Delta(), classifier);
+}
+
+template<
+  typename LocatorType,
+  typename LocationSensorType,
+  typename GlimpseSensorType,
+  typename GlimpseType,
+  typename StartType,
+  typename FeedbackType,
+  typename TransferType,
+  typename ClassifierType,
+  typename RewardPredictorType,
+  typename InitializationRuleType,
+  typename MatType
+>
+const arma::mat& RecurrentNeuralAttention<
+  LocatorType,
+  LocationSensorType,
+  GlimpseSensorType,
+  GlimpseType,
+  StartType,
+  FeedbackType,
+  TransferType,
+  ClassifierType,
+  RewardPredictorType,
+  InitializationRuleType,
+  MatType
+>::Location()
+{
+  if (!location.empty())
+  {
+    evaluationLocation = arma::mat(location[0].n_elem, location.size());
+
+    for (size_t i = 0; i < location.size(); i++)
+    {
+      evaluationLocation.col(i) = arma::vectorise(location[i]);
+    }
+  }
+
+  return evaluationLocation;
+}
+
+template<
+  typename LocatorType,
+  typename LocationSensorType,
+  typename GlimpseSensorType,
+  typename GlimpseType,
+  typename StartType,
+  typename FeedbackType,
+  typename TransferType,
+  typename ClassifierType,
+  typename RewardPredictorType,
+  typename InitializationRuleType,
+  typename MatType
+>
+template<typename Archive>
+void RecurrentNeuralAttention<
+  LocatorType,
+  LocationSensorType,
+  GlimpseSensorType,
+  GlimpseType,
+  StartType,
+  FeedbackType,
+  TransferType,
+  ClassifierType,
+  RewardPredictorType,
+  InitializationRuleType,
+  MatType
+>::Serialize(Archive& ar, const unsigned int /* version */)
+{
+  ar & data::CreateNVP(parameter, "parameter");
+  ar & data::CreateNVP(inputSize, "inputSize");
+  ar & data::CreateNVP(nStep, "nStep");
+
+  // If we are loading, we need to initialize the weights.
+  if (Archive::is_loading::value)
+  {
+    // Set the netork size.
+    locatorSize = NetworkSize(this->locator);
+    locationSensorSize = NetworkSize(this->locationSensor);
+    glimpseSensorSize = NetworkSize(this->glimpseSensor);
+    glimpseSize = NetworkSize(this->glimpse);
+    feedbackSize = NetworkSize(this->feedback);
+    transferSize = NetworkSize(this->transfer);
+    classifierSize = NetworkSize(this->classifier);
+    rewardPredictorSize = NetworkSize(this->rewardPredictor);
+    startSize = NetworkSize(this->start);
+
+    // Set the network weights.
+    NetworkWeights(parameter, this->locator);
+    NetworkWeights(parameter, this->locationSensor, locatorSize);
+    NetworkWeights(parameter, this->glimpseSensor, locatorSize +
+        locationSensorSize);
+    NetworkWeights(parameter, this->glimpse, locatorSize + locationSensorSize +
+        glimpseSensorSize);
+    NetworkWeights(parameter, this->feedback, locatorSize + locationSensorSize +
+        glimpseSensorSize + glimpseSize);
+    NetworkWeights(parameter, this->transfer, locatorSize + locationSensorSize +
+        glimpseSensorSize + glimpseSize + feedbackSize);
+    NetworkWeights(parameter, this->classifier, locatorSize + locationSensorSize
+        + glimpseSensorSize + glimpseSize + feedbackSize + transferSize);
+    NetworkWeights(parameter, this->rewardPredictor, locatorSize +
+        locationSensorSize + glimpseSensorSize + glimpseSize + feedbackSize +
+        transferSize + classifierSize);
+    NetworkWeights(parameter, this->start, locatorSize + locationSensorSize +
+        glimpseSensorSize + glimpseSize + feedbackSize + transferSize +
+        classifierSize + rewardPredictorSize);
+  }
+}
+
+} // namespace ann
+} // namespace mlpack
+
+#endif
diff --git a/src/mlpack/methods/rmva/rmva_main.cpp b/src/mlpack/methods/rmva/rmva_main.cpp
new file mode 100644
index 00000000000..4ea2b76503e
--- /dev/null
+++ b/src/mlpack/methods/rmva/rmva_main.cpp
@@ -0,0 +1,289 @@
+/**
+ * @file rmva_main.cpp
+ * @author Marcus Edel
+ *
+ * Main executable for the Recurrent Model for Visual Attention.
+ */
+#include <mlpack/core.hpp>
+
+#include "rmva.hpp"
+
+#include <mlpack/methods/ann/layer/glimpse_layer.hpp>
+#include <mlpack/methods/ann/layer/linear_layer.hpp>
+#include <mlpack/methods/ann/layer/bias_layer.hpp>
+#include <mlpack/methods/ann/layer/base_layer.hpp>
+#include <mlpack/methods/ann/layer/reinforce_normal_layer.hpp>
+#include <mlpack/methods/ann/layer/multiply_constant_layer.hpp>
+#include <mlpack/methods/ann/layer/constant_layer.hpp>
+#include <mlpack/methods/ann/layer/log_softmax_layer.hpp>
+#include <mlpack/methods/ann/layer/hard_tanh_layer.hpp>
+
+#include <mlpack/core/optimizers/minibatch_sgd/minibatch_sgd.hpp>
+#include <mlpack/core/optimizers/sgd/sgd.hpp>
+
+using namespace mlpack;
+using namespace mlpack::ann;
+using namespace mlpack::optimization;
+using namespace std;
+
+PROGRAM_INFO("Recurrent Model for Visual Attention",
+    "This program trains the Recurrent Model for Visual Attention on the given "
+    "labeled training set, or loads a model from the given model file, and then"
+    " may use that trained model to classify the points in a given test set."
+    "\n\n"
+    "Labels are expected to be passed in separately as their own file "
+    "(--labels_file).  If training is not desired, a pre-existing model can be "
+    "loaded with the --input_model_file (-m) option."
+    "\n\n"
+    "If classifying a test set is desired, the test set should be in the file "
+    "specified with the --test_file (-T) option, and the classifications will "
+    "be saved to the file specified with the --output_file (-o) option.  If "
+    "saving a trained model is desired, the --output_model_file (-M) option "
+    "should be given.");
+
+// Model loading/saving.
+PARAM_STRING("input_model_file", "File containing the Recurrent Model for "
+    "Visual Attention.", "m", "");
+PARAM_STRING("output_model_file", "File to save trained Recurrent Model for "
+    "Visual Attention to.", "M", "");
+
+// Training parameters.
+PARAM_STRING("training_file", "A file containing the training set.", "t", "");
+PARAM_STRING("labels_file", "A file containing labels for the training set.",
+    "l", "");
+
+PARAM_STRING("optimizer", "Optimizer to use; 'sgd', 'minibatch-sgd', or "
+    "'lbfgs'.", "O", "minibatch-sgd");
+
+PARAM_INT("max_iterations", "Maximum number of iterations for SGD or RMSProp "
+    "(0 indicates no limit).", "n", 500000);
+PARAM_DOUBLE("tolerance", "Maximum tolerance for termination of SGD or "
+    "RMSProp.", "e", 1e-7);
+
+PARAM_DOUBLE("step_size", "Step size for stochastic gradient descent (alpha).",
+    "a", 0.01);
+PARAM_FLAG("linear_scan", "Don't shuffle the order in which data points are "
+    "visited for SGD or mini-batch SGD.", "L");
+PARAM_INT("batch_size", "Batch size for mini-batch SGD.", "b", 20);
+
+PARAM_INT("rho", "Number of steps for the back-propagate through time.", "r",
+    7);
+
+PARAM_INT("classes", "The number of classes.", "c", 10);
+
+PARAM_INT("seed", "Random seed.  If 0, 'std::time(NULL)' is used.", "s", 0);
+
+// Test parameters.
+PARAM_STRING("test_file", "A file containing the test set.", "T", "");
+PARAM_STRING("output_file", "The file in which the predicted labels for the "
+    "test set will be written.", "o", "");
+
+int main(int argc, char** argv)
+{
+  CLI::ParseCommandLine(argc, argv);
+
+ // Check input parameters.
+  if (CLI::HasParam("training_file") && CLI::HasParam("input_model_file"))
+    Log::Fatal << "Cannot specify both --training_file (-t) and "
+        << "--input_model_file (-m)!" << endl;
+
+  if (!CLI::HasParam("training_file") && !CLI::HasParam("input_model_file"))
+    Log::Fatal << "Neither --training_file (-t) nor --input_model_file (-m) are"
+        << " specified!" << endl;
+
+  if (!CLI::HasParam("training_file") && CLI::HasParam("labels_file"))
+    Log::Warn << "--labels_file (-l) ignored because --training_file (-t) is "
+        << "not specified." << endl;
+
+  if (!CLI::HasParam("output_file") && !CLI::HasParam("output_model_file"))
+    Log::Warn << "Neither --output_file (-o) nor --output_model_file (-M) "
+        << "specified; no output will be saved!" << endl;
+
+  if (CLI::HasParam("output_file") && !CLI::HasParam("test_file"))
+    Log::Warn << "--output_file (-o) ignored because no test file specified "
+        << "with --test_file (-T)." << endl;
+
+  if (!CLI::HasParam("output_file") && CLI::HasParam("test_file"))
+    Log::Warn << "--test_file (-T) specified, but classification results will "
+        << "not be saved because --output_file (-o) is not specified." << endl;
+
+  const string optimizerType = CLI::GetParam<string>("optimizer");
+
+  if ((optimizerType != "sgd") && (optimizerType != "lbfgs") &&
+      (optimizerType != "minibatch-sgd"))
+  {
+    Log::Fatal << "Optimizer type '" << optimizerType << "' unknown; must be "
+        << "'sgd', 'minibatch-sgd', or 'lbfgs'!" << endl;
+  }
+
+  const double stepSize = CLI::GetParam<double>("step_size");
+  const size_t maxIterations = (size_t) CLI::GetParam<int>("max_iterations");
+  const double tolerance = CLI::GetParam<double>("tolerance");
+  const bool shuffle = !CLI::HasParam("linear_scan");
+  const size_t batchSize = (size_t) CLI::GetParam<int>("batch_size");
+  const size_t rho = (size_t) CLI::GetParam<int>("rho");
+  const size_t numClasses = (size_t) CLI::GetParam<int>("classes");
+
+  const size_t hiddenSize = 256;
+  const double unitPixels = 13;
+  const double locatorStd = 0.11;
+  const size_t imageSize = 28;
+  const size_t locatorHiddenSize = 128;
+  const size_t glimpsePatchSize = 8;
+  const size_t glimpseDepth = 1;
+  const size_t glimpseScale = 2;
+  const size_t glimpseHiddenSize = 128;
+  const size_t imageHiddenSize = 256;
+
+
+  // Locator network.
+  LinearMappingLayer<> linearLayer0(hiddenSize, 2);
+  BiasLayer<> biasLayer0(2, 1);
+  HardTanHLayer<> hardTanhLayer0;
+  ReinforceNormalLayer<> reinforceNormalLayer0(2 * locatorStd);
+  HardTanHLayer<> hardTanhLayer1;
+  MultiplyConstantLayer<> multiplyConstantLayer0(2 * unitPixels / imageSize);
+  auto locator = std::tie(linearLayer0, biasLayer0, hardTanhLayer0,
+      reinforceNormalLayer0, hardTanhLayer1, multiplyConstantLayer0);
+
+  // Location sensor network.
+  LinearLayer<> linearLayer1(2, locatorHiddenSize);
+  BiasLayer<> biasLayer1(locatorHiddenSize, 1);
+  ReLULayer<> rectifierLayer0;
+  auto locationSensor = std::tie(linearLayer1, biasLayer1, rectifierLayer0);
+
+  // Glimpse sensor network.
+  GlimpseLayer<> glimpseLayer0(1, glimpsePatchSize, glimpseDepth, glimpseScale);
+  LinearMappingLayer<> linearLayer2(64, glimpseHiddenSize);
+  BiasLayer<> biasLayer2(glimpseHiddenSize, 1);
+  ReLULayer<> rectifierLayer1;
+  auto glimpseSensor = std::tie(glimpseLayer0, linearLayer2, biasLayer2,
+      rectifierLayer1);
+
+  // Glimpse network.
+  LinearLayer<> linearLayer3(glimpseHiddenSize + locatorHiddenSize,
+      imageHiddenSize);
+  BiasLayer<> biasLayer3(imageHiddenSize, 1);
+  ReLULayer<> rectifierLayer2;
+  LinearLayer<> linearLayer4(imageHiddenSize, hiddenSize);
+  BiasLayer<> biasLayer4(hiddenSize, 1);
+  auto glimpse = std::tie(linearLayer3, biasLayer3, rectifierLayer2,
+      linearLayer4, biasLayer4);
+
+  // Feedback network.
+  LinearLayer<> recurrentLayer0(imageHiddenSize, hiddenSize);
+  BiasLayer<> recurrentLayerBias0(hiddenSize, 1);
+  auto feedback = std::tie(recurrentLayer0, recurrentLayerBias0);
+
+  // Start network.
+  AdditionLayer<> startLayer0(hiddenSize, 1);
+  auto start = std::tie(startLayer0);
+
+  // Transfer network.
+  ReLULayer<> rectifierLayer3;
+  auto transfer = std::tie(rectifierLayer3);
+
+  // Classifier network.
+  LinearLayer<> linearLayer5(hiddenSize, numClasses);
+  BiasLayer<> biasLayer6(numClasses, 1);
+  LogSoftmaxLayer<> logSoftmaxLayer0;
+  auto classifier = std::tie(linearLayer5, biasLayer6, logSoftmaxLayer0);
+
+  // Reward predictor network.
+  ConstantLayer<> constantLayer0(1, 1);
+  AdditionLayer<> additionLayer0(1, 1);
+  auto rewardPredictor = std::tie(constantLayer0, additionLayer0);
+
+  // Recurrent Model for Visual Attention.
+  RecurrentNeuralAttention<decltype(locator),
+                           decltype(locationSensor),
+                           decltype(glimpseSensor),
+                           decltype(glimpse),
+                           decltype(start),
+                           decltype(feedback),
+                           decltype(transfer),
+                           decltype(classifier),
+                           decltype(rewardPredictor),
+                           RandomInitialization>
+    net(locator, locationSensor, glimpseSensor, glimpse, start, feedback,
+        transfer, classifier, rewardPredictor, rho);
+
+  // Either we have to train a model, or load a model.
+  if (CLI::HasParam("training_file"))
+  {
+    const string trainingFile = CLI::GetParam<string>("training_file");
+    arma::mat trainingData;
+    data::Load(trainingFile, trainingData, true);
+
+    arma::mat labels;
+
+    // Did the user pass in labels?
+    const string labelsFilename = CLI::GetParam<string>("labels_file");
+    if (labelsFilename != "")
+    {
+      // Load labels.
+      data::Load(labelsFilename, labels, true, false);
+
+      // Do the labels need to be transposed?
+      if (labels.n_cols == 1)
+        labels = labels.t();
+    }
+
+    // Now run the optimization.
+    if (optimizerType == "sgd")
+    {
+      SGD<decltype(net)> opt(net);
+      opt.StepSize() = stepSize;
+      opt.MaxIterations() = maxIterations;
+      opt.Tolerance() = tolerance;
+      opt.Shuffle() = shuffle;
+
+      Timer::Start("rmva_training");
+      net.Train(trainingData, labels, opt);
+      Timer::Stop("rmva_training");
+    }
+    else if (optimizerType == "minibatch-sgd")
+    {
+      MiniBatchSGD<decltype(net)> opt(net);
+      opt.StepSize() = stepSize;
+      opt.MaxIterations() = maxIterations;
+      opt.Tolerance() = tolerance;
+      opt.Shuffle() = shuffle;
+      opt.BatchSize() = batchSize;
+
+      Timer::Start("rmva_training");
+      net.Train(trainingData, labels, opt);
+      Timer::Stop("rmva_training");
+    }
+  }
+  else
+  {
+    // Load the model from file.
+    data::Load(CLI::GetParam<string>("input_model_file"), "rmva_model", net);
+  }
+
+  // Do we need to do testing?
+  if (CLI::HasParam("test_file"))
+  {
+    const string testingDataFilename = CLI::GetParam<std::string>("test_file");
+    arma::mat testingData;
+    data::Load(testingDataFilename, testingData, true);
+
+    // Time the running of the Naive Bayes Classifier.
+    arma::mat results;
+    Timer::Start("rmva_testing");
+    net.Predict(testingData, results);
+    Timer::Stop("rmva_testing");
+
+    if (CLI::HasParam("output_file"))
+    {
+      // Output results.
+      const string outputFilename = CLI::GetParam<string>("output_file");
+      data::Save(outputFilename, results, true);
+    }
+  }
+
+  // Save the model, if requested.
+  if (CLI::HasParam("output_model_file"))
+    data::Save(CLI::GetParam<string>("output_model_file"), "rmva_model", net);
+}

From d1650e410c5a0bc66e90abdf14e3d217b0050678 Mon Sep 17 00:00:00 2001
From: Marcus Edel <marcus.edel@fu-berlin.de>
Date: Tue, 10 May 2016 17:49:54 +0200
Subject: [PATCH 11/87]  Removes trailing whitespaces at the end of lines.

---
 src/mlpack/methods/ann/layer/glimpse_layer.hpp    | 12 ++++++------
 src/mlpack/methods/ann/layer/leaky_relu_layer.hpp |  4 ++--
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/src/mlpack/methods/ann/layer/glimpse_layer.hpp b/src/mlpack/methods/ann/layer/glimpse_layer.hpp
index 8e7ab8b49d1..25ebfa5db92 100644
--- a/src/mlpack/methods/ann/layer/glimpse_layer.hpp
+++ b/src/mlpack/methods/ann/layer/glimpse_layer.hpp
@@ -133,7 +133,7 @@ class GlimpseLayer
               ReSampling(poolingInput, output.slice(j));
             }
           }
-        }        
+        }
       }
     }
   }
@@ -217,7 +217,7 @@ class GlimpseLayer
             {
               DownwardReSampling(inputParameter.slice(paddedSlice),
                   mappedError.slice(j), poolingOutput);
-            }            
+            }
 
             inputPadded.subcube(x, y,
                 paddedSlice, x + glimpseSize - 1, y + glimpseSize - 1,
@@ -270,7 +270,7 @@ class GlimpseLayer
       for (size_t j = 0; j < w.n_cols; j++, i++)
       {
         w(k, j) = t(i);
-      }      
+      }
     }
   }
 
@@ -383,9 +383,9 @@ class GlimpseLayer
         double nw = (ixNe - ix) * (iySw - iy);
 
         // Calculate the weighted sum.
-        output(y, x) = input(iyNw, ixNw) * nw + 
+        output(y, x) = input(iyNw, ixNw) * nw +
             input(iyNw, std::min(ixNe,  iWidth)) * ne +
-            input(std::min(iySw, iHeight), ixNw) * sw + 
+            input(std::min(iySw, iHeight), ixNw) * sw +
             input(std::min(iySw, iHeight), std::min(ixNe, iWidth)) * se;
       }
     }
@@ -408,7 +408,7 @@ class GlimpseLayer
     double iHeight = input.n_cols - 1;
 
     double wRatio = iWidth / (size - 1);
-    double hRatio = iHeight / (size - 1);    
+    double hRatio = iHeight / (size - 1);
 
     for (size_t y = 0; y < size; y++)
     {
diff --git a/src/mlpack/methods/ann/layer/leaky_relu_layer.hpp b/src/mlpack/methods/ann/layer/leaky_relu_layer.hpp
index b8b391c21fd..9676c49c490 100644
--- a/src/mlpack/methods/ann/layer/leaky_relu_layer.hpp
+++ b/src/mlpack/methods/ann/layer/leaky_relu_layer.hpp
@@ -134,9 +134,9 @@ class LeakyReLULayer
   //! Modify the delta.
   OutputDataType& Delta() { return delta; }
 
-  //! Get the non zero gradient
+  //! Get the non zero gradient.
   double const& Alpha() const { return alpha; }
-  //! Modify the non zero gradient
+  //! Modify the non zero gradient.
   double& Alpha() { return alpha; }
 
   /**

From 989dd35359ee0c2258616ea57675f639ff47bfaa Mon Sep 17 00:00:00 2001
From: Marcus Edel <marcus.edel@fu-berlin.de>
Date: Fri, 20 May 2016 22:24:25 +0200
Subject: [PATCH 12/87] Include split_data.hpp file into the build process, so
 that it is available afterwards.

---
 src/mlpack/core/data/CMakeLists.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/mlpack/core/data/CMakeLists.txt b/src/mlpack/core/data/CMakeLists.txt
index ee4f8a8634e..ea87d0f13ab 100644
--- a/src/mlpack/core/data/CMakeLists.txt
+++ b/src/mlpack/core/data/CMakeLists.txt
@@ -14,6 +14,7 @@ set(SOURCES
   save.hpp
   save_impl.hpp
   serialization_shim.hpp
+  split_data.hpp
 )
 
 # add directory name to sources

From f1bf339c3e12e1ebba1db1fa64036b58632e5a9d Mon Sep 17 00:00:00 2001
From: Marcus Edel <marcus.edel@fu-berlin.de>
Date: Sat, 21 May 2016 00:29:47 +0200
Subject: [PATCH 13/87] Remove unused output parameter.

---
 src/mlpack/methods/ann/layer/vr_class_reward_layer.hpp | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/mlpack/methods/ann/layer/vr_class_reward_layer.hpp b/src/mlpack/methods/ann/layer/vr_class_reward_layer.hpp
index 9b1451c0b4a..09e55b5acd5 100644
--- a/src/mlpack/methods/ann/layer/vr_class_reward_layer.hpp
+++ b/src/mlpack/methods/ann/layer/vr_class_reward_layer.hpp
@@ -70,7 +70,6 @@ class VRClassRewardLayer
   template<typename eT>
   double Forward(const arma::Mat<eT>& input, const arma::Mat<eT>& target)
   {
-    double output = 0;
     reward = 0;
     arma::uword index;
 
@@ -78,7 +77,7 @@ class VRClassRewardLayer
     {
       input.unsafe_col(i).max(index);
       reward = ((index + 1) == target(i)) * scale;
-    }   
+    }
 
     if (sizeAverage)
     {
@@ -101,7 +100,7 @@ class VRClassRewardLayer
   double Backward(const arma::field<arma::Mat<eT> >& input,
                 const arma::Mat<eT>& /* gy */,
                 arma::field<arma::Mat<eT> >& g)
-  {    
+  {
     g = arma::field<arma::Mat<eT> >(2, 1);
     g(0, 0) = arma::zeros(input(0, 0).n_rows, input(0, 0).n_cols);
 

From 39eefded8c6e4edd95c5b0c8110adf8d5aba24be Mon Sep 17 00:00:00 2001
From: Marcus Edel <marcus.edel@fu-berlin.de>
Date: Sat, 21 May 2016 00:33:29 +0200
Subject: [PATCH 14/87] Add RMVA class and function documentation.

---
 src/mlpack/methods/rmva/rmva.hpp      | 90 ++++++++++++++++++++-------
 src/mlpack/methods/rmva/rmva_impl.hpp |  7 +--
 2 files changed, 70 insertions(+), 27 deletions(-)

diff --git a/src/mlpack/methods/rmva/rmva.hpp b/src/mlpack/methods/rmva/rmva.hpp
index 6756465c852..7fb0510b395 100644
--- a/src/mlpack/methods/rmva/rmva.hpp
+++ b/src/mlpack/methods/rmva/rmva.hpp
@@ -2,7 +2,8 @@
  * @file rmva.hpp
  * @author Marcus Edel
  *
- * Definition of the RNN class, which implements feed forward neural networks.
+ * Definition of the RecurrentNeuralAttention class, which implements the
+ * Recurrent Model for Visual Attention.
  */
 #ifndef __MLPACK_METHODS_RMVA_RMVA_HPP
 #define __MLPACK_METHODS_RMVA_RMVA_HPP
@@ -23,11 +24,32 @@ namespace mlpack {
 namespace ann /** Artificial Neural Network. */ {
 
 /**
- * An implementation of a standard feed forward network.
+ * This class implements the Recurrent Model for Visual Attention, using a
+ * variety of possible layer implementations.
  *
- * @tparam LayerTypes Contains all layer modules used to construct the network.
- * @tparam OutputLayerType The outputlayer type used to evaluate the network.
- * @tparam PerformanceFunction Performance strategy used to claculate the error.
+ * For more information, see the following paper.
+ *
+ * @code
+ * @article{MnihHGK14,
+ *   title={Recurrent Models of Visual Attention},
+ *   author={Volodymyr Mnih, Nicolas Heess, Alex Graves, Koray Kavukcuoglu},
+ *   journal={CoRR},
+ *   volume={abs/1406.6247},
+ *   year={2014}
+ * }
+ * @endcode
+ *
+ * @tparam LocatorType Type of locator network.
+ * @tparam LocationSensorType Type of location sensor network.
+ * @tparam GlimpseSensorType Type of glimpse sensor network.
+ * @tparam GlimpseType Type of glimpse network.
+ * @tparam StartType Type of start network.
+ * @tparam FeedbackType Type of feedback network.
+ * @tparam TransferType Type of transfer network.
+ * @tparam ClassifierType Type of classifier network.
+ * @tparam RewardPredictorType Type of reward predictor network.
+ * @tparam InitializationRuleType Rule used to initialize the weight matrix.
+ * @tparam MatType Matrix type (arma::mat or arma::sp_mat).
  */
 template<
   typename LocatorType,
@@ -60,11 +82,20 @@ class RecurrentNeuralAttention
       MatType>;
 
   /**
-   * Construct the RNN object, which will construct a recurrent neural
-   * network with the specified layers.
+   * Construct the RecurrentNeuralAttention object, which will construct the
+   * recurrent model for visual attentionh using the specified networks.
    *
-   * @param network The network modules used to construct the network.
-   * @param outputLayer The outputlayer used to evaluate the network.
+   * @param locator The locator network.
+   * @param locationSensor The location sensor network.
+   * @param glimpseSensor The glimpse sensor network.
+   * @param glimpse The glimpse network.
+   * @param start The start network.
+   * @param feedback The feedback network.
+   * @param transfer The transfer network.
+   * @param classifier The classifier network.
+   * @param rewardPredictor The reward predictor network.
+   * @param nStep Number of steps for the back-propagate through time.
+   * @param initializeRule Rule used to initialize the weight matrix.
    */
   template<typename TypeLocator,
            typename TypeLocationSensor,
@@ -85,8 +116,20 @@ class RecurrentNeuralAttention
                            TypeClassifier&& classifier,
                            TypeRewardPredictor&& rewardPredictor,
                            const size_t nStep,
-                           InitializationRuleType initializeRule = InitializationRuleType());
-
+                           InitializationRuleType initializeRule =
+                              InitializationRuleType());
+  /**
+   * Train the network on the given input data using the given optimizer.
+   *
+   * This will use the existing model parameters as a starting point for the
+   * optimization. If this is not what you want, then you should access the
+   * parameters vector directly with Parameters() and modify it as desired.
+   *
+   * @tparam OptimizerType Type of optimizer to use to train the model.
+   * @param predictors Input training variables.
+   * @param responses Outputs results from input training variables.
+   * @param optimizer Instantiated optimizer used to train the model.
+   */
   template<
       template<typename> class OptimizerType = mlpack::optimization::RMSprop
   >
@@ -105,8 +148,8 @@ class RecurrentNeuralAttention
   void Predict(arma::mat& predictors, arma::mat& responses);
 
   /**
-   * Evaluate the feedforward network with the given parameters. This function
-   * is usually called by the optimizer to train the model.
+   * Evaluate the network with the given parameters. This function is usually
+   * called by the optimizer to train the model.
    *
    * @param parameters Matrix model parameters.
    * @param i Index of point to use for objective function evaluation.
@@ -118,8 +161,8 @@ class RecurrentNeuralAttention
                   const bool deterministic = true);
 
   /**
-   * Evaluate the gradient of the feedforward network with the given parameters,
-   * and with respect to only one point in the dataset. This is useful for
+   * Evaluate the gradient of the network with the given parameters, and with
+   * respect to only one point in the dataset. This is useful for
    * optimizers such as SGD, which require a separable objective function.
    *
    * @param parameters Matrix of the model parameters to be optimized.
@@ -174,7 +217,8 @@ class RecurrentNeuralAttention
     ResetParameter(rewardPredictor);
     ResetParameter(start);
 
-    // Aample an initial starting actions by forwarding zeros through the locator.
+    // Sample an initial starting actions by forwarding zeros through the
+    // locator.
     locatorInput.push_back(new arma::cube(arma::zeros<arma::cube>(inputSize, 1,
         input.n_slices)));
 
@@ -378,7 +422,7 @@ class RecurrentNeuralAttention
   template<typename T, typename P>
   typename std::enable_if<
       HasRecurrentParameterCheck<T, P&(T::*)()>::value, void>::type
-  Save(const size_t layerNumber,
+  Save(const size_t /* layerNumber */,
        boost::ptr_vector<MatType>& activations,
        T& layer,
        P& /* unused */)
@@ -626,7 +670,7 @@ class RecurrentNeuralAttention
 
   template<size_t I = 1, typename DataType, typename... Tp>
   typename std::enable_if<I == (sizeof...(Tp)), void>::type
-  BackwardTail(const DataType& error, std::tuple<Tp...>& t)
+  BackwardTail(const DataType& /* error */, std::tuple<Tp...>& t)
   {
     std::get<sizeof...(Tp) - I>(t).Backward(
         std::get<sizeof...(Tp) - I>(t).OutputParameter(),
@@ -740,15 +784,15 @@ class RecurrentNeuralAttention
     // Nothing to do here
   }
 
+  //! The locator network.
+  LocatorType locator;
+
   //! The location sensor network.
   LocationSensorType locationSensor;
 
   //! The glimpse sensor network.
   GlimpseSensorType glimpseSensor;
 
-  //! The locator network.
-  LocatorType locator;
-
   //! The glimpse network.
   GlimpseType glimpse;
 
@@ -905,8 +949,8 @@ class RecurrentNeuralAttention
   arma::mat evaluationLocation;
 }; // class RecurrentNeuralAttention
 
-}; // namespace ann
-}; // namespace mlpack
+} // namespace ann
+} // namespace mlpack
 
 // Include implementation.
 #include "rmva_impl.hpp"
diff --git a/src/mlpack/methods/rmva/rmva_impl.hpp b/src/mlpack/methods/rmva/rmva_impl.hpp
index 9efa39f199c..fb9fd7c12ee 100644
--- a/src/mlpack/methods/rmva/rmva_impl.hpp
+++ b/src/mlpack/methods/rmva/rmva_impl.hpp
@@ -2,7 +2,7 @@
  * @file rmva_impl.hpp
  * @author Marcus Edel
  *
- * Definition of the FFN class, which implements feed forward neural networks.
+ * Implementation of the Recurrent Model for Visual Attention.
  */
 #ifndef __MLPACK_METHODS_RMVA_RMVA_IMPL_HPP
 #define __MLPACK_METHODS_RMVA_RMVA_IMPL_HPP
@@ -13,7 +13,6 @@
 namespace mlpack {
 namespace ann /** Artificial Neural Network. */ {
 
-
 template<
   typename LocatorType,
   typename LocationSensorType,
@@ -270,7 +269,7 @@ double RecurrentNeuralAttention<
   location.clear();
   feedbackActivationsInput.clear();
 
-  // Aample an initial starting actions by forwarding zeros through the locator.
+  // Sample an initial starting actions by forwarding zeros through the locator.
   locatorInput.push_back(new arma::cube(arma::zeros<arma::cube>(inputSize, 1,
       input.n_slices)));
 
@@ -583,7 +582,7 @@ void RecurrentNeuralAttention<
     UpdateGradients(std::get<0>(transfer).Delta(), locationSensor);
     UpdateGradients(std::get<0>(transfer).Delta(), glimpseSensor);
 
-    // feedback module
+    // Feedback module.
     if (step != 0)
     {
       UpdateGradients(feedbackActivationsInput[step - 1],

From dd136db3241a5d6f9d8c812e9876b89fb5d8786d Mon Sep 17 00:00:00 2001
From: MarcosPividori <marcos.pividori@gmail.com>
Date: Fri, 20 May 2016 12:17:37 -0300
Subject: [PATCH 15/87] Properly use Enum type.

---
 src/mlpack/methods/neighbor_search/kfn_main.cpp      | 2 +-
 src/mlpack/methods/neighbor_search/knn_main.cpp      | 2 +-
 src/mlpack/methods/neighbor_search/ns_model.hpp      | 8 ++++----
 src/mlpack/methods/neighbor_search/ns_model_impl.hpp | 2 +-
 4 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/mlpack/methods/neighbor_search/kfn_main.cpp b/src/mlpack/methods/neighbor_search/kfn_main.cpp
index 3c185decafb..d6807400e79 100644
--- a/src/mlpack/methods/neighbor_search/kfn_main.cpp
+++ b/src/mlpack/methods/neighbor_search/kfn_main.cpp
@@ -149,7 +149,7 @@ int main(int argc, char *argv[])
     const string treeType = CLI::GetParam<string>("tree_type");
     const bool randomBasis = CLI::HasParam("random_basis");
 
-    int tree = 0;
+    KFNModel::TreeTypes tree = KFNModel::KD_TREE;
     if (treeType == "kd")
       tree = KFNModel::KD_TREE;
     else if (treeType == "cover")
diff --git a/src/mlpack/methods/neighbor_search/knn_main.cpp b/src/mlpack/methods/neighbor_search/knn_main.cpp
index f02b96c2bca..9dd77609f01 100644
--- a/src/mlpack/methods/neighbor_search/knn_main.cpp
+++ b/src/mlpack/methods/neighbor_search/knn_main.cpp
@@ -153,7 +153,7 @@ int main(int argc, char *argv[])
     const string treeType = CLI::GetParam<string>("tree_type");
     const bool randomBasis = CLI::HasParam("random_basis");
 
-    int tree = 0;
+    KNNModel::TreeTypes tree = KNNModel::KD_TREE;
     if (treeType == "kd")
       tree = KNNModel::KD_TREE;
     else if (treeType == "cover")
diff --git a/src/mlpack/methods/neighbor_search/ns_model.hpp b/src/mlpack/methods/neighbor_search/ns_model.hpp
index df90f97968f..9c16199aabb 100644
--- a/src/mlpack/methods/neighbor_search/ns_model.hpp
+++ b/src/mlpack/methods/neighbor_search/ns_model.hpp
@@ -52,7 +52,7 @@ class NSModel
   };
 
  private:
-  int treeType;
+  TreeTypes treeType;
   size_t leafSize;
 
   // For random projections.
@@ -83,7 +83,7 @@ class NSModel
    * Initialize the NSModel with the given type and whether or not a random
    * basis should be used.
    */
-  NSModel(int treeType = TreeTypes::KD_TREE, bool randomBasis = false);
+  NSModel(TreeTypes treeType = TreeTypes::KD_TREE, bool randomBasis = false);
 
   //! Clean memory, if necessary.
   ~NSModel();
@@ -105,8 +105,8 @@ class NSModel
   size_t LeafSize() const { return leafSize; }
   size_t& LeafSize() { return leafSize; }
 
-  int TreeType() const { return treeType; }
-  int& TreeType() { return treeType; }
+  TreeTypes TreeType() const { return treeType; }
+  TreeTypes& TreeType() { return treeType; }
 
   bool RandomBasis() const { return randomBasis; }
   bool& RandomBasis() { return randomBasis; }
diff --git a/src/mlpack/methods/neighbor_search/ns_model_impl.hpp b/src/mlpack/methods/neighbor_search/ns_model_impl.hpp
index b25cf0eac0e..28c5a0bf8f0 100644
--- a/src/mlpack/methods/neighbor_search/ns_model_impl.hpp
+++ b/src/mlpack/methods/neighbor_search/ns_model_impl.hpp
@@ -21,7 +21,7 @@ namespace neighbor {
  * basis should be used.
  */
 template<typename SortPolicy>
-NSModel<SortPolicy>::NSModel(int treeType, bool randomBasis) :
+NSModel<SortPolicy>::NSModel(TreeTypes treeType, bool randomBasis) :
     treeType(treeType),
     randomBasis(randomBasis),
     kdTreeNS(NULL),

From 9b811f98334c7e45177935a9369d8324e98418e7 Mon Sep 17 00:00:00 2001
From: dasayan05 <dasayan05@hotmail.com>
Date: Tue, 24 May 2016 01:16:57 +0530
Subject: [PATCH 16/87] Typo fix in knn_main.cpp

---
 src/mlpack/methods/neighbor_search/knn_main.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mlpack/methods/neighbor_search/knn_main.cpp b/src/mlpack/methods/neighbor_search/knn_main.cpp
index 9dd77609f01..4957e88ebe7 100644
--- a/src/mlpack/methods/neighbor_search/knn_main.cpp
+++ b/src/mlpack/methods/neighbor_search/knn_main.cpp
@@ -34,7 +34,7 @@ PROGRAM_INFO("k-Nearest-Neighbors",
     "point in 'input.csv' and store the distances in 'distances.csv' and the "
     "neighbors in the file 'neighbors.csv':"
     "\n\n"
-    "$ mlpack_nn --k=5 --reference_file=input.csv "
+    "$ mlpack_knn --k=5 --reference_file=input.csv "
     "--distances_file=distances.csv\n --neighbors_file=neighbors.csv"
     "\n\n"
     "The output files are organized such that row i and column j in the "

From c82c747a7fb8e0081f9185e199ce1611fe5d073a Mon Sep 17 00:00:00 2001
From: MarcosPividori <marcos.pividori@gmail.com>
Date: Tue, 24 May 2016 11:37:10 -0300
Subject: [PATCH 17/87] Properly use Enum type.

---
 src/mlpack/methods/range_search/range_search_main.cpp | 2 +-
 src/mlpack/methods/range_search/rs_model.cpp          | 2 +-
 src/mlpack/methods/range_search/rs_model.hpp          | 8 ++++----
 src/mlpack/methods/rann/allkrann_main.cpp             | 2 +-
 src/mlpack/methods/rann/ra_model.hpp                  | 8 ++++----
 src/mlpack/methods/rann/ra_model_impl.hpp             | 6 +++---
 6 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/src/mlpack/methods/range_search/range_search_main.cpp b/src/mlpack/methods/range_search/range_search_main.cpp
index 48fd2ede30a..3606950e2f6 100644
--- a/src/mlpack/methods/range_search/range_search_main.cpp
+++ b/src/mlpack/methods/range_search/range_search_main.cpp
@@ -160,7 +160,7 @@ int main(int argc, char *argv[])
     const string treeType = CLI::GetParam<string>("tree_type");
     const bool randomBasis = CLI::HasParam("random_basis");
 
-    int tree = 0;
+    RSModel::TreeTypes tree = RSModel::KD_TREE;
     if (treeType == "kd")
       tree = RSModel::KD_TREE;
     else if (treeType == "cover")
diff --git a/src/mlpack/methods/range_search/rs_model.cpp b/src/mlpack/methods/range_search/rs_model.cpp
index 7ffc97d41ed..e0cdb18da0a 100644
--- a/src/mlpack/methods/range_search/rs_model.cpp
+++ b/src/mlpack/methods/range_search/rs_model.cpp
@@ -14,7 +14,7 @@ using namespace mlpack::range;
  * Initialize the RSModel with the given tree type and whether or not a random
  * basis should be used.
  */
-RSModel::RSModel(int treeType, bool randomBasis) :
+RSModel::RSModel(TreeTypes treeType, bool randomBasis) :
     treeType(treeType),
     randomBasis(randomBasis),
     kdTreeRS(NULL),
diff --git a/src/mlpack/methods/range_search/rs_model.hpp b/src/mlpack/methods/range_search/rs_model.hpp
index a3827b529a1..9598981ff46 100644
--- a/src/mlpack/methods/range_search/rs_model.hpp
+++ b/src/mlpack/methods/range_search/rs_model.hpp
@@ -33,7 +33,7 @@ class RSModel
   };
 
  private:
-  int treeType;
+  TreeTypes treeType;
   size_t leafSize;
 
   //! If true, we randomly project the data into a new basis before search.
@@ -69,7 +69,7 @@ class RSModel
    * @param treeType Type of tree to use.
    * @param randomBasis Whether or not to use a random basis.
    */
-  RSModel(const int treeType = TreeTypes::KD_TREE,
+  RSModel(const TreeTypes treeType = TreeTypes::KD_TREE,
           const bool randomBasis = false);
 
   /**
@@ -100,9 +100,9 @@ class RSModel
   size_t& LeafSize() { return leafSize; }
 
   //! Get the type of tree.
-  int TreeType() const { return treeType; }
+  TreeTypes TreeType() const { return treeType; }
   //! Modify the type of tree (don't do this after the model has been built).
-  int& TreeType() { return treeType; }
+  TreeTypes& TreeType() { return treeType; }
 
   //! Get whether a random basis is used.
   bool RandomBasis() const { return randomBasis; }
diff --git a/src/mlpack/methods/rann/allkrann_main.cpp b/src/mlpack/methods/rann/allkrann_main.cpp
index 6efb097fd4a..ce6f9f1bdcd 100644
--- a/src/mlpack/methods/rann/allkrann_main.cpp
+++ b/src/mlpack/methods/rann/allkrann_main.cpp
@@ -161,7 +161,7 @@ int main(int argc, char *argv[])
     const string treeType = CLI::GetParam<string>("tree_type");
     const bool randomBasis = CLI::HasParam("random_basis");
 
-    int tree = 0;
+    RANNModel::TreeTypes tree = RANNModel::KD_TREE;
     if (treeType == "kd")
       tree = RANNModel::KD_TREE;
     else if (treeType == "cover")
diff --git a/src/mlpack/methods/rann/ra_model.hpp b/src/mlpack/methods/rann/ra_model.hpp
index fffdb5825b6..a04107fce70 100644
--- a/src/mlpack/methods/rann/ra_model.hpp
+++ b/src/mlpack/methods/rann/ra_model.hpp
@@ -45,7 +45,7 @@ class RAModel
 
  private:
   //! The type of tree being used.
-  int treeType;
+  TreeTypes treeType;
   //! The leaf size of the tree being used (useful only for the kd-tree).
   size_t leafSize;
 
@@ -79,7 +79,7 @@ class RAModel
    * Initialize the RAModel with the given type and whether or not a random
    * basis should be used.
    */
-  RAModel(int treeType = TreeTypes::KD_TREE, bool randomBasis = false);
+  RAModel(TreeTypes treeType = TreeTypes::KD_TREE, bool randomBasis = false);
 
   //! Clean memory, if necessary.
   ~RAModel();
@@ -132,9 +132,9 @@ class RAModel
   size_t& LeafSize();
 
   //! Get the type of tree being used.
-  int TreeType() const;
+  TreeTypes TreeType() const;
   //! Modify the type of tree being used.
-  int& TreeType();
+  TreeTypes& TreeType();
 
   //! Get whether or not a random basis is being used.
   bool RandomBasis() const;
diff --git a/src/mlpack/methods/rann/ra_model_impl.hpp b/src/mlpack/methods/rann/ra_model_impl.hpp
index 72f083bb72e..48b4b4aa33b 100644
--- a/src/mlpack/methods/rann/ra_model_impl.hpp
+++ b/src/mlpack/methods/rann/ra_model_impl.hpp
@@ -14,7 +14,7 @@ namespace mlpack {
 namespace neighbor {
 
 template<typename SortPolicy>
-RAModel<SortPolicy>::RAModel(const int treeType, const bool randomBasis) :
+RAModel<SortPolicy>::RAModel(const TreeTypes treeType, const bool randomBasis) :
     treeType(treeType),
     leafSize(20),
     randomBasis(randomBasis),
@@ -377,13 +377,13 @@ size_t& RAModel<SortPolicy>::LeafSize()
 }
 
 template<typename SortPolicy>
-int RAModel<SortPolicy>::TreeType() const
+typename RAModel<SortPolicy>::TreeTypes RAModel<SortPolicy>::TreeType() const
 {
   return treeType;
 }
 
 template<typename SortPolicy>
-int& RAModel<SortPolicy>::TreeType()
+typename RAModel<SortPolicy>::TreeTypes& RAModel<SortPolicy>::TreeType()
 {
   return treeType;
 }

From eef40b993fd1cc859cf1808f49404a8b4fbd5a07 Mon Sep 17 00:00:00 2001
From: MarcosPividori <marcos.pividori@gmail.com>
Date: Tue, 24 May 2016 14:07:16 -0300
Subject: [PATCH 18/87]  Remove duplicated code for traversal info.

 Instead of including: methods/neighbor_search/ns_traversal_info.hpp
 Include the definition in: core/tree/traversal_info.hpp
---
 src/mlpack/core/tree/traversal_info.hpp       |  6 ++
 src/mlpack/methods/emst/dtb_rules.hpp         |  4 +-
 src/mlpack/methods/fastmks/fastmks_rules.hpp  |  5 +-
 .../methods/kmeans/dual_tree_kmeans_rules.hpp |  4 +-
 .../methods/neighbor_search/CMakeLists.txt    |  1 -
 .../neighbor_search/neighbor_search_rules.hpp |  4 +-
 .../neighbor_search/ns_traversal_info.hpp     | 70 -------------------
 .../range_search/range_search_rules.hpp       |  4 +-
 src/mlpack/methods/rann/ra_search_rules.hpp   |  4 +-
 9 files changed, 18 insertions(+), 84 deletions(-)
 delete mode 100644 src/mlpack/methods/neighbor_search/ns_traversal_info.hpp

diff --git a/src/mlpack/core/tree/traversal_info.hpp b/src/mlpack/core/tree/traversal_info.hpp
index 43941e2e656..4ad01843f59 100644
--- a/src/mlpack/core/tree/traversal_info.hpp
+++ b/src/mlpack/core/tree/traversal_info.hpp
@@ -9,6 +9,9 @@
 #ifndef MLPACK_CORE_TREE_TRAVERSAL_INFO_HPP
 #define MLPACK_CORE_TREE_TRAVERSAL_INFO_HPP
 
+namespace mlpack {
+namespace tree {
+
 /**
  * The TraversalInfo class holds traversal information which is used in
  * dual-tree (and single-tree) traversals.  A traversal should be updating the
@@ -82,4 +85,7 @@ class TraversalInfo
   double lastBaseCase;
 };
 
+} // namespace tree
+} // namespace mlpack
+
 #endif
diff --git a/src/mlpack/methods/emst/dtb_rules.hpp b/src/mlpack/methods/emst/dtb_rules.hpp
index e8527a6ffd6..7f275cfbcfb 100644
--- a/src/mlpack/methods/emst/dtb_rules.hpp
+++ b/src/mlpack/methods/emst/dtb_rules.hpp
@@ -9,7 +9,7 @@
 
 #include <mlpack/core.hpp>
 
-#include "../neighbor_search/ns_traversal_info.hpp"
+#include <mlpack/core/tree/traversal_info.hpp>
 
 namespace mlpack {
 namespace emst {
@@ -105,7 +105,7 @@ class DTBRules
                  TreeType& referenceNode,
                  const double oldScore) const;
 
-  typedef neighbor::NeighborSearchTraversalInfo<TreeType> TraversalInfoType;
+  typedef typename tree::TraversalInfo<TreeType> TraversalInfoType;
 
   const TraversalInfoType& TraversalInfo() const { return traversalInfo; }
   TraversalInfoType& TraversalInfo() { return traversalInfo; }
diff --git a/src/mlpack/methods/fastmks/fastmks_rules.hpp b/src/mlpack/methods/fastmks/fastmks_rules.hpp
index eb612f23476..0f4ad3423e9 100644
--- a/src/mlpack/methods/fastmks/fastmks_rules.hpp
+++ b/src/mlpack/methods/fastmks/fastmks_rules.hpp
@@ -9,8 +9,7 @@
 
 #include <mlpack/core.hpp>
 #include <mlpack/core/tree/cover_tree/cover_tree.hpp>
-
-#include "../neighbor_search/ns_traversal_info.hpp"
+#include <mlpack/core/tree/traversal_info.hpp>
 
 namespace mlpack {
 namespace fastmks {
@@ -91,7 +90,7 @@ class FastMKSRules
   //! Modify the number of times Score() was called.
   size_t& Scores() { return scores; }
 
-  typedef neighbor::NeighborSearchTraversalInfo<TreeType> TraversalInfoType;
+  typedef typename tree::TraversalInfo<TreeType> TraversalInfoType;
 
   const TraversalInfoType& TraversalInfo() const { return traversalInfo; }
   TraversalInfoType& TraversalInfo() { return traversalInfo; }
diff --git a/src/mlpack/methods/kmeans/dual_tree_kmeans_rules.hpp b/src/mlpack/methods/kmeans/dual_tree_kmeans_rules.hpp
index fbfe9296bf5..1c7a7c41e3b 100644
--- a/src/mlpack/methods/kmeans/dual_tree_kmeans_rules.hpp
+++ b/src/mlpack/methods/kmeans/dual_tree_kmeans_rules.hpp
@@ -9,7 +9,7 @@
 #ifndef MLPACK_METHODS_KMEANS_DUAL_TREE_KMEANS_RULES_HPP
 #define MLPACK_METHODS_KMEANS_DUAL_TREE_KMEANS_RULES_HPP
 
-#include <mlpack/methods/neighbor_search/ns_traversal_info.hpp>
+#include <mlpack/core/tree/traversal_info.hpp>
 
 namespace mlpack {
 namespace kmeans {
@@ -39,7 +39,7 @@ class DualTreeKMeansRules
                  TreeType& referenceNode,
                  const double oldScore);
 
-  typedef neighbor::NeighborSearchTraversalInfo<TreeType> TraversalInfoType;
+  typedef typename tree::TraversalInfo<TreeType> TraversalInfoType;
 
   TraversalInfoType& TraversalInfo() { return traversalInfo; }
   const TraversalInfoType& TraversalInfo() const { return traversalInfo; }
diff --git a/src/mlpack/methods/neighbor_search/CMakeLists.txt b/src/mlpack/methods/neighbor_search/CMakeLists.txt
index 1c01e2e3915..6b61e1889e8 100644
--- a/src/mlpack/methods/neighbor_search/CMakeLists.txt
+++ b/src/mlpack/methods/neighbor_search/CMakeLists.txt
@@ -8,7 +8,6 @@ set(SOURCES
   neighbor_search_stat.hpp
   ns_model.hpp
   ns_model_impl.hpp
-  ns_traversal_info.hpp
   sort_policies/nearest_neighbor_sort.hpp
   sort_policies/nearest_neighbor_sort.cpp
   sort_policies/nearest_neighbor_sort_impl.hpp
diff --git a/src/mlpack/methods/neighbor_search/neighbor_search_rules.hpp b/src/mlpack/methods/neighbor_search/neighbor_search_rules.hpp
index e428708160e..474d22b005f 100644
--- a/src/mlpack/methods/neighbor_search/neighbor_search_rules.hpp
+++ b/src/mlpack/methods/neighbor_search/neighbor_search_rules.hpp
@@ -8,7 +8,7 @@
 #ifndef MLPACK_METHODS_NEIGHBOR_SEARCH_NEIGHBOR_SEARCH_RULES_HPP
 #define MLPACK_METHODS_NEIGHBOR_SEARCH_NEIGHBOR_SEARCH_RULES_HPP
 
-#include "ns_traversal_info.hpp"
+#include <mlpack/core/tree/traversal_info.hpp>
 
 namespace mlpack {
 namespace neighbor {
@@ -94,7 +94,7 @@ class NeighborSearchRules
   size_t& Scores() { return scores; }
 
   //! Convenience typedef.
-  typedef NeighborSearchTraversalInfo<TreeType> TraversalInfoType;
+  typedef typename tree::TraversalInfo<TreeType> TraversalInfoType;
 
   //! Get the traversal info.
   const TraversalInfoType& TraversalInfo() const { return traversalInfo; }
diff --git a/src/mlpack/methods/neighbor_search/ns_traversal_info.hpp b/src/mlpack/methods/neighbor_search/ns_traversal_info.hpp
deleted file mode 100644
index 27bc70ebf41..00000000000
--- a/src/mlpack/methods/neighbor_search/ns_traversal_info.hpp
+++ /dev/null
@@ -1,70 +0,0 @@
-/**
- * @file ns_traversal_info.hpp
- * @author Ryan Curtin
- *
- * This class holds traversal information for dual-tree traversals that are
- * using the NeighborSearchRules RuleType.
- */
-#ifndef MLPACK_METHODS_NEIGHBOR_SEARCH_TRAVERSAL_INFO_HPP
-#define MLPACK_METHODS_NEIGHBOR_SEARCH_TRAVERSAL_INFO_HPP
-
-namespace mlpack {
-namespace neighbor {
-
-/**
- * Traversal information for NeighborSearch.  This information is used to make
- * parent-child prunes or parent-parent prunes in Score() without needing to
- * evaluate the distance between two nodes.
- *
- * The information held by this class is the last node combination visited
- * before the current node combination was recursed into and the distance
- * between the node centroids.
- */
-template<typename TreeType>
-class NeighborSearchTraversalInfo
-{
- public:
-  /**
-   * Create the TraversalInfo object and initialize the pointers to NULL.
-   */
-  NeighborSearchTraversalInfo() :
-      lastQueryNode(NULL),
-      lastReferenceNode(NULL),
-      lastScore(0.0),
-      lastBaseCase(0.0) { /* Nothing to do. */ }
-
-   //! Get the last query node.
-  TreeType* LastQueryNode() const { return lastQueryNode; }
-  //! Modify the last query node.
-  TreeType*& LastQueryNode() { return lastQueryNode; }
-
-  //! Get the last reference node.
-  TreeType* LastReferenceNode() const { return lastReferenceNode; }
-  //! Modify the last reference node.
-  TreeType*& LastReferenceNode() { return lastReferenceNode; }
-
-  //! Get the score associated with the last query and reference nodes.
-  double LastScore() const { return lastScore; }
-  //! Modify the score associated with the last query and reference nodes.
-  double& LastScore() { return lastScore; }
-
-  //! Get the base case associated with the last node combination.
-  double LastBaseCase() const { return lastBaseCase; }
-  //! Modify the base case associated with the last node combination.
-  double& LastBaseCase() { return lastBaseCase; }
-
- private:
-  //! The last query node.
-  TreeType* lastQueryNode;
-  //! The last reference node.
-  TreeType* lastReferenceNode;
-  //! The last distance.
-  double lastScore;
-  //! The last base case.
-  double lastBaseCase;
-};
-
-} // namespace neighbor
-} // namespace mlpack
-
-#endif
diff --git a/src/mlpack/methods/range_search/range_search_rules.hpp b/src/mlpack/methods/range_search/range_search_rules.hpp
index bfa82d0f24f..e392ee42ac2 100644
--- a/src/mlpack/methods/range_search/range_search_rules.hpp
+++ b/src/mlpack/methods/range_search/range_search_rules.hpp
@@ -7,7 +7,7 @@
 #ifndef MLPACK_METHODS_RANGE_SEARCH_RANGE_SEARCH_RULES_HPP
 #define MLPACK_METHODS_RANGE_SEARCH_RANGE_SEARCH_RULES_HPP
 
-#include "../neighbor_search/ns_traversal_info.hpp"
+#include <mlpack/core/tree/traversal_info.hpp>
 
 namespace mlpack {
 namespace range {
@@ -96,7 +96,7 @@ class RangeSearchRules
                  TreeType& referenceNode,
                  const double oldScore) const;
 
-  typedef neighbor::NeighborSearchTraversalInfo<TreeType> TraversalInfoType;
+  typedef typename tree::TraversalInfo<TreeType> TraversalInfoType;
 
   const TraversalInfoType& TraversalInfo() const { return traversalInfo; }
   TraversalInfoType& TraversalInfo() { return traversalInfo; }
diff --git a/src/mlpack/methods/rann/ra_search_rules.hpp b/src/mlpack/methods/rann/ra_search_rules.hpp
index 25be8c02f88..1037af4b821 100644
--- a/src/mlpack/methods/rann/ra_search_rules.hpp
+++ b/src/mlpack/methods/rann/ra_search_rules.hpp
@@ -9,7 +9,7 @@
 #ifndef MLPACK_METHODS_RANN_RA_SEARCH_RULES_HPP
 #define MLPACK_METHODS_RANN_RA_SEARCH_RULES_HPP
 
-#include "../neighbor_search/ns_traversal_info.hpp"
+#include <mlpack/core/tree/traversal_info.hpp>
 
 namespace mlpack {
 namespace neighbor {
@@ -185,7 +185,7 @@ class RASearchRules
       return arma::sum(numSamplesMade);
   }
 
-  typedef neighbor::NeighborSearchTraversalInfo<TreeType> TraversalInfoType;
+  typedef typename tree::TraversalInfo<TreeType> TraversalInfoType;
 
   const TraversalInfoType& TraversalInfo() const { return traversalInfo; }
   TraversalInfoType& TraversalInfo() { return traversalInfo; }

From b4ee95407de9f0ddc0558a5938cc658585f4f1bc Mon Sep 17 00:00:00 2001
From: MarcosPividori <marcos.pividori@gmail.com>
Date: Tue, 24 May 2016 14:43:25 -0300
Subject: [PATCH 19/87] Remove unnecessary include.

---
 src/mlpack/methods/kmeans/pelleg_moore_kmeans_rules.hpp | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/mlpack/methods/kmeans/pelleg_moore_kmeans_rules.hpp b/src/mlpack/methods/kmeans/pelleg_moore_kmeans_rules.hpp
index a604d831cc9..a2306b1b52d 100644
--- a/src/mlpack/methods/kmeans/pelleg_moore_kmeans_rules.hpp
+++ b/src/mlpack/methods/kmeans/pelleg_moore_kmeans_rules.hpp
@@ -9,8 +9,6 @@
 #ifndef MLPACK_METHODS_KMEANS_PELLEG_MOORE_KMEANS_RULES_HPP
 #define MLPACK_METHODS_KMEANS_PELLEG_MOORE_KMEANS_RULES_HPP
 
-#include <mlpack/methods/neighbor_search/ns_traversal_info.hpp>
-
 namespace mlpack {
 namespace kmeans {
 

From 8acc4ceefdbd46ee32b766a4a01bbf8151574019 Mon Sep 17 00:00:00 2001
From: Marcus Edel <marcus.edel@fu-berlin.de>
Date: Sun, 22 May 2016 14:15:44 +0200
Subject: [PATCH 20/87] Remove trailing underscores in header guards as
 discussed in #533.

---
 src/mlpack/methods/ann/layer/constant_layer.hpp               | 4 ++--
 src/mlpack/methods/ann/layer/dropconnect_layer.hpp            | 4 ++--
 src/mlpack/methods/ann/layer/empty_layer.hpp                  | 4 ++--
 src/mlpack/methods/ann/layer/glimpse_layer.hpp                | 4 ++--
 src/mlpack/methods/ann/layer/hard_tanh_layer.hpp              | 4 ++--
 src/mlpack/methods/ann/layer/leaky_relu_layer.hpp             | 4 ++--
 src/mlpack/methods/ann/layer/log_softmax_layer.hpp            | 4 ++--
 src/mlpack/methods/ann/layer/multiply_constant_layer.hpp      | 4 ++--
 .../methods/ann/layer/negative_log_likelihood_layer.hpp       | 4 ++--
 src/mlpack/methods/ann/layer/reinforce_normal_layer.hpp       | 4 ++--
 src/mlpack/methods/ann/layer/vr_class_reward_layer.hpp        | 4 ++--
 11 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/src/mlpack/methods/ann/layer/constant_layer.hpp b/src/mlpack/methods/ann/layer/constant_layer.hpp
index a142a678564..67701deba93 100644
--- a/src/mlpack/methods/ann/layer/constant_layer.hpp
+++ b/src/mlpack/methods/ann/layer/constant_layer.hpp
@@ -5,8 +5,8 @@
  * Definition of the ConstantLayer class, which outputs a constant value given
  * any input.
  */
-#ifndef __MLPACK_METHODS_ANN_LAYER_CONSTANT_LAYER_HPP
-#define __MLPACK_METHODS_ANN_LAYER_CONSTANT_LAYER_HPP
+#ifndef MLPACK_METHODS_ANN_LAYER_CONSTANT_LAYER_HPP
+#define MLPACK_METHODS_ANN_LAYER_CONSTANT_LAYER_HPP
 
 #include <mlpack/core.hpp>
 
diff --git a/src/mlpack/methods/ann/layer/dropconnect_layer.hpp b/src/mlpack/methods/ann/layer/dropconnect_layer.hpp
index a1d19e04ded..651a8a7ab72 100644
--- a/src/mlpack/methods/ann/layer/dropconnect_layer.hpp
+++ b/src/mlpack/methods/ann/layer/dropconnect_layer.hpp
@@ -5,8 +5,8 @@
  * Definition of the DropConnectLayer class, which implements a regularizer
  * that randomly sets connections to zero. Preventing units from co-adapting.
  */
-#ifndef __MLPACK_METHODS_ANN_LAYER_DROPCONNECT_LAYER_HPP
-#define __MLPACK_METHODS_ANN_LAYER_DROPCONNECT_LAYER_HPP
+#ifndef MLPACK_METHODS_ANN_LAYER_DROPCONNECT_LAYER_HPP
+#define MLPACK_METHODS_ANN_LAYER_DROPCONNECT_LAYER_HPP
 
 #include <mlpack/core.hpp>
 
diff --git a/src/mlpack/methods/ann/layer/empty_layer.hpp b/src/mlpack/methods/ann/layer/empty_layer.hpp
index 9e41a08440c..11cb6a0a243 100644
--- a/src/mlpack/methods/ann/layer/empty_layer.hpp
+++ b/src/mlpack/methods/ann/layer/empty_layer.hpp
@@ -4,8 +4,8 @@
  *
  * Definition of the EmptyLayer class, which is basically empty.
  */
-#ifndef __MLPACK_METHODS_ANN_LAYER_EMPTY_LAYER_HPP
-#define __MLPACK_METHODS_ANN_LAYER_EMPTY_LAYER_HPP
+#ifndef MLPACK_METHODS_ANN_LAYER_EMPTY_LAYER_HPP
+#define MLPACK_METHODS_ANN_LAYER_EMPTY_LAYER_HPP
 
 namespace mlpack{
 namespace ann /** Artificial Neural Network. */ {
diff --git a/src/mlpack/methods/ann/layer/glimpse_layer.hpp b/src/mlpack/methods/ann/layer/glimpse_layer.hpp
index 25ebfa5db92..228ce7d494c 100644
--- a/src/mlpack/methods/ann/layer/glimpse_layer.hpp
+++ b/src/mlpack/methods/ann/layer/glimpse_layer.hpp
@@ -18,8 +18,8 @@
  * }
  * @endcode
  */
-#ifndef __MLPACK_METHODS_ANN_LAYER_GLIMPSE_LAYER_HPP
-#define __MLPACK_METHODS_ANN_LAYER_GLIMPSE_LAYER_HPP
+#ifndef MLPACK_METHODS_ANN_LAYER_GLIMPSE_LAYER_HPP
+#define MLPACK_METHODS_ANN_LAYER_GLIMPSE_LAYER_HPP
 
 #include <mlpack/core.hpp>
 #include <mlpack/methods/ann/pooling_rules/mean_pooling.hpp>
diff --git a/src/mlpack/methods/ann/layer/hard_tanh_layer.hpp b/src/mlpack/methods/ann/layer/hard_tanh_layer.hpp
index 68b6bca1edc..16167a5968e 100644
--- a/src/mlpack/methods/ann/layer/hard_tanh_layer.hpp
+++ b/src/mlpack/methods/ann/layer/hard_tanh_layer.hpp
@@ -4,8 +4,8 @@
  *
  * Definition and implementation of the HardTanHLayer layer.
  */
-#ifndef __MLPACK_METHODS_ANN_LAYER_HARD_TANH_LAYER_HPP
-#define __MLPACK_METHODS_ANN_LAYER_HARD_TANH_LAYER_HPP
+#ifndef MLPACK_METHODS_ANN_LAYER_HARD_TANH_LAYER_HPP
+#define MLPACK_METHODS_ANN_LAYER_HARD_TANH_LAYER_HPP
 
 #include <mlpack/core.hpp>
 
diff --git a/src/mlpack/methods/ann/layer/leaky_relu_layer.hpp b/src/mlpack/methods/ann/layer/leaky_relu_layer.hpp
index 9676c49c490..a55bd2582c8 100644
--- a/src/mlpack/methods/ann/layer/leaky_relu_layer.hpp
+++ b/src/mlpack/methods/ann/layer/leaky_relu_layer.hpp
@@ -6,8 +6,8 @@
  * in the acoustic model, Andrew L. Maas, Awni Y. Hannun, Andrew Y. Ng,
  * "Rectifier Nonlinearities Improve Neural Network Acoustic Models", 2014
  */
-#ifndef __MLPACK_METHODS_ANN_LAYER_LEAKYRELU_LAYER_HPP
-#define __MLPACK_METHODS_ANN_LAYER_LEAKYRELU_LAYER_HPP
+#ifndef MLPACK_METHODS_ANN_LAYER_LEAKYRELU_LAYER_HPP
+#define MLPACK_METHODS_ANN_LAYER_LEAKYRELU_LAYER_HPP
 
 #include <mlpack/core.hpp>
 
diff --git a/src/mlpack/methods/ann/layer/log_softmax_layer.hpp b/src/mlpack/methods/ann/layer/log_softmax_layer.hpp
index 53a135af5c2..1820f454835 100644
--- a/src/mlpack/methods/ann/layer/log_softmax_layer.hpp
+++ b/src/mlpack/methods/ann/layer/log_softmax_layer.hpp
@@ -4,8 +4,8 @@
  *
  * Definition of the LogSoftmaxLayer class.
  */
-#ifndef __MLPACK_METHODS_ANN_LAYER_LOG_SOFTMAX_LAYER_HPP
-#define __MLPACK_METHODS_ANN_LAYER_LOG_SOFTMAX_LAYER_HPP
+#ifndef MLPACK_METHODS_ANN_LAYER_LOG_SOFTMAX_LAYER_HPP
+#define MLPACK_METHODS_ANN_LAYER_LOG_SOFTMAX_LAYER_HPP
 
 #include <mlpack/core.hpp>
 
diff --git a/src/mlpack/methods/ann/layer/multiply_constant_layer.hpp b/src/mlpack/methods/ann/layer/multiply_constant_layer.hpp
index 78c42e9e03f..b15d23c2464 100644
--- a/src/mlpack/methods/ann/layer/multiply_constant_layer.hpp
+++ b/src/mlpack/methods/ann/layer/multiply_constant_layer.hpp
@@ -5,8 +5,8 @@
  * Definition of the MultiplyConstantLayer class, which multiplies the input by
  * a (non-learnable) constant.
  */
-#ifndef __MLPACK_METHODS_ANN_LAYER_MULTIPLY_CONSTANT_LAYER_HPP
-#define __MLPACK_METHODS_ANN_LAYER_MULTIPLY_CONSTANT_LAYER_HPP
+#ifndef MLPACK_METHODS_ANN_LAYER_MULTIPLY_CONSTANT_LAYER_HPP
+#define MLPACK_METHODS_ANN_LAYER_MULTIPLY_CONSTANT_LAYER_HPP
 
 #include <mlpack/core.hpp>
 
diff --git a/src/mlpack/methods/ann/layer/negative_log_likelihood_layer.hpp b/src/mlpack/methods/ann/layer/negative_log_likelihood_layer.hpp
index 27a337e81ff..d2f1f51108f 100644
--- a/src/mlpack/methods/ann/layer/negative_log_likelihood_layer.hpp
+++ b/src/mlpack/methods/ann/layer/negative_log_likelihood_layer.hpp
@@ -4,8 +4,8 @@
  *
  * Definition of the NegativeLogLikelihoodLayer class.
  */
-#ifndef __MLPACK_METHODS_ANN_LAYER_NEGATIVE_LOG_LIKELIHOOD_Layer_HPP
-#define __MLPACK_METHODS_ANN_LAYER_NEGATIVE_LOG_LIKELIHOOD_Layer_HPP
+#ifndef MLPACK_METHODS_ANN_LAYER_NEGATIVE_LOG_LIKELIHOOD_Layer_HPP
+#define MLPACK_METHODS_ANN_LAYER_NEGATIVE_LOG_LIKELIHOOD_Layer_HPP
 
 #include <mlpack/core.hpp>
 
diff --git a/src/mlpack/methods/ann/layer/reinforce_normal_layer.hpp b/src/mlpack/methods/ann/layer/reinforce_normal_layer.hpp
index e5cd7373f19..7dbe089b792 100644
--- a/src/mlpack/methods/ann/layer/reinforce_normal_layer.hpp
+++ b/src/mlpack/methods/ann/layer/reinforce_normal_layer.hpp
@@ -5,8 +5,8 @@
  * Definition of the ReinforceNormalLayer class, which implements the REINFORCE
  * algorithm for the normal distribution.
  */
-#ifndef __MLPACK_METHODS_ANN_LAYER_REINFORCE_NORMAL_LAYER_HPP
-#define __MLPACK_METHODS_ANN_LAYER_REINFORCE_NORMAL_LAYER_HPP
+#ifndef MLPACK_METHODS_ANN_LAYER_REINFORCE_NORMAL_LAYER_HPP
+#define MLPACK_METHODS_ANN_LAYER_REINFORCE_NORMAL_LAYER_HPP
 
 #include <mlpack/core.hpp>
 
diff --git a/src/mlpack/methods/ann/layer/vr_class_reward_layer.hpp b/src/mlpack/methods/ann/layer/vr_class_reward_layer.hpp
index 09e55b5acd5..95692f0957f 100644
--- a/src/mlpack/methods/ann/layer/vr_class_reward_layer.hpp
+++ b/src/mlpack/methods/ann/layer/vr_class_reward_layer.hpp
@@ -5,8 +5,8 @@
  * Definition of the VRClassRewardLayer class, which implements the variance
  * reduced classification reinforcement layer.
  */
-#ifndef __MLPACK_METHODS_ANN_LAYER_VR_CLASS_REWARD_LAYER_HPP
-#define __MLPACK_METHODS_ANN_LAYER_VR_CLASS_REWARD_LAYER_HPP
+#ifndef MLPACK_METHODS_ANN_LAYER_VR_CLASS_REWARD_LAYER_HPP
+#define MLPACK_METHODS_ANN_LAYER_VR_CLASS_REWARD_LAYER_HPP
 
 #include <mlpack/core.hpp>
 

From cd7f06319f88be12c2e7ab35a814c1653fc1e4b4 Mon Sep 17 00:00:00 2001
From: Marcus Edel <marcus.edel@fu-berlin.de>
Date: Wed, 25 May 2016 14:05:06 +0200
Subject: [PATCH 21/87] Add CMakeLists file to build the RMVA.

---
 src/mlpack/methods/CMakeLists.txt      |  1 +
 src/mlpack/methods/rmva/CMakeLists.txt | 23 +++++++++++++++++++++++
 2 files changed, 24 insertions(+)
 create mode 100644 src/mlpack/methods/rmva/CMakeLists.txt

diff --git a/src/mlpack/methods/CMakeLists.txt b/src/mlpack/methods/CMakeLists.txt
index d0ea04ca587..eab2f5c568d 100644
--- a/src/mlpack/methods/CMakeLists.txt
+++ b/src/mlpack/methods/CMakeLists.txt
@@ -48,6 +48,7 @@ set(DIRS
   range_search
   rann
   regularized_svd
+  rmva
   softmax_regression
   sparse_autoencoder
   sparse_coding
diff --git a/src/mlpack/methods/rmva/CMakeLists.txt b/src/mlpack/methods/rmva/CMakeLists.txt
new file mode 100644
index 00000000000..d075ce35ac2
--- /dev/null
+++ b/src/mlpack/methods/rmva/CMakeLists.txt
@@ -0,0 +1,23 @@
+# Define the files we need to compile
+# Anything not in this list will not be compiled into mlpack.
+set(SOURCES
+  rmva.hpp
+  rmva_impl.hpp
+)
+
+# Add directory name to sources.
+set(DIR_SRCS)
+foreach(file ${SOURCES})
+  set(DIR_SRCS ${DIR_SRCS} ${CMAKE_CURRENT_SOURCE_DIR}/${file})
+endforeach()
+# Append sources (with directory name) to list of all mlpack sources (used at
+# the parent scope).
+set(MLPACK_SRCS ${MLPACK_SRCS} ${DIR_SRCS} PARENT_SCOPE)
+
+add_executable(mlpack_rmva
+  rmva_main.cpp
+)
+target_link_libraries(mlpack_rmva
+  mlpack
+)
+install(TARGETS mlpack_rmva RUNTIME DESTINATION bin)

From c15541b94ce0deb5d8f08d46862bb4948afd8f6c Mon Sep 17 00:00:00 2001
From: Marcus Edel <marcus.edel@fu-berlin.de>
Date: Wed, 25 May 2016 14:29:35 +0200
Subject: [PATCH 22/87] Remove CMakeLists file; do not build the RMVA code.

---
 src/mlpack/methods/CMakeLists.txt      |  1 -
 src/mlpack/methods/rmva/CMakeLists.txt | 23 -----------------------
 2 files changed, 24 deletions(-)
 delete mode 100644 src/mlpack/methods/rmva/CMakeLists.txt

diff --git a/src/mlpack/methods/CMakeLists.txt b/src/mlpack/methods/CMakeLists.txt
index eab2f5c568d..d0ea04ca587 100644
--- a/src/mlpack/methods/CMakeLists.txt
+++ b/src/mlpack/methods/CMakeLists.txt
@@ -48,7 +48,6 @@ set(DIRS
   range_search
   rann
   regularized_svd
-  rmva
   softmax_regression
   sparse_autoencoder
   sparse_coding
diff --git a/src/mlpack/methods/rmva/CMakeLists.txt b/src/mlpack/methods/rmva/CMakeLists.txt
deleted file mode 100644
index d075ce35ac2..00000000000
--- a/src/mlpack/methods/rmva/CMakeLists.txt
+++ /dev/null
@@ -1,23 +0,0 @@
-# Define the files we need to compile
-# Anything not in this list will not be compiled into mlpack.
-set(SOURCES
-  rmva.hpp
-  rmva_impl.hpp
-)
-
-# Add directory name to sources.
-set(DIR_SRCS)
-foreach(file ${SOURCES})
-  set(DIR_SRCS ${DIR_SRCS} ${CMAKE_CURRENT_SOURCE_DIR}/${file})
-endforeach()
-# Append sources (with directory name) to list of all mlpack sources (used at
-# the parent scope).
-set(MLPACK_SRCS ${MLPACK_SRCS} ${DIR_SRCS} PARENT_SCOPE)
-
-add_executable(mlpack_rmva
-  rmva_main.cpp
-)
-target_link_libraries(mlpack_rmva
-  mlpack
-)
-install(TARGETS mlpack_rmva RUNTIME DESTINATION bin)

From 04fe0d5198b6f138c1d1eff23b7c7f7100db7ca9 Mon Sep 17 00:00:00 2001
From: Marcus Edel <marcus.edel@fu-berlin.de>
Date: Wed, 25 May 2016 16:08:45 +0200
Subject: [PATCH 23/87] Use n_rows and n_cols to define the matrix size instead
 of arma::size().

---
 src/mlpack/methods/CMakeLists.txt             |  1 +
 .../ann/layer/reinforce_normal_layer.hpp      |  7 +++---
 src/mlpack/methods/rmva/CMakeLists.txt        | 23 +++++++++++++++++++
 3 files changed, 28 insertions(+), 3 deletions(-)
 create mode 100644 src/mlpack/methods/rmva/CMakeLists.txt

diff --git a/src/mlpack/methods/CMakeLists.txt b/src/mlpack/methods/CMakeLists.txt
index d0ea04ca587..209beef72d8 100644
--- a/src/mlpack/methods/CMakeLists.txt
+++ b/src/mlpack/methods/CMakeLists.txt
@@ -47,6 +47,7 @@ set(DIRS
   radical
   range_search
   rann
+  rmva
   regularized_svd
   softmax_regression
   sparse_autoencoder
diff --git a/src/mlpack/methods/ann/layer/reinforce_normal_layer.hpp b/src/mlpack/methods/ann/layer/reinforce_normal_layer.hpp
index 7dbe089b792..3f8099fad82 100644
--- a/src/mlpack/methods/ann/layer/reinforce_normal_layer.hpp
+++ b/src/mlpack/methods/ann/layer/reinforce_normal_layer.hpp
@@ -46,13 +46,14 @@ class ReinforceNormalLayer
    * @param input Input data used for evaluating the specified function.
    * @param output Resulting output activation.
    */
-  template<typename InputType, typename OutputType>
-  void Forward(const InputType& input, OutputType& output)
+  template<typename eT>
+  void Forward(const arma::Mat<eT>& input, arma::Mat<eT>& output)
   {
     if (!deterministic)
     {
       // Multiply by standard deviations and re-center the means to the mean.
-      output = arma::randn<OutputType>(arma::size(input)) * stdev + input;
+      output = arma::randn<arma::Mat<eT> >(input.n_rows, input.n_cols) *
+          stdev + input;
     }
     else
     {
diff --git a/src/mlpack/methods/rmva/CMakeLists.txt b/src/mlpack/methods/rmva/CMakeLists.txt
new file mode 100644
index 00000000000..d075ce35ac2
--- /dev/null
+++ b/src/mlpack/methods/rmva/CMakeLists.txt
@@ -0,0 +1,23 @@
+# Define the files we need to compile
+# Anything not in this list will not be compiled into mlpack.
+set(SOURCES
+  rmva.hpp
+  rmva_impl.hpp
+)
+
+# Add directory name to sources.
+set(DIR_SRCS)
+foreach(file ${SOURCES})
+  set(DIR_SRCS ${DIR_SRCS} ${CMAKE_CURRENT_SOURCE_DIR}/${file})
+endforeach()
+# Append sources (with directory name) to list of all mlpack sources (used at
+# the parent scope).
+set(MLPACK_SRCS ${MLPACK_SRCS} ${DIR_SRCS} PARENT_SCOPE)
+
+add_executable(mlpack_rmva
+  rmva_main.cpp
+)
+target_link_libraries(mlpack_rmva
+  mlpack
+)
+install(TARGETS mlpack_rmva RUNTIME DESTINATION bin)

From 4fae38575502b90b80abddf21a3e4d46a4b4c9fc Mon Sep 17 00:00:00 2001
From: dasayan05 <dasayan05@hotmail.com>
Date: Wed, 25 May 2016 22:14:11 +0530
Subject: [PATCH 24/87] Deprecated arma function replaced by new arma constant

---
 .../methods/ann/activation_functions/logistic_function.hpp    | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/mlpack/methods/ann/activation_functions/logistic_function.hpp b/src/mlpack/methods/ann/activation_functions/logistic_function.hpp
index 5f197978b68..626d9ea0dcf 100644
--- a/src/mlpack/methods/ann/activation_functions/logistic_function.hpp
+++ b/src/mlpack/methods/ann/activation_functions/logistic_function.hpp
@@ -33,9 +33,9 @@ class LogisticFunction
   template<typename eT>
   static double fn(const eT x)
   {
-    if(x < arma::Math<eT>::log_max())
+    if(x < arma::Datum<eT>::log_max)
     {
-      if (x > -arma::Math<eT>::log_max())
+      if (x > -arma::Datum<eT>::log_max)
         return 1.0 /  (1.0 + std::exp(-x));
 
       return 0.0;

From 6f6173c7651478af406e9965cafa31e1238dad3c Mon Sep 17 00:00:00 2001
From: Marcus Edel <marcus.edel@fu-berlin.de>
Date: Thu, 26 May 2016 13:26:12 +0200
Subject: [PATCH 25/87] Update documentation for changed names.

---
 doc/guide/iodoc.hpp                         | 2 +-
 doc/tutorials/range_search/range_search.txt | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/doc/guide/iodoc.hpp b/doc/guide/iodoc.hpp
index 33eda5eccf2..8cdf417b7ec 100644
--- a/doc/guide/iodoc.hpp
+++ b/doc/guide/iodoc.hpp
@@ -132,7 +132,7 @@ Documentation is automatically generated using those macros, and when the
 program is run with --help the following is displayed:
 
 @code
-$ pca --help
+$ mlpack_pca --help
 Principal Components Analysis
 
   This program performs principal components analysis on the given dataset.  It
diff --git a/doc/tutorials/range_search/range_search.txt b/doc/tutorials/range_search/range_search.txt
index a1a3a979e64..277a8444682 100644
--- a/doc/tutorials/range_search/range_search.txt
+++ b/doc/tutorials/range_search/range_search.txt
@@ -67,7 +67,7 @@ option is used so that output is given.  Further documentation on each
 individual option can be found by typing
 
 @code
-$ range_search --help
+$ mlpack_range_search --help
 @endcode
 
 @subsection cli_ex1_rstut One dataset, points with distance <= 0.01
@@ -147,7 +147,7 @@ empty.
 @subsection cli_ex2_rstut Query and reference dataset, range [1.0, 1.5]
 
 @code
-$ range_search -q query_dataset.csv -r reference_dataset.csv -n \
+$ mlpack_range_search -q query_dataset.csv -r reference_dataset.csv -n \
 > neighbors_out.csv -d distances_out.csv -L 1.0 -U 1.5 -v
 [INFO ] Loading 'reference_dataset.csv' as CSV data.  Size is 3 x 1000.
 [INFO ] Loaded reference data from 'reference_dataset.csv' (3x1000).
@@ -196,8 +196,8 @@ faster computation.  The leaf size is modifiable through the command-line
 interface, as shown below.
 
 @code
-$ range_search -r dataset.csv -n neighbors_out.csv -d distances_out.csv -L 0.7 \
-> -U 0.8 -l 15 -v
+$ mlpack_range_search -r dataset.csv -n neighbors_out.csv -d distances_out.csv \
+> -L 0.7 -U 0.8 -l 15 -v
 [INFO ] Loading 'dataset.csv' as CSV data.  Size is 3 x 1000.
 [INFO ] Loaded reference data from 'dataset.csv' (3x1000).
 [INFO ] Building reference tree...

From a35c39061245b0ccb757f65dfe6626b8a3c04d9c Mon Sep 17 00:00:00 2001
From: Keon Kim <kwk236@gmail.com>
Date: Thu, 26 May 2016 20:25:35 +0900
Subject: [PATCH 26/87] add cli executable for data_split

---
 src/mlpack/methods/CMakeLists.txt             |  1 +
 src/mlpack/methods/preprocess/CMakeLists.txt  | 18 +++++
 .../preprocess/preprocess_split_main.cpp      | 77 +++++++++++++++++++
 3 files changed, 96 insertions(+)
 create mode 100644 src/mlpack/methods/preprocess/CMakeLists.txt
 create mode 100644 src/mlpack/methods/preprocess/preprocess_split_main.cpp

diff --git a/src/mlpack/methods/CMakeLists.txt b/src/mlpack/methods/CMakeLists.txt
index 209beef72d8..5734d5c9d8a 100644
--- a/src/mlpack/methods/CMakeLists.txt
+++ b/src/mlpack/methods/CMakeLists.txt
@@ -15,6 +15,7 @@ endmacro ()
 
 # Recurse into each method mlpack provides.
 set(DIRS
+  preprocess
   adaboost
   amf
   ann
diff --git a/src/mlpack/methods/preprocess/CMakeLists.txt b/src/mlpack/methods/preprocess/CMakeLists.txt
new file mode 100644
index 00000000000..3a2f7bf5c9d
--- /dev/null
+++ b/src/mlpack/methods/preprocess/CMakeLists.txt
@@ -0,0 +1,18 @@
+# Define the files we need to compile.
+# Anything not in this list will not be compiled into mlpack.
+set(SOURCES
+)
+
+# Add directory name to sources.
+set(DIR_SRCS)
+foreach(file ${SOURCES})
+  set(DIR_SRCS ${DIR_SRCS} ${CMAKE_CURRENT_SOURCE_DIR}/${file})
+endforeach()
+# Append sources (with directory name) to list of all mlpack sources (used at
+# the parent scope).
+set(MLPACK_SRCS ${MLPACK_SRCS} ${DIR_SRCS} PARENT_SCOPE)
+
+#add_cli_executable(preprocess_stats)
+add_cli_executable(preprocess_split)
+#add_cli_executable(preprocess_scan)
+#add_cli_executable(preprocess_imputer)
diff --git a/src/mlpack/methods/preprocess/preprocess_split_main.cpp b/src/mlpack/methods/preprocess/preprocess_split_main.cpp
new file mode 100644
index 00000000000..996272a20ee
--- /dev/null
+++ b/src/mlpack/methods/preprocess/preprocess_split_main.cpp
@@ -0,0 +1,77 @@
+/**
+ * @file preprocess_split_main.cpp
+ * @author Keon Woo Kim
+ *
+ * split data CLI executable
+ */
+#include <mlpack/core.hpp>
+#include <mlpack/core/data/split_data.hpp>
+
+PROGRAM_INFO("Split into Train and Test Data", "This "
+    "utility takes data and labels and split into a training "
+    "set and a test set.");
+
+// Define parameters for data
+PARAM_STRING_REQ("input_file", "File containing data,", "i");
+PARAM_STRING_REQ("output_train_data", "File name to save train data", "d");
+PARAM_STRING_REQ("output_test_data", "File name to save test data", "D");
+
+// Define parameters for labels
+PARAM_STRING_REQ("input_label", "File containing labels", "I");
+PARAM_STRING_REQ("output_train_label", "File name to save train label", "l");
+PARAM_STRING_REQ("output_test_label", "File name to save test label", "L");
+
+// Define optional test ratio, default is 0.2 (Test 20% Train 80%)
+PARAM_DOUBLE("test_ratio", "Ratio of test set, defaults to 0.2"
+    "if not set", "r", 0.2);
+
+using namespace mlpack;
+using namespace arma;
+using namespace std;
+
+int main(int argc, char** argv)
+{
+  // Parse command line options.
+  CLI::ParseCommandLine(argc, argv);
+
+  // data
+  const string inputFile = CLI::GetParam<string>("input_file");
+  const string outputTrainData = CLI::GetParam<string>("output_train_data");
+  const string outputTestData = CLI::GetParam<string>("output_test_data");
+  // labels
+  const string inputLabel = CLI::GetParam<string>("input_label");
+  const string outputTrainLabel = CLI::GetParam<string>("output_train_label");
+  const string outputTestLabel = CLI::GetParam<string>("output_test_label");
+
+  // Ratio
+  const double testRatio = CLI::GetParam<double>("test_ratio");
+
+  // container for input data and labels
+  arma::mat data;
+  arma::Mat<size_t> labels;
+
+  // Load Data and Labels
+  data::Load(inputFile, data, true);
+  data::Load(inputLabel, labels, true);
+  arma::Row<size_t> labels_row = labels.row(0); // extract first row
+
+  // Split Data
+  const auto value = data::TrainTestSplit(data, labels_row, testRatio);
+  Log::Info << "Train Data Count: " << get<0>(value).n_cols << endl;
+  Log::Info << "Test Data Count: " << get<1>(value).n_cols << endl;
+  Log::Info << "Train Label Count: " << get<2>(value).n_cols << endl;
+  Log::Info << "Test Label Count: " << get<3>(value).n_cols << endl;
+
+  // Save Train Data
+  data::Save(outputTrainData, get<0>(value), false);
+
+  // Save Test Data
+  data::Save(outputTestData, get<1>(value), false);
+
+  // Save Train Label
+  data::Save(outputTrainLabel, get<2>(value), false);
+
+  // Save Test Label
+  data::Save(outputTestLabel, get<3>(value), false);
+}
+

From c68c3b973739310ec5f61f3389d9b61a87eff4a6 Mon Sep 17 00:00:00 2001
From: dasayan05 <dasayan05@hotmail.com>
Date: Thu, 26 May 2016 18:38:09 +0530
Subject: [PATCH 27/87] vc2_test.csv wasn't in proper csv format

---
 src/mlpack/tests/data/vc2_test.csv | 134 ++++++++++++++---------------
 1 file changed, 67 insertions(+), 67 deletions(-)

diff --git a/src/mlpack/tests/data/vc2_test.csv b/src/mlpack/tests/data/vc2_test.csv
index ea8ad28a0fb..51ccd39966d 100644
--- a/src/mlpack/tests/data/vc2_test.csv
+++ b/src/mlpack/tests/data/vc2_test.csv
@@ -1,67 +1,67 @@
-74.43	41.56	27.7	32.88	107.95	5
-50.21	29.76	36.1	20.45	128.29	5.74
-30.15	11.92	34	18.23	112.68	11.46
-41.17	17.32	33.47	23.85	116.38	-9.57
-47.66	13.28	36.68	34.38	98.25	6.27
-43.35	7.47	28.07	35.88	112.78	5.75
-46.86	15.35	38	31.5	116.25	1.66
-43.2	19.66	35	23.54	124.85	-2.92
-48.11	14.93	35.56	33.18	124.06	7.95
-92.03	35.39	77.42	56.63	115.72	58.06
-67.03	13.28	66.15	53.75	100.72	33.99
-80.82	19.24	61.64	61.58	89.47	44.17
-80.65	26.34	60.9	54.31	120.1	52.47
-68.72	49.43	68.06	19.29	125.02	54.69
-37.9	4.48	24.71	33.42	157.85	33.61
-64.62	15.23	67.63	49.4	90.3	31.33
-75.44	31.54	89.6	43.9	106.83	54.97
-71	37.52	84.54	33.49	125.16	67.77
-81.06	20.8	91.78	60.26	125.43	38.18
-91.47	24.51	84.62	66.96	117.31	52.62
-81.08	21.26	78.77	59.83	90.07	49.16
-60.42	5.27	59.81	55.15	109.03	30.27
-85.68	38.65	82.68	47.03	120.84	61.96
-82.41	29.28	77.05	53.13	117.04	62.77
-43.72	9.81	52	33.91	88.43	40.88
-86.47	40.3	61.14	46.17	97.4	55.75
-74.47	33.28	66.94	41.19	146.47	124.98
-70.25	10.34	76.37	59.91	119.24	32.67
-72.64	18.93	68	53.71	116.96	25.38
-71.24	5.27	86	65.97	110.7	38.26
-63.77	12.76	65.36	51.01	89.82	56
-58.83	37.58	125.74	21.25	135.63	117.31
-74.85	13.91	62.69	60.95	115.21	33.17
-75.3	16.67	61.3	58.63	118.88	31.58
-63.36	20.02	67.5	43.34	131	37.56
-67.51	33.28	96.28	34.24	145.6	88.3
-76.31	41.93	93.28	34.38	132.27	101.22
-73.64	9.71	63	63.92	98.73	26.98
-56.54	14.38	44.99	42.16	101.72	25.77
-80.11	33.94	85.1	46.17	125.59	100.29
-95.48	46.55	59	48.93	96.68	77.28
-74.09	18.82	76.03	55.27	128.41	73.39
-87.68	20.37	93.82	67.31	120.94	76.73
-48.26	16.42	36.33	31.84	94.88	28.34
-65.76	13.21	44	52.55	129.39	-1.98
-40.41	-1.33	30.98	41.74	119.34	-6.17
-48.8	18.02	52	30.78	139.15	10.44
-50.09	13.43	34.46	36.66	119.13	3.09
-64.26	14.5	43.9	49.76	115.39	5.95
-53.68	13.45	41.58	40.24	113.91	2.74
-49	13.11	51.87	35.88	126.4	0.54
-59.17	14.56	43.2	44.6	121.04	2.83
-67.8	16.55	43.26	51.25	119.69	4.87
-61.73	17.11	46.9	44.62	120.92	3.09
-33.04	-0.32	19.07	33.37	120.39	9.35
-74.57	15.72	58.62	58.84	105.42	0.6
-44.43	14.17	32.24	30.26	131.72	-3.6
-36.42	13.88	20.24	22.54	126.08	0.18
-51.08	14.21	35.95	36.87	115.8	6.91
-34.76	2.63	29.5	32.12	127.14	-0.46
-48.9	5.59	55.5	43.32	137.11	19.85
-46.24	10.06	37	36.17	128.06	-5.1
-46.43	6.62	48.1	39.81	130.35	2.45
-39.66	16.21	36.67	23.45	131.92	-4.97
-45.58	18.76	33.77	26.82	116.8	3.13
-66.51	20.9	31.73	45.61	128.9	1.52
-82.91	29.89	58.25	53.01	110.71	6.08
\ No newline at end of file
+74.43,41.56,27.7,32.88,107.95,5
+50.21,29.76,36.1,20.45,128.29,5.74
+30.15,11.92,34,18.23,112.68,11.46
+41.17,17.32,33.47,23.85,116.38,-9.57
+47.66,13.28,36.68,34.38,98.25,6.27
+43.35,7.47,28.07,35.88,112.78,5.75
+46.86,15.35,38,31.5,116.25,1.66
+43.2,19.66,35,23.54,124.85,-2.92
+48.11,14.93,35.56,33.18,124.06,7.95
+92.03,35.39,77.42,56.63,115.72,58.06
+67.03,13.28,66.15,53.75,100.72,33.99
+80.82,19.24,61.64,61.58,89.47,44.17
+80.65,26.34,60.9,54.31,120.1,52.47
+68.72,49.43,68.06,19.29,125.02,54.69
+37.9,4.48,24.71,33.42,157.85,33.61
+64.62,15.23,67.63,49.4,90.3,31.33
+75.44,31.54,89.6,43.9,106.83,54.97
+71,37.52,84.54,33.49,125.16,67.77
+81.06,20.8,91.78,60.26,125.43,38.18
+91.47,24.51,84.62,66.96,117.31,52.62
+81.08,21.26,78.77,59.83,90.07,49.16
+60.42,5.27,59.81,55.15,109.03,30.27
+85.68,38.65,82.68,47.03,120.84,61.96
+82.41,29.28,77.05,53.13,117.04,62.77
+43.72,9.81,52,33.91,88.43,40.88
+86.47,40.3,61.14,46.17,97.4,55.75
+74.47,33.28,66.94,41.19,146.47,124.98
+70.25,10.34,76.37,59.91,119.24,32.67
+72.64,18.93,68,53.71,116.96,25.38
+71.24,5.27,86,65.97,110.7,38.26
+63.77,12.76,65.36,51.01,89.82,56
+58.83,37.58,125.74,21.25,135.63,117.31
+74.85,13.91,62.69,60.95,115.21,33.17
+75.3,16.67,61.3,58.63,118.88,31.58
+63.36,20.02,67.5,43.34,131,37.56
+67.51,33.28,96.28,34.24,145.6,88.3
+76.31,41.93,93.28,34.38,132.27,101.22
+73.64,9.71,63,63.92,98.73,26.98
+56.54,14.38,44.99,42.16,101.72,25.77
+80.11,33.94,85.1,46.17,125.59,100.29
+95.48,46.55,59,48.93,96.68,77.28
+74.09,18.82,76.03,55.27,128.41,73.39
+87.68,20.37,93.82,67.31,120.94,76.73
+48.26,16.42,36.33,31.84,94.88,28.34
+65.76,13.21,44,52.55,129.39,-1.98
+40.41,-1.33,30.98,41.74,119.34,-6.17
+48.8,18.02,52,30.78,139.15,10.44
+50.09,13.43,34.46,36.66,119.13,3.09
+64.26,14.5,43.9,49.76,115.39,5.95
+53.68,13.45,41.58,40.24,113.91,2.74
+49,13.11,51.87,35.88,126.4,0.54
+59.17,14.56,43.2,44.6,121.04,2.83
+67.8,16.55,43.26,51.25,119.69,4.87
+61.73,17.11,46.9,44.62,120.92,3.09
+33.04,-0.32,19.07,33.37,120.39,9.35
+74.57,15.72,58.62,58.84,105.42,0.6
+44.43,14.17,32.24,30.26,131.72,-3.6
+36.42,13.88,20.24,22.54,126.08,0.18
+51.08,14.21,35.95,36.87,115.8,6.91
+34.76,2.63,29.5,32.12,127.14,-0.46
+48.9,5.59,55.5,43.32,137.11,19.85
+46.24,10.06,37,36.17,128.06,-5.1
+46.43,6.62,48.1,39.81,130.35,2.45
+39.66,16.21,36.67,23.45,131.92,-4.97
+45.58,18.76,33.77,26.82,116.8,3.13
+66.51,20.9,31.73,45.61,128.9,1.52
+82.91,29.89,58.25,53.01,110.71,6.08

From fe1b6b96fdfa12709df03b730614a00ea97a3747 Mon Sep 17 00:00:00 2001
From: stereomatchingkiss <stereomatchingkiss@gmail.com>
Date: Thu, 26 May 2016 23:18:20 +0800
Subject: [PATCH 28/87] fix performance issue--forgot to move name parameters

---
 src/mlpack/core/data/split_data.hpp | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/mlpack/core/data/split_data.hpp b/src/mlpack/core/data/split_data.hpp
index 38196fdbe00..12310952df9 100644
--- a/src/mlpack/core/data/split_data.hpp
+++ b/src/mlpack/core/data/split_data.hpp
@@ -111,7 +111,10 @@ TrainTestSplit(const arma::Mat<T>& input,
   TrainTestSplit(input, inputLabel, trainData, testData, trainLabel, testLabel,
       testRatio);
 
-  return std::make_tuple(trainData, testData, trainLabel, testLabel);
+  return std::make_tuple(std::move(trainData),
+                         std::move(testData),
+                         std::move(trainLabel),
+                         std::move(testLabel));
 }
 
 } // namespace data

From de4226cb0babd8d59d1f140545602318a5131252 Mon Sep 17 00:00:00 2001
From: Keon Kim <kwk236@gmail.com>
Date: Fri, 27 May 2016 00:57:02 +0900
Subject: [PATCH 29/87] change parameter flags and names for consistency with
 other programs #discussion_r64743312

---
 .../preprocess/preprocess_split_main.cpp      | 41 +++++++------------
 1 file changed, 14 insertions(+), 27 deletions(-)

diff --git a/src/mlpack/methods/preprocess/preprocess_split_main.cpp b/src/mlpack/methods/preprocess/preprocess_split_main.cpp
index 996272a20ee..357e77f6ff6 100644
--- a/src/mlpack/methods/preprocess/preprocess_split_main.cpp
+++ b/src/mlpack/methods/preprocess/preprocess_split_main.cpp
@@ -13,17 +13,15 @@ PROGRAM_INFO("Split into Train and Test Data", "This "
 
 // Define parameters for data
 PARAM_STRING_REQ("input_file", "File containing data,", "i");
-PARAM_STRING_REQ("output_train_data", "File name to save train data", "d");
-PARAM_STRING_REQ("output_test_data", "File name to save test data", "D");
-
-// Define parameters for labels
 PARAM_STRING_REQ("input_label", "File containing labels", "I");
-PARAM_STRING_REQ("output_train_label", "File name to save train label", "l");
-PARAM_STRING_REQ("output_test_label", "File name to save test label", "L");
+PARAM_STRING_REQ("training_file", "File name to save train data", "t");
+PARAM_STRING_REQ("test_file", "File name to save test data", "T");
+PARAM_STRING_REQ("training_labels_file", "File name to save train label", "l");
+PARAM_STRING_REQ("test_labels_file", "File name to save test label", "L");
 
 // Define optional test ratio, default is 0.2 (Test 20% Train 80%)
-PARAM_DOUBLE("test_ratio", "Ratio of test set, defaults to 0.2"
-    "if not set", "r", 0.2);
+PARAM_DOUBLE("test_ratio", "Ratio of test set, if not set,"
+    "the ratio defaults to 0.2", "r", 0.2);
 
 using namespace mlpack;
 using namespace arma;
@@ -34,16 +32,12 @@ int main(int argc, char** argv)
   // Parse command line options.
   CLI::ParseCommandLine(argc, argv);
 
-  // data
   const string inputFile = CLI::GetParam<string>("input_file");
-  const string outputTrainData = CLI::GetParam<string>("output_train_data");
-  const string outputTestData = CLI::GetParam<string>("output_test_data");
-  // labels
   const string inputLabel = CLI::GetParam<string>("input_label");
-  const string outputTrainLabel = CLI::GetParam<string>("output_train_label");
-  const string outputTestLabel = CLI::GetParam<string>("output_test_label");
-
-  // Ratio
+  const string trainingFile = CLI::GetParam<string>("training_file");
+  const string testFile = CLI::GetParam<string>("test_file");
+  const string trainingLabelsFile = CLI::GetParam<string>("training_labels_file");
+  const string testLabelsFile = CLI::GetParam<string>("test_labels_file");
   const double testRatio = CLI::GetParam<double>("test_ratio");
 
   // container for input data and labels
@@ -62,16 +56,9 @@ int main(int argc, char** argv)
   Log::Info << "Train Label Count: " << get<2>(value).n_cols << endl;
   Log::Info << "Test Label Count: " << get<3>(value).n_cols << endl;
 
-  // Save Train Data
-  data::Save(outputTrainData, get<0>(value), false);
-
-  // Save Test Data
-  data::Save(outputTestData, get<1>(value), false);
-
-  // Save Train Label
-  data::Save(outputTrainLabel, get<2>(value), false);
-
-  // Save Test Label
-  data::Save(outputTestLabel, get<3>(value), false);
+  data::Save(trainingFile, get<0>(value), false);
+  data::Save(testFile, get<1>(value), false);
+  data::Save(trainingLabelsFile, get<2>(value), false);
+  data::Save(testLabelsFile, get<3>(value), false);
 }
 

From 2cad593c482275d8d1658ddab01516e59f495f19 Mon Sep 17 00:00:00 2001
From: Keon Kim <kwk236@gmail.com>
Date: Fri, 27 May 2016 03:54:43 +0900
Subject: [PATCH 30/87] add TrainTestSplit without label

---
 src/mlpack/core/data/split_data.hpp           | 109 ++++++++++++++----
 .../preprocess/preprocess_split_main.cpp      |  26 +++--
 src/mlpack/tests/split_data_test.cpp          |   6 +-
 3 files changed, 111 insertions(+), 30 deletions(-)

diff --git a/src/mlpack/core/data/split_data.hpp b/src/mlpack/core/data/split_data.hpp
index 38196fdbe00..b2af0c2d529 100644
--- a/src/mlpack/core/data/split_data.hpp
+++ b/src/mlpack/core/data/split_data.hpp
@@ -1,8 +1,8 @@
 /**
  * @file split_data.hpp
- * @author Tham Ngap Wei
+ * @author Tham Ngap Wei, Keon Kim
  *
- * Defines TrainTestSplit(), a utility function to split a dataset into a
+ * Defines TrainTestSplit() and LabelTrainTestSplit(), utility functions to split a dataset into a
  * training set and a test set.
  */
 #ifndef MLPACK_CORE_UTIL_SPLIT_DATA_HPP
@@ -12,7 +12,6 @@
 
 namespace mlpack {
 namespace data {
-
 /**
  * Given an input dataset and labels, split into a training set and test set.
  * Example usage below.  This overload places the split dataset into the four
@@ -29,7 +28,7 @@ namespace data {
  *
  * // Split the dataset into a training and test set, with 30% of the data being
  * // held out for the test set.
- * TrainTestSplit(input, label, trainData,
+ * LabelTrainTestSplit(input, label, trainData,
  *                testData, trainLabel, testLabel, 0.3);
  * @endcode
  *
@@ -42,13 +41,13 @@ namespace data {
  * @param testRatio Percentage of dataset to use for test set (between 0 and 1).
  */
 template<typename T, typename U>
-void TrainTestSplit(const arma::Mat<T>& input,
-                    const arma::Row<U>& inputLabel,
-                    arma::Mat<T>& trainData,
-                    arma::Mat<T>& testData,
-                    arma::Row<U>& trainLabel,
-                    arma::Row<U>& testLabel,
-                    const double testRatio)
+void LabelTrainTestSplit(const arma::Mat<T>& input,
+                         const arma::Row<U>& inputLabel,
+                         arma::Mat<T>& trainData,
+                         arma::Mat<T>& testData,
+                         arma::Row<U>& trainLabel,
+                         arma::Row<U>& testLabel,
+                         const double testRatio)
 {
   const size_t testSize = static_cast<size_t>(input.n_cols * testRatio);
   const size_t trainSize = input.n_cols - testSize;
@@ -74,6 +73,52 @@ void TrainTestSplit(const arma::Mat<T>& input,
   }
 }
 
+/**
+ * Given an input dataset, split into a training set and test set.
+ * Example usage below. This overload places the split dataset into the two
+ * output parameters given (trainData, testData).
+ *
+ * @code
+ * arma::mat input = loadData();
+ * arma::mat trainData;
+ * arma::mat testData;
+ * math::RandomSeed(100); // Set the seed if you like.
+ *
+ * // Split the dataset into a training and test set, with 30% of the data being
+ * // held out for the test set.
+ * TrainTestSplit(input, trainData, testData, 0.3);
+ * @endcode
+ *
+ * @param input Input dataset to split.
+ * @param trainData Matrix to store training data into.
+ * @param testData Matrix to store test data into.
+ * @param testRatio Percentage of dataset to use for test set (between 0 and 1).
+ */
+template<typename T>
+void TrainTestSplit(const arma::Mat<T>& input,
+                    arma::Mat<T>& trainData,
+                    arma::Mat<T>& testData,
+                    const double testRatio)
+{
+  const size_t testSize = static_cast<size_t>(input.n_cols * testRatio);
+  const size_t trainSize = input.n_cols - testSize;
+  trainData.set_size(input.n_rows, trainSize);
+  testData.set_size(input.n_rows, testSize);
+
+  const arma::Col<size_t> order =
+      arma::shuffle(arma::linspace<arma::Col<size_t>>(0, input.n_cols -1,
+                                                      input.n_cols));
+
+  for (size_t i = 0; i != trainSize; ++i)
+  {
+     trainData.col(i) = input.col(order[i]);
+  }
+  for (size_t i = 0; i != testSize; ++i)
+  {
+     testData.col(i) = input.col(order[i + trainSize]);
+  }
+}
+
 /**
  * Given an input dataset and labels, split into a training set and test set.
  * Example usage below.  This overload returns the split dataset as a std::tuple
@@ -84,36 +129,60 @@ void TrainTestSplit(const arma::Mat<T>& input,
  * @code
  * arma::mat input = loadData();
  * arma::Row<size_t> label = loadLabel();
- * auto splitResult = TrainTestSplit(input, label, 0.2);
+ * auto splitResult = LabelTrainTestSplit(input, label, 0.2);
  * @endcode
  *
  * @param input Input dataset to split.
  * @param label Input labels to split.
- * @param trainData Matrix to store training data into.
- * @param testData Matrix to store test data into.
- * @param trainLabel Vector to store training labels into.
- * @param testLabel Vector to store test labels into.
  * @param testRatio Percentage of dataset to use for test set (between 0 and 1).
  * @return std::tuple containing trainData (arma::Mat<T>), testData
  *      (arma::Mat<T>), trainLabel (arma::Row<U>), and testLabel (arma::Row<U>).
  */
 template<typename T,typename U>
 std::tuple<arma::Mat<T>, arma::Mat<T>, arma::Row<U>, arma::Row<U>>
-TrainTestSplit(const arma::Mat<T>& input,
-               const arma::Row<U>& inputLabel,
-               const double testRatio)
+LabelTrainTestSplit(const arma::Mat<T>& input,
+                    const arma::Row<U>& inputLabel,
+                    const double testRatio)
 {
   arma::Mat<T> trainData;
   arma::Mat<T> testData;
   arma::Row<U> trainLabel;
   arma::Row<U> testLabel;
 
-  TrainTestSplit(input, inputLabel, trainData, testData, trainLabel, testLabel,
+  LabelTrainTestSplit(input, inputLabel, trainData, testData, trainLabel, testLabel,
       testRatio);
 
   return std::make_tuple(trainData, testData, trainLabel, testLabel);
 }
 
+/**
+ * Given an input dataset, split into a training set and test set.
+ * Example usage below.  This overload returns the split dataset as a std::tuple
+ * with two elements: an arma::Mat<T> containing the training data and an
+ * arma::Mat<T> containing the test data.
+ *
+ * @code
+ * arma::mat input = loadData();
+ * auto splitResult = TrainTestSplit(input, 0.2);
+ * @endcode
+ *
+ * @param input Input dataset to split.
+ * @param testRatio Percentage of dataset to use for test set (between 0 and 1).
+ * @return std::tuple containing trainData (arma::Mat<T>)
+ *      and testData (arma::Mat<T>).
+ */
+template<typename T>
+std::tuple<arma::Mat<T>, arma::Mat<T>>
+TrainTestSplit(const arma::Mat<T>& input,
+               const double testRatio)
+{
+  arma::Mat<T> trainData;
+  arma::Mat<T> testData;
+  TrainTestSplit(input, trainData, testData, testRatio);
+
+  return std::make_tuple(trainData, testData);
+}
+
 } // namespace data
 } // namespace mlpack
 
diff --git a/src/mlpack/methods/preprocess/preprocess_split_main.cpp b/src/mlpack/methods/preprocess/preprocess_split_main.cpp
index 357e77f6ff6..d6ffb89f3a6 100644
--- a/src/mlpack/methods/preprocess/preprocess_split_main.cpp
+++ b/src/mlpack/methods/preprocess/preprocess_split_main.cpp
@@ -42,23 +42,35 @@ int main(int argc, char** argv)
 
   // container for input data and labels
   arma::mat data;
-  arma::Mat<size_t> labels;
+  arma::mat labels;
 
   // Load Data and Labels
   data::Load(inputFile, data, true);
   data::Load(inputLabel, labels, true);
-  arma::Row<size_t> labels_row = labels.row(0); // extract first row
+  arma::rowvec labels_row = labels.row(0); // extract first row
 
   // Split Data
-  const auto value = data::TrainTestSplit(data, labels_row, testRatio);
+  const auto value = data::LabelTrainTestSplit(data, labels_row, testRatio);
   Log::Info << "Train Data Count: " << get<0>(value).n_cols << endl;
   Log::Info << "Test Data Count: " << get<1>(value).n_cols << endl;
   Log::Info << "Train Label Count: " << get<2>(value).n_cols << endl;
   Log::Info << "Test Label Count: " << get<3>(value).n_cols << endl;
 
-  data::Save(trainingFile, get<0>(value), false);
-  data::Save(testFile, get<1>(value), false);
-  data::Save(trainingLabelsFile, get<2>(value), false);
-  data::Save(testLabelsFile, get<3>(value), false);
+  // Cast double matrix to string matrix
+  //Mat<string> training = conv_to<Mat<string>>::from(get<0>(value));
+  //Mat<string> test = conv_to<Mat<string>>::from(get<1>(value));
+  //Mat<string> trainingLabels = conv_to<Mat<string>>::from(get<2>(value));
+  //Mat<string> testLabels = conv_to<Mat<string>>::from(get<3>(value));
+
+  //Cast double matrix to string matrix
+  mat training = get<0>(value);
+  mat test = get<1>(value);
+  mat trainingLabels = get<2>(value);
+  mat testLabels = get<3>(value);
+
+  data::Save(trainingFile, training, false);
+  data::Save(testFile, test, false);
+  data::Save(trainingLabelsFile, trainingLabels, false);
+  data::Save(testLabelsFile, testLabels, false);
 }
 
diff --git a/src/mlpack/tests/split_data_test.cpp b/src/mlpack/tests/split_data_test.cpp
index 1cf7136a8d6..462708ef9d9 100644
--- a/src/mlpack/tests/split_data_test.cpp
+++ b/src/mlpack/tests/split_data_test.cpp
@@ -73,11 +73,11 @@ BOOST_AUTO_TEST_CASE(SplitDataSplitResultMat)
   input.randu();
 
   // Set the labels to the column ID, so that CompareData can compare the data
-  // after TrainTestSplit is called.
+  // after LabelTrainTestSplit is called.
   const Row<size_t> labels = arma::linspace<Row<size_t>>(0, input.n_cols - 1,
       input.n_cols);
 
-  const auto value = TrainTestSplit(input, labels, 0.2);
+  const auto value = LabelTrainTestSplit(input, labels, 0.2);
   BOOST_REQUIRE_EQUAL(std::get<0>(value).n_cols, 8);
   BOOST_REQUIRE_EQUAL(std::get<1>(value).n_cols, 2);
   BOOST_REQUIRE_EQUAL(std::get<2>(value).n_cols, 8);
@@ -103,7 +103,7 @@ BOOST_AUTO_TEST_CASE(SplitDataLargerTest)
   const Row<size_t> labels = arma::linspace<Row<size_t>>(0, input.n_cols - 1,
       input.n_cols);
 
-  const auto value = TrainTestSplit(input, labels, 0.3);
+  const auto value = LabelTrainTestSplit(input, labels, 0.3);
   BOOST_REQUIRE_EQUAL(std::get<0>(value).n_cols, 497 - size_t(0.3 * 497));
   BOOST_REQUIRE_EQUAL(std::get<1>(value).n_cols, size_t(0.3 * 497));
   BOOST_REQUIRE_EQUAL(std::get<2>(value).n_cols, 497 - size_t(0.3 * 497));

From c8a60b2db341eab52927bb0707dfc3976e437c8b Mon Sep 17 00:00:00 2001
From: Keon Kim <kwk236@gmail.com>
Date: Fri, 27 May 2016 04:32:39 +0900
Subject: [PATCH 31/87] rename TrainTestSplit to Split

---
 src/mlpack/core/data/split_data.hpp           | 38 +++++++++----------
 .../preprocess/preprocess_split_main.cpp      |  2 +-
 src/mlpack/tests/split_data_test.cpp          |  6 +--
 3 files changed, 23 insertions(+), 23 deletions(-)

diff --git a/src/mlpack/core/data/split_data.hpp b/src/mlpack/core/data/split_data.hpp
index b2af0c2d529..381132061d0 100644
--- a/src/mlpack/core/data/split_data.hpp
+++ b/src/mlpack/core/data/split_data.hpp
@@ -2,7 +2,7 @@
  * @file split_data.hpp
  * @author Tham Ngap Wei, Keon Kim
  *
- * Defines TrainTestSplit() and LabelTrainTestSplit(), utility functions to split a dataset into a
+ * Defines Split(), a utility function to split a dataset into a
  * training set and a test set.
  */
 #ifndef MLPACK_CORE_UTIL_SPLIT_DATA_HPP
@@ -28,7 +28,7 @@ namespace data {
  *
  * // Split the dataset into a training and test set, with 30% of the data being
  * // held out for the test set.
- * LabelTrainTestSplit(input, label, trainData,
+ * Split(input, label, trainData,
  *                testData, trainLabel, testLabel, 0.3);
  * @endcode
  *
@@ -41,13 +41,13 @@ namespace data {
  * @param testRatio Percentage of dataset to use for test set (between 0 and 1).
  */
 template<typename T, typename U>
-void LabelTrainTestSplit(const arma::Mat<T>& input,
-                         const arma::Row<U>& inputLabel,
-                         arma::Mat<T>& trainData,
-                         arma::Mat<T>& testData,
-                         arma::Row<U>& trainLabel,
-                         arma::Row<U>& testLabel,
-                         const double testRatio)
+void Split(const arma::Mat<T>& input,
+                    const arma::Row<U>& inputLabel,
+                    arma::Mat<T>& trainData,
+                    arma::Mat<T>& testData,
+                    arma::Row<U>& trainLabel,
+                    arma::Row<U>& testLabel,
+                    const double testRatio)
 {
   const size_t testSize = static_cast<size_t>(input.n_cols * testRatio);
   const size_t trainSize = input.n_cols - testSize;
@@ -86,7 +86,7 @@ void LabelTrainTestSplit(const arma::Mat<T>& input,
  *
  * // Split the dataset into a training and test set, with 30% of the data being
  * // held out for the test set.
- * TrainTestSplit(input, trainData, testData, 0.3);
+ * Split(input, trainData, testData, 0.3);
  * @endcode
  *
  * @param input Input dataset to split.
@@ -95,7 +95,7 @@ void LabelTrainTestSplit(const arma::Mat<T>& input,
  * @param testRatio Percentage of dataset to use for test set (between 0 and 1).
  */
 template<typename T>
-void TrainTestSplit(const arma::Mat<T>& input,
+void Split(const arma::Mat<T>& input,
                     arma::Mat<T>& trainData,
                     arma::Mat<T>& testData,
                     const double testRatio)
@@ -129,7 +129,7 @@ void TrainTestSplit(const arma::Mat<T>& input,
  * @code
  * arma::mat input = loadData();
  * arma::Row<size_t> label = loadLabel();
- * auto splitResult = LabelTrainTestSplit(input, label, 0.2);
+ * auto splitResult = Split(input, label, 0.2);
  * @endcode
  *
  * @param input Input dataset to split.
@@ -140,16 +140,16 @@ void TrainTestSplit(const arma::Mat<T>& input,
  */
 template<typename T,typename U>
 std::tuple<arma::Mat<T>, arma::Mat<T>, arma::Row<U>, arma::Row<U>>
-LabelTrainTestSplit(const arma::Mat<T>& input,
-                    const arma::Row<U>& inputLabel,
-                    const double testRatio)
+Split(const arma::Mat<T>& input,
+               const arma::Row<U>& inputLabel,
+               const double testRatio)
 {
   arma::Mat<T> trainData;
   arma::Mat<T> testData;
   arma::Row<U> trainLabel;
   arma::Row<U> testLabel;
 
-  LabelTrainTestSplit(input, inputLabel, trainData, testData, trainLabel, testLabel,
+  Split(input, inputLabel, trainData, testData, trainLabel, testLabel,
       testRatio);
 
   return std::make_tuple(trainData, testData, trainLabel, testLabel);
@@ -163,7 +163,7 @@ LabelTrainTestSplit(const arma::Mat<T>& input,
  *
  * @code
  * arma::mat input = loadData();
- * auto splitResult = TrainTestSplit(input, 0.2);
+ * auto splitResult = Split(input, 0.2);
  * @endcode
  *
  * @param input Input dataset to split.
@@ -173,12 +173,12 @@ LabelTrainTestSplit(const arma::Mat<T>& input,
  */
 template<typename T>
 std::tuple<arma::Mat<T>, arma::Mat<T>>
-TrainTestSplit(const arma::Mat<T>& input,
+Split(const arma::Mat<T>& input,
                const double testRatio)
 {
   arma::Mat<T> trainData;
   arma::Mat<T> testData;
-  TrainTestSplit(input, trainData, testData, testRatio);
+  Split(input, trainData, testData, testRatio);
 
   return std::make_tuple(trainData, testData);
 }
diff --git a/src/mlpack/methods/preprocess/preprocess_split_main.cpp b/src/mlpack/methods/preprocess/preprocess_split_main.cpp
index d6ffb89f3a6..dfd79cfdbb1 100644
--- a/src/mlpack/methods/preprocess/preprocess_split_main.cpp
+++ b/src/mlpack/methods/preprocess/preprocess_split_main.cpp
@@ -50,7 +50,7 @@ int main(int argc, char** argv)
   arma::rowvec labels_row = labels.row(0); // extract first row
 
   // Split Data
-  const auto value = data::LabelTrainTestSplit(data, labels_row, testRatio);
+  const auto value = data::Split(data, labels_row, testRatio);
   Log::Info << "Train Data Count: " << get<0>(value).n_cols << endl;
   Log::Info << "Test Data Count: " << get<1>(value).n_cols << endl;
   Log::Info << "Train Label Count: " << get<2>(value).n_cols << endl;
diff --git a/src/mlpack/tests/split_data_test.cpp b/src/mlpack/tests/split_data_test.cpp
index 462708ef9d9..700e31ad7d2 100644
--- a/src/mlpack/tests/split_data_test.cpp
+++ b/src/mlpack/tests/split_data_test.cpp
@@ -73,11 +73,11 @@ BOOST_AUTO_TEST_CASE(SplitDataSplitResultMat)
   input.randu();
 
   // Set the labels to the column ID, so that CompareData can compare the data
-  // after LabelTrainTestSplit is called.
+  // after Split is called.
   const Row<size_t> labels = arma::linspace<Row<size_t>>(0, input.n_cols - 1,
       input.n_cols);
 
-  const auto value = LabelTrainTestSplit(input, labels, 0.2);
+  const auto value = Split(input, labels, 0.2);
   BOOST_REQUIRE_EQUAL(std::get<0>(value).n_cols, 8);
   BOOST_REQUIRE_EQUAL(std::get<1>(value).n_cols, 2);
   BOOST_REQUIRE_EQUAL(std::get<2>(value).n_cols, 8);
@@ -103,7 +103,7 @@ BOOST_AUTO_TEST_CASE(SplitDataLargerTest)
   const Row<size_t> labels = arma::linspace<Row<size_t>>(0, input.n_cols - 1,
       input.n_cols);
 
-  const auto value = LabelTrainTestSplit(input, labels, 0.3);
+  const auto value = Split(input, labels, 0.3);
   BOOST_REQUIRE_EQUAL(std::get<0>(value).n_cols, 497 - size_t(0.3 * 497));
   BOOST_REQUIRE_EQUAL(std::get<1>(value).n_cols, size_t(0.3 * 497));
   BOOST_REQUIRE_EQUAL(std::get<2>(value).n_cols, 497 - size_t(0.3 * 497));

From e41b3dbde7328a358cfa8adb2ffb5a545a48eb75 Mon Sep 17 00:00:00 2001
From: Keon Kim <kwk236@gmail.com>
Date: Fri, 27 May 2016 22:11:31 +0900
Subject: [PATCH 32/87] add data split executable without labels + tests

---
 .../preprocess/preprocess_split_main.cpp      | 150 +++++++++++++-----
 src/mlpack/tests/split_data_test.cpp          |  22 ++-
 2 files changed, 130 insertions(+), 42 deletions(-)

diff --git a/src/mlpack/methods/preprocess/preprocess_split_main.cpp b/src/mlpack/methods/preprocess/preprocess_split_main.cpp
index dfd79cfdbb1..02e844a21a5 100644
--- a/src/mlpack/methods/preprocess/preprocess_split_main.cpp
+++ b/src/mlpack/methods/preprocess/preprocess_split_main.cpp
@@ -13,11 +13,12 @@ PROGRAM_INFO("Split into Train and Test Data", "This "
 
 // Define parameters for data
 PARAM_STRING_REQ("input_file", "File containing data,", "i");
-PARAM_STRING_REQ("input_label", "File containing labels", "I");
-PARAM_STRING_REQ("training_file", "File name to save train data", "t");
-PARAM_STRING_REQ("test_file", "File name to save test data", "T");
-PARAM_STRING_REQ("training_labels_file", "File name to save train label", "l");
-PARAM_STRING_REQ("test_labels_file", "File name to save test label", "L");
+// Define optional parameters
+PARAM_STRING("input_labels", "File containing labels", "I", "");
+PARAM_STRING("training_file", "File name to save train data", "t", "");
+PARAM_STRING("test_file", "File name to save test data", "T", "");
+PARAM_STRING("training_labels_file", "File name to save train label", "l", "");
+PARAM_STRING("test_labels_file", "File name to save test label", "L", "");
 
 // Define optional test ratio, default is 0.2 (Test 20% Train 80%)
 PARAM_DOUBLE("test_ratio", "Ratio of test set, if not set,"
@@ -31,46 +32,113 @@ int main(int argc, char** argv)
 {
   // Parse command line options.
   CLI::ParseCommandLine(argc, argv);
-
   const string inputFile = CLI::GetParam<string>("input_file");
-  const string inputLabel = CLI::GetParam<string>("input_label");
-  const string trainingFile = CLI::GetParam<string>("training_file");
-  const string testFile = CLI::GetParam<string>("test_file");
-  const string trainingLabelsFile = CLI::GetParam<string>("training_labels_file");
-  const string testLabelsFile = CLI::GetParam<string>("test_labels_file");
+  const string inputLabels = CLI::GetParam<string>("input_labels");
+  string trainingFile = CLI::GetParam<string>("training_file");
+  string testFile = CLI::GetParam<string>("test_file");
+  string trainingLabelsFile = CLI::GetParam<string>("training_labels_file");
+  string testLabelsFile = CLI::GetParam<string>("test_labels_file");
   const double testRatio = CLI::GetParam<double>("test_ratio");
 
-  // container for input data and labels
-  arma::mat data;
-  arma::mat labels;
+  // check on data parameters
+  if (trainingFile.empty())
+  {
+    trainingFile = "train_" + inputFile;
+    Log::Warn << "You did not specify --training_file. "
+      << "Training file name is automatically set to: "
+      << trainingFile << endl;
+  }
+  if (testFile.empty())
+  {
+    testFile = "test_" + inputFile;
+    Log::Warn << "You did not specify --test_file. "
+      << "Test file name is automatically set to: " << testFile << endl;
+  }
+
+  // check on label parameters
+  if (!inputLabels.empty())
+  {
+    if (!CLI::HasParam("training_labels_file"))
+    {
+      trainingLabelsFile = "train_" + inputLabels;
+      Log::Warn << "You did not specify --training_labels_file. "
+        << "Training labels file name is automatically set to: "
+        << trainingLabelsFile << endl;
+    }
+    if (!CLI::HasParam("test_labels_file"))
+    {
+      testLabelsFile = "test_" + inputLabels;
+      Log::Warn << "You did not specify --test_labels_file. "
+        << "Test labels file name is automatically set to: "
+        << testLabelsFile << endl;
+    }
+  }
+  else
+  {
+    if (CLI::HasParam("training_labels_file")
+        || CLI::HasParam("test_labels_file"))
+    {
+      Log::Fatal << "When specifying --training_labels_file or "
+        << "test_labels_file, you must also specify --input_labels. " << endl;
+    }
+  }
 
-  // Load Data and Labels
+  // check on test_ratio
+  if (CLI::HasParam("test_ratio"))
+  {
+    //sanity check on test_ratio
+    if ((testRatio < 0.0) && (testRatio > 1.0))
+    {
+      Log::Fatal << "Invalid parameter for test_ratio. "
+        << "test_ratio must be between 0.0 and 1.0" << endl;
+    }
+  }
+  else // if test_ratio is not set
+  {
+    Log::Warn << "You did not specify --test_ratio_file. "
+      << "Test ratio is automatically set to: 0.2"<< endl;
+  }
+
+  // load data
+  arma::mat data;
   data::Load(inputFile, data, true);
-  data::Load(inputLabel, labels, true);
-  arma::rowvec labels_row = labels.row(0); // extract first row
-
-  // Split Data
-  const auto value = data::Split(data, labels_row, testRatio);
-  Log::Info << "Train Data Count: " << get<0>(value).n_cols << endl;
-  Log::Info << "Test Data Count: " << get<1>(value).n_cols << endl;
-  Log::Info << "Train Label Count: " << get<2>(value).n_cols << endl;
-  Log::Info << "Test Label Count: " << get<3>(value).n_cols << endl;
-
-  // Cast double matrix to string matrix
-  //Mat<string> training = conv_to<Mat<string>>::from(get<0>(value));
-  //Mat<string> test = conv_to<Mat<string>>::from(get<1>(value));
-  //Mat<string> trainingLabels = conv_to<Mat<string>>::from(get<2>(value));
-  //Mat<string> testLabels = conv_to<Mat<string>>::from(get<3>(value));
-
-  //Cast double matrix to string matrix
-  mat training = get<0>(value);
-  mat test = get<1>(value);
-  mat trainingLabels = get<2>(value);
-  mat testLabels = get<3>(value);
-
-  data::Save(trainingFile, training, false);
-  data::Save(testFile, test, false);
-  data::Save(trainingLabelsFile, trainingLabels, false);
-  data::Save(testLabelsFile, testLabels, false);
+
+  // if parameters for labels exist
+  if (CLI::HasParam("input_labels"))
+  {
+    arma::mat labels;
+    data::Load(inputLabels, labels, true);
+    arma::rowvec labels_row = labels.row(0); // extract first row
+
+    const auto value = data::Split(data, labels_row, testRatio);
+    Log::Info << "Train Data Count: " << get<0>(value).n_cols << endl;
+    Log::Info << "Test Data Count: " << get<1>(value).n_cols << endl;
+    Log::Info << "Train Label Count: " << get<2>(value).n_cols << endl;
+    Log::Info << "Test Label Count: " << get<3>(value).n_cols << endl;
+
+    // TODO: fix full precision problem
+    mat training = get<0>(value);
+    mat test = get<1>(value);
+    mat trainingLabels = get<2>(value);
+    mat testLabels = get<3>(value);
+
+    data::Save(trainingFile, training, false);
+    data::Save(testFile, test, false);
+    data::Save(trainingLabelsFile, trainingLabels, false);
+    data::Save(testLabelsFile, testLabels, false);
+  }
+  else // split without parameters
+  {
+    const auto value = data::Split(data, testRatio);
+    Log::Info << "Train Data Count: " << get<0>(value).n_cols << endl;
+    Log::Info << "Test Data Count: " << get<1>(value).n_cols << endl;
+
+    // TODO: fix full precision problem
+    mat training = get<0>(value);
+    mat test = get<1>(value);
+
+    data::Save(trainingFile, training, false);
+    data::Save(testFile, test, false);
+  }
 }
 
diff --git a/src/mlpack/tests/split_data_test.cpp b/src/mlpack/tests/split_data_test.cpp
index 700e31ad7d2..d7b39909832 100644
--- a/src/mlpack/tests/split_data_test.cpp
+++ b/src/mlpack/tests/split_data_test.cpp
@@ -67,7 +67,17 @@ void CheckDuplication(const Row<size_t>& trainLabels,
     BOOST_REQUIRE_EQUAL(counts[i], 1);
 }
 
-BOOST_AUTO_TEST_CASE(SplitDataSplitResultMat)
+BOOST_AUTO_TEST_CASE(SplitDataResultMat)
+{
+  mat input(2, 10);
+  input.randu();
+
+  const auto value = Split(input, 0.2);
+  BOOST_REQUIRE_EQUAL(std::get<0>(value).n_cols, 8); // train data
+  BOOST_REQUIRE_EQUAL(std::get<1>(value).n_cols, 2); // test data
+}
+
+BOOST_AUTO_TEST_CASE(SplitLabeledDataResultMat)
 {
   mat input(2, 10);
   input.randu();
@@ -99,6 +109,16 @@ BOOST_AUTO_TEST_CASE(SplitDataLargerTest)
   mat input(10, 497);
   input.randu();
 
+  const auto value = Split(input, 0.3);
+  BOOST_REQUIRE_EQUAL(std::get<0>(value).n_cols, 497 - size_t(0.3 * 497));
+  BOOST_REQUIRE_EQUAL(std::get<1>(value).n_cols, size_t(0.3 * 497));
+}
+
+BOOST_AUTO_TEST_CASE(SplitLabeledDataLargerTest)
+{
+  mat input(10, 497);
+  input.randu();
+
   // Set the labels to the column ID.
   const Row<size_t> labels = arma::linspace<Row<size_t>>(0, input.n_cols - 1,
       input.n_cols);

From 706f0ee9307effc4f240d16ee398a8a1ee71a64c Mon Sep 17 00:00:00 2001
From: MarcosPividori <marcos.pividori@gmail.com>
Date: Fri, 27 May 2016 20:56:23 -0300
Subject: [PATCH 33/87] Fix error, a balltree should use ballbounds instead of
 hrectbounds.

---
 src/mlpack/core/tree/binary_space_tree/typedef.hpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/mlpack/core/tree/binary_space_tree/typedef.hpp b/src/mlpack/core/tree/binary_space_tree/typedef.hpp
index 7d58f6750e7..28145d11bb2 100644
--- a/src/mlpack/core/tree/binary_space_tree/typedef.hpp
+++ b/src/mlpack/core/tree/binary_space_tree/typedef.hpp
@@ -103,7 +103,7 @@ template<typename MetricType, typename StatisticType, typename MatType>
 using BallTree = BinarySpaceTree<MetricType,
                                  StatisticType,
                                  MatType,
-                                 bound::HRectBound,
+                                 bound::BallBound,
                                  MidpointSplit>;
 
 /**
@@ -132,7 +132,7 @@ template<typename MetricType, typename StatisticType, typename MatType>
 using MeanSplitBallTree = BinarySpaceTree<MetricType,
                                           StatisticType,
                                           MatType,
-                                          bound::HRectBound,
+                                          bound::BallBound,
                                           MeanSplit>;
 
 } // namespace tree

From 9e5d9c28ad46c1bb21e395df7a6f7249cd17e011 Mon Sep 17 00:00:00 2001
From: MarcosPividori <marcos.pividori@gmail.com>
Date: Fri, 27 May 2016 21:01:06 -0300
Subject: [PATCH 34/87] Set proper template order. To work with binary space
 trees, the metric template parameter must be the first.

---
 src/mlpack/core/tree/ballbound.hpp      | 10 +--
 src/mlpack/core/tree/ballbound_impl.hpp | 88 ++++++++++++-------------
 2 files changed, 49 insertions(+), 49 deletions(-)

diff --git a/src/mlpack/core/tree/ballbound.hpp b/src/mlpack/core/tree/ballbound.hpp
index 68a16d759dd..d61e88aa1c3 100644
--- a/src/mlpack/core/tree/ballbound.hpp
+++ b/src/mlpack/core/tree/ballbound.hpp
@@ -19,11 +19,11 @@ namespace bound {
  * specific point (center). TMetricType is the custom metric type that defaults
  * to the Euclidean (L2) distance.
  *
- * @tparam VecType Type of vector (arma::vec or arma::sp_vec or similar).
  * @tparam TMetricType metric type used in the distance measure.
+ * @tparam VecType Type of vector (arma::vec or arma::sp_vec or similar).
  */
-template<typename VecType = arma::vec,
-         typename TMetricType = metric::LMetric<2, true>>
+template<typename TMetricType = metric::LMetric<2, true>,
+         typename VecType = arma::vec>
 class BallBound
 {
  public:
@@ -189,8 +189,8 @@ class BallBound
 };
 
 //! A specialization of BoundTraits for this bound type.
-template<typename VecType, typename TMetricType>
-struct BoundTraits<BallBound<VecType, TMetricType>>
+template<typename TMetricType, typename VecType>
+struct BoundTraits<BallBound<TMetricType, VecType>>
 {
   //! These bounds are potentially loose in some dimensions.
   const static bool HasTightBounds = false;
diff --git a/src/mlpack/core/tree/ballbound_impl.hpp b/src/mlpack/core/tree/ballbound_impl.hpp
index 8e0e658fab5..385915616c2 100644
--- a/src/mlpack/core/tree/ballbound_impl.hpp
+++ b/src/mlpack/core/tree/ballbound_impl.hpp
@@ -18,8 +18,8 @@ namespace mlpack {
 namespace bound {
 
 //! Empty Constructor.
-template<typename VecType, typename TMetricType>
-BallBound<VecType, TMetricType>::BallBound() :
+template<typename TMetricType, typename VecType>
+BallBound<TMetricType, VecType>::BallBound() :
     radius(std::numeric_limits<ElemType>::lowest()),
     metric(new TMetricType()),
     ownsMetric(true)
@@ -30,8 +30,8 @@ BallBound<VecType, TMetricType>::BallBound() :
  *
  * @param dimension Dimensionality of ball bound.
  */
-template<typename VecType, typename TMetricType>
-BallBound<VecType, TMetricType>::BallBound(const size_t dimension) :
+template<typename TMetricType, typename VecType>
+BallBound<TMetricType, VecType>::BallBound(const size_t dimension) :
     radius(std::numeric_limits<ElemType>::lowest()),
     center(dimension),
     metric(new TMetricType()),
@@ -44,8 +44,8 @@ BallBound<VecType, TMetricType>::BallBound(const size_t dimension) :
  * @param radius Radius of ball bound.
  * @param center Center of ball bound.
  */
-template<typename VecType, typename TMetricType>
-BallBound<VecType, TMetricType>::BallBound(const ElemType radius,
+template<typename TMetricType, typename VecType>
+BallBound<TMetricType, VecType>::BallBound(const ElemType radius,
                                            const VecType& center) :
     radius(radius),
     center(center),
@@ -54,8 +54,8 @@ BallBound<VecType, TMetricType>::BallBound(const ElemType radius,
 { /* Nothing to do. */ }
 
 //! Copy Constructor. To prevent memory leaks.
-template<typename VecType, typename TMetricType>
-BallBound<VecType, TMetricType>::BallBound(const BallBound& other) :
+template<typename TMetricType, typename VecType>
+BallBound<TMetricType, VecType>::BallBound(const BallBound& other) :
     radius(other.radius),
     center(other.center),
     metric(other.metric),
@@ -63,8 +63,8 @@ BallBound<VecType, TMetricType>::BallBound(const BallBound& other) :
 { /* Nothing to do. */ }
 
 //! For the same reason as the copy constructor: to prevent memory leaks.
-template<typename VecType, typename TMetricType>
-BallBound<VecType, TMetricType>& BallBound<VecType, TMetricType>::operator=(
+template<typename TMetricType, typename VecType>
+BallBound<TMetricType, VecType>& BallBound<TMetricType, VecType>::operator=(
     const BallBound& other)
 {
   radius = other.radius;
@@ -74,8 +74,8 @@ BallBound<VecType, TMetricType>& BallBound<VecType, TMetricType>::operator=(
 }
 
 //! Move constructor.
-template<typename VecType, typename TMetricType>
-BallBound<VecType, TMetricType>::BallBound(BallBound&& other) :
+template<typename TMetricType, typename VecType>
+BallBound<TMetricType, VecType>::BallBound(BallBound&& other) :
     radius(other.radius),
     center(other.center),
     metric(other.metric),
@@ -89,17 +89,17 @@ BallBound<VecType, TMetricType>::BallBound(BallBound&& other) :
 }
 
 //! Destructor to release allocated memory.
-template<typename VecType, typename TMetricType>
-BallBound<VecType, TMetricType>::~BallBound()
+template<typename TMetricType, typename VecType>
+BallBound<TMetricType, VecType>::~BallBound()
 {
   if (ownsMetric)
     delete metric;
 }
 
 //! Get the range in a certain dimension.
-template<typename VecType, typename TMetricType>
-math::RangeType<typename BallBound<VecType, TMetricType>::ElemType>
-BallBound<VecType, TMetricType>::operator[](const size_t i) const
+template<typename TMetricType, typename VecType>
+math::RangeType<typename BallBound<TMetricType, VecType>::ElemType>
+BallBound<TMetricType, VecType>::operator[](const size_t i) const
 {
   if (radius < 0)
     return math::Range();
@@ -110,8 +110,8 @@ BallBound<VecType, TMetricType>::operator[](const size_t i) const
 /**
  * Determines if a point is within the bound.
  */
-template<typename VecType, typename TMetricType>
-bool BallBound<VecType, TMetricType>::Contains(const VecType& point) const
+template<typename TMetricType, typename VecType>
+bool BallBound<TMetricType, VecType>::Contains(const VecType& point) const
 {
   if (radius < 0)
     return false;
@@ -122,10 +122,10 @@ bool BallBound<VecType, TMetricType>::Contains(const VecType& point) const
 /**
  * Calculates minimum bound-to-point squared distance.
  */
-template<typename VecType, typename TMetricType>
+template<typename TMetricType, typename VecType>
 template<typename OtherVecType>
-typename BallBound<VecType, TMetricType>::ElemType
-BallBound<VecType, TMetricType>::MinDistance(
+typename BallBound<TMetricType, VecType>::ElemType
+BallBound<TMetricType, VecType>::MinDistance(
     const OtherVecType& point,
     typename boost::enable_if<IsVector<OtherVecType>>* /* junk */) const
 {
@@ -138,9 +138,9 @@ BallBound<VecType, TMetricType>::MinDistance(
 /**
  * Calculates minimum bound-to-bound squared distance.
  */
-template<typename VecType, typename TMetricType>
-typename BallBound<VecType, TMetricType>::ElemType
-BallBound<VecType, TMetricType>::MinDistance(const BallBound& other)
+template<typename TMetricType, typename VecType>
+typename BallBound<TMetricType, VecType>::ElemType
+BallBound<TMetricType, VecType>::MinDistance(const BallBound& other)
     const
 {
   if (radius < 0)
@@ -156,10 +156,10 @@ BallBound<VecType, TMetricType>::MinDistance(const BallBound& other)
 /**
  * Computes maximum distance.
  */
-template<typename VecType, typename TMetricType>
+template<typename TMetricType, typename VecType>
 template<typename OtherVecType>
-typename BallBound<VecType, TMetricType>::ElemType
-BallBound<VecType, TMetricType>::MaxDistance(
+typename BallBound<TMetricType, VecType>::ElemType
+BallBound<TMetricType, VecType>::MaxDistance(
     const OtherVecType& point,
     typename boost::enable_if<IsVector<OtherVecType> >* /* junk */) const
 {
@@ -172,9 +172,9 @@ BallBound<VecType, TMetricType>::MaxDistance(
 /**
  * Computes maximum distance.
  */
-template<typename VecType, typename TMetricType>
-typename BallBound<VecType, TMetricType>::ElemType
-BallBound<VecType, TMetricType>::MaxDistance(const BallBound& other)
+template<typename TMetricType, typename VecType>
+typename BallBound<TMetricType, VecType>::ElemType
+BallBound<TMetricType, VecType>::MaxDistance(const BallBound& other)
     const
 {
   if (radius < 0)
@@ -188,10 +188,10 @@ BallBound<VecType, TMetricType>::MaxDistance(const BallBound& other)
  *
  * Example: bound1.MinDistanceSq(other) for minimum squared distance.
  */
-template<typename VecType, typename TMetricType>
+template<typename TMetricType, typename VecType>
 template<typename OtherVecType>
-math::RangeType<typename BallBound<VecType, TMetricType>::ElemType>
-BallBound<VecType, TMetricType>::RangeDistance(
+math::RangeType<typename BallBound<TMetricType, VecType>::ElemType>
+BallBound<TMetricType, VecType>::RangeDistance(
     const OtherVecType& point,
     typename boost::enable_if<IsVector<OtherVecType> >* /* junk */) const
 {
@@ -206,9 +206,9 @@ BallBound<VecType, TMetricType>::RangeDistance(
   }
 }
 
-template<typename VecType, typename TMetricType>
-math::RangeType<typename BallBound<VecType, TMetricType>::ElemType>
-BallBound<VecType, TMetricType>::RangeDistance(
+template<typename TMetricType, typename VecType>
+math::RangeType<typename BallBound<TMetricType, VecType>::ElemType>
+BallBound<TMetricType, VecType>::RangeDistance(
     const BallBound& other) const
 {
   if (radius < 0)
@@ -226,9 +226,9 @@ BallBound<VecType, TMetricType>::RangeDistance(
 /**
  * Expand the bound to include the given bound.
  *
-template<typename VecType, typename TMetricType>
+template<typename TMetricType, typename VecType>
 const BallBound<VecType>&
-BallBound<VecType, TMetricType>::operator|=(
+BallBound<TMetricType, VecType>::operator|=(
     const BallBound<VecType>& other)
 {
   double dist = metric->Evaluate(center, other);
@@ -246,10 +246,10 @@ BallBound<VecType, TMetricType>::operator|=(
  * The difference lies in the way we initialize the ball bound. The way we
  * expand the bound is same.
  */
-template<typename VecType, typename TMetricType>
+template<typename TMetricType, typename VecType>
 template<typename MatType>
-const BallBound<VecType, TMetricType>&
-BallBound<VecType, TMetricType>::operator|=(const MatType& data)
+const BallBound<TMetricType, VecType>&
+BallBound<TMetricType, VecType>::operator|=(const MatType& data)
 {
   if (radius < 0)
   {
@@ -277,9 +277,9 @@ BallBound<VecType, TMetricType>::operator|=(const MatType& data)
 }
 
 //! Serialize the BallBound.
-template<typename VecType, typename TMetricType>
+template<typename TMetricType, typename VecType>
 template<typename Archive>
-void BallBound<VecType, TMetricType>::Serialize(
+void BallBound<TMetricType, VecType>::Serialize(
     Archive& ar,
     const unsigned int /* version */)
 {

From c0cdb8b3fad77b2f9d026e84a0f27ca16c896acd Mon Sep 17 00:00:00 2001
From: MarcosPividori <marcos.pividori@gmail.com>
Date: Fri, 27 May 2016 21:02:53 -0300
Subject: [PATCH 35/87] Proper template order to BallBound.

---
 src/mlpack/tests/serialization_test.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/mlpack/tests/serialization_test.cpp b/src/mlpack/tests/serialization_test.cpp
index 9bddbc2c16d..73a960cf2fe 100644
--- a/src/mlpack/tests/serialization_test.cpp
+++ b/src/mlpack/tests/serialization_test.cpp
@@ -354,12 +354,12 @@ BOOST_AUTO_TEST_CASE(BallBoundTest)
 
 BOOST_AUTO_TEST_CASE(MahalanobisBallBoundTest)
 {
-  BallBound<arma::vec, MahalanobisDistance<>> b(100);
+  BallBound<MahalanobisDistance<>, arma::vec> b(100);
   b.Center().randu();
   b.Radius() = 14.0;
   b.Metric().Covariance().randu(100, 100);
 
-  BallBound<arma::vec, MahalanobisDistance<>> xmlB, textB, binaryB;
+  BallBound<MahalanobisDistance<>, arma::vec> xmlB, textB, binaryB;
 
   SerializeObjectAll(b, xmlB, textB, binaryB);
 

From 903be7eb8583e81bbe76f858ec8cc4ed91d50408 Mon Sep 17 00:00:00 2001
From: MarcosPividori <marcos.pividori@gmail.com>
Date: Fri, 27 May 2016 21:11:43 -0300
Subject: [PATCH 36/87] Fix error. kdtree where should be a balltree.

---
 src/mlpack/methods/neighbor_search/ns_model_impl.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mlpack/methods/neighbor_search/ns_model_impl.hpp b/src/mlpack/methods/neighbor_search/ns_model_impl.hpp
index 28c5a0bf8f0..e4aa7c179fe 100644
--- a/src/mlpack/methods/neighbor_search/ns_model_impl.hpp
+++ b/src/mlpack/methods/neighbor_search/ns_model_impl.hpp
@@ -320,7 +320,7 @@ void NSModel<SortPolicy>::BuildModel(arma::mat&& referenceSet,
       {
         std::vector<size_t> oldFromNewReferences;
         typename NSType<tree::BallTree>::Tree* ballTree =
-            new typename NSType<tree::KDTree>::Tree(std::move(referenceSet),
+            new typename NSType<tree::BallTree>::Tree(std::move(referenceSet),
             oldFromNewReferences, leafSize);
         ballTreeNS = new NSType<tree::BallTree>(ballTree, singleMode);
 

From 8285c31068b66c5bc5ebdefe8b1ea46010678176 Mon Sep 17 00:00:00 2001
From: MarcosPividori <marcos.pividori@gmail.com>
Date: Fri, 27 May 2016 21:18:41 -0300
Subject: [PATCH 37/87] Change template name to be similar to the definition of
 hrectbound.

---
 src/mlpack/core/tree/ballbound.hpp      | 18 +++--
 src/mlpack/core/tree/ballbound_impl.hpp | 94 ++++++++++++-------------
 2 files changed, 55 insertions(+), 57 deletions(-)

diff --git a/src/mlpack/core/tree/ballbound.hpp b/src/mlpack/core/tree/ballbound.hpp
index d61e88aa1c3..14f289a14ee 100644
--- a/src/mlpack/core/tree/ballbound.hpp
+++ b/src/mlpack/core/tree/ballbound.hpp
@@ -16,13 +16,13 @@ namespace bound {
 
 /**
  * Ball bound encloses a set of points at a specific distance (radius) from a
- * specific point (center). TMetricType is the custom metric type that defaults
+ * specific point (center). MetricType is the custom metric type that defaults
  * to the Euclidean (L2) distance.
  *
- * @tparam TMetricType metric type used in the distance measure.
+ * @tparam MetricType metric type used in the distance measure.
  * @tparam VecType Type of vector (arma::vec or arma::sp_vec or similar).
  */
-template<typename TMetricType = metric::LMetric<2, true>,
+template<typename MetricType = metric::LMetric<2, true>,
          typename VecType = arma::vec>
 class BallBound
 {
@@ -31,8 +31,6 @@ class BallBound
   typedef typename VecType::elem_type ElemType;
   //! A public version of the vector type.
   typedef VecType Vec;
-  //! Needed for BinarySpaceTree.
-  typedef TMetricType MetricType;
 
  private:
   //! The radius of the ball bound.
@@ -40,7 +38,7 @@ class BallBound
   //! The center of the ball bound.
   VecType center;
   //! The metric used in this bound.
-  TMetricType* metric;
+  MetricType* metric;
 
   /**
    * To know whether this object allocated memory to the metric member
@@ -179,9 +177,9 @@ class BallBound
   ElemType Diameter() const { return 2 * radius; }
 
   //! Returns the distance metric used in this bound.
-  const TMetricType& Metric() const { return *metric; }
+  const MetricType& Metric() const { return *metric; }
   //! Modify the distance metric used in this bound.
-  TMetricType& Metric() { return *metric; }
+  MetricType& Metric() { return *metric; }
 
   //! Serialize the bound.
   template<typename Archive>
@@ -189,8 +187,8 @@ class BallBound
 };
 
 //! A specialization of BoundTraits for this bound type.
-template<typename TMetricType, typename VecType>
-struct BoundTraits<BallBound<TMetricType, VecType>>
+template<typename MetricType, typename VecType>
+struct BoundTraits<BallBound<MetricType, VecType>>
 {
   //! These bounds are potentially loose in some dimensions.
   const static bool HasTightBounds = false;
diff --git a/src/mlpack/core/tree/ballbound_impl.hpp b/src/mlpack/core/tree/ballbound_impl.hpp
index 385915616c2..885acb5a8e0 100644
--- a/src/mlpack/core/tree/ballbound_impl.hpp
+++ b/src/mlpack/core/tree/ballbound_impl.hpp
@@ -18,10 +18,10 @@ namespace mlpack {
 namespace bound {
 
 //! Empty Constructor.
-template<typename TMetricType, typename VecType>
-BallBound<TMetricType, VecType>::BallBound() :
+template<typename MetricType, typename VecType>
+BallBound<MetricType, VecType>::BallBound() :
     radius(std::numeric_limits<ElemType>::lowest()),
-    metric(new TMetricType()),
+    metric(new MetricType()),
     ownsMetric(true)
 { /* Nothing to do. */ }
 
@@ -30,11 +30,11 @@ BallBound<TMetricType, VecType>::BallBound() :
  *
  * @param dimension Dimensionality of ball bound.
  */
-template<typename TMetricType, typename VecType>
-BallBound<TMetricType, VecType>::BallBound(const size_t dimension) :
+template<typename MetricType, typename VecType>
+BallBound<MetricType, VecType>::BallBound(const size_t dimension) :
     radius(std::numeric_limits<ElemType>::lowest()),
     center(dimension),
-    metric(new TMetricType()),
+    metric(new MetricType()),
     ownsMetric(true)
 { /* Nothing to do. */ }
 
@@ -44,18 +44,18 @@ BallBound<TMetricType, VecType>::BallBound(const size_t dimension) :
  * @param radius Radius of ball bound.
  * @param center Center of ball bound.
  */
-template<typename TMetricType, typename VecType>
-BallBound<TMetricType, VecType>::BallBound(const ElemType radius,
+template<typename MetricType, typename VecType>
+BallBound<MetricType, VecType>::BallBound(const ElemType radius,
                                            const VecType& center) :
     radius(radius),
     center(center),
-    metric(new TMetricType()),
+    metric(new MetricType()),
     ownsMetric(true)
 { /* Nothing to do. */ }
 
 //! Copy Constructor. To prevent memory leaks.
-template<typename TMetricType, typename VecType>
-BallBound<TMetricType, VecType>::BallBound(const BallBound& other) :
+template<typename MetricType, typename VecType>
+BallBound<MetricType, VecType>::BallBound(const BallBound& other) :
     radius(other.radius),
     center(other.center),
     metric(other.metric),
@@ -63,8 +63,8 @@ BallBound<TMetricType, VecType>::BallBound(const BallBound& other) :
 { /* Nothing to do. */ }
 
 //! For the same reason as the copy constructor: to prevent memory leaks.
-template<typename TMetricType, typename VecType>
-BallBound<TMetricType, VecType>& BallBound<TMetricType, VecType>::operator=(
+template<typename MetricType, typename VecType>
+BallBound<MetricType, VecType>& BallBound<MetricType, VecType>::operator=(
     const BallBound& other)
 {
   radius = other.radius;
@@ -74,8 +74,8 @@ BallBound<TMetricType, VecType>& BallBound<TMetricType, VecType>::operator=(
 }
 
 //! Move constructor.
-template<typename TMetricType, typename VecType>
-BallBound<TMetricType, VecType>::BallBound(BallBound&& other) :
+template<typename MetricType, typename VecType>
+BallBound<MetricType, VecType>::BallBound(BallBound&& other) :
     radius(other.radius),
     center(other.center),
     metric(other.metric),
@@ -89,17 +89,17 @@ BallBound<TMetricType, VecType>::BallBound(BallBound&& other) :
 }
 
 //! Destructor to release allocated memory.
-template<typename TMetricType, typename VecType>
-BallBound<TMetricType, VecType>::~BallBound()
+template<typename MetricType, typename VecType>
+BallBound<MetricType, VecType>::~BallBound()
 {
   if (ownsMetric)
     delete metric;
 }
 
 //! Get the range in a certain dimension.
-template<typename TMetricType, typename VecType>
-math::RangeType<typename BallBound<TMetricType, VecType>::ElemType>
-BallBound<TMetricType, VecType>::operator[](const size_t i) const
+template<typename MetricType, typename VecType>
+math::RangeType<typename BallBound<MetricType, VecType>::ElemType>
+BallBound<MetricType, VecType>::operator[](const size_t i) const
 {
   if (radius < 0)
     return math::Range();
@@ -110,8 +110,8 @@ BallBound<TMetricType, VecType>::operator[](const size_t i) const
 /**
  * Determines if a point is within the bound.
  */
-template<typename TMetricType, typename VecType>
-bool BallBound<TMetricType, VecType>::Contains(const VecType& point) const
+template<typename MetricType, typename VecType>
+bool BallBound<MetricType, VecType>::Contains(const VecType& point) const
 {
   if (radius < 0)
     return false;
@@ -122,10 +122,10 @@ bool BallBound<TMetricType, VecType>::Contains(const VecType& point) const
 /**
  * Calculates minimum bound-to-point squared distance.
  */
-template<typename TMetricType, typename VecType>
+template<typename MetricType, typename VecType>
 template<typename OtherVecType>
-typename BallBound<TMetricType, VecType>::ElemType
-BallBound<TMetricType, VecType>::MinDistance(
+typename BallBound<MetricType, VecType>::ElemType
+BallBound<MetricType, VecType>::MinDistance(
     const OtherVecType& point,
     typename boost::enable_if<IsVector<OtherVecType>>* /* junk */) const
 {
@@ -138,9 +138,9 @@ BallBound<TMetricType, VecType>::MinDistance(
 /**
  * Calculates minimum bound-to-bound squared distance.
  */
-template<typename TMetricType, typename VecType>
-typename BallBound<TMetricType, VecType>::ElemType
-BallBound<TMetricType, VecType>::MinDistance(const BallBound& other)
+template<typename MetricType, typename VecType>
+typename BallBound<MetricType, VecType>::ElemType
+BallBound<MetricType, VecType>::MinDistance(const BallBound& other)
     const
 {
   if (radius < 0)
@@ -156,10 +156,10 @@ BallBound<TMetricType, VecType>::MinDistance(const BallBound& other)
 /**
  * Computes maximum distance.
  */
-template<typename TMetricType, typename VecType>
+template<typename MetricType, typename VecType>
 template<typename OtherVecType>
-typename BallBound<TMetricType, VecType>::ElemType
-BallBound<TMetricType, VecType>::MaxDistance(
+typename BallBound<MetricType, VecType>::ElemType
+BallBound<MetricType, VecType>::MaxDistance(
     const OtherVecType& point,
     typename boost::enable_if<IsVector<OtherVecType> >* /* junk */) const
 {
@@ -172,9 +172,9 @@ BallBound<TMetricType, VecType>::MaxDistance(
 /**
  * Computes maximum distance.
  */
-template<typename TMetricType, typename VecType>
-typename BallBound<TMetricType, VecType>::ElemType
-BallBound<TMetricType, VecType>::MaxDistance(const BallBound& other)
+template<typename MetricType, typename VecType>
+typename BallBound<MetricType, VecType>::ElemType
+BallBound<MetricType, VecType>::MaxDistance(const BallBound& other)
     const
 {
   if (radius < 0)
@@ -188,10 +188,10 @@ BallBound<TMetricType, VecType>::MaxDistance(const BallBound& other)
  *
  * Example: bound1.MinDistanceSq(other) for minimum squared distance.
  */
-template<typename TMetricType, typename VecType>
+template<typename MetricType, typename VecType>
 template<typename OtherVecType>
-math::RangeType<typename BallBound<TMetricType, VecType>::ElemType>
-BallBound<TMetricType, VecType>::RangeDistance(
+math::RangeType<typename BallBound<MetricType, VecType>::ElemType>
+BallBound<MetricType, VecType>::RangeDistance(
     const OtherVecType& point,
     typename boost::enable_if<IsVector<OtherVecType> >* /* junk */) const
 {
@@ -206,9 +206,9 @@ BallBound<TMetricType, VecType>::RangeDistance(
   }
 }
 
-template<typename TMetricType, typename VecType>
-math::RangeType<typename BallBound<TMetricType, VecType>::ElemType>
-BallBound<TMetricType, VecType>::RangeDistance(
+template<typename MetricType, typename VecType>
+math::RangeType<typename BallBound<MetricType, VecType>::ElemType>
+BallBound<MetricType, VecType>::RangeDistance(
     const BallBound& other) const
 {
   if (radius < 0)
@@ -226,9 +226,9 @@ BallBound<TMetricType, VecType>::RangeDistance(
 /**
  * Expand the bound to include the given bound.
  *
-template<typename TMetricType, typename VecType>
+template<typename MetricType, typename VecType>
 const BallBound<VecType>&
-BallBound<TMetricType, VecType>::operator|=(
+BallBound<MetricType, VecType>::operator|=(
     const BallBound<VecType>& other)
 {
   double dist = metric->Evaluate(center, other);
@@ -246,10 +246,10 @@ BallBound<TMetricType, VecType>::operator|=(
  * The difference lies in the way we initialize the ball bound. The way we
  * expand the bound is same.
  */
-template<typename TMetricType, typename VecType>
+template<typename MetricType, typename VecType>
 template<typename MatType>
-const BallBound<TMetricType, VecType>&
-BallBound<TMetricType, VecType>::operator|=(const MatType& data)
+const BallBound<MetricType, VecType>&
+BallBound<MetricType, VecType>::operator|=(const MatType& data)
 {
   if (radius < 0)
   {
@@ -277,9 +277,9 @@ BallBound<TMetricType, VecType>::operator|=(const MatType& data)
 }
 
 //! Serialize the BallBound.
-template<typename TMetricType, typename VecType>
+template<typename MetricType, typename VecType>
 template<typename Archive>
-void BallBound<TMetricType, VecType>::Serialize(
+void BallBound<MetricType, VecType>::Serialize(
     Archive& ar,
     const unsigned int /* version */)
 {

From ef01c055a24863b4c34c37da9637b2bfcd2a5da1 Mon Sep 17 00:00:00 2001
From: Keon Kim <kwk236@gmail.com>
Date: Sun, 29 May 2016 19:12:29 +0900
Subject: [PATCH 38/87] fix directive name for split_data.hpp

---
 src/mlpack/core/data/split_data.hpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/mlpack/core/data/split_data.hpp b/src/mlpack/core/data/split_data.hpp
index 381132061d0..1df1b28e841 100644
--- a/src/mlpack/core/data/split_data.hpp
+++ b/src/mlpack/core/data/split_data.hpp
@@ -5,8 +5,8 @@
  * Defines Split(), a utility function to split a dataset into a
  * training set and a test set.
  */
-#ifndef MLPACK_CORE_UTIL_SPLIT_DATA_HPP
-#define MLPACK_CORE_UTIL_SPLIT_DATA_HPP
+#ifndef MLPACK_CORE_DATA_SPLIT_DATA_HPP
+#define MLPACK_CORE_DATA_SPLIT_DATA_HPP
 
 #include <mlpack/core.hpp>
 

From 3d1ed0fa731b4f34d0c02ae63c37d4106e23a9f8 Mon Sep 17 00:00:00 2001
From: Marcus Edel <marcus.edel@fu-berlin.de>
Date: Mon, 30 May 2016 19:18:28 +0200
Subject: [PATCH 39/87] Use add_cli_executable to control CLI executables
 build; more information take a look at #619.

---
 src/mlpack/methods/rmva/CMakeLists.txt | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/src/mlpack/methods/rmva/CMakeLists.txt b/src/mlpack/methods/rmva/CMakeLists.txt
index d075ce35ac2..ced53a30ea7 100644
--- a/src/mlpack/methods/rmva/CMakeLists.txt
+++ b/src/mlpack/methods/rmva/CMakeLists.txt
@@ -14,10 +14,4 @@ endforeach()
 # the parent scope).
 set(MLPACK_SRCS ${MLPACK_SRCS} ${DIR_SRCS} PARENT_SCOPE)
 
-add_executable(mlpack_rmva
-  rmva_main.cpp
-)
-target_link_libraries(mlpack_rmva
-  mlpack
-)
-install(TARGETS mlpack_rmva RUNTIME DESTINATION bin)
+add_cli_executable(rmva)

From 1dad2b662d595097d77f4f0608e22aaa5546bd67 Mon Sep 17 00:00:00 2001
From: Ryan Curtin <ryan@ratml.org>
Date: Mon, 30 May 2016 18:39:13 -0400
Subject: [PATCH 40/87] Fix spacing.

---
 .../tree/rectangle_tree/r_star_tree_split_impl.hpp     | 10 +++++-----
 .../core/tree/rectangle_tree/r_tree_split_impl.hpp     |  8 ++++----
 .../core/tree/rectangle_tree/rectangle_tree_impl.hpp   |  4 ++--
 .../core/tree/rectangle_tree/x_tree_split_impl.hpp     | 10 +++++-----
 4 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/src/mlpack/core/tree/rectangle_tree/r_star_tree_split_impl.hpp b/src/mlpack/core/tree/rectangle_tree/r_star_tree_split_impl.hpp
index 49adbe8377f..44dbf95fe77 100644
--- a/src/mlpack/core/tree/rectangle_tree/r_star_tree_split_impl.hpp
+++ b/src/mlpack/core/tree/rectangle_tree/r_star_tree_split_impl.hpp
@@ -60,7 +60,7 @@ void RStarTreeSplit<TreeType>::SplitLeafNode(TreeType *tree,std::vector<bool>& r
     tree->Children()[(tree->NumChildren())++] = copy;
     assert(tree->NumChildren() == 1);
 
-    copy->Split().SplitLeafNode(copy,relevels);
+    copy->Split().SplitLeafNode(copy, relevels);
     return;
   }
 
@@ -77,7 +77,7 @@ void RStarTreeSplit<TreeType>::SplitLeafNode(TreeType *tree,std::vector<bool>& r
     size_t p = tree->MaxLeafSize() * 0.3; // The paper says this works the best.
     if (p == 0)
     {
-      tree->Split().SplitLeafNode(tree,relevels);
+      tree->Split().SplitLeafNode(tree, relevels);
       return;
     }
 
@@ -270,7 +270,7 @@ void RStarTreeSplit<TreeType>::SplitLeafNode(TreeType *tree,std::vector<bool>& r
   // just in case, we use an assert.
   assert(par->NumChildren() <= par->MaxNumChildren() + 1);
   if (par->NumChildren() == par->MaxNumChildren() + 1)
-    par->Split().SplitNonLeafNode(par,relevels);
+    par->Split().SplitNonLeafNode(par, relevels);
 
   assert(treeOne->Parent()->NumChildren() <= treeOne->MaxNumChildren());
   assert(treeOne->Parent()->NumChildren() >= treeOne->MinNumChildren());
@@ -306,7 +306,7 @@ bool RStarTreeSplit<TreeType>::SplitNonLeafNode(TreeType *tree,std::vector<bool>
     tree->NullifyData();
     tree->Children()[(tree->NumChildren())++] = copy;
 
-    copy->Split().SplitNonLeafNode(copy,relevels);
+    copy->Split().SplitNonLeafNode(copy, relevels);
     return true;
   }
 
@@ -662,7 +662,7 @@ bool RStarTreeSplit<TreeType>::SplitNonLeafNode(TreeType *tree,std::vector<bool>
   assert(par->NumChildren() <= par->MaxNumChildren() + 1);
   if (par->NumChildren() == par->MaxNumChildren() + 1)
   {
-    par->Split().SplitNonLeafNode(par,relevels);
+    par->Split().SplitNonLeafNode(par, relevels);
   }
 
   // We have to update the children of each of these new nodes so that they
diff --git a/src/mlpack/core/tree/rectangle_tree/r_tree_split_impl.hpp b/src/mlpack/core/tree/rectangle_tree/r_tree_split_impl.hpp
index 442e49f7ed0..69bf041d5ab 100644
--- a/src/mlpack/core/tree/rectangle_tree/r_tree_split_impl.hpp
+++ b/src/mlpack/core/tree/rectangle_tree/r_tree_split_impl.hpp
@@ -53,7 +53,7 @@ void RTreeSplit<TreeType>::SplitLeafNode(TreeType *tree,std::vector<bool>& relev
     tree->NullifyData();
     // Because this was a leaf node, numChildren must be 0.
     tree->Children()[(tree->NumChildren())++] = copy;
-    copy->Split().SplitLeafNode(copy,relevels);
+    copy->Split().SplitLeafNode(copy, relevels);
     return;
   }
 
@@ -84,7 +84,7 @@ void RTreeSplit<TreeType>::SplitLeafNode(TreeType *tree,std::vector<bool>& relev
   // just in case, we use an assert.
   assert(par->NumChildren() <= par->MaxNumChildren() + 1);
   if (par->NumChildren() == par->MaxNumChildren() + 1)
-    par->Split().SplitNonLeafNode(par,relevels);
+    par->Split().SplitNonLeafNode(par, relevels);
 
   assert(treeOne->Parent()->NumChildren() <= treeOne->MaxNumChildren());
   assert(treeOne->Parent()->NumChildren() >= treeOne->MinNumChildren());
@@ -116,7 +116,7 @@ bool RTreeSplit<TreeType>::SplitNonLeafNode(TreeType *tree,std::vector<bool>& re
     tree->NumChildren() = 0;
     tree->NullifyData();
     tree->Children()[(tree->NumChildren())++] = copy;
-    copy->Split().SplitNonLeafNode(copy,relevels);
+    copy->Split().SplitNonLeafNode(copy, relevels);
     return true;
   }
 
@@ -149,7 +149,7 @@ bool RTreeSplit<TreeType>::SplitNonLeafNode(TreeType *tree,std::vector<bool>& re
   assert(par->NumChildren() <= par->MaxNumChildren() + 1);
 
   if (par->NumChildren() == par->MaxNumChildren() + 1)
-    par->Split().SplitNonLeafNode(par,relevels);
+    par->Split().SplitNonLeafNode(par, relevels);
 
   // We have to update the children of each of these new nodes so that they
   // record the correct parent.
diff --git a/src/mlpack/core/tree/rectangle_tree/rectangle_tree_impl.hpp b/src/mlpack/core/tree/rectangle_tree/rectangle_tree_impl.hpp
index 8184f89c06b..d993a450901 100644
--- a/src/mlpack/core/tree/rectangle_tree/rectangle_tree_impl.hpp
+++ b/src/mlpack/core/tree/rectangle_tree/rectangle_tree_impl.hpp
@@ -667,7 +667,7 @@ void RectangleTree<MetricType, StatisticType, MatType, SplitType, DescentType>::
 
     // If we are full, then we need to split (or at least try).  The SplitType
     // takes care of this and of moving up the tree if necessary.
-    split.SplitLeafNode(this,relevels);
+    split.SplitLeafNode(this, relevels);
   }
   else
   {
@@ -677,7 +677,7 @@ void RectangleTree<MetricType, StatisticType, MatType, SplitType, DescentType>::
 
     // If we are full, then we need to split (or at least try).  The SplitType
     // takes care of this and of moving up the tree if necessary.
-    split.SplitNonLeafNode(this,relevels);
+    split.SplitNonLeafNode(this, relevels);
   }
 }
 
diff --git a/src/mlpack/core/tree/rectangle_tree/x_tree_split_impl.hpp b/src/mlpack/core/tree/rectangle_tree/x_tree_split_impl.hpp
index a619b725a54..0b434542001 100644
--- a/src/mlpack/core/tree/rectangle_tree/x_tree_split_impl.hpp
+++ b/src/mlpack/core/tree/rectangle_tree/x_tree_split_impl.hpp
@@ -66,7 +66,7 @@ void XTreeSplit<TreeType>::SplitLeafNode(TreeType *tree,std::vector<bool>& relev
     // Because this was a leaf node, numChildren must be 0.
     tree->Children()[(tree->NumChildren())++] = copy;
     assert(tree->NumChildren() == 1);
-    copy->Split().SplitLeafNode(copy,relevels);
+    copy->Split().SplitLeafNode(copy, relevels);
     return;
   }
 
@@ -84,7 +84,7 @@ void XTreeSplit<TreeType>::SplitLeafNode(TreeType *tree,std::vector<bool>& relev
     size_t p = tree->MaxLeafSize() * 0.3;
     if (p == 0)
     {
-      tree->Split().SplitLeafNode(tree,relevels);
+      tree->Split().SplitLeafNode(tree, relevels);
       return;
     }
 
@@ -297,7 +297,7 @@ void XTreeSplit<TreeType>::SplitLeafNode(TreeType *tree,std::vector<bool>& relev
   // in case, we use an assert.
   assert(par->NumChildren() <= par->MaxNumChildren() + 1);
   if (par->NumChildren() == par->MaxNumChildren() + 1)
-    par->Split().SplitNonLeafNode(par,relevels);
+    par->Split().SplitNonLeafNode(par, relevels);
 
   assert(treeOne->Parent()->NumChildren() <=
       treeOne->Parent()->MaxNumChildren());
@@ -336,7 +336,7 @@ bool XTreeSplit<TreeType>::SplitNonLeafNode(TreeType *tree,std::vector<bool>& re
     tree->NumChildren() = 0;
     tree->NullifyData();
     tree->Children()[(tree->NumChildren())++] = copy;
-    copy->Split().SplitNonLeafNode(copy,relevels);
+    copy->Split().SplitNonLeafNode(copy, relevels);
     return true;
   }
 
@@ -830,7 +830,7 @@ bool XTreeSplit<TreeType>::SplitNonLeafNode(TreeType *tree,std::vector<bool>& re
 
   if (par->NumChildren() == par->MaxNumChildren() + 1)
   {
-    par->Split().SplitNonLeafNode(par,relevels);
+    par->Split().SplitNonLeafNode(par, relevels);
   }
 
   // We have to update the children of each of these new nodes so that they

From 9e88669c6c5a8fcc9efb2ce37d09f99d49e4e302 Mon Sep 17 00:00:00 2001
From: Keon Kim <kwk236@gmail.com>
Date: Mon, 30 May 2016 19:15:45 +0900
Subject: [PATCH 41/87] refine style and detail test

---
 src/mlpack/core/data/split_data.hpp           | 30 +++++++++----------
 .../preprocess/preprocess_split_main.cpp      | 24 +++++----------
 src/mlpack/tests/split_data_test.cpp          | 28 +++++++++++++++--
 3 files changed, 48 insertions(+), 34 deletions(-)

diff --git a/src/mlpack/core/data/split_data.hpp b/src/mlpack/core/data/split_data.hpp
index 1df1b28e841..3979caf8f2e 100644
--- a/src/mlpack/core/data/split_data.hpp
+++ b/src/mlpack/core/data/split_data.hpp
@@ -42,12 +42,12 @@ namespace data {
  */
 template<typename T, typename U>
 void Split(const arma::Mat<T>& input,
-                    const arma::Row<U>& inputLabel,
-                    arma::Mat<T>& trainData,
-                    arma::Mat<T>& testData,
-                    arma::Row<U>& trainLabel,
-                    arma::Row<U>& testLabel,
-                    const double testRatio)
+           const arma::Row<U>& inputLabel,
+           arma::Mat<T>& trainData,
+           arma::Mat<T>& testData,
+           arma::Row<U>& trainLabel,
+           arma::Row<U>& testLabel,
+           const double testRatio)
 {
   const size_t testSize = static_cast<size_t>(input.n_cols * testRatio);
   const size_t trainSize = input.n_cols - testSize;
@@ -96,9 +96,9 @@ void Split(const arma::Mat<T>& input,
  */
 template<typename T>
 void Split(const arma::Mat<T>& input,
-                    arma::Mat<T>& trainData,
-                    arma::Mat<T>& testData,
-                    const double testRatio)
+           arma::Mat<T>& trainData,
+           arma::Mat<T>& testData,
+           const double testRatio)
 {
   const size_t testSize = static_cast<size_t>(input.n_cols * testRatio);
   const size_t trainSize = input.n_cols - testSize;
@@ -111,11 +111,11 @@ void Split(const arma::Mat<T>& input,
 
   for (size_t i = 0; i != trainSize; ++i)
   {
-     trainData.col(i) = input.col(order[i]);
+    trainData.col(i) = input.col(order[i]);
   }
   for (size_t i = 0; i != testSize; ++i)
   {
-     testData.col(i) = input.col(order[i + trainSize]);
+    testData.col(i) = input.col(order[i + trainSize]);
   }
 }
 
@@ -141,8 +141,8 @@ void Split(const arma::Mat<T>& input,
 template<typename T,typename U>
 std::tuple<arma::Mat<T>, arma::Mat<T>, arma::Row<U>, arma::Row<U>>
 Split(const arma::Mat<T>& input,
-               const arma::Row<U>& inputLabel,
-               const double testRatio)
+      const arma::Row<U>& inputLabel,
+      const double testRatio)
 {
   arma::Mat<T> trainData;
   arma::Mat<T> testData;
@@ -174,13 +174,13 @@ Split(const arma::Mat<T>& input,
 template<typename T>
 std::tuple<arma::Mat<T>, arma::Mat<T>>
 Split(const arma::Mat<T>& input,
-               const double testRatio)
+      const double testRatio)
 {
   arma::Mat<T> trainData;
   arma::Mat<T> testData;
   Split(input, trainData, testData, testRatio);
 
-  return std::make_tuple(trainData, testData);
+  return std::make_tuple(std::move(trainData), std::move(testData));
 }
 
 } // namespace data
diff --git a/src/mlpack/methods/preprocess/preprocess_split_main.cpp b/src/mlpack/methods/preprocess/preprocess_split_main.cpp
index 02e844a21a5..ca8e830be7c 100644
--- a/src/mlpack/methods/preprocess/preprocess_split_main.cpp
+++ b/src/mlpack/methods/preprocess/preprocess_split_main.cpp
@@ -87,7 +87,7 @@ int main(int argc, char** argv)
   if (CLI::HasParam("test_ratio"))
   {
     //sanity check on test_ratio
-    if ((testRatio < 0.0) && (testRatio > 1.0))
+    if ((testRatio < 0.0) || (testRatio > 1.0))
     {
       Log::Fatal << "Invalid parameter for test_ratio. "
         << "test_ratio must be between 0.0 and 1.0" << endl;
@@ -116,16 +116,10 @@ int main(int argc, char** argv)
     Log::Info << "Train Label Count: " << get<2>(value).n_cols << endl;
     Log::Info << "Test Label Count: " << get<3>(value).n_cols << endl;
 
-    // TODO: fix full precision problem
-    mat training = get<0>(value);
-    mat test = get<1>(value);
-    mat trainingLabels = get<2>(value);
-    mat testLabels = get<3>(value);
-
-    data::Save(trainingFile, training, false);
-    data::Save(testFile, test, false);
-    data::Save(trainingLabelsFile, trainingLabels, false);
-    data::Save(testLabelsFile, testLabels, false);
+    data::Save(trainingFile, get<0>(value), false);
+    data::Save(testFile, get<1>(value), false);
+    data::Save(trainingLabelsFile, get<2>(value), false);
+    data::Save(testLabelsFile, get<3>(value), false);
   }
   else // split without parameters
   {
@@ -133,12 +127,8 @@ int main(int argc, char** argv)
     Log::Info << "Train Data Count: " << get<0>(value).n_cols << endl;
     Log::Info << "Test Data Count: " << get<1>(value).n_cols << endl;
 
-    // TODO: fix full precision problem
-    mat training = get<0>(value);
-    mat test = get<1>(value);
-
-    data::Save(trainingFile, training, false);
-    data::Save(testFile, test, false);
+    data::Save(trainingFile, get<0>(value), false);
+    data::Save(testFile, get<1>(value), false);
   }
 }
 
diff --git a/src/mlpack/tests/split_data_test.cpp b/src/mlpack/tests/split_data_test.cpp
index d7b39909832..daf4cd50e84 100644
--- a/src/mlpack/tests/split_data_test.cpp
+++ b/src/mlpack/tests/split_data_test.cpp
@@ -41,6 +41,22 @@ void CompareData(const mat& inputData,
   }
 }
 
+void CheckMatEqual(const mat& inputData,
+                   const mat& compareData)
+{
+  const mat& sortedInput = arma::sort(inputData, "ascend", 1);
+  const mat& sortedCompare = arma::sort(compareData, "ascend", 1);
+  for (size_t i = 0; i < sortedInput.n_cols; ++i)
+  {
+    const mat& lhsCol = sortedInput.col(i);
+    const mat& rhsCol = sortedCompare.col(i);
+    for (size_t j = 0; j < lhsCol.n_rows; ++j)
+    {
+      BOOST_REQUIRE_CLOSE(lhsCol(j), rhsCol(j), 1e-5);
+    }
+  }
+}
+
 /**
  * Check that no labels have been duplicated.
  */
@@ -70,11 +86,15 @@ void CheckDuplication(const Row<size_t>& trainLabels,
 BOOST_AUTO_TEST_CASE(SplitDataResultMat)
 {
   mat input(2, 10);
-  input.randu();
+  size_t count = 0; // count for putting unique sequential values
+  input.imbue([&count] () { return ++count; });
 
   const auto value = Split(input, 0.2);
   BOOST_REQUIRE_EQUAL(std::get<0>(value).n_cols, 8); // train data
   BOOST_REQUIRE_EQUAL(std::get<1>(value).n_cols, 2); // test data
+
+  mat concat = arma::join_rows(std::get<0>(value), std::get<1>(value));
+  CheckMatEqual(input, concat);
 }
 
 BOOST_AUTO_TEST_CASE(SplitLabeledDataResultMat)
@@ -106,12 +126,16 @@ BOOST_AUTO_TEST_CASE(SplitLabeledDataResultMat)
  */
 BOOST_AUTO_TEST_CASE(SplitDataLargerTest)
 {
+  size_t count = 0;
   mat input(10, 497);
-  input.randu();
+  input.imbue([&count] () { return ++count; });
 
   const auto value = Split(input, 0.3);
   BOOST_REQUIRE_EQUAL(std::get<0>(value).n_cols, 497 - size_t(0.3 * 497));
   BOOST_REQUIRE_EQUAL(std::get<1>(value).n_cols, size_t(0.3 * 497));
+
+  mat concat = arma::join_rows(std::get<0>(value), std::get<1>(value));
+  CheckMatEqual(input, concat);
 }
 
 BOOST_AUTO_TEST_CASE(SplitLabeledDataLargerTest)

From 0e61fe24ce2fc4a79064fd16112c4f918e6cc121 Mon Sep 17 00:00:00 2001
From: stereomatchingkiss <stereomatchingkiss@gmail.com>
Date: Tue, 31 May 2016 15:52:38 +0800
Subject: [PATCH 42/87] 1 : fixing issue #658, work with non-transpose data, do
 not work with transpose data yet

---
 src/mlpack/core/data/load_impl.hpp | 93 ++++++++++--------------------
 1 file changed, 30 insertions(+), 63 deletions(-)

diff --git a/src/mlpack/core/data/load_impl.hpp b/src/mlpack/core/data/load_impl.hpp
index 947b3600e97..c02fa5ea662 100644
--- a/src/mlpack/core/data/load_impl.hpp
+++ b/src/mlpack/core/data/load_impl.hpp
@@ -389,81 +389,48 @@ bool Load(const std::string& filename,
     stream.open(filename, std::fstream::in);
 
     // Extract line by line.
-    std::stringstream token;
+    auto notNumber = [](std::string const &str)
+    {
+      return std::any_of(std::begin(str), std::end(str),
+                         [](char c){ return !std::isdigit(c);});
+    };
     size_t row = 0;
     while (!stream.bad() && !stream.fail() && !stream.eof())
     {
       std::getline(stream, buffer, '\n');
-
-      // Look at each token.  Unfortunately we have to do this character by
-      // character, because things may be escaped in quotes.
       Tokenizer lineTok(buffer, sep);
-      size_t col = 0;
-      for (Tokenizer::iterator it = lineTok.begin(); it != lineTok.end(); ++it)
+      std::vector<std::string> tokens;
+      if(!transpose)
       {
-        // Attempt to extract as type eT.  If that fails, we'll assume it's a
-        // string and map it (which may involve retroactively mapping everything
-        // we've seen so far).
-        token.clear();
-        token.str(*it);
-
-        eT val = eT(0);
-        token >> val;
-
-        if (token.fail())
+        for (Tokenizer::iterator it = lineTok.begin(); it != lineTok.end(); ++it)
         {
-          // Conversion failed; but it may be a NaN or inf.  Armadillo has
-          // convenient functions to check.
-          if (!arma::diskio::convert_naninf(val, token.str()))
+          std::string trimmedToken(*it);
+          boost::trim(trimmedToken);
+          tokens.emplace_back(std::move(trimmedToken));
+        }
+        bool const notNumeric = std::any_of(std::begin(tokens),
+                                           std::end(tokens), notNumber);
+        if(notNumeric)
+        {
+          for(size_t i = 0; i != tokens.size(); ++i)
           {
-            // We need to perform a mapping.
-            const size_t dim = (transpose) ? col : row;
-            if (info.Type(dim) == Datatype::numeric)
-            {
-              // We must map everything we have seen up to this point and change
-              // the values in the matrix.
-              if (transpose)
-              {
-                // Whatever we've seen so far has successfully mapped to an eT.
-                // So we need to print it back to a string.  We'll use
-                // Armadillo's functionality for that.
-                for (size_t i = 0; i < row; ++i)
-                {
-                  std::stringstream sstr;
-                  arma::arma_ostream::print_elem(sstr, matrix.at(i, col),
-                      false);
-                  eT newVal = info.MapString(sstr.str(), col);
-                  matrix.at(i, col) = newVal;
-                }
-              }
-              else
-              {
-                for (size_t i = 0; i < col; ++i)
-                {
-                  std::stringstream sstr;
-                  arma::arma_ostream::print_elem(sstr, matrix.at(row, i),
-                      false);
-                  eT newVal = info.MapString(sstr.str(), row);
-                  matrix.at(row, i) = newVal;
-                }
-              }
-            }
-
-            // Strip whitespace from either side of the string.
-            std::string trimmedToken(token.str());
-            boost::trim(trimmedToken);
-            val = info.MapString(trimmedToken, dim);
+            eT const val = static_cast<eT>(info.MapString(tokens[i], row));
+            matrix.at(row, i) = val;
           }
         }
-
-        if (transpose)
-          matrix(col, row) = val;
         else
-          matrix(row, col) = val;
-
-        ++col;
+        {
+          std::stringstream sstream;
+          for(size_t i = 0; i != tokens.size(); ++i)
+          {
+            eT val(0);
+            sstream<<tokens[i];
+            sstream>>val;
+            matrix.at(row, i) = val;
+            sstream.clear();
+          }
+        }
       }
-
       ++row;
     }
   }

From 29f0536f6aea762b98bef2a2ae91ce2d54b88e1b Mon Sep 17 00:00:00 2001
From: stereomatchingkiss <stereomatchingkiss@gmail.com>
Date: Tue, 31 May 2016 16:10:30 +0800
Subject: [PATCH 43/87] remove useless codes

---
 src/mlpack/core/data/load_impl.hpp | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/src/mlpack/core/data/load_impl.hpp b/src/mlpack/core/data/load_impl.hpp
index c02fa5ea662..69a46fa7eb8 100644
--- a/src/mlpack/core/data/load_impl.hpp
+++ b/src/mlpack/core/data/load_impl.hpp
@@ -388,7 +388,6 @@ bool Load(const std::string& filename,
     stream.close();
     stream.open(filename, std::fstream::in);
 
-    // Extract line by line.
     auto notNumber = [](std::string const &str)
     {
       return std::any_of(std::begin(str), std::end(str),
@@ -397,6 +396,7 @@ bool Load(const std::string& filename,
     size_t row = 0;
     while (!stream.bad() && !stream.fail() && !stream.eof())
     {
+      // Extract line by line.
       std::getline(stream, buffer, '\n');
       Tokenizer lineTok(buffer, sep);
       std::vector<std::string> tokens;
@@ -423,10 +423,8 @@ bool Load(const std::string& filename,
           std::stringstream sstream;
           for(size_t i = 0; i != tokens.size(); ++i)
           {
-            eT val(0);
             sstream<<tokens[i];
-            sstream>>val;
-            matrix.at(row, i) = val;
+            sstream>>matrix.at(row, i);
             sstream.clear();
           }
         }

From 9aac1dca04dbfbd63658f0719a97aea6804cae2d Mon Sep 17 00:00:00 2001
From: stereomatchingkiss <stereomatchingkiss@gmail.com>
Date: Tue, 31 May 2016 16:19:18 +0800
Subject: [PATCH 44/87] use algorithm to replace for loop

---
 src/mlpack/core/data/load_impl.hpp | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/src/mlpack/core/data/load_impl.hpp b/src/mlpack/core/data/load_impl.hpp
index 69a46fa7eb8..c74b089d7d2 100644
--- a/src/mlpack/core/data/load_impl.hpp
+++ b/src/mlpack/core/data/load_impl.hpp
@@ -402,12 +402,14 @@ bool Load(const std::string& filename,
       std::vector<std::string> tokens;
       if(!transpose)
       {
-        for (Tokenizer::iterator it = lineTok.begin(); it != lineTok.end(); ++it)
+        std::transform(std::begin(lineTok), std::end(lineTok),
+                       std::back_inserter(tokens),
+                       [&tokens](std::string const &str)
         {
-          std::string trimmedToken(*it);
+          std::string trimmedToken(str);
           boost::trim(trimmedToken);
-          tokens.emplace_back(std::move(trimmedToken));
-        }
+          return std::move(trimmedToken);
+        });
         bool const notNumeric = std::any_of(std::begin(tokens),
                                            std::end(tokens), notNumber);
         if(notNumeric)

From e36eec5cb250d8c36a49aba5cc1bae6a68723d29 Mon Sep 17 00:00:00 2001
From: Ryan Curtin <ryan@ratml.org>
Date: Tue, 31 May 2016 09:30:55 -0400
Subject: [PATCH 45/87] Document the state of loading sparse matrices.

---
 doc/guide/formats.hpp | 43 ++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 40 insertions(+), 3 deletions(-)

diff --git a/doc/guide/formats.hpp b/doc/guide/formats.hpp
index 4b2d7378c2b..ef834afae80 100644
--- a/doc/guide/formats.hpp
+++ b/doc/guide/formats.hpp
@@ -16,9 +16,10 @@ suitable numeric representation.  Therefore, in general, datasets on disk should
 contain only numeric features in order to be loaded successfully by mlpack.
 
 The types of datasets that mlpack can load are roughly the same as the types of
-matrices that Armadillo can load.  When datasets are loaded by mlpack, \b the
-\b "file's type is detected using the file's extension".  mlpack supports the
-following file types:
+matrices that Armadillo can load.  However, the load functionality that mlpack
+provides \b "only supports loading dense datasets".  When datasets are loaded by
+mlpack, \b the \b "file's type is detected using the file's extension".  mlpack
+supports the following file types:
 
  - csv (comma-separated values), denoted by .csv or .txt
  - tsv (tab-separated values), denoted by .tsv, .csv, or .txt
@@ -101,6 +102,42 @@ As with the command-line programs, the type of data to be loaded is
 automatically detected from the filename extension.  For more details, see the
 mlpack::data::Load() and mlpack::data::Save() documentation.
 
+@section sparseload Dealing with sparse matrices
+
+As mentioned earlier, support for loading sparse matrices in mlpack is not
+available at this time.  To use a sparse matrix with mlpack code, you will have
+to write a C++ program instead of using any of the command-line tools, because
+the command-line tools all use dense datasets internally.  (There is one
+exception: the \c mlpack_cf program, for collaborative filtering, loads sparse
+coordinate lists.)
+
+In addition, the \c mlpack::data::Load() function does not support loading any
+sparse format; so the best idea is to use undocumented Armadillo functionality
+to load coordinate lists.  Suppose you have a coordinate list file like the one
+below:
+
+\code
+$ cat cl.csv
+0 0 0.332
+1 3 3.126
+4 4 1.333
+\endcode
+
+This represents a 5x5 matrix with three nonzero elements.  We can load this
+using Armadillo:
+
+\code
+arma::sp_mat matrix;
+matrix.load("cl.csv", arma::coord_ascii);
+matrix = matrix.t(); // We must transpose after load!
+\endcode
+
+The transposition after loading is necessary if the coordinate list is in
+row-major format (that is, if each row in the matrix represents a point and each
+column represents a feature).  Be sure that the matrix you use with mlpack
+methods has points as columns and features as rows!  See \ref matrices for more
+information.
+
 @section formatcat Categorical features and command line programs
 
 In some situations it is useful to represent data not just as a numeric matrix

From a340e23b46356e91444faeea93421e10e1a5a378 Mon Sep 17 00:00:00 2001
From: stereomatchingkiss <stereomatchingkiss@gmail.com>
Date: Tue, 31 May 2016 22:14:17 +0800
Subject: [PATCH 46/87] fix bug--isdigit will not treat 1.004e10 as digit

---
 src/mlpack/core/data/load_impl.hpp | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/src/mlpack/core/data/load_impl.hpp b/src/mlpack/core/data/load_impl.hpp
index c74b089d7d2..f6fc93e7e67 100644
--- a/src/mlpack/core/data/load_impl.hpp
+++ b/src/mlpack/core/data/load_impl.hpp
@@ -390,8 +390,11 @@ bool Load(const std::string& filename,
 
     auto notNumber = [](std::string const &str)
     {
-      return std::any_of(std::begin(str), std::end(str),
-                         [](char c){ return !std::isdigit(c);});
+      eT val(0);
+      std::stringstream token;
+      token.str(str);
+      token>>val;
+      return token.fail();
     };
     size_t row = 0;
     while (!stream.bad() && !stream.fail() && !stream.eof())
@@ -421,13 +424,13 @@ bool Load(const std::string& filename,
           }
         }
         else
-        {
-          std::stringstream sstream;
+        {          
+          std::stringstream token;
           for(size_t i = 0; i != tokens.size(); ++i)
-          {
-            sstream<<tokens[i];
-            sstream>>matrix.at(row, i);
-            sstream.clear();
+          {            
+            token.str(tokens[i]);
+            token>>matrix.at(row, i);
+            token.clear();
           }
         }
       }

From 56a036116f07e76692766b9b1f1410887f2deb96 Mon Sep 17 00:00:00 2001
From: stereomatchingkiss <stereomatchingkiss@gmail.com>
Date: Tue, 31 May 2016 23:29:03 +0800
Subject: [PATCH 47/87] support transpose mapping

---
 src/mlpack/core/data/load_impl.hpp | 141 ++++++++++++++++++++---------
 1 file changed, 96 insertions(+), 45 deletions(-)

diff --git a/src/mlpack/core/data/load_impl.hpp b/src/mlpack/core/data/load_impl.hpp
index f6fc93e7e67..cab577e977a 100644
--- a/src/mlpack/core/data/load_impl.hpp
+++ b/src/mlpack/core/data/load_impl.hpp
@@ -29,6 +29,74 @@
 namespace mlpack {
 namespace data {
 
+namespace details{
+
+template<typename Tokenizer>
+std::vector<std::string> ToTokens(Tokenizer &lineTok)
+{
+  std::vector<std::string> tokens;
+  std::transform(std::begin(lineTok), std::end(lineTok),
+                 std::back_inserter(tokens),
+                 [&tokens](std::string const &str)
+  {
+    std::string trimmedToken(str);
+    boost::trim(trimmedToken);
+    return std::move(trimmedToken);
+  });
+
+  return tokens;
+}
+
+void TransPoseTokens(std::vector<std::vector<std::string>> const &input,
+                     std::vector<std::string> &output,
+                     size_t index)
+{
+  output.clear();
+  for(size_t i = 0; i != input.size(); ++i)
+  {
+    output.emplace_back(input[i][index]);
+  }
+}
+
+template<typename eT>
+void MapToNumerical(std::vector<std::string> const &tokens,
+                    size_t &row,
+                    DatasetInfo &info,
+                    arma::Mat<eT> &matrix)
+{
+  auto notNumber = [](std::string const &str)
+  {
+    eT val(0);
+    std::stringstream token;
+    token.str(str);
+    token>>val;
+    return token.fail();
+  };
+
+  bool const notNumeric = std::any_of(std::begin(tokens),
+                                     std::end(tokens), notNumber);
+  if(notNumeric)
+  {
+    for(size_t i = 0; i != tokens.size(); ++i)
+    {
+      eT const val = static_cast<eT>(info.MapString(tokens[i], row));
+      matrix.at(row, i) = val;
+    }
+  }
+  else
+  {
+    std::stringstream token;
+    for(size_t i = 0; i != tokens.size(); ++i)
+    {
+      token.str(tokens[i]);
+      token>>matrix.at(row, i);
+      token.clear();
+    }
+  }
+}
+
+}
+
 template<typename eT>
 bool inline inplace_transpose(arma::Mat<eT>& X)
 {
@@ -37,7 +105,7 @@ bool inline inplace_transpose(arma::Mat<eT>& X)
     X = arma::trans(X);
     return false;
   }
-  catch (std::bad_alloc& exception)
+  catch (std::bad_alloc&)
   {
 #if (ARMA_VERSION_MAJOR >= 4) || \
     ((ARMA_VERSION_MAJOR == 3) && (ARMA_VERSION_MINOR >= 930))
@@ -386,55 +454,38 @@ bool Load(const std::string& filename,
     }
 
     stream.close();
-    stream.open(filename, std::fstream::in);
+    stream.open(filename, std::fstream::in);    
 
-    auto notNumber = [](std::string const &str)
-    {
-      eT val(0);
-      std::stringstream token;
-      token.str(str);
-      token>>val;
-      return token.fail();
-    };
-    size_t row = 0;
-    while (!stream.bad() && !stream.fail() && !stream.eof())
+    if(transpose)
     {
-      // Extract line by line.
-      std::getline(stream, buffer, '\n');
-      Tokenizer lineTok(buffer, sep);
+      std::vector<std::vector<std::string>> tokensArray;
+      while (!stream.bad() && !stream.fail() && !stream.eof())
+      {
+        // Extract line by line.
+        std::getline(stream, buffer, '\n');
+        Tokenizer lineTok(buffer, sep);
+        tokensArray.emplace_back(details::ToTokens(lineTok));
+      }
       std::vector<std::string> tokens;
-      if(!transpose)
+      for(size_t i = 0; i != cols; ++i)
       {
-        std::transform(std::begin(lineTok), std::end(lineTok),
-                       std::back_inserter(tokens),
-                       [&tokens](std::string const &str)
-        {
-          std::string trimmedToken(str);
-          boost::trim(trimmedToken);
-          return std::move(trimmedToken);
-        });
-        bool const notNumeric = std::any_of(std::begin(tokens),
-                                           std::end(tokens), notNumber);
-        if(notNumeric)
-        {
-          for(size_t i = 0; i != tokens.size(); ++i)
-          {
-            eT const val = static_cast<eT>(info.MapString(tokens[i], row));
-            matrix.at(row, i) = val;
-          }
-        }
-        else
-        {          
-          std::stringstream token;
-          for(size_t i = 0; i != tokens.size(); ++i)
-          {            
-            token.str(tokens[i]);
-            token>>matrix.at(row, i);
-            token.clear();
-          }
-        }
+        details::TransPoseTokens(tokensArray, tokens, i);
+        details::MapToNumerical(tokens, i,
+                                info, matrix);
+      }
+    }
+    else
+    {
+      size_t row = 0;
+      while (!stream.bad() && !stream.fail() && !stream.eof())
+      {
+        // Extract line by line.
+        std::getline(stream, buffer, '\n');
+        Tokenizer lineTok(buffer, sep);
+        details::MapToNumerical(details::ToTokens(lineTok), row,
+                                info, matrix);
+        ++row;
       }
-      ++row;
     }
   }
   else if (extension == "arff")

From 0634d4afe48f030db45deca9d8c768735047ca79 Mon Sep 17 00:00:00 2001
From: stereomatchingkiss <stereomatchingkiss@gmail.com>
Date: Tue, 31 May 2016 23:36:33 +0800
Subject: [PATCH 48/87] fix format--place const before variable

---
 src/mlpack/core/data/load_impl.hpp | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/mlpack/core/data/load_impl.hpp b/src/mlpack/core/data/load_impl.hpp
index cab577e977a..c9341816d81 100644
--- a/src/mlpack/core/data/load_impl.hpp
+++ b/src/mlpack/core/data/load_impl.hpp
@@ -59,12 +59,12 @@ void TransPoseTokens(std::vector<std::vector<std::string>> const &input,
 }
 
 template<typename eT>
-void MapToNumerical(std::vector<std::string> const &tokens,
+void MapToNumerical(const std::vector<std::string> &tokens,
                     size_t &row,
                     DatasetInfo &info,
                     arma::Mat<eT> &matrix)
 {
-  auto notNumber = [](std::string const &str)
+  auto notNumber = [](const std::string &str)
   {
     eT val(0);
     std::stringstream token;
@@ -73,13 +73,13 @@ void MapToNumerical(std::vector<std::string> const &tokens,
     return token.fail();
   };
 
-  bool const notNumeric = std::any_of(std::begin(tokens),
-                                     std::end(tokens), notNumber);
+  const bool notNumeric = std::any_of(std::begin(tokens),
+                                      std::end(tokens), notNumber);
   if(notNumeric)
   {
     for(size_t i = 0; i != tokens.size(); ++i)
     {
-      eT const val = static_cast<eT>(info.MapString(tokens[i], row));
+      const eT val = static_cast<eT>(info.MapString(tokens[i], row));
       matrix.at(row, i) = val;
     }
   }

From 5d964a1633faddeaf152b9495d96a271bb455a91 Mon Sep 17 00:00:00 2001
From: stereomatchingkiss <stereomatchingkiss@gmail.com>
Date: Tue, 31 May 2016 23:49:14 +0800
Subject: [PATCH 49/87] fix bugs--may read invalid stream

---
 src/mlpack/core/data/load_impl.hpp | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/src/mlpack/core/data/load_impl.hpp b/src/mlpack/core/data/load_impl.hpp
index c9341816d81..90d6d37bbf3 100644
--- a/src/mlpack/core/data/load_impl.hpp
+++ b/src/mlpack/core/data/load_impl.hpp
@@ -459,14 +459,19 @@ bool Load(const std::string& filename,
     if(transpose)
     {
       std::vector<std::vector<std::string>> tokensArray;
+      std::vector<std::string> tokens;
       while (!stream.bad() && !stream.fail() && !stream.eof())
       {
         // Extract line by line.
         std::getline(stream, buffer, '\n');
         Tokenizer lineTok(buffer, sep);
-        tokensArray.emplace_back(details::ToTokens(lineTok));
+        tokens = details::ToTokens(lineTok);
+        if(tokens.size() == cols)
+        {
+          tokensArray.emplace_back(details::ToTokens(lineTok));
+        }
       }
-      std::vector<std::string> tokens;
+      std::cout<<"array size : "<<tokensArray.size()<<std::endl;
       for(size_t i = 0; i != cols; ++i)
       {
         details::TransPoseTokens(tokensArray, tokens, i);

From 6eb1723c153193f8ef655d1e2b728e05c1dcc17d Mon Sep 17 00:00:00 2001
From: stereomatchingkiss <stereomatchingkiss@gmail.com>
Date: Tue, 31 May 2016 23:50:14 +0800
Subject: [PATCH 50/87] remove debug message

---
 src/mlpack/core/data/load_impl.hpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/mlpack/core/data/load_impl.hpp b/src/mlpack/core/data/load_impl.hpp
index 90d6d37bbf3..e8327b04f21 100644
--- a/src/mlpack/core/data/load_impl.hpp
+++ b/src/mlpack/core/data/load_impl.hpp
@@ -471,7 +471,6 @@ bool Load(const std::string& filename,
           tokensArray.emplace_back(details::ToTokens(lineTok));
         }
       }
-      std::cout<<"array size : "<<tokensArray.size()<<std::endl;
       for(size_t i = 0; i != cols; ++i)
       {
         details::TransPoseTokens(tokensArray, tokens, i);

From 5eaa2a9a84cbf20b6770e680b7e54e68fb2a6335 Mon Sep 17 00:00:00 2001
From: stereomatchingkiss <stereomatchingkiss@gmail.com>
Date: Wed, 1 Jun 2016 00:37:56 +0800
Subject: [PATCH 51/87] reduce copy

---
 src/mlpack/core/data/load_impl.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mlpack/core/data/load_impl.hpp b/src/mlpack/core/data/load_impl.hpp
index e8327b04f21..e3b38602ba1 100644
--- a/src/mlpack/core/data/load_impl.hpp
+++ b/src/mlpack/core/data/load_impl.hpp
@@ -468,7 +468,7 @@ bool Load(const std::string& filename,
         tokens = details::ToTokens(lineTok);
         if(tokens.size() == cols)
         {
-          tokensArray.emplace_back(details::ToTokens(lineTok));
+          tokensArray.emplace_back(std::move(tokens));
         }
       }
       for(size_t i = 0; i != cols; ++i)

From 4ea13b02acb58c408135d08eae942b774064fdc7 Mon Sep 17 00:00:00 2001
From: stereomatchingkiss <stereomatchingkiss@gmail.com>
Date: Wed, 1 Jun 2016 02:25:10 +0800
Subject: [PATCH 52/87] add new test cases

---
 src/mlpack/tests/load_save_test.cpp | 272 +++++++++++++++++++++++++++-
 1 file changed, 270 insertions(+), 2 deletions(-)

diff --git a/src/mlpack/tests/load_save_test.cpp b/src/mlpack/tests/load_save_test.cpp
index 7e2f6a152d1..1935083798d 100644
--- a/src/mlpack/tests/load_save_test.cpp
+++ b/src/mlpack/tests/load_save_test.cpp
@@ -941,7 +941,7 @@ BOOST_AUTO_TEST_CASE(NontransposedCSVDatasetInfoLoad)
 /**
  * Create a file with a categorical string feature, then load it.
  */
-BOOST_AUTO_TEST_CASE(CategoricalCSVLoadTest)
+BOOST_AUTO_TEST_CASE(CategoricalCSVLoadTest00)
 {
   fstream f;
   f.open("test.csv", fstream::out);
@@ -1001,7 +1001,140 @@ BOOST_AUTO_TEST_CASE(CategoricalCSVLoadTest)
   remove("test.csv");
 }
 
-BOOST_AUTO_TEST_CASE(CategoricalNontransposedCSVLoadTest)
+BOOST_AUTO_TEST_CASE(CategoricalCSVLoadTest01)
+{
+  fstream f;
+  f.open("test.csv", fstream::out);
+  f << "1, 1, 1" << endl;
+  f << "1, 1, 1" << endl;
+  f << " , 1, 1" << endl;
+  f << "1, 1, 1" << endl;
+  f.close();
+
+  // Load the test CSV.
+  arma::umat matrix;
+  DatasetInfo info;
+  data::Load("test.csv", matrix, info, true);
+
+  BOOST_REQUIRE_EQUAL(matrix.n_cols, 4);
+  BOOST_REQUIRE_EQUAL(matrix.n_rows, 3);
+
+  BOOST_REQUIRE_EQUAL(matrix(0, 0), 0);
+  BOOST_REQUIRE_EQUAL(matrix(0, 1), 0);
+  BOOST_REQUIRE_EQUAL(matrix(0, 2), 1);
+  BOOST_REQUIRE_EQUAL(matrix(0, 3), 0);
+  BOOST_REQUIRE_EQUAL(matrix(1, 0), 1);
+  BOOST_REQUIRE_EQUAL(matrix(1, 1), 1);
+  BOOST_REQUIRE_EQUAL(matrix(1, 2), 1);
+  BOOST_REQUIRE_EQUAL(matrix(1, 3), 1);
+  BOOST_REQUIRE_EQUAL(matrix(2, 0), 1);
+  BOOST_REQUIRE_EQUAL(matrix(2, 1), 1);
+  BOOST_REQUIRE_EQUAL(matrix(2, 2), 1);
+  BOOST_REQUIRE_EQUAL(matrix(2, 3), 1);
+
+  BOOST_REQUIRE(info.Type(0) == Datatype::categorical);
+  BOOST_REQUIRE(info.Type(1) == Datatype::numeric);
+  BOOST_REQUIRE(info.Type(2) == Datatype::numeric);
+  BOOST_REQUIRE(info.Type(3) == Datatype::numeric);
+
+  BOOST_REQUIRE_EQUAL(info.MapString("1", 0), 0);
+  BOOST_REQUIRE_EQUAL(info.MapString("", 0), 1);
+
+  BOOST_REQUIRE_EQUAL(info.UnmapString(0, 0), "1");
+  BOOST_REQUIRE_EQUAL(info.UnmapString(1, 0), "");
+
+  remove("test.csv");
+}
+
+BOOST_AUTO_TEST_CASE(CategoricalCSVLoadTest02)
+{
+  fstream f;
+  f.open("test.csv", fstream::out);
+  f << "1, 1, 1" << endl;
+  f << ", 1, 1" << endl;
+  f << "1, 1, 1" << endl;
+  f << "1, 1, 1" << endl;
+  f.close();
+
+  // Load the test CSV.
+  arma::umat matrix;
+  DatasetInfo info;
+  data::Load("test.csv", matrix, info, true);
+
+  BOOST_REQUIRE_EQUAL(matrix.n_cols, 4);
+  BOOST_REQUIRE_EQUAL(matrix.n_rows, 3);
+
+  BOOST_REQUIRE_EQUAL(matrix(0, 0), 0);
+  BOOST_REQUIRE_EQUAL(matrix(0, 1), 1);
+  BOOST_REQUIRE_EQUAL(matrix(0, 2), 0);
+  BOOST_REQUIRE_EQUAL(matrix(0, 3), 0);
+  BOOST_REQUIRE_EQUAL(matrix(1, 0), 1);
+  BOOST_REQUIRE_EQUAL(matrix(1, 1), 1);
+  BOOST_REQUIRE_EQUAL(matrix(1, 2), 1);
+  BOOST_REQUIRE_EQUAL(matrix(1, 3), 1);
+  BOOST_REQUIRE_EQUAL(matrix(2, 0), 1);
+  BOOST_REQUIRE_EQUAL(matrix(2, 1), 1);
+  BOOST_REQUIRE_EQUAL(matrix(2, 2), 1);
+  BOOST_REQUIRE_EQUAL(matrix(2, 3), 1);
+
+  BOOST_REQUIRE(info.Type(0) == Datatype::categorical);
+  BOOST_REQUIRE(info.Type(1) == Datatype::numeric);
+  BOOST_REQUIRE(info.Type(2) == Datatype::numeric);
+
+  BOOST_REQUIRE_EQUAL(info.MapString("", 0), 1);
+  BOOST_REQUIRE_EQUAL(info.MapString("1", 0), 0);
+
+  BOOST_REQUIRE_EQUAL(info.UnmapString(0, 0), "1");
+  BOOST_REQUIRE_EQUAL(info.UnmapString(1, 0), "");
+
+  remove("test.csv");
+}
+
+BOOST_AUTO_TEST_CASE(CategoricalCSVLoadTest03)
+{
+  fstream f;
+  f.open("test.csv", fstream::out);
+  f << ", 1, 1" << endl;
+  f << "1, 1, 1" << endl;
+  f << "1, 1, 1" << endl;
+  f << "1, 1, 1" << endl;
+  f.close();
+
+  // Load the test CSV.
+  arma::umat matrix;
+  DatasetInfo info;
+  data::Load("test.csv", matrix, info, true);
+
+  BOOST_REQUIRE_EQUAL(matrix.n_cols, 4);
+  BOOST_REQUIRE_EQUAL(matrix.n_rows, 3);
+
+  BOOST_REQUIRE_EQUAL(matrix(0, 0), 0);
+  BOOST_REQUIRE_EQUAL(matrix(0, 1), 1);
+  BOOST_REQUIRE_EQUAL(matrix(0, 2), 1);
+  BOOST_REQUIRE_EQUAL(matrix(0, 3), 1);
+  BOOST_REQUIRE_EQUAL(matrix(1, 0), 1);
+  BOOST_REQUIRE_EQUAL(matrix(1, 1), 1);
+  BOOST_REQUIRE_EQUAL(matrix(1, 2), 1);
+  BOOST_REQUIRE_EQUAL(matrix(1, 3), 1);
+  BOOST_REQUIRE_EQUAL(matrix(2, 0), 1);
+  BOOST_REQUIRE_EQUAL(matrix(2, 1), 1);
+  BOOST_REQUIRE_EQUAL(matrix(2, 2), 1);
+  BOOST_REQUIRE_EQUAL(matrix(2, 3), 1);
+
+  BOOST_REQUIRE(info.Type(0) == Datatype::categorical);
+  BOOST_REQUIRE(info.Type(1) == Datatype::numeric);
+  BOOST_REQUIRE(info.Type(2) == Datatype::numeric);
+
+  BOOST_REQUIRE_EQUAL(info.MapString("", 0), 0);
+  BOOST_REQUIRE_EQUAL(info.MapString("1", 0), 1);
+
+  BOOST_REQUIRE_EQUAL(info.UnmapString(0, 0), "");
+  BOOST_REQUIRE_EQUAL(info.UnmapString(1, 0), "1");
+
+  remove("test.csv");
+}
+
+BOOST_AUTO_TEST_CASE(CategoricalNontransposedCSVLoadTest00)
 {
   fstream f;
   f.open("test.csv", fstream::out);
@@ -1093,6 +1226,141 @@ BOOST_AUTO_TEST_CASE(CategoricalNontransposedCSVLoadTest)
   remove("test.csv");
 }
 
+BOOST_AUTO_TEST_CASE(CategoricalNontransposedCSVLoadTest01)
+{
+  fstream f;
+  f.open("test.csv", fstream::out);
+  f << "1, 1, 1" << endl;
+  f << "1, 1, 1" << endl;
+  f << " , 1, 1" << endl;
+  f << "1, 1, 1" << endl;
+  f.close();
+
+  // Load the test CSV.
+  arma::umat matrix;
+  DatasetInfo info;
+  data::Load("test.csv", matrix, info, true, false); // No transpose.
+
+  BOOST_REQUIRE_EQUAL(matrix.n_cols, 3);
+  BOOST_REQUIRE_EQUAL(matrix.n_rows, 4);
+
+  BOOST_REQUIRE_EQUAL(matrix(0, 0), 1);
+  BOOST_REQUIRE_EQUAL(matrix(0, 1), 1);
+  BOOST_REQUIRE_EQUAL(matrix(0, 2), 1);
+  BOOST_REQUIRE_EQUAL(matrix(1, 0), 1);
+  BOOST_REQUIRE_EQUAL(matrix(1, 1), 1);
+  BOOST_REQUIRE_EQUAL(matrix(1, 2), 1);
+  BOOST_REQUIRE_EQUAL(matrix(2, 0), 0);
+  BOOST_REQUIRE_EQUAL(matrix(2, 1), 1);
+  BOOST_REQUIRE_EQUAL(matrix(2, 2), 1);
+  BOOST_REQUIRE_EQUAL(matrix(3, 0), 1);
+  BOOST_REQUIRE_EQUAL(matrix(3, 1), 1);
+  BOOST_REQUIRE_EQUAL(matrix(3, 2), 1);
+
+  BOOST_REQUIRE(info.Type(0) == Datatype::numeric);
+  BOOST_REQUIRE(info.Type(1) == Datatype::numeric);
+  BOOST_REQUIRE(info.Type(2) == Datatype::categorical);
+  BOOST_REQUIRE(info.Type(3) == Datatype::numeric);
+
+  BOOST_REQUIRE_EQUAL(info.MapString("", 2), 0);
+  BOOST_REQUIRE_EQUAL(info.MapString("1", 2), 1);
+
+  BOOST_REQUIRE_EQUAL(info.UnmapString(0, 2), "");
+  BOOST_REQUIRE_EQUAL(info.UnmapString(1, 2), "1");
+
+  remove("test.csv");
+}
+
+BOOST_AUTO_TEST_CASE(CategoricalNontransposedCSVLoadTest02)
+{
+  fstream f;
+  f.open("test.csv", fstream::out);
+  f << "1, 1, 1" << endl;
+  f << ", 1, 1" << endl;
+  f << "1, 1, 1" << endl;
+  f << "1, 1, 1" << endl;
+  f.close();
+
+  // Load the test CSV.
+  arma::umat matrix;
+  DatasetInfo info;
+  data::Load("test.csv", matrix, info, true, false); // No transpose.
+
+  BOOST_REQUIRE_EQUAL(matrix.n_cols, 3);
+  BOOST_REQUIRE_EQUAL(matrix.n_rows, 4);
+
+  BOOST_REQUIRE_EQUAL(matrix(0, 0), 1);
+  BOOST_REQUIRE_EQUAL(matrix(0, 1), 1);
+  BOOST_REQUIRE_EQUAL(matrix(0, 2), 1);
+  BOOST_REQUIRE_EQUAL(matrix(1, 0), 0);
+  BOOST_REQUIRE_EQUAL(matrix(1, 1), 1);
+  BOOST_REQUIRE_EQUAL(matrix(1, 2), 1);
+  BOOST_REQUIRE_EQUAL(matrix(2, 0), 1);
+  BOOST_REQUIRE_EQUAL(matrix(2, 1), 1);
+  BOOST_REQUIRE_EQUAL(matrix(2, 2), 1);
+  BOOST_REQUIRE_EQUAL(matrix(3, 0), 1);
+  BOOST_REQUIRE_EQUAL(matrix(3, 1), 1);
+  BOOST_REQUIRE_EQUAL(matrix(3, 2), 1);
+
+  BOOST_REQUIRE(info.Type(0) == Datatype::numeric);
+  BOOST_REQUIRE(info.Type(1) == Datatype::categorical);
+  BOOST_REQUIRE(info.Type(2) == Datatype::numeric);
+  BOOST_REQUIRE(info.Type(3) == Datatype::numeric);
+
+  BOOST_REQUIRE_EQUAL(info.MapString("", 1), 0);
+  BOOST_REQUIRE_EQUAL(info.MapString("1", 1), 1);
+
+  BOOST_REQUIRE_EQUAL(info.UnmapString(0, 1), "");
+  BOOST_REQUIRE_EQUAL(info.UnmapString(1, 1), "1");
+
+  remove("test.csv");
+}
+
+BOOST_AUTO_TEST_CASE(CategoricalNontransposedCSVLoadTest03)
+{
+  fstream f;
+  f.open("test.csv", fstream::out);
+  f << ",  1, 1" << endl;
+  f << "1, 1, 1" << endl;
+  f << "1, 1, 1" << endl;
+  f << "1, 1, 1" << endl;
+  f.close();
+
+  // Load the test CSV.
+  arma::umat matrix;
+  DatasetInfo info;
+  data::Load("test.csv", matrix, info, true, false); // No transpose.
+
+  BOOST_REQUIRE_EQUAL(matrix.n_cols, 3);
+  BOOST_REQUIRE_EQUAL(matrix.n_rows, 4);
+
+  BOOST_REQUIRE_EQUAL(matrix(0, 0), 0);
+  BOOST_REQUIRE_EQUAL(matrix(0, 1), 1);
+  BOOST_REQUIRE_EQUAL(matrix(0, 2), 1);
+  BOOST_REQUIRE_EQUAL(matrix(1, 0), 1);
+  BOOST_REQUIRE_EQUAL(matrix(1, 1), 1);
+  BOOST_REQUIRE_EQUAL(matrix(1, 2), 1);
+  BOOST_REQUIRE_EQUAL(matrix(2, 0), 1);
+  BOOST_REQUIRE_EQUAL(matrix(2, 1), 1);
+  BOOST_REQUIRE_EQUAL(matrix(2, 2), 1);
+  BOOST_REQUIRE_EQUAL(matrix(3, 0), 1);
+  BOOST_REQUIRE_EQUAL(matrix(3, 1), 1);
+  BOOST_REQUIRE_EQUAL(matrix(3, 2), 1);
+
+  BOOST_REQUIRE(info.Type(0) == Datatype::categorical);
+  BOOST_REQUIRE(info.Type(1) == Datatype::numeric);
+  BOOST_REQUIRE(info.Type(2) == Datatype::numeric);
+  BOOST_REQUIRE(info.Type(3) == Datatype::numeric);
+
+  BOOST_REQUIRE_EQUAL(info.MapString("", 1), 0);
+  BOOST_REQUIRE_EQUAL(info.MapString("1", 1), 1);
+
+  BOOST_REQUIRE_EQUAL(info.UnmapString(0, 1), "");
+  BOOST_REQUIRE_EQUAL(info.UnmapString(1, 1), "1");
+
+  remove("test.csv");
+}
+
 /**
  * A simple ARFF load test.  Two attributes, both numeric.
  */

From 1beb58dbb90bf2349c2ac495669103e61231b628 Mon Sep 17 00:00:00 2001
From: Ryan Curtin <ryan@ratml.org>
Date: Tue, 31 May 2016 14:27:39 -0400
Subject: [PATCH 53/87] A first simple test.

---
 src/mlpack/tests/load_save_test.cpp | 40 +++++++++++++++++++++++++++++
 1 file changed, 40 insertions(+)

diff --git a/src/mlpack/tests/load_save_test.cpp b/src/mlpack/tests/load_save_test.cpp
index 7e2f6a152d1..fc753f81fd1 100644
--- a/src/mlpack/tests/load_save_test.cpp
+++ b/src/mlpack/tests/load_save_test.cpp
@@ -1093,6 +1093,46 @@ BOOST_AUTO_TEST_CASE(CategoricalNontransposedCSVLoadTest)
   remove("test.csv");
 }
 
+/**
+ * The test CSV Keon suggested in #658.
+ */
+BOOST_AUTO_TEST_CASE(KeonsSimpleDatasetInfoTest)
+{
+  fstream f;
+  f.open("test.csv", fstream::out);
+  f << "1, 1, 1" << endl;
+  f << ", 1, 1" << endl;
+  f << "1, 1, 1" << endl;
+  f << "1, 1, 1" << endl;
+  f.close();
+
+  // Load without transpose.
+  arma::mat dataset;
+  data::DatasetInfo info;
+  data::Load("test.csv", dataset, info, true, false);
+
+  BOOST_REQUIRE_EQUAL(dataset.n_rows, 4);
+  BOOST_REQUIRE_EQUAL(dataset.n_cols, 3);
+  BOOST_REQUIRE_EQUAL(info.Dimensionality(), 4);
+  BOOST_REQUIRE_EQUAL(info.NumMappings(0), 0);
+  BOOST_REQUIRE_EQUAL(info.NumMappings(1), 2);
+  BOOST_REQUIRE_EQUAL(info.NumMappings(2), 0);
+  BOOST_REQUIRE_EQUAL(info.NumMappings(3), 0);
+
+  // Load with transpose.
+  data::DatasetInfo newInfo;
+  data::Load("test.csv", dataset, info, true, true);
+
+  BOOST_REQUIRE_EQUAL(dataset.n_rows, 3);
+  BOOST_REQUIRE_EQUAL(dataset.n_cols, 4);
+  BOOST_REQUIRE_EQUAL(info.Dimensionality(), 3);
+  BOOST_REQUIRE_EQUAL(info.NumMappings(0), 2);
+  BOOST_REQUIRE_EQUAL(info.NumMappings(1), 0);
+  BOOST_REQUIRE_EQUAL(info.NumMappings(2), 0);
+
+  remove("test.csv");
+}
+
 /**
  * A simple ARFF load test.  Two attributes, both numeric.
  */

From 3ee8906a6b2fdc59afd2138ce521cd6763a3eb0c Mon Sep 17 00:00:00 2001
From: stereomatchingkiss <stereomatchingkiss@gmail.com>
Date: Wed, 1 Jun 2016 02:40:14 +0800
Subject: [PATCH 54/87] fix bug--forgot to put the functions into nameless
 namespace

---
 src/mlpack/core/data/load_impl.hpp | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/mlpack/core/data/load_impl.hpp b/src/mlpack/core/data/load_impl.hpp
index e3b38602ba1..8f298379fb7 100644
--- a/src/mlpack/core/data/load_impl.hpp
+++ b/src/mlpack/core/data/load_impl.hpp
@@ -31,6 +31,8 @@ namespace data {
 
 namespace details{
 
+namespace{
+
 template<typename Tokenizer>
 std::vector<std::string> ToTokens(Tokenizer &lineTok)
 {
@@ -97,6 +99,8 @@ void MapToNumerical(const std::vector<std::string> &tokens,
 
 }
 
+}
+
 template<typename eT>
 bool inline inplace_transpose(arma::Mat<eT>& X)
 {

From 02e31b3b07f1c4ea5abf067c029cd3ea69a5c8ae Mon Sep 17 00:00:00 2001
From: Ryan Curtin <ryan@ratml.org>
Date: Tue, 31 May 2016 14:41:47 -0400
Subject: [PATCH 55/87] Better Doxygen formatting.

---
 doc/guide/formats.hpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/doc/guide/formats.hpp b/doc/guide/formats.hpp
index ef834afae80..24e9a1b43ae 100644
--- a/doc/guide/formats.hpp
+++ b/doc/guide/formats.hpp
@@ -17,9 +17,9 @@ contain only numeric features in order to be loaded successfully by mlpack.
 
 The types of datasets that mlpack can load are roughly the same as the types of
 matrices that Armadillo can load.  However, the load functionality that mlpack
-provides \b "only supports loading dense datasets".  When datasets are loaded by
-mlpack, \b the \b "file's type is detected using the file's extension".  mlpack
-supports the following file types:
+provides <b>only supports loading dense datasets</b>.  When datasets are loaded
+by mlpack, <b>the file's type is detected using the file's extension</b>.
+mlpack supports the following file types:
 
  - csv (comma-separated values), denoted by .csv or .txt
  - tsv (tab-separated values), denoted by .tsv, .csv, or .txt

From 3e75b485bf78db28e4b38437b0fe0bda00736fb4 Mon Sep 17 00:00:00 2001
From: stereomatchingkiss <stereomatchingkiss@gmail.com>
Date: Wed, 1 Jun 2016 02:44:10 +0800
Subject: [PATCH 56/87] make TransPoseTokens as rcurtin suggested, should not
 use nameless namespace in header file

---
 src/mlpack/core/data/load_impl.hpp | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/src/mlpack/core/data/load_impl.hpp b/src/mlpack/core/data/load_impl.hpp
index 8f298379fb7..5479bab17d5 100644
--- a/src/mlpack/core/data/load_impl.hpp
+++ b/src/mlpack/core/data/load_impl.hpp
@@ -31,8 +31,6 @@ namespace data {
 
 namespace details{
 
-namespace{
-
 template<typename Tokenizer>
 std::vector<std::string> ToTokens(Tokenizer &lineTok)
 {
@@ -49,6 +47,7 @@ std::vector<std::string> ToTokens(Tokenizer &lineTok)
   return tokens;
 }
 
+inline
 void TransPoseTokens(std::vector<std::vector<std::string>> const &input,
                      std::vector<std::string> &output,
                      size_t index)
@@ -99,8 +98,6 @@ void MapToNumerical(const std::vector<std::string> &tokens,
 
 }
 
-}
-
 template<typename eT>
 bool inline inplace_transpose(arma::Mat<eT>& X)
 {

From 68776071c232fb63a6a3f7aff611dc5d8825155d Mon Sep 17 00:00:00 2001
From: MarcosPividori <marcos.pividori@gmail.com>
Date: Mon, 30 May 2016 11:49:58 -0300
Subject: [PATCH 57/87] Implement B_aux according to what was discussed in
 #642.

---
 .../neighbor_search_rules_impl.hpp            | 41 ++++++++++++-------
 .../neighbor_search/neighbor_search_stat.hpp  | 10 +++++
 2 files changed, 37 insertions(+), 14 deletions(-)

diff --git a/src/mlpack/methods/neighbor_search/neighbor_search_rules_impl.hpp b/src/mlpack/methods/neighbor_search/neighbor_search_rules_impl.hpp
index c4767ece158..cc2b9574911 100644
--- a/src/mlpack/methods/neighbor_search/neighbor_search_rules_impl.hpp
+++ b/src/mlpack/methods/neighbor_search/neighbor_search_rules_impl.hpp
@@ -344,6 +344,8 @@ inline double NeighborSearchRules<SortPolicy, MetricType, TreeType>::
 
   double worstDistance = SortPolicy::BestDistance();
   double bestDistance = SortPolicy::WorstDistance();
+  double bestPointDistance = SortPolicy::WorstDistance();
+  double auxDistance = SortPolicy::WorstDistance();
 
   // Loop over points held in the node.
   for (size_t i = 0; i < queryNode.NumPoints(); ++i)
@@ -351,33 +353,43 @@ inline double NeighborSearchRules<SortPolicy, MetricType, TreeType>::
     const double distance = distances(distances.n_rows - 1, queryNode.Point(i));
     if (SortPolicy::IsBetter(worstDistance, distance))
       worstDistance = distance;
-    if (SortPolicy::IsBetter(distance, bestDistance))
-      bestDistance = distance;
+    if (SortPolicy::IsBetter(distance, bestPointDistance))
+      bestPointDistance = distance;
   }
 
-  // Add triangle inequality adjustment to best distance.  It is possible this
-  // could be tighter for some certain types of trees.
-  bestDistance = SortPolicy::CombineWorst(bestDistance,
-      queryNode.FurthestPointDistance() +
-      queryNode.FurthestDescendantDistance());
+  auxDistance = bestPointDistance;
 
   // Loop over children of the node, and use their cached information to
   // assemble bounds.
   for (size_t i = 0; i < queryNode.NumChildren(); ++i)
   {
     const double firstBound = queryNode.Child(i).Stat().FirstBound();
-    const double adjustment = std::max(0.0,
-        queryNode.FurthestDescendantDistance() -
-        queryNode.Child(i).FurthestDescendantDistance());
-    const double adjustedSecondBound = SortPolicy::CombineWorst(
-        queryNode.Child(i).Stat().SecondBound(), 2 * adjustment);
+    const double auxBound = queryNode.Child(i).Stat().AuxBound();
 
     if (SortPolicy::IsBetter(worstDistance, firstBound))
       worstDistance = firstBound;
-    if (SortPolicy::IsBetter(adjustedSecondBound, bestDistance))
-      bestDistance = adjustedSecondBound;
+    if (SortPolicy::IsBetter(auxBound, auxDistance))
+      auxDistance = auxBound;
   }
 
+  // Add triangle inequality adjustment to best distance.  It is possible this
+  // could be tighter for some certain types of trees.
+  bestDistance = SortPolicy::CombineWorst(auxDistance,
+      2 * queryNode.FurthestDescendantDistance());
+
+  // Add triangle inequality adjustment to best distance of points in node.
+  bestPointDistance = SortPolicy::CombineWorst(bestPointDistance,
+      queryNode.FurthestPointDistance() +
+      queryNode.FurthestDescendantDistance());
+
+  if (SortPolicy::IsBetter(bestPointDistance, bestDistance))
+    bestDistance = bestPointDistance;
+
+  // At this point:
+  // worstDistance holds the value of B_1(N_q).
+  // bestDistance holds the value of B_2(N_q).
+  // auxDistance holds the value of B_aux(N_q).
+
   // Now consider the parent bounds.
   if (queryNode.Parent() != NULL)
   {
@@ -405,6 +417,7 @@ inline double NeighborSearchRules<SortPolicy, MetricType, TreeType>::
   // Cache bounds for later.
   queryNode.Stat().FirstBound() = worstDistance;
   queryNode.Stat().SecondBound() = bestDistance;
+  queryNode.Stat().AuxBound() = auxDistance;
 
   if (SortPolicy::IsBetter(worstDistance, bestDistance))
     return worstDistance;
diff --git a/src/mlpack/methods/neighbor_search/neighbor_search_stat.hpp b/src/mlpack/methods/neighbor_search/neighbor_search_stat.hpp
index 90b3f76ff10..c125369d5b5 100644
--- a/src/mlpack/methods/neighbor_search/neighbor_search_stat.hpp
+++ b/src/mlpack/methods/neighbor_search/neighbor_search_stat.hpp
@@ -29,6 +29,9 @@ class NeighborSearchStat
   //! using the best descendant candidate distance modified by the furthest
   //! descendant distance.
   double secondBound;
+  //! The aux bound on the node's neighbor distances (B_aux). This represents
+  //! the best descendant candidate distance (used to calculate secondBound).
+  double auxBound;
   //! The better of the two bounds.
   double bound;
 
@@ -45,6 +48,7 @@ class NeighborSearchStat
   NeighborSearchStat() :
       firstBound(SortPolicy::WorstDistance()),
       secondBound(SortPolicy::WorstDistance()),
+      auxBound(SortPolicy::WorstDistance()),
       bound(SortPolicy::WorstDistance()),
       lastDistance(0.0) { }
 
@@ -56,6 +60,7 @@ class NeighborSearchStat
   NeighborSearchStat(TreeType& /* node */) :
       firstBound(SortPolicy::WorstDistance()),
       secondBound(SortPolicy::WorstDistance()),
+      auxBound(SortPolicy::WorstDistance()),
       bound(SortPolicy::WorstDistance()),
       lastDistance(0.0) { }
 
@@ -67,6 +72,10 @@ class NeighborSearchStat
   double SecondBound() const { return secondBound; }
   //! Modify the second bound.
   double& SecondBound() { return secondBound; }
+  //! Get the aux bound.
+  double AuxBound() const { return auxBound; }
+  //! Modify the aux bound.
+  double& AuxBound() { return auxBound; }
   //! Get the overall bound (the better of the two bounds).
   double Bound() const { return bound; }
   //! Modify the overall bound (it should be the better of the two bounds).
@@ -84,6 +93,7 @@ class NeighborSearchStat
 
     ar & CreateNVP(firstBound, "firstBound");
     ar & CreateNVP(secondBound, "secondBound");
+    ar & CreateNVP(auxBound, "auxBound");
     ar & CreateNVP(bound, "bound");
     ar & CreateNVP(lastDistance, "lastDistance");
   }

From ba4c5bbdb4597fb6a5dbc69915961464966ff260 Mon Sep 17 00:00:00 2001
From: MarcosPividori <marcos.pividori@gmail.com>
Date: Tue, 31 May 2016 15:42:27 -0300
Subject: [PATCH 58/87] Remove unused information from neighbor search stats.

---
 .../methods/neighbor_search/neighbor_search_impl.hpp |  1 -
 .../methods/neighbor_search/neighbor_search_stat.hpp | 12 ------------
 2 files changed, 13 deletions(-)

diff --git a/src/mlpack/methods/neighbor_search/neighbor_search_impl.hpp b/src/mlpack/methods/neighbor_search/neighbor_search_impl.hpp
index 529cfdd45a0..e092766ebbb 100644
--- a/src/mlpack/methods/neighbor_search/neighbor_search_impl.hpp
+++ b/src/mlpack/methods/neighbor_search/neighbor_search_impl.hpp
@@ -640,7 +640,6 @@ Search(const size_t k,
         // Reset bounds of this node.
         node->Stat().FirstBound() = SortPolicy::WorstDistance();
         node->Stat().SecondBound() = SortPolicy::WorstDistance();
-        node->Stat().Bound() = SortPolicy::WorstDistance();
         node->Stat().LastDistance() = 0.0;
 
         // Then add the children.
diff --git a/src/mlpack/methods/neighbor_search/neighbor_search_stat.hpp b/src/mlpack/methods/neighbor_search/neighbor_search_stat.hpp
index c125369d5b5..dfcc5ad743b 100644
--- a/src/mlpack/methods/neighbor_search/neighbor_search_stat.hpp
+++ b/src/mlpack/methods/neighbor_search/neighbor_search_stat.hpp
@@ -32,11 +32,6 @@ class NeighborSearchStat
   //! The aux bound on the node's neighbor distances (B_aux). This represents
   //! the best descendant candidate distance (used to calculate secondBound).
   double auxBound;
-  //! The better of the two bounds.
-  double bound;
-
-  //! The last distance evaluation node.
-  void* lastDistanceNode;
   //! The last distance evaluation.
   double lastDistance;
 
@@ -49,7 +44,6 @@ class NeighborSearchStat
       firstBound(SortPolicy::WorstDistance()),
       secondBound(SortPolicy::WorstDistance()),
       auxBound(SortPolicy::WorstDistance()),
-      bound(SortPolicy::WorstDistance()),
       lastDistance(0.0) { }
 
   /**
@@ -61,7 +55,6 @@ class NeighborSearchStat
       firstBound(SortPolicy::WorstDistance()),
       secondBound(SortPolicy::WorstDistance()),
       auxBound(SortPolicy::WorstDistance()),
-      bound(SortPolicy::WorstDistance()),
       lastDistance(0.0) { }
 
   //! Get the first bound.
@@ -76,10 +69,6 @@ class NeighborSearchStat
   double AuxBound() const { return auxBound; }
   //! Modify the aux bound.
   double& AuxBound() { return auxBound; }
-  //! Get the overall bound (the better of the two bounds).
-  double Bound() const { return bound; }
-  //! Modify the overall bound (it should be the better of the two bounds).
-  double& Bound() { return bound; }
   //! Get the last distance calculation.
   double LastDistance() const { return lastDistance; }
   //! Modify the last distance calculation.
@@ -94,7 +83,6 @@ class NeighborSearchStat
     ar & CreateNVP(firstBound, "firstBound");
     ar & CreateNVP(secondBound, "secondBound");
     ar & CreateNVP(auxBound, "auxBound");
-    ar & CreateNVP(bound, "bound");
     ar & CreateNVP(lastDistance, "lastDistance");
   }
 };

From 79d07f20a97987f49118efbefc6365c59e663667 Mon Sep 17 00:00:00 2001
From: MarcosPividori <marcos.pividori@gmail.com>
Date: Tue, 31 May 2016 15:43:13 -0300
Subject: [PATCH 59/87] Remove unnecessary include

---
 src/mlpack/methods/mean_shift/mean_shift_impl.hpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/mlpack/methods/mean_shift/mean_shift_impl.hpp b/src/mlpack/methods/mean_shift/mean_shift_impl.hpp
index 57dcfeef55d..03ce3e09ba6 100644
--- a/src/mlpack/methods/mean_shift/mean_shift_impl.hpp
+++ b/src/mlpack/methods/mean_shift/mean_shift_impl.hpp
@@ -11,7 +11,6 @@
 #include <mlpack/core/kernels/kernel_traits.hpp>
 #include <mlpack/core/metrics/lmetric.hpp>
 #include <mlpack/methods/neighbor_search/neighbor_search.hpp>
-#include <mlpack/methods/neighbor_search/neighbor_search_stat.hpp>
 #include <mlpack/methods/range_search/range_search.hpp>
 
 #include "map"

From b3fc4684a0c14c1482fb58f6aac9be749f0f11c9 Mon Sep 17 00:00:00 2001
From: Ryan Curtin <ryan@ratml.org>
Date: Tue, 31 May 2016 14:56:40 -0400
Subject: [PATCH 60/87] Adapt Keon's test into something harder.

---
 src/mlpack/tests/load_save_test.cpp | 50 +++++++++++++++--------------
 1 file changed, 26 insertions(+), 24 deletions(-)

diff --git a/src/mlpack/tests/load_save_test.cpp b/src/mlpack/tests/load_save_test.cpp
index fc753f81fd1..ecb089c2def 100644
--- a/src/mlpack/tests/load_save_test.cpp
+++ b/src/mlpack/tests/load_save_test.cpp
@@ -1094,43 +1094,45 @@ BOOST_AUTO_TEST_CASE(CategoricalNontransposedCSVLoadTest)
 }
 
 /**
- * The test CSV Keon suggested in #658.
+ * A harder test CSV based on the concerns in #658.
  */
-BOOST_AUTO_TEST_CASE(KeonsSimpleDatasetInfoTest)
+BOOST_AUTO_TEST_CASE(HarderKeonTest)
 {
   fstream f;
   f.open("test.csv", fstream::out);
-  f << "1, 1, 1" << endl;
-  f << ", 1, 1" << endl;
-  f << "1, 1, 1" << endl;
-  f << "1, 1, 1" << endl;
+  f << "a,, 13,\t, 0" << endl;
+  f << "b, 3, 14, hello,1" << endl;
+  f << "b, 4, 15, , 2" << endl;
+  f << ", 5, 16, ," << endl;
   f.close();
 
-  // Load without transpose.
+  // Load transposed.
   arma::mat dataset;
   data::DatasetInfo info;
-  data::Load("test.csv", dataset, info, true, false);
-
-  BOOST_REQUIRE_EQUAL(dataset.n_rows, 4);
-  BOOST_REQUIRE_EQUAL(dataset.n_cols, 3);
-  BOOST_REQUIRE_EQUAL(info.Dimensionality(), 4);
-  BOOST_REQUIRE_EQUAL(info.NumMappings(0), 0);
-  BOOST_REQUIRE_EQUAL(info.NumMappings(1), 2);
-  BOOST_REQUIRE_EQUAL(info.NumMappings(2), 0);
-  BOOST_REQUIRE_EQUAL(info.NumMappings(3), 0);
-
-  // Load with transpose.
-  data::DatasetInfo newInfo;
   data::Load("test.csv", dataset, info, true, true);
 
-  BOOST_REQUIRE_EQUAL(dataset.n_rows, 3);
+  BOOST_REQUIRE_EQUAL(dataset.n_rows, 5);
   BOOST_REQUIRE_EQUAL(dataset.n_cols, 4);
-  BOOST_REQUIRE_EQUAL(info.Dimensionality(), 3);
-  BOOST_REQUIRE_EQUAL(info.NumMappings(0), 2);
-  BOOST_REQUIRE_EQUAL(info.NumMappings(1), 0);
+
+  BOOST_REQUIRE_EQUAL(info.Dimensionality(), 5);
+  BOOST_REQUIRE_EQUAL(info.NumMappings(0), 3);
+  BOOST_REQUIRE_EQUAL(info.NumMappings(1), 4);
   BOOST_REQUIRE_EQUAL(info.NumMappings(2), 0);
+  BOOST_REQUIRE_EQUAL(info.NumMappings(3), 2); // \t and "" are equivalent.
+  BOOST_REQUIRE_EQUAL(info.NumMappings(4), 4);
 
-  remove("test.csv");
+  // Now load non-transposed.
+  data::DatasetInfo ntInfo;
+  data::Load("test.csv", dataset, ntInfo, true, false);
+
+  BOOST_REQUIRE_EQUAL(dataset.n_rows, 4);
+  BOOST_REQUIRE_EQUAL(dataset.n_cols, 5);
+
+  BOOST_REQUIRE_EQUAL(ntInfo.Dimensionality(), 4);
+  BOOST_REQUIRE_EQUAL(ntInfo.NumMappings(0), 4);
+  BOOST_REQUIRE_EQUAL(ntInfo.NumMappings(1), 5);
+  BOOST_REQUIRE_EQUAL(ntInfo.NumMappings(2), 5);
+  BOOST_REQUIRE_EQUAL(ntInfo.NumMappings(3), 3);
 }
 
 /**

From e94896d9c720ed706b5ad546be9df13b90631f10 Mon Sep 17 00:00:00 2001
From: Yannis Mentekidis <mentekid@gmail.com>
Date: Tue, 31 May 2016 23:14:08 +0300
Subject: [PATCH 61/87] Adds code that gives controllable access to LSH
 projection tables

---
 src/mlpack/methods/lsh/lsh_search.hpp      | 30 ++++++++++++++---
 src/mlpack/methods/lsh/lsh_search_impl.hpp | 38 +++++++++++++++++++---
 2 files changed, 60 insertions(+), 8 deletions(-)

diff --git a/src/mlpack/methods/lsh/lsh_search.hpp b/src/mlpack/methods/lsh/lsh_search.hpp
index 7505f29c0af..94ab4525e01 100644
--- a/src/mlpack/methods/lsh/lsh_search.hpp
+++ b/src/mlpack/methods/lsh/lsh_search.hpp
@@ -83,15 +83,19 @@ class LSHSearch
   ~LSHSearch();
 
   /**
-   * Train the LSH model on the given dataset.  This means building new hash
-   * tables.
+   * Train the LSH model on the given dataset.  If a correct vector is not
+   * provided, this means building new hash tables. Otherwise, we use the ones
+   * provided by the user.
    */
   void Train(const arma::mat& referenceSet,
              const size_t numProj,
              const size_t numTables,
              const double hashWidth = 0.0,
              const size_t secondHashSize = 99901,
-             const size_t bucketSize = 500);
+             const size_t bucketSize = 500,
+             const std::vector<arma::mat> &projection
+             = std::vector<arma::mat>()
+             );
 
   /**
    * Compute the nearest neighbors of the points in the given query set and
@@ -174,6 +178,24 @@ class LSHSearch
   //! Get the second hash table.
   const arma::Mat<size_t>& SecondHashTable() const { return secondHashTable; }
 
+  //! Get the projection tables.
+  std::vector<arma::mat> getProjectionTables() { return projections; }
+
+  //! Change the projection tables (Retrains object)
+  void setProjectionTables(std::vector<arma::mat> projTables)
+  {
+    // Simply call Train() with given projection tables
+    Train(
+        *referenceSet,
+        numProj,
+        numTables,
+        hashWidth,
+        secondHashSize,
+        bucketSize,
+        projTables
+        );
+  };
+
  private:
   /**
    * This function builds a hash table with two levels of hashing as presented
@@ -188,7 +210,7 @@ class LSHSearch
    * are private members of this class, initialized during the class
    * initialization.
    */
-  void BuildHash();
+  void BuildHash(const std::vector<arma::mat> &projection);
 
   /**
    * This function takes a query and hashes it into each of the hash tables to
diff --git a/src/mlpack/methods/lsh/lsh_search_impl.hpp b/src/mlpack/methods/lsh/lsh_search_impl.hpp
index b956ed27717..119eb78d620 100644
--- a/src/mlpack/methods/lsh/lsh_search_impl.hpp
+++ b/src/mlpack/methods/lsh/lsh_search_impl.hpp
@@ -65,7 +65,8 @@ void LSHSearch<SortPolicy>::Train(const arma::mat& referenceSet,
                                   const size_t numTables,
                                   const double hashWidthIn,
                                   const size_t secondHashSize,
-                                  const size_t bucketSize)
+                                  const size_t bucketSize,
+                                  const std::vector<arma::mat> &projection)
 {
   // Set new reference set.
   if (this->referenceSet && ownsSet)
@@ -97,7 +98,7 @@ void LSHSearch<SortPolicy>::Train(const arma::mat& referenceSet,
 
   Log::Info << "Hash width chosen as: " << hashWidth << std::endl;
 
-  BuildHash();
+  BuildHash(projection);
 }
 
 template<typename SortPolicy>
@@ -355,7 +356,7 @@ Search(const size_t k,
 }
 
 template<typename SortPolicy>
-void LSHSearch<SortPolicy>::BuildHash()
+void LSHSearch<SortPolicy>::BuildHash(const std::vector<arma::mat> &projection)
 {
   // The first level hash for a single table outputs a 'numProj'-dimensional
   // integer key for each point in the set -- (key, pointID)
@@ -412,6 +413,13 @@ void LSHSearch<SortPolicy>::BuildHash()
   // Step III: Create each hash table in the first level hash one by one and
   // putting them directly into the 'secondHashTable' for memory efficiency.
   projections.clear(); // Reset projections vector.
+
+
+  if (projection.size() != 0 && projection.size() != numTables)
+    throw std::invalid_argument(
+        "number of projection tables provided must be equal to numProj"
+        );
+
   for (size_t i = 0; i < numTables; i++)
   {
     // Step IV: Obtain the 'numProj' projections for each table.
@@ -419,7 +427,29 @@ void LSHSearch<SortPolicy>::BuildHash()
     // For L2 metric, 2-stable distributions are used, and
     // the normal Z ~ N(0, 1) is a 2-stable distribution.
     arma::mat projMat;
-    projMat.randn(referenceSet->n_rows, numProj);
+
+    if (projection.size() == 0) //random generation of table i
+    {
+
+      // For L2 metric, p-stable distributions are used, and the normal
+      // Z ~ N(0, 1) is p-stable.
+      projMat.randn(referenceSet->n_rows, numProj);
+    }
+    else //user-specified projection tables
+    {
+      //TODO: check that projection.size() == numTables
+
+      projMat = projection[i];
+
+      //make sure specified matrix is of correct size
+      if (projMat.n_rows != referenceSet->n_rows)
+        throw std::invalid_argument( 
+            "projection table dimensionality doesn't"
+            " equal dataset dimensionality" );
+      if (projMat.n_cols != numProj)
+        throw std::invalid_argument(
+            "projection table doesn't have correct number of projections");
+    }
 
     // Save the projection matrix for querying.
     projections.push_back(projMat);

From f3c4939e58eccff3f25f5ba399dcb96a04dee066 Mon Sep 17 00:00:00 2001
From: Yannis Mentekidis <mentekid@gmail.com>
Date: Tue, 31 May 2016 23:17:13 +0300
Subject: [PATCH 62/87] Adds code that gives controllable access to LSH
 projection tables

---
 src/mlpack/methods/lsh/lsh_search_impl.hpp | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/src/mlpack/methods/lsh/lsh_search_impl.hpp b/src/mlpack/methods/lsh/lsh_search_impl.hpp
index 119eb78d620..7986c070d3e 100644
--- a/src/mlpack/methods/lsh/lsh_search_impl.hpp
+++ b/src/mlpack/methods/lsh/lsh_search_impl.hpp
@@ -437,16 +437,14 @@ void LSHSearch<SortPolicy>::BuildHash(const std::vector<arma::mat> &projection)
     }
     else //user-specified projection tables
     {
-      //TODO: check that projection.size() == numTables
-
       projMat = projection[i];
 
       //make sure specified matrix is of correct size
-      if (projMat.n_rows != referenceSet->n_rows)
+      if ( projMat.n_rows != referenceSet->n_rows )
         throw std::invalid_argument( 
             "projection table dimensionality doesn't"
             " equal dataset dimensionality" );
-      if (projMat.n_cols != numProj)
+      if ( projMat.n_cols != numProj )
         throw std::invalid_argument(
             "projection table doesn't have correct number of projections");
     }

From 934fe082230f7d8b8f9c3e9c1fe2f9fc25ea493e Mon Sep 17 00:00:00 2001
From: Yannis Mentekidis <mentekid@gmail.com>
Date: Tue, 31 May 2016 23:20:41 +0300
Subject: [PATCH 63/87] Adds code that gives controllable access to LSH
 projection tables

---
 src/mlpack/methods/lsh/lsh_search.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mlpack/methods/lsh/lsh_search.hpp b/src/mlpack/methods/lsh/lsh_search.hpp
index 94ab4525e01..f79b0cf7584 100644
--- a/src/mlpack/methods/lsh/lsh_search.hpp
+++ b/src/mlpack/methods/lsh/lsh_search.hpp
@@ -179,7 +179,7 @@ class LSHSearch
   const arma::Mat<size_t>& SecondHashTable() const { return secondHashTable; }
 
   //! Get the projection tables.
-  std::vector<arma::mat> getProjectionTables() { return projections; }
+  const std::vector<arma::mat> getProjectionTables() { return projections; }
 
   //! Change the projection tables (Retrains object)
   void setProjectionTables(std::vector<arma::mat> projTables)

From e6d2ca7bf64b47a36ac489335cf0dd8933e13076 Mon Sep 17 00:00:00 2001
From: Ryan Curtin <ryan@ratml.org>
Date: Tue, 31 May 2016 16:56:11 -0400
Subject: [PATCH 64/87] I'm not sure what line width Pari used, but it wasn't
 80 columns.

This will probably make the merge of #663 and other LSH improvements by Yannis
harder...
---
 src/mlpack/methods/lsh/lsh_search_impl.hpp | 34 +++++++++++-----------
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/src/mlpack/methods/lsh/lsh_search_impl.hpp b/src/mlpack/methods/lsh/lsh_search_impl.hpp
index b956ed27717..02d0021d2dc 100644
--- a/src/mlpack/methods/lsh/lsh_search_impl.hpp
+++ b/src/mlpack/methods/lsh/lsh_search_impl.hpp
@@ -358,19 +358,19 @@ template<typename SortPolicy>
 void LSHSearch<SortPolicy>::BuildHash()
 {
   // The first level hash for a single table outputs a 'numProj'-dimensional
-  // integer key for each point in the set -- (key, pointID)
-  // The key creation details are presented below
+  // integer key for each point in the set -- (key, pointID). The key creation
+  // details are presented below.
   //
-  // The second level hash is performed by hashing the key to
-  // an integer in the range [0, 'secondHashSize').
+  // The second level hash is performed by hashing the key to an integer in the
+  // range [0, 'secondHashSize').
   //
-  // This is done by creating a weight vector 'secondHashWeights' of
-  // length 'numProj' with each entry an integer randomly chosen
-  // between [0, 'secondHashSize').
+  // This is done by creating a weight vector 'secondHashWeights' of length
+  // 'numProj' with each entry an integer randomly chosen between [0,
+  // 'secondHashSize').
   //
-  // Then the bucket for any key and its corresponding point is
-  // given by <key, 'secondHashWeights'> % 'secondHashSize'
-  // and the corresponding point ID is put into that bucket.
+  // Then the bucket for any key and its corresponding point is given by <key,
+  // 'secondHashWeights'> % 'secondHashSize' and the corresponding point ID is
+  // put into that bucket.
 
   // Step I: Prepare the second level hash.
 
@@ -379,9 +379,9 @@ void LSHSearch<SortPolicy>::BuildHash()
                                   (double) secondHashSize);
 
   // The 'secondHashTable' is initially an empty matrix of size
-  // ('secondHashSize' x 'bucketSize'). But by only filling the buckets
-  // as points land in them allows us to shrink the size of the
-  // 'secondHashTable' at the end of the hashing.
+  // ('secondHashSize' x 'bucketSize'). But by only filling the buckets as
+  // points land in them allows us to shrink the size of the 'secondHashTable'
+  // at the end of the hashing.
 
   // Fill the second hash table n = referenceSet.n_cols.  This is because no
   // point has index 'n' so the presence of this in the bucket denotes that
@@ -404,8 +404,8 @@ void LSHSearch<SortPolicy>::BuildHash()
   size_t numRowsInTable = 0;
 
   // Step II: The offsets for all projections in all tables.
-  // Since the 'offsets' are in [0, hashWidth], we obtain the 'offsets'
-  // as randu(numProj, numTables) * hashWidth.
+  // Since the 'offsets' are in [0, hashWidth], we obtain the 'offsets' as
+  // randu(numProj, numTables) * hashWidth.
   offsets.randu(numProj, numTables);
   offsets *= hashWidth;
 
@@ -416,8 +416,8 @@ void LSHSearch<SortPolicy>::BuildHash()
   {
     // Step IV: Obtain the 'numProj' projections for each table.
 
-    // For L2 metric, 2-stable distributions are used, and
-    // the normal Z ~ N(0, 1) is a 2-stable distribution.
+    // For L2 metric, 2-stable distributions are used, and the normal Z ~ N(0,
+    // 1) is a 2-stable distribution.
     arma::mat projMat;
     projMat.randn(referenceSet->n_rows, numProj);
 

From 4773efbb117d2b56991d2b95fa5418ef1aa28fb6 Mon Sep 17 00:00:00 2001
From: Yannis Mentekidis <mentekid@gmail.com>
Date: Wed, 1 Jun 2016 09:32:45 +0300
Subject: [PATCH 65/87] Fixes naming conventions of accessors

---
 src/mlpack/methods/lsh/lsh_search.hpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/mlpack/methods/lsh/lsh_search.hpp b/src/mlpack/methods/lsh/lsh_search.hpp
index f79b0cf7584..d5389d4091c 100644
--- a/src/mlpack/methods/lsh/lsh_search.hpp
+++ b/src/mlpack/methods/lsh/lsh_search.hpp
@@ -179,10 +179,10 @@ class LSHSearch
   const arma::Mat<size_t>& SecondHashTable() const { return secondHashTable; }
 
   //! Get the projection tables.
-  const std::vector<arma::mat> getProjectionTables() { return projections; }
+  const std::vector<arma::mat> Projections() { return projections; }
 
   //! Change the projection tables (Retrains object)
-  void setProjectionTables(std::vector<arma::mat> projTables)
+  void Projections(const std::vector<arma::mat> &projTables)
   {
     // Simply call Train() with given projection tables
     Train(

From b067e89b528d7fb55b3591b64d49531c039d42df Mon Sep 17 00:00:00 2001
From: Yannis Mentekidis <mentekid@gmail.com>
Date: Wed, 1 Jun 2016 10:40:20 +0300
Subject: [PATCH 66/87] Changes LSHSearch.projections from
 std::vector<arma::mat> to arma::cube

---
 src/mlpack/methods/lsh/lsh_search.hpp      | 14 ++---
 src/mlpack/methods/lsh/lsh_search_impl.hpp | 66 ++++++++++------------
 2 files changed, 37 insertions(+), 43 deletions(-)

diff --git a/src/mlpack/methods/lsh/lsh_search.hpp b/src/mlpack/methods/lsh/lsh_search.hpp
index d5389d4091c..10a6ffc3f4b 100644
--- a/src/mlpack/methods/lsh/lsh_search.hpp
+++ b/src/mlpack/methods/lsh/lsh_search.hpp
@@ -93,8 +93,8 @@ class LSHSearch
              const double hashWidth = 0.0,
              const size_t secondHashSize = 99901,
              const size_t bucketSize = 500,
-             const std::vector<arma::mat> &projection
-             = std::vector<arma::mat>()
+             const arma::cube &projection
+             = arma::zeros<arma::cube>(0,0,0)
              );
 
   /**
@@ -163,8 +163,6 @@ class LSHSearch
 
   //! Get the number of projections.
   size_t NumProjections() const { return projections.size(); }
-  //! Get the projection matrix of the given table.
-  const arma::mat& Projection(const size_t i) const { return projections[i]; }
 
   //! Get the offsets 'b' for each of the projections.  (One 'b' per column.)
   const arma::mat& Offsets() const { return offsets; }
@@ -179,10 +177,10 @@ class LSHSearch
   const arma::Mat<size_t>& SecondHashTable() const { return secondHashTable; }
 
   //! Get the projection tables.
-  const std::vector<arma::mat> Projections() { return projections; }
+  const arma::cube Projections() { return projections; }
 
   //! Change the projection tables (Retrains object)
-  void Projections(const std::vector<arma::mat> &projTables)
+  void Projections(const arma::cube &projTables)
   {
     // Simply call Train() with given projection tables
     Train(
@@ -210,7 +208,7 @@ class LSHSearch
    * are private members of this class, initialized during the class
    * initialization.
    */
-  void BuildHash(const std::vector<arma::mat> &projection);
+  void BuildHash(const arma::cube &projection);
 
   /**
    * This function takes a query and hashes it into each of the hash tables to
@@ -294,7 +292,7 @@ class LSHSearch
   size_t numTables;
 
   //! The std::vector containing the projection matrix of each table.
-  std::vector<arma::mat> projections; // should be [numProj x dims] x numTables
+  arma::cube projections; // should be [numProj x dims] x numTables
 
   //! The list of the offsets 'b' for each of the projection for each table.
   arma::mat offsets; // should be numProj x numTables
diff --git a/src/mlpack/methods/lsh/lsh_search_impl.hpp b/src/mlpack/methods/lsh/lsh_search_impl.hpp
index 7986c070d3e..14a86e15310 100644
--- a/src/mlpack/methods/lsh/lsh_search_impl.hpp
+++ b/src/mlpack/methods/lsh/lsh_search_impl.hpp
@@ -66,7 +66,7 @@ void LSHSearch<SortPolicy>::Train(const arma::mat& referenceSet,
                                   const double hashWidthIn,
                                   const size_t secondHashSize,
                                   const size_t bucketSize,
-                                  const std::vector<arma::mat> &projection)
+                                  const arma::cube &projection)
 {
   // Set new reference set.
   if (this->referenceSet && ownsSet)
@@ -206,7 +206,8 @@ void LSHSearch<SortPolicy>::ReturnIndicesFromTable(
   // Compute the projection of the query in each table.
   arma::mat allProjInTables(numProj, numTablesToSearch);
   for (size_t i = 0; i < numTablesToSearch; i++)
-    allProjInTables.unsafe_col(i) = projections[i].t() * queryPoint;
+    //allProjInTables.unsafe_col(i) = projections[i].t() * queryPoint;
+    allProjInTables.unsafe_col(i) = projections.slice(i).t() * queryPoint;
   allProjInTables += offsets.cols(0, numTablesToSearch - 1);
   allProjInTables /= hashWidth;
 
@@ -356,7 +357,7 @@ Search(const size_t k,
 }
 
 template<typename SortPolicy>
-void LSHSearch<SortPolicy>::BuildHash(const std::vector<arma::mat> &projection)
+void LSHSearch<SortPolicy>::BuildHash(const arma::cube &projection)
 {
   // The first level hash for a single table outputs a 'numProj'-dimensional
   // integer key for each point in the set -- (key, pointID)
@@ -412,45 +413,39 @@ void LSHSearch<SortPolicy>::BuildHash(const std::vector<arma::mat> &projection)
 
   // Step III: Create each hash table in the first level hash one by one and
   // putting them directly into the 'secondHashTable' for memory efficiency.
-  projections.clear(); // Reset projections vector.
+  //projections.clear(); // Reset projections vector.
 
 
-  if (projection.size() != 0 && projection.size() != numTables)
-    throw std::invalid_argument(
-        "number of projection tables provided must be equal to numProj"
-        );
 
-  for (size_t i = 0; i < numTables; i++)
-  {
-    // Step IV: Obtain the 'numProj' projections for each table.
+  // Step IV: Obtain the 'numProj' projections for each table.
 
+  if (projection.n_slices == 0) //random generation of tables
+  {
     // For L2 metric, 2-stable distributions are used, and
     // the normal Z ~ N(0, 1) is a 2-stable distribution.
-    arma::mat projMat;
 
-    if (projection.size() == 0) //random generation of table i
-    {
+    //numTables random tables arranged in a cube
+    projections.randn(
+        referenceSet->n_rows,
+        numProj,
+        numTables
+    );
+  }
+  else if (projection.n_slices == numTables) //user defined tables
+  {
+    projections = projection;
+  }
+  else //invalid argument
+  {
+    throw std::invalid_argument(
+        "number of projection tables provided must be equal to numProj"
+        );
+  }
+    
 
-      // For L2 metric, p-stable distributions are used, and the normal
-      // Z ~ N(0, 1) is p-stable.
-      projMat.randn(referenceSet->n_rows, numProj);
-    }
-    else //user-specified projection tables
-    {
-      projMat = projection[i];
-
-      //make sure specified matrix is of correct size
-      if ( projMat.n_rows != referenceSet->n_rows )
-        throw std::invalid_argument( 
-            "projection table dimensionality doesn't"
-            " equal dataset dimensionality" );
-      if ( projMat.n_cols != numProj )
-        throw std::invalid_argument(
-            "projection table doesn't have correct number of projections");
-    }
+  for (size_t i = 0; i < numTables; i++)
+  {
 
-    // Save the projection matrix for querying.
-    projections.push_back(projMat);
 
     // Step V: create the 'numProj'-dimensional key for each point in each
     // table.
@@ -465,7 +460,8 @@ void LSHSearch<SortPolicy>::BuildHash(const std::vector<arma::mat> &projection)
     // key = { floor( (<proj_i, point> + offset_i) / 'hashWidth' ) forall i }
     arma::mat offsetMat = arma::repmat(offsets.unsafe_col(i), 1,
                                        referenceSet->n_cols);
-    arma::mat hashMat = projMat.t() * (*referenceSet);
+    // arma::mat hashMat = projMat.t() * (*referenceSet);
+    arma::mat hashMat = projections.slice(i).t() * (*referenceSet);
     hashMat += offsetMat;
     hashMat /= hashWidth;
 
@@ -546,7 +542,7 @@ void LSHSearch<SortPolicy>::Serialize(Archive& ar,
 
   // Delete existing projections, if necessary.
   if (Archive::is_loading::value)
-    projections.clear();
+    projections.zeros(0, 0, 0); // TODO: correct way to clear this?
 
   ar & CreateNVP(projections, "projections");
   ar & CreateNVP(offsets, "offsets");

From 7ebf1daa6de66aa5c047b27ff61866c52fa295d6 Mon Sep 17 00:00:00 2001
From: Yannis Mentekidis <mentekid@gmail.com>
Date: Wed, 1 Jun 2016 10:48:20 +0300
Subject: [PATCH 67/87] Changes LSHSearch::projections from
 std::vector<arma::mat> to arma::cube

---
 src/mlpack/methods/lsh/lsh_search.hpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/mlpack/methods/lsh/lsh_search.hpp b/src/mlpack/methods/lsh/lsh_search.hpp
index 10a6ffc3f4b..e7fd98d2597 100644
--- a/src/mlpack/methods/lsh/lsh_search.hpp
+++ b/src/mlpack/methods/lsh/lsh_search.hpp
@@ -291,8 +291,8 @@ class LSHSearch
   //! The number of hash tables.
   size_t numTables;
 
-  //! The std::vector containing the projection matrix of each table.
-  arma::cube projections; // should be [numProj x dims] x numTables
+  //! The arma::cube containing the projection matrix of each table.
+  arma::cube projections; // should be [numProj x dims] x numTables slices
 
   //! The list of the offsets 'b' for each of the projection for each table.
   arma::mat offsets; // should be numProj x numTables

From ff7877d28efd2e713c29de0f5934e14e485136fe Mon Sep 17 00:00:00 2001
From: Ryan Curtin <ryan@ratml.org>
Date: Wed, 1 Jun 2016 07:45:17 -0700
Subject: [PATCH 68/87] Add a templated version for BOOST_CLASS_VERSION().

---
 .../data/serialization_template_version.hpp   | 38 +++++++++++++++++++
 1 file changed, 38 insertions(+)
 create mode 100644 src/mlpack/core/data/serialization_template_version.hpp

diff --git a/src/mlpack/core/data/serialization_template_version.hpp b/src/mlpack/core/data/serialization_template_version.hpp
new file mode 100644
index 00000000000..5d3f5b48b6e
--- /dev/null
+++ b/src/mlpack/core/data/serialization_template_version.hpp
@@ -0,0 +1,38 @@
+/**
+ * @file serialization_template_version.hpp
+ * @author Ryan Curtin
+ *
+ * A better version of the BOOST_CLASS_VERSION() macro that supports templated
+ * classes.
+ */
+#ifndef MLPACK_CORE_DATA_SERIALIZATION_TEMPLATE_VERSION_HPP
+#define MLPACK_CORE_DATA_SERIALIZATION_TEMPLATE_VERSION_HPP
+
+/**
+ * Use this like BOOST_CLASS_VERSION(), but for templated classes.  The first
+ * argument is the signature for the template.  Here is an example for
+ * math::Range<eT>:
+ *
+ * BOOST_TEMPLATE_CLASS_VERSION(template<typename eT>, math::Range<eT>, 1);
+ */
+#define BOOST_TEMPLATE_CLASS_VERSION(SIGNATURE, T, N) \
+namespace boost { \
+namespace serialization { \
+template<> \
+SIGNATURE \
+struct version<T> \
+{ \
+  typedef mpl::int_<N> type; \
+  typedef mpl::integral_c_tag tag; \
+  BOOST_STATIC_CONSTANT(int, value = version::type::value); \
+  BOOST_MPL_ASSERT(( \
+      boost::mpl::less< \
+          boost::mpl::int_<N>, \
+          boost::mpl::int_<256> \
+      > \
+  )); \
+}; \
+} \
+}
+
+#endif

From 6958cc0a5835e2882dc7ef48aec744610d7d5a12 Mon Sep 17 00:00:00 2001
From: Ryan Curtin <ryan@ratml.org>
Date: Wed, 1 Jun 2016 08:47:21 -0700
Subject: [PATCH 69/87] We actually need to wrap mlpack::data::SecondShim<>
 objects.

---
 src/mlpack/core/data/serialization_template_version.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mlpack/core/data/serialization_template_version.hpp b/src/mlpack/core/data/serialization_template_version.hpp
index 5d3f5b48b6e..8cf67dc121c 100644
--- a/src/mlpack/core/data/serialization_template_version.hpp
+++ b/src/mlpack/core/data/serialization_template_version.hpp
@@ -20,7 +20,7 @@ namespace boost { \
 namespace serialization { \
 template<> \
 SIGNATURE \
-struct version<T> \
+struct version<mlpack::data::SecondShim<T>> \
 { \
   typedef mpl::int_<N> type; \
   typedef mpl::integral_c_tag tag; \

From 14740500bcd282b21af2bf32dd9ce65f0b0365e1 Mon Sep 17 00:00:00 2001
From: Ryan Curtin <ryan@ratml.org>
Date: Wed, 1 Jun 2016 08:47:38 -0700
Subject: [PATCH 70/87] Include new serialization version macro.

---
 src/mlpack/prereqs.hpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/mlpack/prereqs.hpp b/src/mlpack/prereqs.hpp
index 02828929210..3852a6b561c 100644
--- a/src/mlpack/prereqs.hpp
+++ b/src/mlpack/prereqs.hpp
@@ -65,6 +65,7 @@
   #define BOOST_PFTO
 #endif
 #include <mlpack/core/data/serialization_shim.hpp>
+#include <mlpack/core/data/serialization_template_version.hpp>
 
 // Now include Armadillo through the special mlpack extensions.
 #include <mlpack/core/arma_extend/arma_extend.hpp>

From 81b6b4f88d93d55e19f8698d4a2e5abe973bed3f Mon Sep 17 00:00:00 2001
From: Ryan Curtin <ryan@ratml.org>
Date: Wed, 1 Jun 2016 08:48:04 -0700
Subject: [PATCH 71/87] Use n_slices not size() to fix correctness.

---
 src/mlpack/methods/lsh/lsh_search.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mlpack/methods/lsh/lsh_search.hpp b/src/mlpack/methods/lsh/lsh_search.hpp
index e7fd98d2597..89f0f920ddb 100644
--- a/src/mlpack/methods/lsh/lsh_search.hpp
+++ b/src/mlpack/methods/lsh/lsh_search.hpp
@@ -162,7 +162,7 @@ class LSHSearch
   const arma::mat& ReferenceSet() const { return *referenceSet; }
 
   //! Get the number of projections.
-  size_t NumProjections() const { return projections.size(); }
+  size_t NumProjections() const { return projections.n_slices; }
 
   //! Get the offsets 'b' for each of the projections.  (One 'b' per column.)
   const arma::mat& Offsets() const { return offsets; }

From f989f1f9ddcba8d30e36f43d407768ee6cd78623 Mon Sep 17 00:00:00 2001
From: Ryan Curtin <ryan@ratml.org>
Date: Wed, 1 Jun 2016 08:48:30 -0700
Subject: [PATCH 72/87] Refactor Serialize(), add backwards compatibility, and
 update tests.

---
 src/mlpack/methods/lsh/lsh_search.hpp      |  6 +++++-
 src/mlpack/methods/lsh/lsh_search_impl.hpp | 21 ++++++++++++++++++---
 src/mlpack/tests/serialization_test.cpp    |  4 ++--
 3 files changed, 25 insertions(+), 6 deletions(-)

diff --git a/src/mlpack/methods/lsh/lsh_search.hpp b/src/mlpack/methods/lsh/lsh_search.hpp
index 89f0f920ddb..d3bc2f92e00 100644
--- a/src/mlpack/methods/lsh/lsh_search.hpp
+++ b/src/mlpack/methods/lsh/lsh_search.hpp
@@ -151,7 +151,7 @@ class LSHSearch
    * @param ar Archive to serialize to.
    */
   template<typename Archive>
-  void Serialize(Archive& ar, const unsigned int /* version */);
+  void Serialize(Archive& ar, const unsigned int version);
 
   //! Return the number of distance evaluations performed.
   size_t DistanceEvaluations() const { return distanceEvaluations; }
@@ -327,6 +327,10 @@ class LSHSearch
 } // namespace neighbor
 } // namespace mlpack
 
+//! Set the serialization version of the LSHSearch class.
+BOOST_TEMPLATE_CLASS_VERSION(template<typename SortPolicy>,
+    mlpack::neighbor::LSHSearch<SortPolicy>, 1);
+
 // Include implementation.
 #include "lsh_search_impl.hpp"
 
diff --git a/src/mlpack/methods/lsh/lsh_search_impl.hpp b/src/mlpack/methods/lsh/lsh_search_impl.hpp
index 14a86e15310..afeaf05b12f 100644
--- a/src/mlpack/methods/lsh/lsh_search_impl.hpp
+++ b/src/mlpack/methods/lsh/lsh_search_impl.hpp
@@ -524,7 +524,7 @@ void LSHSearch<SortPolicy>::BuildHash(const arma::cube &projection)
 template<typename SortPolicy>
 template<typename Archive>
 void LSHSearch<SortPolicy>::Serialize(Archive& ar,
-                                      const unsigned int /* version */)
+                                      const unsigned int version)
 {
   using data::CreateNVP;
 
@@ -542,9 +542,24 @@ void LSHSearch<SortPolicy>::Serialize(Archive& ar,
 
   // Delete existing projections, if necessary.
   if (Archive::is_loading::value)
-    projections.zeros(0, 0, 0); // TODO: correct way to clear this?
+    projections.reset();
+
+  // Backward compatibility: older version of LSHSearch stored the projection
+  // tables in a std::vector<arma::mat>.
+  if (version == 0)
+  {
+    std::vector<arma::mat> tmpProj;
+    ar & CreateNVP(tmpProj, "projections");
+
+    projections.set_size(tmpProj[0].n_rows, tmpProj[0].n_cols, tmpProj.size());
+    for (size_t i = 0; i < tmpProj.size(); ++i)
+      projections.slice(i) = tmpProj[i];
+  }
+  else
+  {
+    ar & CreateNVP(projections, "projections");
+  }
 
-  ar & CreateNVP(projections, "projections");
   ar & CreateNVP(offsets, "offsets");
   ar & CreateNVP(hashWidth, "hashWidth");
   ar & CreateNVP(secondHashSize, "secondHashSize");
diff --git a/src/mlpack/tests/serialization_test.cpp b/src/mlpack/tests/serialization_test.cpp
index 9bddbc2c16d..1753e672d61 100644
--- a/src/mlpack/tests/serialization_test.cpp
+++ b/src/mlpack/tests/serialization_test.cpp
@@ -1210,8 +1210,8 @@ BOOST_AUTO_TEST_CASE(LSHTest)
   BOOST_REQUIRE_EQUAL(lsh.NumProjections(), binaryLsh.NumProjections());
   for (size_t i = 0; i < lsh.NumProjections(); ++i)
   {
-    CheckMatrices(lsh.Projection(i), xmlLsh.Projection(i),
-        textLsh.Projection(i), binaryLsh.Projection(i));
+    CheckMatrices(lsh.Projections().slice(i), xmlLsh.Projections().slice(i),
+        textLsh.Projections().slice(i), binaryLsh.Projections().slice(i));
   }
 
   CheckMatrices(lsh.ReferenceSet(), xmlLsh.ReferenceSet(),

From 5546ebcf02598c9da06e19ed447e73ddcd0d3347 Mon Sep 17 00:00:00 2001
From: Ryan Curtin <ryan@ratml.org>
Date: Wed, 1 Jun 2016 11:26:09 -0700
Subject: [PATCH 73/87] Add Classify() functions and tests.

---
 .../logistic_regression.hpp                   |  41 +++++
 .../logistic_regression_impl.hpp              |  31 ++++
 src/mlpack/tests/logistic_regression_test.cpp | 150 ++++++++++++++++++
 3 files changed, 222 insertions(+)

diff --git a/src/mlpack/methods/logistic_regression/logistic_regression.hpp b/src/mlpack/methods/logistic_regression/logistic_regression.hpp
index 0d56e0c39b0..008193a3032 100644
--- a/src/mlpack/methods/logistic_regression/logistic_regression.hpp
+++ b/src/mlpack/methods/logistic_regression/logistic_regression.hpp
@@ -152,6 +152,8 @@ class LogisticRegression
    * the decision boundary, the response is taken to be 1; otherwise, it is 0.
    * By default the decision boundary is 0.5.
    *
+   * This method is deprecated---you should use Classify() instead.
+   *
    * @param predictors Input predictors.
    * @param responses Vector to put output predictions of responses into.
    * @param decisionBoundary Decision boundary (default 0.5).
@@ -160,6 +162,45 @@ class LogisticRegression
                arma::Row<size_t>& responses,
                const double decisionBoundary = 0.5) const;
 
+  /**
+   * Classify the given point.  The predicted label is returned.  Optionally,
+   * specify the decision boundary; logistic regression returns a value between
+   * 0 and 1.  If the value is greater than the decision boundary, the response
+   * is taken to be 1; otherwise, it is 0.  By default the decision boundary is
+   * 0.5.
+   *
+   * @param point Point to classify.
+   * @param decisionBoundary Decision boundary (default 0.5).
+   * @return Predicted label of point.
+   */
+  template<typename VecType>
+  size_t Classify(const VecType& point,
+                  const double decisionBoundary = 0.5) const;
+
+  /**
+   * Classify the given points, returning the predicted labels for each point.
+   * Optionally, specify the decision boundary; logistic regression returns a
+   * value between 0 and 1.  If the value is greater than the decision boundary,
+   * the response is taken to be 1; otherwise, it is 0.  By default the decision
+   * boundary is 0.5.
+   *
+   * @param dataset Set of points to classify.
+   * @param labels Predicted labels for each point.
+   * @param decisionBoundary Decision boundary (default 0.5).
+   */
+  void Classify(const MatType& dataset,
+                arma::Row<size_t>& labels,
+                const double decisionBoundary = 0.5) const;
+
+  /**
+   * Classify the given points, returning class probabilities for each point.
+   *
+   * @param dataset Set of points to classify.
+   * @param probabilities Class probabilities for each point (output).
+   */
+  void Classify(const MatType& dataset,
+                arma::mat& probabilities) const;
+
   /**
    * Compute the accuracy of the model on the given predictors and responses,
    * optionally using the given decision boundary.  The responses should be
diff --git a/src/mlpack/methods/logistic_regression/logistic_regression_impl.hpp b/src/mlpack/methods/logistic_regression/logistic_regression_impl.hpp
index ee4396e89c6..5b6a2c6e435 100644
--- a/src/mlpack/methods/logistic_regression/logistic_regression_impl.hpp
+++ b/src/mlpack/methods/logistic_regression/logistic_regression_impl.hpp
@@ -105,6 +105,37 @@ void LogisticRegression<MatType>::Predict(const MatType& predictors,
       (1.0 - decisionBoundary));
 }
 
+template<typename MatType>
+template<typename VecType>
+size_t LogisticRegression<MatType>::Classify(const VecType& point,
+                                             const double decisionBoundary)
+    const
+{
+  return size_t(1.0 / (1.0 + std::exp(-parameters(0) - arma::dot(point,
+      parameters.subvec(1, parameters.n_elem - 1)))) +
+      (1.0 - decisionBoundary));
+}
+
+template<typename MatType>
+void LogisticRegression<MatType>::Classify(const MatType& dataset,
+                                           arma::Row<size_t>& labels,
+                                           const double decisionBoundary) const
+{
+  Predict(dataset, labels, decisionBoundary);
+}
+
+template<typename MatType>
+void LogisticRegression<MatType>::Classify(const MatType& dataset,
+                                           arma::mat& probabilities) const
+{
+  // Set correct size of output matrix.
+  probabilities.set_size(2, dataset.n_cols);
+
+  probabilities.row(1) = 1.0 / (1.0 + arma::exp(-parameters(0) - dataset.t() *
+      parameters.subvec(1, parameters.n_elem - 1))).t();
+  probabilities.row(0) = 1.0 - probabilities.row(1);
+}
+
 template<typename MatType>
 double LogisticRegression<MatType>::ComputeError(
     const MatType& predictors,
diff --git a/src/mlpack/tests/logistic_regression_test.cpp b/src/mlpack/tests/logistic_regression_test.cpp
index f567049a8b8..7881bb248d6 100644
--- a/src/mlpack/tests/logistic_regression_test.cpp
+++ b/src/mlpack/tests/logistic_regression_test.cpp
@@ -807,4 +807,154 @@ BOOST_AUTO_TEST_CASE(LogisticRegressionSparseSGDTest)
     BOOST_REQUIRE_CLOSE(lr.Parameters()[i], lrSparse.Parameters()[i], 1e-5);
 }
 
+/**
+ * Test multi-point classification (Classify()).
+ */
+BOOST_AUTO_TEST_CASE(ClassifyTest)
+{
+  // Generate a two-Gaussian dataset.
+  GaussianDistribution g1(arma::vec("1.0 1.0 1.0"), arma::eye<arma::mat>(3, 3));
+  GaussianDistribution g2(arma::vec("9.0 9.0 9.0"), arma::eye<arma::mat>(3, 3));
+
+  arma::mat data(3, 1000);
+  arma::Row<size_t> responses(1000);
+  for (size_t i = 0; i < 500; ++i)
+  {
+    data.col(i) = g1.Random();
+    responses[i] = 0;
+  }
+  for (size_t i = 500; i < 1000; ++i)
+  {
+    data.col(i) = g2.Random();
+    responses[i] = 1;
+  }
+
+  // Now train a logistic regression object on it.
+  LogisticRegression<> lr(data.n_rows, 0.5);
+  lr.Train<>(data, responses);
+
+  // Create a test set.
+  for (size_t i = 0; i < 500; ++i)
+  {
+    data.col(i) = g1.Random();
+    responses[i] = 0;
+  }
+  for (size_t i = 500; i < 1000; ++i)
+  {
+    data.col(i) = g2.Random();
+    responses[i] = 1;
+  }
+
+  arma::Row<size_t> predictions;
+  lr.Classify(data, predictions);
+
+  BOOST_REQUIRE_GE((double) arma::accu(predictions == responses), 900);
+}
+
+/**
+ * Test that single-point classification gives the same results as multi-point
+ * classification.
+ */
+BOOST_AUTO_TEST_CASE(SinglePointClassifyTest)
+{
+  // Generate a two-Gaussian dataset.
+  GaussianDistribution g1(arma::vec("1.0 1.0 1.0"), arma::eye<arma::mat>(3, 3));
+  GaussianDistribution g2(arma::vec("9.0 9.0 9.0"), arma::eye<arma::mat>(3, 3));
+
+  arma::mat data(3, 1000);
+  arma::Row<size_t> responses(1000);
+  for (size_t i = 0; i < 500; ++i)
+  {
+    data.col(i) = g1.Random();
+    responses[i] = 0;
+  }
+  for (size_t i = 500; i < 1000; ++i)
+  {
+    data.col(i) = g2.Random();
+    responses[i] = 1;
+  }
+
+  // Now train a logistic regression object on it.
+  LogisticRegression<> lr(data.n_rows, 0.5);
+  lr.Train<>(data, responses);
+
+  // Create a test set.
+  for (size_t i = 0; i < 500; ++i)
+  {
+    data.col(i) = g1.Random();
+    responses[i] = 0;
+  }
+  for (size_t i = 500; i < 1000; ++i)
+  {
+    data.col(i) = g2.Random();
+    responses[i] = 1;
+  }
+
+  arma::Row<size_t> predictions;
+  lr.Classify(data, predictions);
+
+  for (size_t i = 0; i < data.n_cols; ++i)
+  {
+    size_t pred = lr.Classify(data.col(i));
+
+    BOOST_REQUIRE_EQUAL(pred, predictions[i]);
+  }
+}
+
+/**
+ * Test that giving point probabilities works.
+ */
+BOOST_AUTO_TEST_CASE(ClassifyProbabilitiesTest)
+{
+  // Generate a two-Gaussian dataset.
+  GaussianDistribution g1(arma::vec("1.0 1.0 1.0"), arma::eye<arma::mat>(3, 3));
+  GaussianDistribution g2(arma::vec("9.0 9.0 9.0"), arma::eye<arma::mat>(3, 3));
+
+  arma::mat data(3, 1000);
+  arma::Row<size_t> responses(1000);
+  for (size_t i = 0; i < 500; ++i)
+  {
+    data.col(i) = g1.Random();
+    responses[i] = 0;
+  }
+  for (size_t i = 500; i < 1000; ++i)
+  {
+    data.col(i) = g2.Random();
+    responses[i] = 1;
+  }
+
+  // Now train a logistic regression object on it.
+  LogisticRegression<> lr(data.n_rows, 0.5);
+  lr.Train<>(data, responses);
+
+  // Create a test set.
+  for (size_t i = 0; i < 500; ++i)
+  {
+    data.col(i) = g1.Random();
+    responses[i] = 0;
+  }
+  for (size_t i = 500; i < 1000; ++i)
+  {
+    data.col(i) = g2.Random();
+    responses[i] = 1;
+  }
+
+  arma::mat probabilities;
+  lr.Classify(data, probabilities);
+
+  BOOST_REQUIRE_EQUAL(probabilities.n_cols, data.n_cols);
+  BOOST_REQUIRE_EQUAL(probabilities.n_rows, 2);
+
+  for (size_t i = 0; i < data.n_cols; ++i)
+  {
+    BOOST_REQUIRE_CLOSE(probabilities(0, i) + probabilities(1, i), 1.0, 1e-5);
+
+    // 10% tolerance.
+    if (responses[i] == 0)
+      BOOST_REQUIRE_CLOSE(probabilities(0, i), 1.0, 10.0);
+    else
+      BOOST_REQUIRE_CLOSE(probabilities(1, i), 1.0, 10.0);
+  }
+}
+
 BOOST_AUTO_TEST_SUITE_END();

From dca52fd2ed7a7f44c4fbd7b0f89e4c5bf2337b92 Mon Sep 17 00:00:00 2001
From: Ryan Curtin <ryan@ratml.org>
Date: Wed, 1 Jun 2016 11:49:55 -0700
Subject: [PATCH 74/87] Add --output_probabilities_file option.

---
 .../logistic_regression_main.cpp              | 28 +++++++++++++++----
 1 file changed, 22 insertions(+), 6 deletions(-)

diff --git a/src/mlpack/methods/logistic_regression/logistic_regression_main.cpp b/src/mlpack/methods/logistic_regression/logistic_regression_main.cpp
index 4f4de07a42a..d2ffd9d3c51 100644
--- a/src/mlpack/methods/logistic_regression/logistic_regression_main.cpp
+++ b/src/mlpack/methods/logistic_regression/logistic_regression_main.cpp
@@ -94,7 +94,10 @@ PARAM_STRING("output_model_file", "File to save trained logistic regression "
 // Testing.
 PARAM_STRING("test_file", "File containing test dataset.", "T", "");
 PARAM_STRING("output_file", "If --test_file is specified, this file is "
-    "where the predicted responses will be saved.", "o", "");
+    "where the predictions for the test set will be saved.", "o", "");
+PARAM_STRING("output_probabilities_file", "If --test_file is specified, this "
+    "file is where the class probabilities for the test set will be saved.",
+    "p", "");
 PARAM_DOUBLE("decision_boundary", "Decision boundary for prediction; if the "
     "logistic function for a point is less than the boundary, the class is "
     "taken to be 0; otherwise, the class is 1.", "d", 0.5);
@@ -116,6 +119,8 @@ int main(int argc, char** argv)
   const string outputModelFile = CLI::GetParam<string>("output_model_file");
   const string testFile = CLI::GetParam<string>("test_file");
   const string outputFile = CLI::GetParam<string>("output_file");
+  const string outputProbabilitiesFile =
+      CLI::GetParam<string>("output_probabilities_file");
   const double decisionBoundary = CLI::GetParam<double>("decision_boundary");
 
   // One of inputFile and modelFile must be specified.
@@ -260,13 +265,24 @@ int main(int argc, char** argv)
 
     // We must perform predictions on the test set.  Training (and the
     // optimizer) are irrelevant here; we'll pass in the model we have.
-    Log::Info << "Predicting classes of points in '" << testFile << "'."
-        << endl;
-    model.Predict(testSet, predictions, decisionBoundary);
-
-    // Save the results, if necessary.
     if (!outputFile.empty())
+    {
+      Log::Info << "Predicting classes of points in '" << testFile << "'."
+          << endl;
+      model.Classify(testSet, predictions, decisionBoundary);
+
       data::Save(outputFile, predictions, false);
+    }
+
+    if (!outputProbabilitiesFile.empty())
+    {
+      Log::Info << "Calculating class probabilities of points in '" << testFile
+          << "'." << endl;
+      arma::mat probabilities;
+      model.Classify(testSet, probabilities);
+
+      data::Save(outputProbabilitiesFile, probabilities, false);
+    }
   }
 
   if (!outputModelFile.empty())

From eba4f9924694bc10daec74ff5059dbb8af001416 Mon Sep 17 00:00:00 2001
From: Ryan Curtin <ryan@ratml.org>
Date: Wed, 1 Jun 2016 12:35:50 -0700
Subject: [PATCH 75/87] Fix spacing and make method const.

---
 src/mlpack/methods/softmax_regression/softmax_regression.hpp   | 3 ++-
 .../methods/softmax_regression/softmax_regression_impl.hpp     | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/mlpack/methods/softmax_regression/softmax_regression.hpp b/src/mlpack/methods/softmax_regression/softmax_regression.hpp
index a2c8ee39eeb..e08466b35db 100644
--- a/src/mlpack/methods/softmax_regression/softmax_regression.hpp
+++ b/src/mlpack/methods/softmax_regression/softmax_regression.hpp
@@ -121,7 +121,8 @@ class SoftmaxRegression
    * @param testData Matrix of data points using which predictions are made.
    * @param labels Vector of labels associated with the data.
    */
-  double ComputeAccuracy(const arma::mat& testData, const arma::Row<size_t>& labels);
+  double ComputeAccuracy(const arma::mat& testData,
+                         const arma::Row<size_t>& labels) const;
 
   /**
    * Train the softmax regression model with the given optimizer.
diff --git a/src/mlpack/methods/softmax_regression/softmax_regression_impl.hpp b/src/mlpack/methods/softmax_regression/softmax_regression_impl.hpp
index d3c8fe976e9..dfede6e50e1 100644
--- a/src/mlpack/methods/softmax_regression/softmax_regression_impl.hpp
+++ b/src/mlpack/methods/softmax_regression/softmax_regression_impl.hpp
@@ -117,7 +117,7 @@ void SoftmaxRegression<OptimizerType>::Predict(const arma::mat& testData,
 template<template<typename> class OptimizerType>
 double SoftmaxRegression<OptimizerType>::ComputeAccuracy(
     const arma::mat& testData,
-    const arma::Row<size_t>& labels)
+    const arma::Row<size_t>& labels) const
 {
   arma::Row<size_t> predictions;
 

From 2417a1992b9dfdc96a5be45450e73a17e97db050 Mon Sep 17 00:00:00 2001
From: Keon Kim <kwk236@gmail.com>
Date: Thu, 2 Jun 2016 15:01:22 +0900
Subject: [PATCH 76/87] add more program info of split

---
 src/mlpack/core/data/split_data.hpp           |  3 ++-
 .../preprocess/preprocess_split_main.cpp      | 19 ++++++++++++++++---
 2 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/src/mlpack/core/data/split_data.hpp b/src/mlpack/core/data/split_data.hpp
index e5f1e2e9f20..d02f6c663e4 100644
--- a/src/mlpack/core/data/split_data.hpp
+++ b/src/mlpack/core/data/split_data.hpp
@@ -183,7 +183,8 @@ Split(const arma::Mat<T>& input,
   arma::Mat<T> testData;
   Split(input, trainData, testData, testRatio);
 
-  return std::make_tuple(std::move(trainData), std::move(testData));
+  return std::make_tuple(std::move(trainData),
+                         std::move(testData));
 }
 
 } // namespace data
diff --git a/src/mlpack/methods/preprocess/preprocess_split_main.cpp b/src/mlpack/methods/preprocess/preprocess_split_main.cpp
index ca8e830be7c..d24b8ff8f0a 100644
--- a/src/mlpack/methods/preprocess/preprocess_split_main.cpp
+++ b/src/mlpack/methods/preprocess/preprocess_split_main.cpp
@@ -7,9 +7,22 @@
 #include <mlpack/core.hpp>
 #include <mlpack/core/data/split_data.hpp>
 
-PROGRAM_INFO("Split into Train and Test Data", "This "
-    "utility takes data and labels and split into a training "
-    "set and a test set.");
+PROGRAM_INFO("Split Data", "This utility takes data and split into a training "
+    "set and a test set. Before the split happens, it shuffles the data in "
+    "the each feature. Without (--test_ratio) specified, the default "
+    "test-to-training ratio is set to 0.2."
+    "\n\n"
+    "The program does not modify or write on the original file, but instead "
+    "makes a seperate files to save the training and test files; you can "
+    "specify the file names with (-training_file) and (-test_file). If the "
+    "names are not specified, the program automatically names the training "
+    "and test file by attaching 'train_' and 'test_' in front of the "
+    "original file name"
+    "\n\n"
+    "Optionally, a label can be also be splited along with the data at the "
+    "same time by specifying (--input_lables) option. Splitting label works "
+    "the same as splitting the data and you can also specify the names using "
+    "(--trainning_labels_file) and (--test_labels_file).");
 
 // Define parameters for data
 PARAM_STRING_REQ("input_file", "File containing data,", "i");

From ac016feb8fb38b0b5c20644adff628815a4dc880 Mon Sep 17 00:00:00 2001
From: Keon Kim <kwk236@gmail.com>
Date: Thu, 2 Jun 2016 16:27:14 +0900
Subject: [PATCH 77/87] fix tests for split data

---
 src/mlpack/tests/split_data_test.cpp | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/src/mlpack/tests/split_data_test.cpp b/src/mlpack/tests/split_data_test.cpp
index daf4cd50e84..bbc529baaef 100644
--- a/src/mlpack/tests/split_data_test.cpp
+++ b/src/mlpack/tests/split_data_test.cpp
@@ -36,7 +36,10 @@ void CompareData(const mat& inputData,
     const mat& rhsCol = compareData.col(i);
     for (size_t j = 0; j != lhsCol.n_rows; ++j)
     {
-      BOOST_REQUIRE_CLOSE(lhsCol(j), rhsCol(j), 1e-5);
+      if (std::abs(rhsCol(j)) < 1e-5)
+        BOOST_REQUIRE_SMALL(lhsCol(j), 1e-5);
+      else
+        BOOST_REQUIRE_CLOSE(lhsCol(j), rhsCol(j), 1e-5);
     }
   }
 }
@@ -52,7 +55,10 @@ void CheckMatEqual(const mat& inputData,
     const mat& rhsCol = sortedCompare.col(i);
     for (size_t j = 0; j < lhsCol.n_rows; ++j)
     {
-      BOOST_REQUIRE_CLOSE(lhsCol(j), rhsCol(j), 1e-5);
+      if (std::abs(rhsCol(j)) < 1e-5)
+        BOOST_REQUIRE_SMALL(lhsCol(j), 1e-5);
+      else
+        BOOST_REQUIRE_CLOSE(lhsCol(j), rhsCol(j), 1e-5);
     }
   }
 }

From 06fdfa866a1d85bc8213c0f86777865174fafd84 Mon Sep 17 00:00:00 2001
From: Yannis Mentekidis <mentekid@gmail.com>
Date: Thu, 2 Jun 2016 14:39:07 +0300
Subject: [PATCH 78/87] Adds new constructor to LSHSearch and merges Train()
 and BuildHash() methods

---
 HISTORY.md                                 |  10 +
 src/mlpack/methods/lsh/lsh_search.hpp      |  27 +-
 src/mlpack/methods/lsh/lsh_search_impl.hpp | 331 +++++++++++----------
 3 files changed, 205 insertions(+), 163 deletions(-)

diff --git a/HISTORY.md b/HISTORY.md
index cddb28a3e3c..de21f895281 100644
--- a/HISTORY.md
+++ b/HISTORY.md
@@ -1,5 +1,15 @@
 ### mlpack 2.0.2
 ###### 2016-??-??
+  * LSHSearch::Projection(size_t) that returned a single projection matrix has
+    been removed. In its place, LSHSearch::Projections() has been added, which
+    returns an arma::cube with each projection table in a slice (#663).
+
+  * A new constructor has been added to LSHSearch that creates objects using
+    projection tables provided in an arma::cube (#663).
+
+  * LSHSearch::Projections(arma::cube) has been added that allows users to
+    change the projection tables of an LSHSearch object (#663).
+
   * Handle zero-variance dimensions in DET (#515).
 
   * Add MiniBatchSGD optimizer (src/mlpack/core/optimizers/minibatch_sgd/) and
diff --git a/src/mlpack/methods/lsh/lsh_search.hpp b/src/mlpack/methods/lsh/lsh_search.hpp
index d3bc2f92e00..9c7c1d6438c 100644
--- a/src/mlpack/methods/lsh/lsh_search.hpp
+++ b/src/mlpack/methods/lsh/lsh_search.hpp
@@ -64,6 +64,31 @@ class LSHSearch
    *     the maximum number of points that can be hashed into single bucket.
    *     Default values are already provided here.
    */
+  LSHSearch(const arma::mat& referenceSet,
+            const arma::cube& projections,
+            const double hashWidth = 0.0,
+            const size_t secondHashSize = 99901,
+            const size_t bucketSize = 500);
+
+  /**
+   * This function initializes the LSH class. It builds the hash one the
+   * reference set using the provided projections. See the individual functions
+   * performing the hashing for details on how the hashing is done.
+   *
+   * @param referenceSet Set of reference points and the set of queries.
+   * @param projections Cube of projection tables. For a cube of size (a, b, c)
+   *     we set numProj = a, numTables = c. b is the reference set
+   *     dimensionality.
+   * @param hashWidth The width of hash for every table. If 0 (the default) is
+   *     provided, then the hash width is automatically obtained by computing
+   *     the average pairwise distance of 25 pairs.  This should be a reasonable
+   *     upper bound on the nearest-neighbor distance in general.
+   * @param secondHashSize The size of the second hash table. This should be a
+   *     large prime number.
+   * @param bucketSize The size of the bucket in the second hash table. This is
+   *     the maximum number of points that can be hashed into single bucket.
+   *     Default values are already provided here.
+   */
   LSHSearch(const arma::mat& referenceSet,
             const size_t numProj,
             const size_t numTables,
@@ -177,7 +202,7 @@ class LSHSearch
   const arma::Mat<size_t>& SecondHashTable() const { return secondHashTable; }
 
   //! Get the projection tables.
-  const arma::cube Projections() { return projections; }
+  const arma::cube& Projections() { return projections; }
 
   //! Change the projection tables (Retrains object)
   void Projections(const arma::cube &projTables)
diff --git a/src/mlpack/methods/lsh/lsh_search_impl.hpp b/src/mlpack/methods/lsh/lsh_search_impl.hpp
index afeaf05b12f..c0fc57e759b 100644
--- a/src/mlpack/methods/lsh/lsh_search_impl.hpp
+++ b/src/mlpack/methods/lsh/lsh_search_impl.hpp
@@ -12,7 +12,7 @@
 namespace mlpack {
 namespace neighbor {
 
-// Construct the object.
+// Construct the object with random tables
 template<typename SortPolicy>
 LSHSearch<SortPolicy>::
 LSHSearch(const arma::mat& referenceSet,
@@ -35,6 +35,28 @@ LSHSearch(const arma::mat& referenceSet,
       bucketSize);
 }
 
+// Construct the object with given tables
+template<typename SortPolicy>
+LSHSearch<SortPolicy>::
+LSHSearch(const arma::mat& referenceSet,
+          const arma::cube& projections,
+          const double hashWidthIn,
+          const size_t secondHashSize,
+          const size_t bucketSize) :
+  referenceSet(NULL), // This will be set in Train().
+  ownsSet(false),
+  numProj(projections.n_cols),
+  numTables(projections.n_slices),
+  hashWidth(hashWidthIn),
+  secondHashSize(secondHashSize),
+  bucketSize(bucketSize),
+  distanceEvaluations(0)
+{
+  // Pass work to training function
+  Train(referenceSet, numProj, numTables, hashWidthIn, secondHashSize,
+      bucketSize, projections);
+}
+
 // Empty constructor.
 template<typename SortPolicy>
 LSHSearch<SortPolicy>::LSHSearch() :
@@ -98,7 +120,152 @@ void LSHSearch<SortPolicy>::Train(const arma::mat& referenceSet,
 
   Log::Info << "Hash width chosen as: " << hashWidth << std::endl;
 
-  BuildHash(projection);
+  // Hash Building Procedure
+  // The first level hash for a single table outputs a 'numProj'-dimensional
+  // integer key for each point in the set -- (key, pointID)
+  // The key creation details are presented below
+  //
+
+  // Step I: Prepare the second level hash.
+
+  // Obtain the weights for the second hash.
+  secondHashWeights = arma::floor(arma::randu(numProj) *
+                                  (double) secondHashSize);
+
+  // The 'secondHashTable' is initially an empty matrix of size
+  // ('secondHashSize' x 'bucketSize'). But by only filling the buckets
+  // as points land in them allows us to shrink the size of the
+  // 'secondHashTable' at the end of the hashing.
+
+  // Fill the second hash table n = referenceSet.n_cols.  This is because no
+  // point has index 'n' so the presence of this in the bucket denotes that
+  // there are no more points in this bucket.
+  secondHashTable.set_size(secondHashSize, bucketSize);
+  secondHashTable.fill(referenceSet.n_cols);
+
+  // Keep track of the size of each bucket in the hash.  At the end of hashing
+  // most buckets will be empty.
+  bucketContentSize.zeros(secondHashSize);
+
+  // Instead of putting the points in the row corresponding to the bucket, we
+  // chose the next empty row and keep track of the row in which the bucket
+  // lies. This allows us to stack together and slice out the empty buckets at
+  // the end of the hashing.
+  bucketRowInHashTable.set_size(secondHashSize);
+  bucketRowInHashTable.fill(secondHashSize);
+
+  // Keep track of number of non-empty rows in the 'secondHashTable'.
+  size_t numRowsInTable = 0;
+
+  // Step II: The offsets for all projections in all tables.
+  // Since the 'offsets' are in [0, hashWidth], we obtain the 'offsets'
+  // as randu(numProj, numTables) * hashWidth.
+  offsets.randu(numProj, numTables);
+  offsets *= hashWidth;
+
+
+
+
+  // Step III: Obtain the 'numProj' projections for each table.
+  projections.clear(); // Reset projections vector.
+
+  if (projection.n_slices == 0) //random generation of tables
+  {
+    // For L2 metric, 2-stable distributions are used, and
+    // the normal Z ~ N(0, 1) is a 2-stable distribution.
+
+    //numTables random tables arranged in a cube
+    projections.randn(
+        referenceSet.n_rows,
+        numProj,
+        numTables
+    );
+  }
+  else if (projection.n_slices == numTables) //user defined tables
+  {
+    projections = projection;
+  }
+  else //invalid argument
+  {
+    throw std::invalid_argument(
+        "number of projection tables provided must be equal to numProj"
+        );
+  }
+
+
+  for (size_t i = 0; i < numTables; i++)
+  {
+    // Step IV: create the 'numProj'-dimensional key for each point in each
+    // table.
+
+    // The following code performs the task of hashing each point to a
+    // 'numProj'-dimensional integer key.  Hence you get a ('numProj' x
+    // 'referenceSet.n_cols') key matrix.
+    //
+    // For a single table, let the 'numProj' projections be denoted by 'proj_i'
+    // and the corresponding offset be 'offset_i'.  Then the key of a single
+    // point is obtained as:
+    // key = { floor( (<proj_i, point> + offset_i) / 'hashWidth' ) forall i }
+    arma::mat offsetMat = arma::repmat(offsets.unsafe_col(i), 1,
+                                       referenceSet.n_cols);
+    arma::mat hashMat = projections.slice(i).t() * (referenceSet);
+    hashMat += offsetMat;
+    hashMat /= hashWidth;
+
+    // Step V: Putting the points in the 'secondHashTable' by hashing the key.
+    // Now we hash every key, point ID to its corresponding bucket.
+    arma::rowvec secondHashVec = secondHashWeights.t() * arma::floor(hashMat);
+
+    // This gives us the bucket for the corresponding point ID.
+    for (size_t j = 0; j < secondHashVec.n_elem; j++)
+      secondHashVec[j] = (double)((size_t) secondHashVec[j] % secondHashSize);
+
+    Log::Assert(secondHashVec.n_elem == referenceSet.n_cols);
+
+    // Insert the point in the corresponding row to its bucket in the
+    // 'secondHashTable'.
+    for (size_t j = 0; j < secondHashVec.n_elem; j++)
+    {
+      // This is the bucket number.
+      size_t hashInd = (size_t) secondHashVec[j];
+      // The point ID is 'j'.
+
+      // If this is currently an empty bucket, start a new row keep track of
+      // which row corresponds to the bucket.
+      if (bucketContentSize[hashInd] == 0)
+      {
+        // Start a new row for hash.
+        bucketRowInHashTable[hashInd] = numRowsInTable;
+        secondHashTable(numRowsInTable, 0) = j;
+
+        numRowsInTable++;
+      }
+
+      else
+      {
+        // If bucket is already present in the 'secondHashTable', find the
+        // corresponding row and insert the point ID in this row unless the
+        // bucket is full, in which case, do nothing.
+        if (bucketContentSize[hashInd] < bucketSize)
+          secondHashTable(bucketRowInHashTable[hashInd],
+                          bucketContentSize[hashInd]) = j;
+      }
+
+      // Increment the count of the points in this bucket.
+      if (bucketContentSize[hashInd] < bucketSize)
+        bucketContentSize[hashInd]++;
+    } // Loop over all points in the reference set.
+  } // Loop over tables.
+
+  // Step VI: Condensing the 'secondHashTable'.
+  size_t maxBucketSize = 0;
+  for (size_t i = 0; i < bucketContentSize.n_elem; i++)
+    if (bucketContentSize[i] > maxBucketSize)
+      maxBucketSize = bucketContentSize[i];
+
+  Log::Info << "Final hash table size: (" << numRowsInTable << " x "
+            << maxBucketSize << ")" << std::endl;
+  secondHashTable.resize(numRowsInTable, maxBucketSize);
 }
 
 template<typename SortPolicy>
@@ -359,166 +526,6 @@ Search(const size_t k,
 template<typename SortPolicy>
 void LSHSearch<SortPolicy>::BuildHash(const arma::cube &projection)
 {
-  // The first level hash for a single table outputs a 'numProj'-dimensional
-  // integer key for each point in the set -- (key, pointID)
-  // The key creation details are presented below
-  //
-  // The second level hash is performed by hashing the key to
-  // an integer in the range [0, 'secondHashSize').
-  //
-  // This is done by creating a weight vector 'secondHashWeights' of
-  // length 'numProj' with each entry an integer randomly chosen
-  // between [0, 'secondHashSize').
-  //
-  // Then the bucket for any key and its corresponding point is
-  // given by <key, 'secondHashWeights'> % 'secondHashSize'
-  // and the corresponding point ID is put into that bucket.
-
-  // Step I: Prepare the second level hash.
-
-  // Obtain the weights for the second hash.
-  secondHashWeights = arma::floor(arma::randu(numProj) *
-                                  (double) secondHashSize);
-
-  // The 'secondHashTable' is initially an empty matrix of size
-  // ('secondHashSize' x 'bucketSize'). But by only filling the buckets
-  // as points land in them allows us to shrink the size of the
-  // 'secondHashTable' at the end of the hashing.
-
-  // Fill the second hash table n = referenceSet.n_cols.  This is because no
-  // point has index 'n' so the presence of this in the bucket denotes that
-  // there are no more points in this bucket.
-  secondHashTable.set_size(secondHashSize, bucketSize);
-  secondHashTable.fill(referenceSet->n_cols);
-
-  // Keep track of the size of each bucket in the hash.  At the end of hashing
-  // most buckets will be empty.
-  bucketContentSize.zeros(secondHashSize);
-
-  // Instead of putting the points in the row corresponding to the bucket, we
-  // chose the next empty row and keep track of the row in which the bucket
-  // lies. This allows us to stack together and slice out the empty buckets at
-  // the end of the hashing.
-  bucketRowInHashTable.set_size(secondHashSize);
-  bucketRowInHashTable.fill(secondHashSize);
-
-  // Keep track of number of non-empty rows in the 'secondHashTable'.
-  size_t numRowsInTable = 0;
-
-  // Step II: The offsets for all projections in all tables.
-  // Since the 'offsets' are in [0, hashWidth], we obtain the 'offsets'
-  // as randu(numProj, numTables) * hashWidth.
-  offsets.randu(numProj, numTables);
-  offsets *= hashWidth;
-
-  // Step III: Create each hash table in the first level hash one by one and
-  // putting them directly into the 'secondHashTable' for memory efficiency.
-  //projections.clear(); // Reset projections vector.
-
-
-
-  // Step IV: Obtain the 'numProj' projections for each table.
-
-  if (projection.n_slices == 0) //random generation of tables
-  {
-    // For L2 metric, 2-stable distributions are used, and
-    // the normal Z ~ N(0, 1) is a 2-stable distribution.
-
-    //numTables random tables arranged in a cube
-    projections.randn(
-        referenceSet->n_rows,
-        numProj,
-        numTables
-    );
-  }
-  else if (projection.n_slices == numTables) //user defined tables
-  {
-    projections = projection;
-  }
-  else //invalid argument
-  {
-    throw std::invalid_argument(
-        "number of projection tables provided must be equal to numProj"
-        );
-  }
-    
-
-  for (size_t i = 0; i < numTables; i++)
-  {
-
-
-    // Step V: create the 'numProj'-dimensional key for each point in each
-    // table.
-
-    // The following code performs the task of hashing each point to a
-    // 'numProj'-dimensional integer key.  Hence you get a ('numProj' x
-    // 'referenceSet.n_cols') key matrix.
-    //
-    // For a single table, let the 'numProj' projections be denoted by 'proj_i'
-    // and the corresponding offset be 'offset_i'.  Then the key of a single
-    // point is obtained as:
-    // key = { floor( (<proj_i, point> + offset_i) / 'hashWidth' ) forall i }
-    arma::mat offsetMat = arma::repmat(offsets.unsafe_col(i), 1,
-                                       referenceSet->n_cols);
-    // arma::mat hashMat = projMat.t() * (*referenceSet);
-    arma::mat hashMat = projections.slice(i).t() * (*referenceSet);
-    hashMat += offsetMat;
-    hashMat /= hashWidth;
-
-    // Step VI: Putting the points in the 'secondHashTable' by hashing the key.
-    // Now we hash every key, point ID to its corresponding bucket.
-    arma::rowvec secondHashVec = secondHashWeights.t() * arma::floor(hashMat);
-
-    // This gives us the bucket for the corresponding point ID.
-    for (size_t j = 0; j < secondHashVec.n_elem; j++)
-      secondHashVec[j] = (double)((size_t) secondHashVec[j] % secondHashSize);
-
-    Log::Assert(secondHashVec.n_elem == referenceSet->n_cols);
-
-    // Insert the point in the corresponding row to its bucket in the
-    // 'secondHashTable'.
-    for (size_t j = 0; j < secondHashVec.n_elem; j++)
-    {
-      // This is the bucket number.
-      size_t hashInd = (size_t) secondHashVec[j];
-      // The point ID is 'j'.
-
-      // If this is currently an empty bucket, start a new row keep track of
-      // which row corresponds to the bucket.
-      if (bucketContentSize[hashInd] == 0)
-      {
-        // Start a new row for hash.
-        bucketRowInHashTable[hashInd] = numRowsInTable;
-        secondHashTable(numRowsInTable, 0) = j;
-
-        numRowsInTable++;
-      }
-
-      else
-      {
-        // If bucket is already present in the 'secondHashTable', find the
-        // corresponding row and insert the point ID in this row unless the
-        // bucket is full, in which case, do nothing.
-        if (bucketContentSize[hashInd] < bucketSize)
-          secondHashTable(bucketRowInHashTable[hashInd],
-                          bucketContentSize[hashInd]) = j;
-      }
-
-      // Increment the count of the points in this bucket.
-      if (bucketContentSize[hashInd] < bucketSize)
-        bucketContentSize[hashInd]++;
-    } // Loop over all points in the reference set.
-  } // Loop over tables.
-
-  // Step VII: Condensing the 'secondHashTable'.
-  size_t maxBucketSize = 0;
-  for (size_t i = 0; i < bucketContentSize.n_elem; i++)
-    if (bucketContentSize[i] > maxBucketSize)
-      maxBucketSize = bucketContentSize[i];
-
-  Log::Info << "Final hash table size: (" << numRowsInTable << " x "
-            << maxBucketSize << ")" << std::endl;
-  secondHashTable.resize(numRowsInTable, maxBucketSize);
 }
 
 template<typename SortPolicy>

From b30e6975a22d82f2f82d46d7fa219bdc2eb52870 Mon Sep 17 00:00:00 2001
From: Ryan Curtin <ryan@ratml.org>
Date: Thu, 2 Jun 2016 09:44:03 -0400
Subject: [PATCH 79/87] Remove BuildHash(); simpler cube default.

---
 src/mlpack/methods/lsh/lsh_search.hpp      | 19 +------------------
 src/mlpack/methods/lsh/lsh_search_impl.hpp |  5 -----
 2 files changed, 1 insertion(+), 23 deletions(-)

diff --git a/src/mlpack/methods/lsh/lsh_search.hpp b/src/mlpack/methods/lsh/lsh_search.hpp
index 9c7c1d6438c..7dd321271b4 100644
--- a/src/mlpack/methods/lsh/lsh_search.hpp
+++ b/src/mlpack/methods/lsh/lsh_search.hpp
@@ -118,9 +118,7 @@ class LSHSearch
              const double hashWidth = 0.0,
              const size_t secondHashSize = 99901,
              const size_t bucketSize = 500,
-             const arma::cube &projection
-             = arma::zeros<arma::cube>(0,0,0)
-             );
+             const arma::cube& projection = arma::cube());
 
   /**
    * Compute the nearest neighbors of the points in the given query set and
@@ -220,21 +218,6 @@ class LSHSearch
   };
 
  private:
-  /**
-   * This function builds a hash table with two levels of hashing as presented
-   * in the paper. This function first hashes the points with 'numProj' random
-   * projections to a single hash table creating (key, point ID) pairs where the
-   * key is a 'numProj'-dimensional integer vector.
-   *
-   * Then each key in this hash table is hashed into a second hash table using a
-   * standard hash.
-   *
-   * This function does not have any parameters and relies on parameters which
-   * are private members of this class, initialized during the class
-   * initialization.
-   */
-  void BuildHash(const arma::cube &projection);
-
   /**
    * This function takes a query and hashes it into each of the hash tables to
    * get keys for the query and then the key is hashed to a bucket of the second
diff --git a/src/mlpack/methods/lsh/lsh_search_impl.hpp b/src/mlpack/methods/lsh/lsh_search_impl.hpp
index c3734aa2b7f..74ac4cc94e9 100644
--- a/src/mlpack/methods/lsh/lsh_search_impl.hpp
+++ b/src/mlpack/methods/lsh/lsh_search_impl.hpp
@@ -518,11 +518,6 @@ Search(const size_t k,
       std::endl;
 }
 
-template<typename SortPolicy>
-void LSHSearch<SortPolicy>::BuildHash(const arma::cube &projection)
-{
-}
-
 template<typename SortPolicy>
 template<typename Archive>
 void LSHSearch<SortPolicy>::Serialize(Archive& ar,

From d3e3c5470e38f781592d6344321f930df81a54d2 Mon Sep 17 00:00:00 2001
From: Ryan Curtin <ryan@ratml.org>
Date: Thu, 2 Jun 2016 09:47:31 -0400
Subject: [PATCH 80/87] Minor code cleanups and style fixes.

---
 src/mlpack/methods/lsh/lsh_search.hpp      | 19 ++++++-------------
 src/mlpack/methods/lsh/lsh_search_impl.hpp | 21 ++++++++-------------
 2 files changed, 14 insertions(+), 26 deletions(-)

diff --git a/src/mlpack/methods/lsh/lsh_search.hpp b/src/mlpack/methods/lsh/lsh_search.hpp
index 7dd321271b4..1f548734aa3 100644
--- a/src/mlpack/methods/lsh/lsh_search.hpp
+++ b/src/mlpack/methods/lsh/lsh_search.hpp
@@ -202,20 +202,13 @@ class LSHSearch
   //! Get the projection tables.
   const arma::cube& Projections() { return projections; }
 
-  //! Change the projection tables (Retrains object)
-  void Projections(const arma::cube &projTables)
+  //! Change the projection tables (this retrains the LSH model).
+  void Projections(const arma::cube& projTables)
   {
-    // Simply call Train() with given projection tables
-    Train(
-        *referenceSet,
-        numProj,
-        numTables,
-        hashWidth,
-        secondHashSize,
-        bucketSize,
-        projTables
-        );
-  };
+    // Simply call Train() with the given projection tables.
+    Train(*referenceSet, numProj, numTables, hashWidth, secondHashSize,
+        bucketSize, projTables);
+  }
 
  private:
   /**
diff --git a/src/mlpack/methods/lsh/lsh_search_impl.hpp b/src/mlpack/methods/lsh/lsh_search_impl.hpp
index 74ac4cc94e9..9ab206760e3 100644
--- a/src/mlpack/methods/lsh/lsh_search_impl.hpp
+++ b/src/mlpack/methods/lsh/lsh_search_impl.hpp
@@ -165,27 +165,22 @@ void LSHSearch<SortPolicy>::Train(const arma::mat& referenceSet,
   // Step III: Obtain the 'numProj' projections for each table.
   projections.clear(); // Reset projections vector.
 
-  if (projection.n_slices == 0) //random generation of tables
+  if (projection.n_slices == 0) // Randomly generate the tables.
   {
     // For L2 metric, 2-stable distributions are used, and the normal Z ~ N(0,
     // 1) is a 2-stable distribution.
 
-    // numTables random tables arranged in a cube.
-    projections.randn(
-        referenceSet.n_rows,
-        numProj,
-        numTables
-    );
+    // Build numTables random tables arranged in a cube.
+    projections.randn(referenceSet.n_rows, numProj, numTables);
   }
-  else if (projection.n_slices == numTables) //user defined tables
+  else if (projection.n_slices == numTables) // Take user-defined tables.
   {
     projections = projection;
   }
-  else //invalid argument
+  else // The user gave something wrong.
   {
-    throw std::invalid_argument(
-        "number of projection tables provided must be equal to numProj"
-        );
+    throw std::invalid_argument("LSHSearch::Train(): number of projection "
+        "tables provided must be equal to numProj");
   }
 
   for (size_t i = 0; i < numTables; i++)
@@ -213,7 +208,7 @@ void LSHSearch<SortPolicy>::Train(const arma::mat& referenceSet,
 
     // This gives us the bucket for the corresponding point ID.
     for (size_t j = 0; j < secondHashVec.n_elem; j++)
-      secondHashVec[j] = (double)((size_t) secondHashVec[j] % secondHashSize);
+      secondHashVec[j] = (double) ((size_t) secondHashVec[j] % secondHashSize);
 
     Log::Assert(secondHashVec.n_elem == referenceSet.n_cols);
 

From 8ed22ce675da6a4a889ecc12b1f6d13f37c11fdf Mon Sep 17 00:00:00 2001
From: Ryan Curtin <ryan@ratml.org>
Date: Thu, 2 Jun 2016 09:49:39 -0400
Subject: [PATCH 81/87] We can't remove Projection() because we'd break our
 versioning principles. So we'll have to wait until mlpack 2.0.1 to remove
 it... :(

---
 src/mlpack/methods/lsh/lsh_search.hpp | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/mlpack/methods/lsh/lsh_search.hpp b/src/mlpack/methods/lsh/lsh_search.hpp
index 1f548734aa3..b42bb7a81e0 100644
--- a/src/mlpack/methods/lsh/lsh_search.hpp
+++ b/src/mlpack/methods/lsh/lsh_search.hpp
@@ -210,6 +210,10 @@ class LSHSearch
         bucketSize, projTables);
   }
 
+  //! Get a single projection matrix.  This function is deprecated and will be
+  //! removed in mlpack 2.1.0!
+  const arma::mat& Projection(size_t i) { return projections.slice(i); }
+
  private:
   /**
    * This function takes a query and hashes it into each of the hash tables to

From e3a23c256f017ebb8185b15847c82f51d359cdfd Mon Sep 17 00:00:00 2001
From: Ryan Curtin <ryan@ratml.org>
Date: Thu, 2 Jun 2016 09:50:42 -0400
Subject: [PATCH 82/87] Update HISTORY.

---
 HISTORY.md | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/HISTORY.md b/HISTORY.md
index de21f895281..b117ee68958 100644
--- a/HISTORY.md
+++ b/HISTORY.md
@@ -1,15 +1,12 @@
 ### mlpack 2.0.2
 ###### 2016-??-??
-  * LSHSearch::Projection(size_t) that returned a single projection matrix has
-    been removed. In its place, LSHSearch::Projections() has been added, which
-    returns an arma::cube with each projection table in a slice (#663).
+  * Added the function LSHSearch::Projections(), which returns an arma::cube
+    with each projection table in a slice (#663).  Instead of Projection(i), you
+    should now use Projections().slice(i).
 
   * A new constructor has been added to LSHSearch that creates objects using
     projection tables provided in an arma::cube (#663).
 
-  * LSHSearch::Projections(arma::cube) has been added that allows users to
-    change the projection tables of an LSHSearch object (#663).
-
   * Handle zero-variance dimensions in DET (#515).
 
   * Add MiniBatchSGD optimizer (src/mlpack/core/optimizers/minibatch_sgd/) and

From 6a94eb6efb37eca09e95fd53bd7c21334abf7614 Mon Sep 17 00:00:00 2001
From: Ryan Curtin <ryan@ratml.org>
Date: Thu, 2 Jun 2016 14:54:46 -0400
Subject: [PATCH 83/87] Update some documentation, change --input_labels to
 --input_labels_file.

---
 .../preprocess/preprocess_split_main.cpp      | 125 ++++++++++--------
 1 file changed, 73 insertions(+), 52 deletions(-)

diff --git a/src/mlpack/methods/preprocess/preprocess_split_main.cpp b/src/mlpack/methods/preprocess/preprocess_split_main.cpp
index d24b8ff8f0a..1e063db0cda 100644
--- a/src/mlpack/methods/preprocess/preprocess_split_main.cpp
+++ b/src/mlpack/methods/preprocess/preprocess_split_main.cpp
@@ -7,27 +7,47 @@
 #include <mlpack/core.hpp>
 #include <mlpack/core/data/split_data.hpp>
 
-PROGRAM_INFO("Split Data", "This utility takes data and split into a training "
-    "set and a test set. Before the split happens, it shuffles the data in "
-    "the each feature. Without (--test_ratio) specified, the default "
-    "test-to-training ratio is set to 0.2."
+PROGRAM_INFO("Split Data", "This utility takes a dataset and optionally labels "
+    "and splits them into a training set and a test set. Before the split, the "
+    "points in the dataset are randomly reordered. The percentage of the "
+    "dataset to be used as the test set can be specified with the --test_ratio "
+    "(-r) option; the default is 0.2 (20%)."
     "\n\n"
-    "The program does not modify or write on the original file, but instead "
-    "makes a seperate files to save the training and test files; you can "
-    "specify the file names with (-training_file) and (-test_file). If the "
-    "names are not specified, the program automatically names the training "
-    "and test file by attaching 'train_' and 'test_' in front of the "
-    "original file name"
+    "The program does not modify the original file, but instead makes separate "
+    "files to save the training and test files; you can specify the file names "
+    "with --training_file (-t) and --test_file (-T). If these options are not "
+    "specified, the program automatically names the training and test file by "
+    "prepending 'train_' and 'test_' to the dataset filename (which was "
+    "specified by --input_file)."
     "\n\n"
-    "Optionally, a label can be also be splited along with the data at the "
-    "same time by specifying (--input_lables) option. Splitting label works "
-    "the same as splitting the data and you can also specify the names using "
-    "(--trainning_labels_file) and (--test_labels_file).");
+    "Optionally, labels can be also be split along with the data by specifying "
+    "the --input_labels_file (-I) option. Splitting labels works the same way "
+    "as splitting the data. The output training and test labels will be saved "
+    "to the files specified by --training_labels_file (-l) and "
+    "--test_labels_file (-L), respectively. If these options are not specified,"
+    " then the program will automatically name the training labels and test "
+    "labels file by prepending 'train_' and 'test_' to the labels filename "
+    "(which was specified by --input_labels_file)."
+    "\n\n"
+    "So, a simple example where we want to split dataset.csv into "
+    "train_dataset.csv and test_dataset.csv with 60% of the data in the "
+    "training set and 40% of the dataset in the test set, we could run"
+    "\n\n"
+    "$ mlpack_preprocess_split -i dataset.csv -r 0.4"
+    "\n\n"
+    "If we had a dataset in dataset.csv and associated labels in labels.csv, "
+    "and we wanted to split these into training_set.csv, training_labels.csv, "
+    "test_set.csv, and test_labels.csv, with 30% of the data in the test set, "
+    "we could run"
+    "\n\n"
+    "$ mlpack_preprocess_split -i dataset.csv -I labels.csv -r 0.3\n"
+    "> -t training_set.csv -l training_labels.csv -T test_set.csv\n"
+    "> -L test_labels.csv");
 
-// Define parameters for data
+// Define parameters for data.
 PARAM_STRING_REQ("input_file", "File containing data,", "i");
-// Define optional parameters
-PARAM_STRING("input_labels", "File containing labels", "I", "");
+// Define optional parameters.
+PARAM_STRING("input_labels_file", "File containing labels", "I", "");
 PARAM_STRING("training_file", "File name to save train data", "t", "");
 PARAM_STRING("test_file", "File name to save test data", "T", "");
 PARAM_STRING("training_labels_file", "File name to save train label", "l", "");
@@ -46,44 +66,44 @@ int main(int argc, char** argv)
   // Parse command line options.
   CLI::ParseCommandLine(argc, argv);
   const string inputFile = CLI::GetParam<string>("input_file");
-  const string inputLabels = CLI::GetParam<string>("input_labels");
+  const string inputLabels = CLI::GetParam<string>("input_labels_file");
   string trainingFile = CLI::GetParam<string>("training_file");
   string testFile = CLI::GetParam<string>("test_file");
   string trainingLabelsFile = CLI::GetParam<string>("training_labels_file");
   string testLabelsFile = CLI::GetParam<string>("test_labels_file");
   const double testRatio = CLI::GetParam<double>("test_ratio");
 
-  // check on data parameters
+  // Check on data parameters.
   if (trainingFile.empty())
   {
     trainingFile = "train_" + inputFile;
-    Log::Warn << "You did not specify --training_file. "
-      << "Training file name is automatically set to: "
-      << trainingFile << endl;
+    Log::Warn << "You did not specify --training_file, so the training set file"
+        << " name will be automatically set to '" << trainingFile << "'." 
+        << endl;
   }
   if (testFile.empty())
   {
     testFile = "test_" + inputFile;
-    Log::Warn << "You did not specify --test_file. "
-      << "Test file name is automatically set to: " << testFile << endl;
+    Log::Warn << "You did not specify --test_file, so the test set file name "
+        << "will be automatically set to '" << testFile << "'." << endl;
   }
 
-  // check on label parameters
+  // Check on label parameters.
   if (!inputLabels.empty())
   {
     if (!CLI::HasParam("training_labels_file"))
     {
       trainingLabelsFile = "train_" + inputLabels;
-      Log::Warn << "You did not specify --training_labels_file. "
-        << "Training labels file name is automatically set to: "
-        << trainingLabelsFile << endl;
+      Log::Warn << "You did not specify --training_labels_file, so the training"
+          << "set labels file name will be automatically set to '"
+          << trainingLabelsFile << "'." << endl;
     }
     if (!CLI::HasParam("test_labels_file"))
     {
       testLabelsFile = "test_" + inputLabels;
-      Log::Warn << "You did not specify --test_labels_file. "
-        << "Test labels file name is automatically set to: "
-        << testLabelsFile << endl;
+      Log::Warn << "You did not specify --test_labels_file, so the test set "
+        << "labels file name will be automatically set to '"
+        << testLabelsFile << "'." << endl;
     }
   }
   else
@@ -92,56 +112,57 @@ int main(int argc, char** argv)
         || CLI::HasParam("test_labels_file"))
     {
       Log::Fatal << "When specifying --training_labels_file or "
-        << "test_labels_file, you must also specify --input_labels. " << endl;
+          << "--test_labels_file, you must also specify --input_labels. "
+          << endl;
     }
   }
 
-  // check on test_ratio
+  // Check test_ratio.
   if (CLI::HasParam("test_ratio"))
   {
-    //sanity check on test_ratio
     if ((testRatio < 0.0) || (testRatio > 1.0))
     {
-      Log::Fatal << "Invalid parameter for test_ratio. "
-        << "test_ratio must be between 0.0 and 1.0" << endl;
+      Log::Fatal << "Invalid parameter for test_ratio; "
+          << "--test_ratio must be between 0.0 and 1.0." << endl;
     }
   }
-  else // if test_ratio is not set
+  else // If test_ratio is not set, warn the user.
   {
-    Log::Warn << "You did not specify --test_ratio_file. "
-      << "Test ratio is automatically set to: 0.2"<< endl;
+    Log::Warn << "You did not specify --test_ratio, so it will be automatically"
+        << " set to 0.2." << endl;
   }
 
-  // load data
+  // Load the data.
   arma::mat data;
   data::Load(inputFile, data, true);
 
-  // if parameters for labels exist
-  if (CLI::HasParam("input_labels"))
+  // If parameters for labels exist, we must split the labels too.
+  if (CLI::HasParam("input_labels_file"))
   {
     arma::mat labels;
     data::Load(inputLabels, labels, true);
-    arma::rowvec labels_row = labels.row(0); // extract first row
+    arma::rowvec labelsRow = labels.row(0);
 
-    const auto value = data::Split(data, labels_row, testRatio);
-    Log::Info << "Train Data Count: " << get<0>(value).n_cols << endl;
-    Log::Info << "Test Data Count: " << get<1>(value).n_cols << endl;
-    Log::Info << "Train Label Count: " << get<2>(value).n_cols << endl;
-    Log::Info << "Test Label Count: " << get<3>(value).n_cols << endl;
+    const auto value = data::Split(data, labelsRow, testRatio);
+    Log::Info << "Training data contains " << get<0>(value).n_cols << " points."
+        << endl;
+    Log::Info << "Test data contains " << get<1>(value).n_cols << " points."
+        << endl;
 
     data::Save(trainingFile, get<0>(value), false);
     data::Save(testFile, get<1>(value), false);
     data::Save(trainingLabelsFile, get<2>(value), false);
     data::Save(testLabelsFile, get<3>(value), false);
   }
-  else // split without parameters
+  else // We have no labels, so just split the dataset.
   {
     const auto value = data::Split(data, testRatio);
-    Log::Info << "Train Data Count: " << get<0>(value).n_cols << endl;
-    Log::Info << "Test Data Count: " << get<1>(value).n_cols << endl;
+    Log::Info << "Training data contains " << get<0>(value).n_cols << " points."
+        << endl;
+    Log::Info << "Test data contains " << get<1>(value).n_cols << " points."
+        << endl;
 
     data::Save(trainingFile, get<0>(value), false);
     data::Save(testFile, get<1>(value), false);
   }
 }
-

From dde781fad5658797d87eeab8d40cf629dfa8d000 Mon Sep 17 00:00:00 2001
From: Ryan Curtin <ryan@ratml.org>
Date: Thu, 2 Jun 2016 16:55:32 -0400
Subject: [PATCH 84/87] Marcus thinks this will fix the Windows build... let's
 find out.

---
 src/mlpack/core/data/serialization_template_version.hpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/mlpack/core/data/serialization_template_version.hpp b/src/mlpack/core/data/serialization_template_version.hpp
index 8cf67dc121c..6b617a8e1d1 100644
--- a/src/mlpack/core/data/serialization_template_version.hpp
+++ b/src/mlpack/core/data/serialization_template_version.hpp
@@ -18,7 +18,6 @@
 #define BOOST_TEMPLATE_CLASS_VERSION(SIGNATURE, T, N) \
 namespace boost { \
 namespace serialization { \
-template<> \
 SIGNATURE \
 struct version<mlpack::data::SecondShim<T>> \
 { \

From 8ad5711d77865c6a2df5b6a296de8a905f587c94 Mon Sep 17 00:00:00 2001
From: Yannis Mentekidis <mentekid@gmail.com>
Date: Sat, 4 Jun 2016 16:30:45 +0300
Subject: [PATCH 85/87] Adds 2 deterministic LSH tests

---
 src/mlpack/tests/lsh_test.cpp | 178 ++++++++++++++++++++++++++++++++++
 1 file changed, 178 insertions(+)

diff --git a/src/mlpack/tests/lsh_test.cpp b/src/mlpack/tests/lsh_test.cpp
index d42566694fb..26cf24d854c 100644
--- a/src/mlpack/tests/lsh_test.cpp
+++ b/src/mlpack/tests/lsh_test.cpp
@@ -15,6 +15,9 @@ using namespace std;
 using namespace mlpack;
 using namespace mlpack::neighbor;
 
+/**
+ * Computes Recall (percent of neighbors found correctly)
+ */
 double ComputeRecall(
     const arma::Mat<size_t>& lshNeighbors,
     const arma::Mat<size_t>& groundTruth)
@@ -26,6 +29,62 @@ double ComputeRecall(
   return same / (static_cast<double>(queries * neigh));
 }
 
+/**
+ * Generates a point set of four clusters around (0.5, 0.5),
+ * (3.5, 0.5), (0.5, 3.5), (3.5, 3.5)
+ */
+void getPointset(const size_t N, arma::mat& rdata)
+{
+  const size_t d = 2;
+  // Create four clusters of points
+  arma::mat C1(d, N/4, arma::fill::randu);
+  arma::mat C2(d, N/4, arma::fill::randu);
+  arma::mat C3(d, N/4, arma::fill::randu);
+  arma::mat C4(d, N/4, arma::fill::randu);
+
+  arma::colvec offset1;
+  offset1<<0<<arma::endr<<3<<arma::endr;
+  arma::colvec offset2;
+  offset2<<3<<arma::endr<<3<<arma::endr;
+  arma::colvec offset4;
+  offset4<<3<<arma::endr<<0<<arma::endr;
+  //spread points in plane
+  for (size_t p = 0; p < N/4; ++p)
+  {
+    C1.col(p)+=offset1;
+    C2.col(p)+=offset2;
+    C4.col(p)+=offset4;
+  }
+
+  rdata.set_size(d, N);
+  rdata.cols(0, N/4-1) = C1;
+  rdata.cols(N/4, N/2-1) = C2;
+  rdata.cols(N/2, 3*N/4-1) = C3;
+  rdata.cols(3*N/4, N-1) = C4;
+}
+
+/**
+ * Generates two queries, one around (0.5, 0.5) and one around (3.5, 3.5)
+ */
+void getQueries(arma::mat& qdata)
+{
+  const size_t d = 2;
+  // generate two queries inside two of the clusters
+
+  // put query 1 into cluster 3
+  arma::colvec q1, q2;
+  q1.randu(d, 1);
+
+  // offset second query to go into cluster 2
+  q2.randu(d, 1);
+  q2.row(0)+=3;
+  q2.row(1)+=3;
+
+  qdata.set_size(d, 2);
+  qdata.col(0) = q1;
+  qdata.col(1) = q2;
+}
+
 BOOST_AUTO_TEST_SUITE(LSHTest);
 
 /**
@@ -302,6 +361,125 @@ BOOST_AUTO_TEST_CASE(RecallTest)
   BOOST_REQUIRE_LE(recallChp, recallThreshChp);
 }
 
+/**
+ * Test: This is a deterministic test that projects 2-dpoints to a known line (axis
+ * 2). The reference set contains 4 well-separated clusters that will merge into
+ * 2 clusters when projected on that axis.
+ *
+ * We create two queries, each one belonging in one cluster (q1 in cluster 3
+ * located around (0, 0) and q2 in cluster 2 located around (3, 3). After the
+ * projection, q1 should have neighbors in C3 and C4 and q2 in C1 and C2.
+ */
+BOOST_AUTO_TEST_CASE(DeterministicMerge)
+{
+  const size_t N = 40; //must be devisable by 4 to create 4 clusters properly
+  arma::mat rdata;
+  arma::mat qdata;
+  getPointset(N, rdata);
+  getQueries(qdata);
+
+
+  const int k = N/2;
+  const double hashWidth = 1;
+  const int secondHashSize = 99901;
+  const int bucketSize = 500;
+
+  //1 table, with one projection to axis 1
+  arma::cube projections(2, 1, 1);
+  projections(0, 0, 0) = 0;
+  projections(1, 0, 0) = 1;
+
+  LSHSearch<> lshTest(rdata, projections, 
+                      hashWidth, secondHashSize, bucketSize);
+
+  arma::Mat<size_t> neighbors;
+  arma::mat distances;
+  lshTest.Search(qdata, k, neighbors, distances);
+
+  // test query 1
+  size_t q;
+  for (size_t j = 0; j < k; ++j) //for each neighbor
+  {
+    q = 0;
+    if (neighbors(j, 0) == N || neighbors(j, 1) == N) //neighbor not found, ignore
+      continue;
+
+    //query 1 is in cluster 3, which under this projection was merged with
+    //cluster 4. Clusters 3 and 4 have points 20:39, so only neighbors among
+    //those should be found
+    q = 0;
+    BOOST_REQUIRE(neighbors(j, q) >= N/2);
+  
+    //query 2 is in cluster 2, which under this projection was merged with
+    //cluster 1. Clusters 1 and 2 have points 0:19, so only neighbors among
+    //those should be found
+    q = 1;
+    BOOST_REQUIRE(neighbors(j, q) < N/2);
+
+  }
+}
+
+
+/**
+ * Test: This is a deterministic test that projects 2-di points to the plane.
+ * The reference set contains 4 well-separated clusters that should not merge.
+ *
+ * We create two queries, each one belonging in one cluster (q1 in cluster 3
+ * located around (0, 0) and q2 in cluster 2 located around (3, 3). The test is
+ * a success if, after the projection, q1 should have neighbors in C3 and q2 
+ * in C2.
+ */
+BOOST_AUTO_TEST_CASE(DeterministicNoMerge)
+{
+  const size_t N = 40;
+  arma::mat rdata;
+  arma::mat qdata;
+  getPointset(N, rdata);
+  getQueries(qdata);
+
+
+  const int k = N/2;
+  const double hashWidth = 1;
+  const int secondHashSize = 99901;
+  const int bucketSize = 500;
+
+  //1 table, with one projection to axis 1
+  arma::cube projections(2, 2, 1);
+  projections(0, 0, 0) = 0;
+  projections(1, 0, 0) = 1;
+  projections(0, 1, 0) = 1;
+  projections(1, 1, 0) = 0;
+
+  LSHSearch<> lshTest(rdata, projections, 
+                      hashWidth, secondHashSize, bucketSize);
+
+  arma::Mat<size_t> neighbors;
+  arma::mat distances;
+  lshTest.Search(qdata, k, neighbors, distances);
+
+  // test query 1
+  size_t q;
+  for (size_t j = 0; j < k; ++j) //for each neighbor
+  {
+  
+    //neighbor not found, ignore
+    if (neighbors(j, 0) == N || neighbors(j, 1) == N)
+      continue;
+
+    q = 0;
+    //query 1 is in cluster 3, which is points 20:29
+    BOOST_REQUIRE(
+        neighbors(j, q) >= N/2 && neighbors(j, q) < 3*N/4
+        );
+
+    q = 1;
+    //query 2 is in cluster 2, which is points 10:19
+    BOOST_REQUIRE(
+        neighbors(j, q) >= N/4 && neighbors(j, q) < N/2
+        );
+  }
+
+}
 BOOST_AUTO_TEST_CASE(LSHTrainTest)
 {
   // This is a not very good test that simply checks that the re-trained LSH

From 330e82c648e83b6e8f417a41384957dfe8c81bb4 Mon Sep 17 00:00:00 2001
From: Yannis Mentekidis <mentekid@gmail.com>
Date: Sat, 4 Jun 2016 16:43:08 +0300
Subject: [PATCH 86/87] Style fixes and remove redundancies

---
 src/mlpack/tests/lsh_test.cpp | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/mlpack/tests/lsh_test.cpp b/src/mlpack/tests/lsh_test.cpp
index 26cf24d854c..9104dab6de9 100644
--- a/src/mlpack/tests/lsh_test.cpp
+++ b/src/mlpack/tests/lsh_test.cpp
@@ -400,7 +400,6 @@ BOOST_AUTO_TEST_CASE(DeterministicMerge)
   size_t q;
   for (size_t j = 0; j < k; ++j) //for each neighbor
   {
-    q = 0;
     if (neighbors(j, 0) == N || neighbors(j, 1) == N) //neighbor not found, ignore
       continue;
 
@@ -466,14 +465,14 @@ BOOST_AUTO_TEST_CASE(DeterministicNoMerge)
     if (neighbors(j, 0) == N || neighbors(j, 1) == N)
       continue;
 
-    q = 0;
     //query 1 is in cluster 3, which is points 20:29
+    q = 0;
     BOOST_REQUIRE(
         neighbors(j, q) >= N/2 && neighbors(j, q) < 3*N/4
         );
 
-    q = 1;
     //query 2 is in cluster 2, which is points 10:19
+    q = 1;
     BOOST_REQUIRE(
         neighbors(j, q) >= N/4 && neighbors(j, q) < N/2
         );

From 6469dac448ba9f583dc70b0b79c9785c30923a86 Mon Sep 17 00:00:00 2001
From: Yannis Mentekidis <mentekid@gmail.com>
Date: Sun, 5 Jun 2016 09:26:15 +0300
Subject: [PATCH 87/87] Fixes style problems

---
 src/mlpack/tests/lsh_test.cpp | 72 +++++++++++++++++++++--------------
 1 file changed, 43 insertions(+), 29 deletions(-)

diff --git a/src/mlpack/tests/lsh_test.cpp b/src/mlpack/tests/lsh_test.cpp
index 9104dab6de9..85881b78cee 100644
--- a/src/mlpack/tests/lsh_test.cpp
+++ b/src/mlpack/tests/lsh_test.cpp
@@ -33,40 +33,52 @@ double ComputeRecall(
  * Generates a point set of four clusters around (0.5, 0.5),
  * (3.5, 0.5), (0.5, 3.5), (3.5, 3.5)
  */
-void getPointset(const size_t N, arma::mat& rdata)
+void GetPointset(const size_t N, arma::mat& rdata)
 {
   const size_t d = 2;
   // Create four clusters of points
-  arma::mat C1(d, N/4, arma::fill::randu);
-  arma::mat C2(d, N/4, arma::fill::randu);
-  arma::mat C3(d, N/4, arma::fill::randu);
-  arma::mat C4(d, N/4, arma::fill::randu);
+  arma::mat C1(d, N / 4, arma::fill::randu);
+  arma::mat C2(d, N / 4, arma::fill::randu);
+  arma::mat C3(d, N / 4, arma::fill::randu);
+  arma::mat C4(d, N / 4, arma::fill::randu);
 
   arma::colvec offset1;
-  offset1<<0<<arma::endr<<3<<arma::endr;
+  offset1
+    <<0<<arma::endr
+    <<3<<arma::endr;
+  
   arma::colvec offset2;
-  offset2<<3<<arma::endr<<3<<arma::endr;
+  offset2
+    <<3<<arma::endr
+    <<3<<arma::endr;
+  
   arma::colvec offset4;
-  offset4<<3<<arma::endr<<0<<arma::endr;
+  offset4
+    <<3<<arma::endr
+    <<0<<arma::endr;
+  
   //spread points in plane
-  for (size_t p = 0; p < N/4; ++p)
+  for (size_t p = 0; p < N / 4; ++p)
   {
-    C1.col(p)+=offset1;
-    C2.col(p)+=offset2;
-    C4.col(p)+=offset4;
+    C1.col(p) += offset1;
+    C2.col(p) += offset2;
+    C4.col(p) += offset4;
   }
 
   rdata.set_size(d, N);
-  rdata.cols(0, N/4-1) = C1;
-  rdata.cols(N/4, N/2-1) = C2;
-  rdata.cols(N/2, 3*N/4-1) = C3;
-  rdata.cols(3*N/4, N-1) = C4;
+  rdata.cols(0, (N / 4) - 1) = C1;
+
+  rdata.cols(N / 4, (N / 2) - 1) = C2;
+  
+  rdata.cols(N / 2, (3 * N / 4) - 1) = C3;
+  
+  rdata.cols(3 * N / 4, N - 1) = C4;
 }
 
 /**
  * Generates two queries, one around (0.5, 0.5) and one around (3.5, 3.5)
  */
-void getQueries(arma::mat& qdata)
+void GetQueries(arma::mat& qdata)
 {
   const size_t d = 2;
   // generate two queries inside two of the clusters
@@ -77,8 +89,8 @@ void getQueries(arma::mat& qdata)
 
   // offset second query to go into cluster 2
   q2.randu(d, 1);
-  q2.row(0)+=3;
-  q2.row(1)+=3;
+  q2.row(0) += 3;
+  q2.row(1) += 3;
 
   qdata.set_size(d, 2);
   qdata.col(0) = q1;
@@ -375,11 +387,11 @@ BOOST_AUTO_TEST_CASE(DeterministicMerge)
   const size_t N = 40; //must be devisable by 4 to create 4 clusters properly
   arma::mat rdata;
   arma::mat qdata;
-  getPointset(N, rdata);
-  getQueries(qdata);
+  GetPointset(N, rdata);
+  GetQueries(qdata);
 
 
-  const int k = N/2;
+  const int k = N / 2;
   const double hashWidth = 1;
   const int secondHashSize = 99901;
   const int bucketSize = 500;
@@ -407,13 +419,13 @@ BOOST_AUTO_TEST_CASE(DeterministicMerge)
     //cluster 4. Clusters 3 and 4 have points 20:39, so only neighbors among
     //those should be found
     q = 0;
-    BOOST_REQUIRE(neighbors(j, q) >= N/2);
+    BOOST_REQUIRE(neighbors(j, q) >= N / 2);
   
     //query 2 is in cluster 2, which under this projection was merged with
     //cluster 1. Clusters 1 and 2 have points 0:19, so only neighbors among
     //those should be found
     q = 1;
-    BOOST_REQUIRE(neighbors(j, q) < N/2);
+    BOOST_REQUIRE(neighbors(j, q) < N / 2);
 
   }
 }
@@ -433,11 +445,11 @@ BOOST_AUTO_TEST_CASE(DeterministicNoMerge)
   const size_t N = 40;
   arma::mat rdata;
   arma::mat qdata;
-  getPointset(N, rdata);
-  getQueries(qdata);
+  GetPointset(N, rdata);
+  GetQueries(qdata);
 
 
-  const int k = N/2;
+  const int k = N / 2;
   const double hashWidth = 1;
   const int secondHashSize = 99901;
   const int bucketSize = 500;
@@ -468,13 +480,15 @@ BOOST_AUTO_TEST_CASE(DeterministicNoMerge)
     //query 1 is in cluster 3, which is points 20:29
     q = 0;
     BOOST_REQUIRE(
-        neighbors(j, q) >= N/2 && neighbors(j, q) < 3*N/4
+        neighbors(j, q) < 3 * N / 4 &&
+        neighbors(j, q) >= N / 2
         );
 
     //query 2 is in cluster 2, which is points 10:19
     q = 1;
     BOOST_REQUIRE(
-        neighbors(j, q) >= N/4 && neighbors(j, q) < N/2
+        neighbors(j, q) < N / 2 &&
+        neighbors(j, q) >= N / 4
         );
   }