Merge pull request #1038 from rcurtin/num_classes

Refactor classifiers to take numClasses parameters
mlpack · Jul 25, 2017 · dc70207 · dc70207
2 parents cea8fe2 + e717023
commit dc70207
Show file tree

Hide file tree

Showing 16 changed files with 227 additions and 127 deletions.
diff --git a/src/mlpack/methods/adaboost/adaboost.hpp b/src/mlpack/methods/adaboost/adaboost.hpp
@@ -95,6 +95,7 @@ class AdaBoost
    */
   AdaBoost(const MatType& data,
            const arma::Row<size_t>& labels,
+           const size_t numClasses,
            const WeakLearnerType& other,
            const size_t iterations = 100,
            const double tolerance = 1e-6);
@@ -114,7 +115,7 @@ class AdaBoost
   double& Tolerance() { return tolerance; }
 
   //! Get the number of classes this model is trained on.
-  size_t Classes() const { return classes; }
+  size_t NumClasses() const { return numClasses; }
 
   //! Get the number of weak learners in the model.
   size_t WeakLearners() const { return alpha.size(); }
@@ -142,6 +143,7 @@ class AdaBoost
    */
   void Train(const MatType& data,
              const arma::Row<size_t>& labels,
+             const size_t numClasses,
              const WeakLearnerType& learner,
              const size_t iterations = 100,
              const double tolerance = 1e-6);
@@ -163,7 +165,7 @@ class AdaBoost
 
  private:
   //! The number of classes in the model.
-  size_t classes;
+  size_t numClasses;
   // The tolerance for change in rt and when to stop.
   double tolerance;
 

diff --git a/src/mlpack/methods/adaboost/adaboost_impl.hpp b/src/mlpack/methods/adaboost/adaboost_impl.hpp
@@ -44,11 +44,12 @@ template<typename WeakLearnerType, typename MatType>
 AdaBoost<WeakLearnerType, MatType>::AdaBoost(
     const MatType& data,
     const arma::Row<size_t>& labels,
+    const size_t numClasses,
     const WeakLearnerType& other,
     const size_t iterations,
     const double tol)
 {
-  Train(data, labels, other, iterations, tol);
+  Train(data, labels, numClasses, other, iterations, tol);
 }
 
 // Empty constructor.
@@ -64,6 +65,7 @@ template<typename WeakLearnerType, typename MatType>
 void AdaBoost<WeakLearnerType, MatType>::Train(
     const MatType& data,
     const arma::Row<size_t>& labels,
+    const size_t numClasses,
     const WeakLearnerType& other,
     const size_t iterations,
     const double tolerance)
@@ -72,9 +74,8 @@ void AdaBoost<WeakLearnerType, MatType>::Train(
   wl.clear();
   alpha.clear();
 
-  // Count the number of classes.
-  classes = (arma::max(labels) - arma::min(labels)) + 1;
   this->tolerance = tolerance;
+  this->numClasses = numClasses;
 
   // crt is the cumulative rt value for terminating the optimization when rt is
   // changing by less than the tolerance.
@@ -89,11 +90,12 @@ void AdaBoost<WeakLearnerType, MatType>::Train(
   MatType tempData(data);
 
   // This matrix is a helper matrix used to calculate the final hypothesis.
-  arma::mat sumFinalH = arma::zeros<arma::mat>(classes, predictedLabels.n_cols);
+  arma::mat sumFinalH = arma::zeros<arma::mat>(numClasses,
+      predictedLabels.n_cols);
 
   // Load the initial weights into a 2-D matrix.
-  const double initWeight = 1.0 / double(data.n_cols * classes);
-  arma::mat D(classes, data.n_cols);
+  const double initWeight = 1.0 / double(data.n_cols * numClasses);
+  arma::mat D(numClasses, data.n_cols);
   D.fill(initWeight);
 
   // Weights are stored in this row vector.
@@ -117,7 +119,7 @@ void AdaBoost<WeakLearnerType, MatType>::Train(
     weights = arma::sum(D);
 
     // Use the existing weak learner to train a new one with new weights.
-    WeakLearnerType w(other, tempData, labels, weights);
+    WeakLearnerType w(other, tempData, labels, numClasses, weights);
     w.Classify(tempData, predictedLabels);
 
     // Now from predictedLabels, build ht, the weak hypothesis
@@ -165,7 +167,6 @@ void AdaBoost<WeakLearnerType, MatType>::Train(
           D(k, j) /= expo;
           zt += D(k, j); // * exp(-1 * alphat * yt(j,k) * ht(j,k));
 
-
           // Add to the final hypothesis matrix.
           // sumFinalH(k, j) += (alphat * ht(k, j));
           if (k == labels(j))
@@ -208,7 +209,7 @@ void AdaBoost<WeakLearnerType, MatType>::Classify(
     arma::Row<size_t>& predictedLabels)
 {
   arma::Row<size_t> tempPredictedLabels(test.n_cols);
-  arma::mat cMatrix(classes, test.n_cols);
+  arma::mat cMatrix(numClasses, test.n_cols);
 
   cMatrix.zeros();
   predictedLabels.set_size(test.n_cols);
@@ -240,7 +241,7 @@ template<typename Archive>
 void AdaBoost<WeakLearnerType, MatType>::Serialize(Archive& ar,
                                                const unsigned int /* version */)
 {
-  ar & data::CreateNVP(classes, "classes");
+  ar & data::CreateNVP(numClasses, "classes");
   ar & data::CreateNVP(tolerance, "tolerance");
   ar & data::CreateNVP(ztProduct, "ztProduct");
   ar & data::CreateNVP(alpha, "alpha");

diff --git a/src/mlpack/methods/adaboost/adaboost_main.cpp b/src/mlpack/methods/adaboost/adaboost_main.cpp
@@ -202,8 +202,11 @@ int main(int argc, char *argv[])
     else if (weakLearner == "perceptron")
       m.WeakLearnerType() = AdaBoostModel::WeakLearnerTypes::PERCEPTRON;
 
+    const size_t numClasses = m.Mappings().n_elem;
+    Log::Info << numClasses << " classes in dataset." << endl;
+
     Timer::Start("adaboost_training");
-    m.Train(trainingData, labels, iterations, tolerance);
+    m.Train(trainingData, labels, numClasses, iterations, tolerance);
     Timer::Stop("adaboost_training");
   }
   else

diff --git a/src/mlpack/methods/adaboost/adaboost_model.cpp b/src/mlpack/methods/adaboost/adaboost_model.cpp
@@ -92,6 +92,7 @@ AdaBoostModel::~AdaBoostModel()
 //! Train the model.
 void AdaBoostModel::Train(const mat& data,
                           const Row<size_t>& labels,
+                          const size_t numClasses,
                           const size_t iterations,
                           const double tolerance)
 {
@@ -101,13 +102,13 @@ void AdaBoostModel::Train(const mat& data,
     delete dsBoost;
 
     DecisionStump<> ds(data, labels, max(labels) + 1);
-    dsBoost = new AdaBoost<DecisionStump<>>(data, labels, ds, iterations,
-        tolerance);
+    dsBoost = new AdaBoost<DecisionStump<>>(data, labels, numClasses, ds,
+        iterations, tolerance);
   }
   else if (weakLearnerType == WeakLearnerTypes::PERCEPTRON)
   {
     Perceptron<> p(data, labels, max(labels) + 1);
-    pBoost = new AdaBoost<Perceptron<>>(data, labels, p, iterations,
+    pBoost = new AdaBoost<Perceptron<>>(data, labels, numClasses, p, iterations,
         tolerance);
   }
 }

diff --git a/src/mlpack/methods/adaboost/adaboost_model.hpp b/src/mlpack/methods/adaboost/adaboost_model.hpp
@@ -77,6 +77,7 @@ class AdaBoostModel
   //! Train the model.
   void Train(const arma::mat& data,
              const arma::Row<size_t>& labels,
+             const size_t numClasses,
              const size_t iterations,
              const double tolerance);
 

diff --git a/src/mlpack/methods/decision_stump/decision_stump.hpp b/src/mlpack/methods/decision_stump/decision_stump.hpp
@@ -40,12 +40,12 @@ class DecisionStump
    *
    * @param data Input, training data.
    * @param labels Labels of training data.
-   * @param classes Number of distinct classes in labels.
+   * @param numClasses Number of distinct classes in labels.
    * @param bucketSize Minimum size of bucket when splitting.
    */
   DecisionStump(const MatType& data,
                 const arma::Row<size_t>& labels,
-                const size_t classes,
+                const size_t numClasses,
                 const size_t bucketSize = 10);
 
   /**
@@ -62,6 +62,7 @@ class DecisionStump
   DecisionStump(const DecisionStump<>& other,
                 const MatType& data,
                 const arma::Row<size_t>& labels,
+                const size_t numClasses,
                 const arma::rowvec& weights);
 
   /**
@@ -78,12 +79,12 @@ class DecisionStump
    *
    * @param data Dataset to train on.
    * @param labels Labels for each point in the dataset.
-   * @param classes Number of classes in the dataset.
+   * @param numClasses Number of classes in the dataset.
    * @param bucketSize Minimum size of bucket when splitting.
    */
   void Train(const MatType& data,
              const arma::Row<size_t>& labels,
-             const size_t classes,
+             const size_t numClasses,
              const size_t bucketSize);
 
   /**
@@ -94,13 +95,13 @@ class DecisionStump
    * @param data Dataset to train on.
    * @param labels Labels for each point in the dataset.
    * @param weights Weights for each point in the dataset.
-   * @param classes Number of classes in the dataset.
+   * @param numClasses Number of classes in the dataset.
    * @param bucketSize Minimum size of bucket when splitting.
    */
   void Train(const MatType& data,
              const arma::Row<size_t>& labels,
              const arma::rowvec& weights,
-             const size_t classes,
+             const size_t numClasses,
              const size_t bucketSize);
 
   /**
@@ -134,7 +135,7 @@ class DecisionStump
 
  private:
   //! The number of classes (we must store this for boosting).
-  size_t classes;
+  size_t numClasses;
   //! The minimum number of points in a bucket.
   size_t bucketSize;
 

diff --git a/src/mlpack/methods/decision_stump/decision_stump_impl.hpp b/src/mlpack/methods/decision_stump/decision_stump_impl.hpp
@@ -9,7 +9,6 @@
  * 3-clause BSD license along with mlpack.  If not, see
  * http://www.opensource.org/licenses/BSD-3-Clause for more information.
  */
-
 #ifndef MLPACK_METHODS_DECISION_STUMP_DECISION_STUMP_IMPL_HPP
 #define MLPACK_METHODS_DECISION_STUMP_DECISION_STUMP_IMPL_HPP
 
@@ -24,15 +23,15 @@ namespace decision_stump {
  *
  * @param data Input, training data.
  * @param labels Labels of data.
- * @param classes Number of distinct classes in labels.
+ * @param numClasses Number of distinct classes in labels.
  * @param bucketSize Minimum size of bucket when splitting.
  */
 template<typename MatType>
 DecisionStump<MatType>::DecisionStump(const MatType& data,
                                       const arma::Row<size_t>& labels,
-                                      const size_t classes,
+                                      const size_t numClasses,
                                       const size_t bucketSize) :
-    classes(classes),
+    numClasses(numClasses),
     bucketSize(bucketSize)
 {
   arma::rowvec weights;
@@ -44,7 +43,7 @@ DecisionStump<MatType>::DecisionStump(const MatType& data,
  */
 template<typename MatType>
 DecisionStump<MatType>::DecisionStump() :
-    classes(1),
+    numClasses(1),
     bucketSize(0),
     splitDimension(0),
     split(1),
@@ -60,10 +59,10 @@ DecisionStump<MatType>::DecisionStump() :
 template<typename MatType>
 void DecisionStump<MatType>::Train(const MatType& data,
                                    const arma::Row<size_t>& labels,
-                                   const size_t classes,
+                                   const size_t numClasses,
                                    const size_t bucketSize)
 {
-  this->classes = classes;
+  this->numClasses = numClasses;
   this->bucketSize = bucketSize;
 
   // Pass to unweighted training function.
@@ -80,10 +79,10 @@ template<typename MatType>
 void DecisionStump<MatType>::Train(const MatType& data,
                                    const arma::Row<size_t>& labels,
                                    const arma::rowvec& weights,
-                                   const size_t classes,
+                                   const size_t numClasses,
                                    const size_t bucketSize)
 {
-  this->classes = classes;
+  this->numClasses = numClasses;
   this->bucketSize = bucketSize;
 
   // Pass to weighted training function.
@@ -186,8 +185,9 @@ template<typename MatType>
 DecisionStump<MatType>::DecisionStump(const DecisionStump<>& other,
                                       const MatType& data,
                                       const arma::Row<size_t>& labels,
+                                      const size_t numClasses,
                                       const arma::rowvec& weights) :
-    classes(other.classes),
+    numClasses(numClasses),
     bucketSize(other.bucketSize)
 {
   Train<true>(data, labels, weights);
@@ -205,7 +205,7 @@ void DecisionStump<MatType>::Serialize(Archive& ar,
 
   // This is straightforward; just serialize all of the members of the class.
   // None need special handling.
-  ar & CreateNVP(classes, "classes");
+  ar & CreateNVP(numClasses, "classes");
   ar & CreateNVP(bucketSize, "bucketSize");
   ar & CreateNVP(splitDimension, "splitDimension");
   ar & CreateNVP(split, "split");
@@ -469,7 +469,7 @@ double DecisionStump<MatType>::CalculateEntropy(
   double entropy = 0.0;
   size_t j;
 
-  arma::rowvec numElem(classes);
+  arma::rowvec numElem(numClasses);
   numElem.fill(0);
 
   // Variable to accumulate the weight in this subview_row.
@@ -484,7 +484,7 @@ double DecisionStump<MatType>::CalculateEntropy(
       accWeight += weights(j);
     }
 
-    for (j = 0; j < classes; j++)
+    for (j = 0; j < numClasses; j++)
     {
       const double p1 = ((double) numElem(j) / accWeight);
 
@@ -499,7 +499,7 @@ double DecisionStump<MatType>::CalculateEntropy(
     for (j = 0; j < labels.n_elem; j++)
       numElem(labels(j))++;
 
-    for (j = 0; j < classes; j++)
+    for (j = 0; j < numClasses; j++)
     {
       const double p1 = ((double) numElem(j) / labels.n_elem);