Skip to content
Permalink
Browse files

Merge branch 'probabilities_param' of https://github.com/adityaviki/m…

…lpack into adityaviki-probabilities_param
  • Loading branch information
rcurtin committed Dec 28, 2019
2 parents d467fdf + e27de58 commit 89a38e508553616e8382bddede96696de0f75db6
@@ -123,6 +123,7 @@ Copyright:
Copyright 2019, Jai Agarwal <jai.bhageria@gmail.com>
Copyright 2019, Ziyang Jiang <zij004@alumni.stanford.edu>
Copyright 2019, Rohit Kartik <rohit.audrey@gmail.com>
Copyright 2019, Aditya Viki <adityaviki01@gmail.com>

License: BSD-3-clause
All rights reserved.
@@ -1,5 +1,8 @@
### mlpack ?.?.?
###### ????-??-??
* Added `probabilities_file` parameter to get the probabilities matrix of
AdaBoost classifier (#2050).

* Fix STB header search paths (#2104).

* Add `DISABLE_DOWNLOADS` CMake configuration option (#2104).
@@ -150,10 +150,24 @@ class AdaBoost
* Classify the given test points.
*
* @param test Testing data.
* @param predictedLabels Vector in which to the predicted labels of the test
* @param predictedLabels Vector in which the predicted labels of the test
* set will be stored.
* @param probabilities matrix to store the predicted class probabilities for
* each point in the test set.
*/
void Classify(const MatType& test, arma::Row<size_t>& predictedLabels);
void Classify(const MatType& test,
arma::Row<size_t>& predictedLabels,
arma::mat& probabilities);

/**
* Classify the given test points.
*
* @param test Testing data.
* @param predictedLabels Vector in which the predicted labels of the test
* set will be stored.
*/
void Classify(const MatType& test,
arma::Row<size_t>& predictedLabels);

/**
* Serialize the AdaBoost model.
@@ -211,26 +211,41 @@ void AdaBoost<WeakLearnerType, MatType>::Classify(
arma::Row<size_t>& predictedLabels)
{
arma::Row<size_t> tempPredictedLabels(test.n_cols);
arma::mat cMatrix(numClasses, test.n_cols);
arma::mat probabilities;

cMatrix.zeros();
Classify(test, predictedLabels, probabilities);
}

/**
* Classify the given test points.
*/
template<typename WeakLearnerType, typename MatType>
void AdaBoost<WeakLearnerType, MatType>::Classify(
const MatType& test,
arma::Row<size_t>& predictedLabels,
arma::mat& probabilities)
{
arma::Row<size_t> tempPredictedLabels(test.n_cols);

probabilities.zeros(numClasses, test.n_cols);
predictedLabels.set_size(test.n_cols);

for (size_t i = 0; i < wl.size(); i++)
{
wl[i].Classify(test, tempPredictedLabels);

for (size_t j = 0; j < tempPredictedLabels.n_cols; j++)
cMatrix(tempPredictedLabels(j), j) += alpha[i];
probabilities(tempPredictedLabels(j), j) += alpha[i];
}

arma::colvec cMRow;
arma::colvec pRow;
arma::uword maxIndex = 0;

for (size_t i = 0; i < predictedLabels.n_cols; i++)
{
cMRow = cMatrix.unsafe_col(i);
cMRow.max(maxIndex);
probabilities.col(i) /= arma::accu(probabilities.col(i));
pRow = probabilities.unsafe_col(i);
pRow.max(maxIndex);
predictedLabels(i) = maxIndex;
}
}
@@ -121,6 +121,8 @@ PARAM_MATRIX_IN("test", "Test dataset.", "T");
// PARAM_UROW_OUT("output") is deprecated and will be removed in mlpack 4.0.0.
PARAM_UROW_OUT("output", "Predicted labels for the test set.", "o");
PARAM_UROW_OUT("predictions", "Predicted labels for the test set.", "P");
PARAM_MATRIX_OUT("probabilities", "Predicted class probabilities for each "
"point in the test set.", "p");

// Training options.
PARAM_INT_IN("iterations", "The maximum number of boosting iterations to be run"
@@ -233,9 +235,20 @@ static void mlpackMain()
<< m->Dimensionality() << ")!" << endl;

Row<size_t> predictedLabels(testingData.n_cols);
Timer::Start("adaboost_classification");
m->Classify(testingData, predictedLabels);
Timer::Stop("adaboost_classification");
mat probabilities;

if (CLI::HasParam("probabilities"))
{
Timer::Start("adaboost_classification");
m->Classify(testingData, predictedLabels, probabilities);
Timer::Stop("adaboost_classification");
}
else
{
Timer::Start("adaboost_classification");
m->Classify(testingData, predictedLabels);
Timer::Stop("adaboost_classification");
}

Row<size_t> results;
data::RevertLabels(predictedLabels, m->Mappings(), results);
@@ -245,6 +258,8 @@ static void mlpackMain()
CLI::GetParam<arma::Row<size_t>>("output") = results;
if (CLI::HasParam("predictions"))
CLI::GetParam<arma::Row<size_t>>("predictions") = std::move(results);
if (CLI::HasParam("probabilities"))
CLI::GetParam<arma::mat>("probabilities") = std::move(probabilities);
}

CLI::GetParam<AdaBoostModel*>("output_model") = m;
@@ -119,7 +119,19 @@ void AdaBoostModel::Train(const mat& data,
}

//! Classify test points.
void AdaBoostModel::Classify(const mat& testData, Row<size_t>& predictions)
void AdaBoostModel::Classify(const mat& testData,
Row<size_t>& predictions,
mat& probabilities)
{
if (weakLearnerType == WeakLearnerTypes::DECISION_STUMP)
dsBoost->Classify(testData, predictions, probabilities);
else if (weakLearnerType == WeakLearnerTypes::PERCEPTRON)
pBoost->Classify(testData, predictions, probabilities);
}

//! Classify test points.
void AdaBoostModel::Classify(const mat& testData,
Row<size_t>& predictions)
{
if (weakLearnerType == WeakLearnerTypes::DECISION_STUMP)
dsBoost->Classify(testData, predictions);
@@ -87,7 +87,13 @@ class AdaBoostModel
const double tolerance);

//! Classify test points.
void Classify(const arma::mat& testData, arma::Row<size_t>& predictions);
void Classify(const arma::mat& testData,
arma::Row<size_t>& predictions);

//! Classify test points.
void Classify(const arma::mat& testData,
arma::Row<size_t>& predictions,
arma::mat& probabilities);

//! Serialize the model.
template<typename Archive>
@@ -620,12 +620,33 @@ BOOST_AUTO_TEST_CASE(ClassifyTest_VERTEBRALCOL)
double tolerance = 1e-10;
AdaBoost<> a(inputData, labels.row(0), numClasses, p, iterations, tolerance);

arma::Row<size_t> predictedLabels(testData.n_cols);
a.Classify(testData, predictedLabels);
arma::Row<size_t> predictedLabels1(testData.n_cols),
predictedLabels2(testData.n_cols);
arma::mat probabilities;

a.Classify(testData, predictedLabels1);
a.Classify(testData, predictedLabels2, probabilities);

BOOST_REQUIRE_EQUAL(probabilities.n_cols, testData.n_cols);
BOOST_REQUIRE_EQUAL(probabilities.n_rows, numClasses);

for (size_t i = 0; i < predictedLabels1.n_cols; ++i)
BOOST_REQUIRE_EQUAL(predictedLabels1[i], predictedLabels2[i]);

arma::colvec pRow;
arma::uword maxIndex = 0;

for (size_t i = 0; i < predictedLabels1.n_cols; i++)
{
pRow = probabilities.unsafe_col(i);
pRow.max(maxIndex);
BOOST_REQUIRE_EQUAL(predictedLabels1(i), maxIndex);
BOOST_REQUIRE_CLOSE(arma::accu(probabilities.col(i)), 1, 1e-5);
}

size_t localError = 0;
for (size_t i = 0; i < trueTestLabels.n_cols; i++)
if (trueTestLabels(i) != predictedLabels(i))
if (trueTestLabels(i) != predictedLabels1(i))
localError++;

double lError = (double) localError / trueTestLabels.n_cols;
@@ -671,12 +692,32 @@ BOOST_AUTO_TEST_CASE(ClassifyTest_NONLINSEP)
AdaBoost<DecisionStump<> > a(inputData, labels.row(0), numClasses, ds,
iterations, tolerance);

arma::Row<size_t> predictedLabels(testData.n_cols);
a.Classify(testData, predictedLabels);
arma::Row<size_t> predictedLabels1(testData.n_cols),
predictedLabels2(testData.n_cols);
arma::mat probabilities;

a.Classify(testData, predictedLabels1);
a.Classify(testData, predictedLabels2, probabilities);

BOOST_REQUIRE_EQUAL(probabilities.n_cols, testData.n_cols);

for (size_t i = 0; i < predictedLabels1.n_cols; ++i)
BOOST_REQUIRE_EQUAL(predictedLabels1[i], predictedLabels2[i]);

arma::colvec pRow;
arma::uword maxIndex = 0;

for (size_t i = 0; i < predictedLabels1.n_cols; i++)
{
pRow = probabilities.unsafe_col(i);
pRow.max(maxIndex);
BOOST_REQUIRE_EQUAL(predictedLabels1(i), maxIndex);
BOOST_REQUIRE_CLOSE(arma::accu(probabilities.col(i)), 1, 1e-5);
}

size_t localError = 0;
for (size_t i = 0; i < trueTestLabels.n_cols; i++)
if (trueTestLabels(i) != predictedLabels(i))
if (trueTestLabels(i) != predictedLabels1(i))
localError++;

double lError = (double) localError / trueTestLabels.n_cols;
@@ -716,16 +757,38 @@ BOOST_AUTO_TEST_CASE(ClassifyTest_IRIS)
BOOST_FAIL("Cannot load test dataset iris_test.csv!");

arma::Row<size_t> predictedLabels(testData.n_cols);

a.Classify(testData, predictedLabels);

arma::Mat<size_t> trueTestLabels;
if (!data::Load("iris_test_labels.csv", trueTestLabels))
BOOST_FAIL("Cannot load test dataset iris_test_labels.csv!");

arma::Row<size_t> predictedLabels1(testData.n_cols),
predictedLabels2(testData.n_cols);
arma::mat probabilities;

a.Classify(testData, predictedLabels1);
a.Classify(testData, predictedLabels2, probabilities);

BOOST_REQUIRE_EQUAL(probabilities.n_cols, testData.n_cols);

for (size_t i = 0; i < predictedLabels1.n_cols; ++i)
BOOST_REQUIRE_EQUAL(predictedLabels1[i], predictedLabels2[i]);

arma::colvec pRow;
arma::uword maxIndex = 0;

for (size_t i = 0; i < predictedLabels1.n_cols; i++)
{
pRow = probabilities.unsafe_col(i);
pRow.max(maxIndex);
BOOST_REQUIRE_EQUAL(predictedLabels1(i), maxIndex);
BOOST_REQUIRE_CLOSE(arma::accu(probabilities.col(i)), 1, 1e-5);
}

size_t localError = 0;
for (size_t i = 0; i < trueTestLabels.n_cols; i++)
if (trueTestLabels(i) != predictedLabels(i))
if (trueTestLabels(i) != predictedLabels1(i))
localError++;
double lError = (double) localError / labels.n_cols;
BOOST_REQUIRE_LE(lError, 0.30);
@@ -76,6 +76,43 @@ BOOST_AUTO_TEST_CASE(AdaBoostOutputDimensionTest)
BOOST_REQUIRE_EQUAL(CLI::GetParam<arma::Row<size_t>>("output").n_rows, 1);
}

/**
* Check that total number of rows of probabilities matrix is equal to total
* number of rows of input data and that each column of probabilities matrix sums
* up to 1.
*/
BOOST_AUTO_TEST_CASE(AdaBoostProbabilitiesTest)
{
arma::mat trainData;
if (!data::Load("vc2.csv", trainData))
BOOST_FAIL("Unable to load train dataset vc2.csv!");

arma::Row<size_t> labels;
if (!data::Load("vc2_labels.txt", labels))
BOOST_FAIL("Unable to load label dataset vc2_labels.txt!");

arma::mat testData;
if (!data::Load("vc2_test.csv", testData))
BOOST_FAIL("Unable to load test dataset vc2.csv!");

size_t testSize = testData.n_cols;

SetInputParam("training", std::move(trainData));
SetInputParam("labels", std::move(labels));

SetInputParam("test", std::move(testData));

mlpackMain();

arma::mat probabilities;
probabilities = std::move(CLI::GetParam<arma::mat>("probabilities"));

BOOST_REQUIRE_EQUAL(probabilities.n_cols, testSize);

for (size_t i = 0; i < testSize; i++)
BOOST_REQUIRE_CLOSE(arma::accu(probabilities.col(i)), 1, 1e-5);
}

/**
* Ensure that saved model can be used again.
*/

0 comments on commit 89a38e5

Please sign in to comment.
You can’t perform that action at this time.