From 333c541f98076d2c6ea108cb005791554f78bbf4 Mon Sep 17 00:00:00 2001 From: yashu-seth Date: Sun, 18 Dec 2016 10:47:54 -0800 Subject: [PATCH 1/3] adds GammaDistribution::Train(observations, probabilities) --- src/mlpack/core/dists/gamma_distribution.cpp | 29 ++++++++++++++++++++ src/mlpack/core/dists/gamma_distribution.hpp | 3 +- 2 files changed, 30 insertions(+), 2 deletions(-) diff --git a/src/mlpack/core/dists/gamma_distribution.cpp b/src/mlpack/core/dists/gamma_distribution.cpp index 8913730459b..1a55fad0ee4 100644 --- a/src/mlpack/core/dists/gamma_distribution.cpp +++ b/src/mlpack/core/dists/gamma_distribution.cpp @@ -64,6 +64,35 @@ void GammaDistribution::Train(const arma::mat& rdata, const double tol) Train(logMeanxVec, meanLogxVec, meanxVec, tol); } +//Fits an alpha and beta parameter according to observation probabilities. +void GammaDistribution::Train(const arma::mat& rdata, const arma::vec& probabilities, + const double tol) +{ + // If fittingSet is empty, nothing to do. + if (arma::size(rdata) == arma::size(arma::mat())) + return; + + arma::vec meanLogxVec(rdata.n_rows, arma::fill::zeros); + arma::vec meanxVec(rdata.n_rows, arma::fill::zeros); + arma::vec logMeanxVec(rdata.n_rows, arma::fill::zeros); + + for(size_t i=0; i Date: Fri, 23 Dec 2016 13:01:20 -0800 Subject: [PATCH 2/3] adds tests for GammaDistribution::Train(observations, probabilities) --- src/mlpack/tests/distribution_test.cpp | 142 +++++++++++++++++++++++++ 1 file changed, 142 insertions(+) diff --git a/src/mlpack/tests/distribution_test.cpp b/src/mlpack/tests/distribution_test.cpp index 2e15786fdc9..9f01d89dd03 100644 --- a/src/mlpack/tests/distribution_test.cpp +++ b/src/mlpack/tests/distribution_test.cpp @@ -442,6 +442,148 @@ BOOST_AUTO_TEST_CASE(GammaDistributionTrainTest) BOOST_REQUIRE_EQUAL(gDist.Dimensionality(), d2); } +/** + * This test verifies that the fitting procedure for GammaDistribution works + * properly when probabilities for each sample is given. + */ +BOOST_AUTO_TEST_CASE(GammaDistributionTrainWithProbabilitiesTest) +{ + double alphaReal = 5.4; + double betaReal = 6.7; + + // Create a gamma distribution random generator + std::default_random_engine generator; + std::gamma_distribution dist(alphaReal, betaReal); + + size_t N = 50000; + size_t d = 2; + arma::mat rdata(d, N); + + for(size_t j = 0; j < d; j++) + for(size_t i = 0; i < N; i++) + rdata(j, i) = dist(generator); + + // create a uniform distribution random generator + std::uniform_real_distribution prob(0, 1); + arma::vec probabilities(N); + + for(size_t i = 0; i < N; i++) + probabilities(i) = prob(generator); + + // fit results with probabilities and data + GammaDistribution gDist; + gDist.Train(rdata, probabilities); + + // fit results with only data + GammaDistribution gDist2; + gDist2.Train(rdata); + + BOOST_REQUIRE_CLOSE(gDist2.Alpha(0), gDist.Alpha(0), 1); + BOOST_REQUIRE_CLOSE(gDist2.Beta(0), gDist.Beta(0), 1); + + BOOST_REQUIRE_CLOSE(gDist2.Alpha(1), gDist.Alpha(1), 1); + BOOST_REQUIRE_CLOSE(gDist2.Beta(1), gDist.Beta(1), 1); + + BOOST_REQUIRE_CLOSE(alphaReal, gDist.Alpha(0), 1); + BOOST_REQUIRE_CLOSE(betaReal, gDist.Beta(0), 1); + + BOOST_REQUIRE_CLOSE(alphaReal, gDist.Alpha(1), 1); + BOOST_REQUIRE_CLOSE(betaReal, gDist.Beta(1), 1); +} + + +// This test ensures that the same result is obtained when +// trained with probabilities all set to 1 and with no probabilities at all. +BOOST_AUTO_TEST_CASE(GammaDistributionTrainALLProbabilities1Test) +{ + double alphaReal = 5.4; + double betaReal = 6.7; + + // Create a gamma distribution random generator + std::default_random_engine generator; + std::gamma_distribution dist(alphaReal, betaReal); + + size_t N = 1000; + size_t d = 2; + arma::mat rdata(d, N); + + for(size_t j = 0; j < d; j++) + for(size_t i = 0; i < N; i++) + rdata(j, i) = dist(generator); + + // fit results with only data + GammaDistribution gDist; + gDist.Train(rdata); + + // fit results with data and each probability as 1 + GammaDistribution gDist2; + arma::vec allProbabilities1(N, arma::fill::ones); + gDist2.Train(rdata, allProbabilities1); + + BOOST_REQUIRE_CLOSE(gDist2.Alpha(0), gDist.Alpha(0), 1e-5); + BOOST_REQUIRE_CLOSE(gDist2.Beta(0), gDist.Beta(0), 1e-5); + + BOOST_REQUIRE_CLOSE(gDist2.Alpha(1), gDist.Alpha(1), 1e-5); + BOOST_REQUIRE_CLOSE(gDist2.Beta(1), gDist.Beta(1), 1e-5); +} + + /** This test draws points from two different gamma distributions, + * sets the probabilities for the points from the first distribution + * to something small and the probabilities for the second to something large. + * It ensures that the gamma distribution recovered has the + * same parameters as the second gamma distribution with high probabilities. + */ +BOOST_AUTO_TEST_CASE(GammaDistributionTrainTwoDistProbabilities1Test) +{ + double alphaReal = 5.4; + double betaReal = 6.7; + + double alphaReal2 = 1.9; + double betaReal2 = 8.4; + + // Create two gamma distribution random generators + std::default_random_engine generator; + std::gamma_distribution dist(alphaReal, betaReal); + std::gamma_distribution dist2(alphaReal2, betaReal2); + + std::uniform_real_distribution lowProb(0, 0.02); + std::uniform_real_distribution highProb(0.98, 1); + + size_t N = 50000; + size_t d = 2; + arma::mat rdata(d, N); + arma::vec probabilities(N); + + // draws points alternately from the two different distributions. + for(size_t j = 0; j < d; j++) + { + for(size_t i = 0; i < N; i++) + { + if(i % 2 == 0) + rdata(j, i) = dist(generator); + else + rdata(j, i) = dist2(generator); + } + } + + for(size_t i = 0; i Date: Fri, 23 Dec 2016 13:04:59 -0800 Subject: [PATCH 3/3] adds my name to core.hpp and COPYRIGHT.txt --- COPYRIGHT.txt | 1 + src/mlpack/core.hpp | 1 + 2 files changed, 2 insertions(+) diff --git a/COPYRIGHT.txt b/COPYRIGHT.txt index d02ba901219..bd9e16bd976 100644 --- a/COPYRIGHT.txt +++ b/COPYRIGHT.txt @@ -63,6 +63,7 @@ Copyright: Copyright 2016, Anuraj Kanodia Copyright 2016, Ivan Georgiev Copyright 2016, Shikhar Bhardwaj + Copyright 2016, Yashu Seth License: BSD-3-clause All rights reserved. . diff --git a/src/mlpack/core.hpp b/src/mlpack/core.hpp index aa9242d1651..1409be57dfc 100644 --- a/src/mlpack/core.hpp +++ b/src/mlpack/core.hpp @@ -203,6 +203,7 @@ * - Anuraj Kanodia * - Ivan Georgiev * - Shikhar Bhardwaj + * - Yashu Seth */ // First, include all of the prerequisites.