diff --git a/src/shogun/mathematics/Statistics.cpp b/src/shogun/mathematics/Statistics.cpp index 6117c347377..4c3c5a60b90 100644 --- a/src/shogun/mathematics/Statistics.cpp +++ b/src/shogun/mathematics/Statistics.cpp @@ -571,12 +571,61 @@ float64_t CStatistics::gamma_incomplete_lower(float64_t s, float64_t z) return CMath::exp(s * CMath::log(z) - z - CStatistics::lgamma(s + 1.) + CMath::log(sum)); } -float64_t CStatistics::gamma_incomplete_upper(float64_t s, float64_t x) +float64_t CStatistics::lgamma_approx(float64_t z) { - REQUIRE(s>0, "Given s (%f) must be greater than 0.\n", s); - REQUIRE(x>=0, "Given x (%f) must be greater or equal to 0.\n", x); + float64_t x = 0; + x += 0.1659470187408462e-06 / (z+7); + x += 0.9934937113930748e-05 / (z+6); + x -= 0.1385710331296526 / (z+5); + x += 12.50734324009056 / (z+4); + x -= 176.6150291498386 / (z+3); + x += 771.3234287757674 / (z+2); + x -= 1259.139216722289 / (z+1); + x += 676.5203681218835 / z; + x += 0.9999999999995183; + return CMath::log(x) - 5.58106146679532777 - z + (z-0.5) * CMath::log(z+6.5); +} + +float64_t CStatistics::gamma_incomplete_upper(float64_t s, float64_t z) +{ + REQUIRE(s>0, "Given exponent (%f) must be greater than 0.\n", s); + REQUIRE(z>=0, "Given integral bound (%f) must be greater or equal to 0.\n", z); - return CMath::exp(lgammal(s))*(1-CStatistics::gamma_pdf(x,s,1)); + /* Taken inspiration from + * https://github.com/lh3/samtools/blob/master/bcftools/kfunc.c + * under MIT license + * The code states: + * The following computes regularized incomplete gamma functions. + * Formulas are taken from Wiki, with additional input from Numerical + * Recipes in C (for modified Lentz's algorithm) and AS245 + * (http://lib.stat.cmu.edu/apstat/245). + */ + + int32_t j; + float64_t C, D, f; + f = 1. + z - s; + C = f; + D = 0.; + // Modified Lentz's algorithm for computing continued fraction + // See Numerical Recipes in C, 2nd edition, section 5.2 + for (j = 1; j < 100; ++j) + { + float64_t a = j * (s - j), b = (j<<1) + 1 + z - s, d; + D = b + a * D; + // hard coded tiny numbers + if (D < 1e-290) + D = 1e-290; + C = b + a / C; + if (C < 1e-290) + C = 1e-290; + D = 1. / D; + d = C * D; + f *= d; + // hard coded epsilon + if (CMath::abs(d - 1.) < 1e-14) + break; + } + return CMath::exp(s * CMath::log(z) - z - CStatistics::lgamma_approx(s) - CMath::log(f)); } float64_t CStatistics::gamma_pdf(float64_t x, float64_t a, float64_t b) diff --git a/src/shogun/mathematics/Statistics.h b/src/shogun/mathematics/Statistics.h index 68b51d1b922..50111de2919 100644 --- a/src/shogun/mathematics/Statistics.h +++ b/src/shogun/mathematics/Statistics.h @@ -197,6 +197,9 @@ class CStatistics: public CSGObject return ::lgamma((double) x); } + /** TODO */ + static float64_t lgamma_approx(float64_t x); + /** @return natural logarithm of the gamma function of input for large * numbers */ static inline floatmax_t lgammal(floatmax_t x) @@ -227,11 +230,15 @@ class CStatistics: public CSGObject */ static float64_t gamma_pdf(float64_t x, float64_t a, float64_t b); - /** Compute the (incomplete) Gamma function. + /** Compute the (regularized incomplete) Gamma function. * - * TODO + * Formulas are taken from Wiki, with additional input from Numerical + * Recipes in C (for modified Lentz's algorithm). Uses a series expansion. * - * a>0, x>=0 + * TODO latex + * http://www.johndcook.com/blog/gamma_python/ + * + * Assumes a>0, x>=0 * * @param a TODO * @param x TODO @@ -239,6 +246,16 @@ class CStatistics: public CSGObject */ static float64_t gamma_incomplete_lower(float64_t a, float64_t x); + /** Cumputes the (regularized upper) Gamma function. + * + * Modified Lentz's algorithm for computing continued fraction. + * See Numerical Recipes in C, 2nd edition, section 5.2. + * + * TODO latex + * http://www.johndcook.com/blog/gamma_python/ + * + * Assumes a>0, x>=0 + */ static float64_t gamma_incomplete_upper(float64_t a, float64_t x); /** Evaluates the CDF of the gamma distribution with given parameters \f$a, b\f$ diff --git a/tests/unit/mathematics/Statistics_unittest.cc b/tests/unit/mathematics/Statistics_unittest.cc index 91e4e437620..b6a41739a79 100644 --- a/tests/unit/mathematics/Statistics_unittest.cc +++ b/tests/unit/mathematics/Statistics_unittest.cc @@ -460,6 +460,7 @@ TEST(Statistics, inverse_normal_cdf_with_mean_std_dev) TEST(Statistics, gamma_incomplete_lower) { + // tests against scipy.special.gammainc float64_t result; result=CStatistics::gamma_incomplete_lower(1, 1); @@ -501,11 +502,51 @@ TEST(Statistics, gamma_incomplete_lower) TEST(Statistics, gamma_incomplete_upper) { - EXPECT_NEAR(0, 1, 1e-15); + // tests against scipy.special.gammaincc + float64_t result; + + result=CStatistics::gamma_incomplete_upper(1, 1); + EXPECT_NEAR(result, 0.36787944117144233, 1e-15); + + result=CStatistics::gamma_incomplete_upper(2, 1); + EXPECT_NEAR(result, 0.73575888234288467, 1e-15); + + result=CStatistics::gamma_incomplete_upper(1, 2); + EXPECT_NEAR(result, 0.1353352832366127, 1e-15); + + result=CStatistics::gamma_incomplete_upper(0.0000001, 1); + EXPECT_NEAR(result, 2.193839568430253e-08, 1e-14); + + result=CStatistics::gamma_incomplete_upper(0.001, 1); + EXPECT_NEAR(result, 0.00021960835758555317, 1e-15); + + result=CStatistics::gamma_incomplete_upper(5, 1); + EXPECT_NEAR(result, 0.99634015317265634, 1e-15); + + result=CStatistics::gamma_incomplete_upper(10, 1); + EXPECT_NEAR(result, 0.99999988857452171, 1e-15); + + result=CStatistics::gamma_incomplete_upper(1, 0.0000001); + EXPECT_NEAR(result, 0.99999990000000505, 1e-9); + + result=CStatistics::gamma_incomplete_upper(1, 0.001); + EXPECT_NEAR(result, 0.99900049983337502, 1e-12); + + result=CStatistics::gamma_incomplete_upper(1, 5); + EXPECT_NEAR(result, 0.0067379469990854679, 1e-15); + + result=CStatistics::gamma_incomplete_upper(1, 10); + EXPECT_NEAR(result, 4.5399929762484861e-05, 1e-14); + + result=CStatistics::gamma_incomplete_upper(1, 20); + EXPECT_NEAR(result, 2.0611536224385566e-09, 1e-14); } TEST(Statistics, gamma_pdf) { + // tests against scipy.stats.gamma.pdf + // note that scipy.stats.gamma.pdf(2, a=2, scale=1./2) corresonds to + // CStatistics:.gamma_pdf(2,2,2) float64_t result; // three basic cases to get order of parameters