Skip to content

Commit

Permalink
ENH: Kstest exact performance improvements (scipy#13265)
Browse files Browse the repository at this point in the history
* ENH: Remove rounding from special.binom and raise error on warning to remove explicit checks
  • Loading branch information
Balthasar-eu committed Jul 31, 2022
1 parent dc45894 commit 6988f97
Show file tree
Hide file tree
Showing 3 changed files with 70 additions and 41 deletions.
39 changes: 39 additions & 0 deletions benchmarks/benchmarks/stats.py
Expand Up @@ -664,3 +664,42 @@ def setup(self, n_size):

def time_somersd(self, n_size):
res = stats.somersd(self.x, self.y)


class KolmogorovSmirnov(Benchmark):
param_names = ['alternative', 'mode', 'size']
# No auto since it defaults to exact for 20 samples
params = [
['two-sided', 'less', 'greater'],
['exact', 'approx', 'asymp'],
[19, 20, 21]
]

def setup(self, alternative, mode, size):
np.random.seed(12345678)
a = stats.norm.rvs(size=20)
self.a = a

def time_ks(self, alternative, mode, size):
stats.kstest(self.a, 'norm', alternative=alternative,
mode=mode, N=size)


class KolmogorovSmirnovTwoSamples(Benchmark):
param_names = ['alternative', 'mode', 'size']
# No auto since it defaults to exact for 20 samples
params = [
['two-sided', 'less', 'greater'],
['exact', 'asymp'],
[(21, 20), (20, 20)]
]

def setup(self, alternative, mode, size):
np.random.seed(12345678)
a = stats.norm.rvs(size=size[0])
b = stats.norm.rvs(size=size[1])
self.a = a
self.b = b

def time_ks2(self, alternative, mode, size):
stats.ks_2samp(self.a, self.b, alternative=alternative, mode=mode)
65 changes: 26 additions & 39 deletions scipy/stats/_stats_py.py
Expand Up @@ -7651,11 +7651,6 @@ def _count_paths_outside_method(m, n, g, h):
The number of paths that go low.
The calculation may overflow - check for a finite answer.
Raises
------
FloatingPointError: Raised if the intermediate computation goes outside
the range of a float.
Notes
-----
Count the integer lattice paths from (0, 0) to (m, n), which at some
Expand Down Expand Up @@ -7691,31 +7686,23 @@ def _count_paths_outside_method(m, n, g, h):
# B is an array just holding a few values of B(x,y), the ones needed.
# B[j] == B(x_j, j)
if lxj == 0:
return np.round(special.binom(m + n, n))
return special.binom(m + n, n)
B = np.zeros(lxj)
B[0] = 1
# Compute the B(x, y) terms
# The binomial coefficient is an integer, but special.binom()
# may return a float. Round it to the nearest integer.
for j in range(1, lxj):
Bj = np.round(special.binom(xj[j] + j, j))
if not np.isfinite(Bj):
raise FloatingPointError()
Bj = special.binom(xj[j] + j, j)
for i in range(j):
bin = np.round(special.binom(xj[j] - xj[i] + j - i, j-i))
bin = special.binom(xj[j] - xj[i] + j - i, j-i)
Bj -= bin * B[i]
B[j] = Bj
if not np.isfinite(Bj):
raise FloatingPointError()
# Compute the number of path extensions...
num_paths = 0
for j in range(lxj):
bin = np.round(special.binom((m-xj[j]) + (n - j), n-j))
bin = special.binom((m-xj[j]) + (n - j), n-j)
term = B[j] * bin
if not np.isfinite(term):
raise FloatingPointError()
num_paths += term
return np.round(num_paths)
return num_paths


def _attempt_exact_2kssamp(n1, n2, g, d, alternative):
Expand All @@ -7734,29 +7721,29 @@ def _attempt_exact_2kssamp(n1, n2, g, d, alternative):
return True, d, 1.0
saw_fp_error, prob = False, np.nan
try:
if alternative == 'two-sided':
if n1 == n2:
prob = _compute_prob_outside_square(n1, h)
else:
prob = _compute_outer_prob_inside_method(n1, n2, g, h)
else:
if n1 == n2:
# prob = binom(2n, n-h) / binom(2n, n)
# Evaluating in that form incurs roundoff errors
# from special.binom. Instead calculate directly
jrange = np.arange(h)
prob = np.prod((n1 - jrange) / (n1 + jrange + 1.0))
with np.errstate(invalid="raise", over="raise"):
if alternative == 'two-sided':
if n1 == n2:
prob = _compute_prob_outside_square(n1, h)
else:
prob = _compute_outer_prob_inside_method(n1, n2, g, h)
else:
with np.errstate(over='raise'):
num_paths = _count_paths_outside_method(n1, n2, g, h)
bin = special.binom(n1 + n2, n1)
if not np.isfinite(bin) or not np.isfinite(num_paths)\
or num_paths > bin:
saw_fp_error = True
if n1 == n2:
# prob = binom(2n, n-h) / binom(2n, n)
# Evaluating in that form incurs roundoff errors
# from special.binom. Instead calculate directly
jrange = np.arange(h)
prob = np.prod((n1 - jrange) / (n1 + jrange + 1.0))
else:
prob = num_paths / bin

except FloatingPointError:
with np.errstate(over='raise'):
num_paths = _count_paths_outside_method(n1, n2, g, h)
bin = special.binom(n1 + n2, n1)
if num_paths > bin or np.isinf(bin):
saw_fp_error = True
else:
prob = num_paths / bin

except (FloatingPointError, OverflowError):
saw_fp_error = True

if saw_fp_error:
Expand Down
7 changes: 5 additions & 2 deletions scipy/stats/tests/test_stats.py
Expand Up @@ -4117,8 +4117,11 @@ def test_some_code_paths(self):
_compute_outer_prob_inside_method(1, 1, 1, 1)
_count_paths_outside_method(1000, 1, 1, 1001)

assert_raises(FloatingPointError, _count_paths_outside_method, 1100, 1099, 1, 1)
assert_raises(FloatingPointError, _count_paths_outside_method, 2000, 1000, 1, 1)
with np.errstate(invalid='raise'):
assert_raises(FloatingPointError, _count_paths_outside_method,
1100, 1099, 1, 1)
assert_raises(FloatingPointError, _count_paths_outside_method,
2000, 1000, 1, 1)

def test_argument_checking(self):
# Check that an empty array causes a ValueError
Expand Down

0 comments on commit 6988f97

Please sign in to comment.