Permalink
Browse files

BUG: stats: improve how chi2_contingency handles degenerate cases.

  • Loading branch information...
1 parent 0bccc24 commit e90c7361878117171296c6e91dc63c8141b5ecc1 Warren Weckesser committed Sep 9, 2011
Showing with 21 additions and 8 deletions.
  1. +17 −7 scipy/stats/contingency.py
  2. +4 −1 scipy/stats/tests/test_contingency.py
View
24 scipy/stats/contingency.py
@@ -216,7 +216,9 @@ def chi2_contingency(observed, correction=True):
"""
observed = np.asarray(observed)
if np.any(observed < 0):
- raise ValueError("All values in `table` must be nonnegative.")
+ raise ValueError("All values in `observed` must be nonnegative.")
+ if observed.size == 0:
+ raise ValueError("No data; `observed` has size 0.")
expected = expected_freq(observed)
if np.any(expected == 0):
@@ -229,11 +231,19 @@ def chi2_contingency(observed, correction=True):
# The degrees of freedom
dof = expected.size - sum(expected.shape) + expected.ndim - 1
- if dof == 1 and correction:
- # Use Yates' correction for continuity.
- chi2 = ((np.abs(observed - expected) - 0.5) ** 2 / expected).sum()
+ if dof == 0:
+ # Degenerate case; this occurs when `observed` is 1D (or, more
+ # generally, when it has only one nontrivial dimension). In this
+ # case, we also have observed == expected, so chi2 is 0.
+ chi2 = 0.0
+ p = 1.0
else:
- # Regular chi-square--no correction.
- chi2 = ((observed - expected) ** 2 / expected).sum()
- p = special.chdtrc(dof, chi2)
+ if dof == 1 and correction:
+ # Use Yates' correction for continuity.
+ chi2 = ((np.abs(observed - expected) - 0.5) ** 2 / expected).sum()
+ else:
+ # Regular chi-square--no correction.
+ chi2 = ((observed - expected) ** 2 / expected).sum()
+ p = special.chdtrc(dof, chi2)
+
return chi2, p, dof, expected
View
5 scipy/stats/tests/test_contingency.py
@@ -50,7 +50,7 @@ def test_expected_freq():
assert_array_almost_equal(e, correct)
-def test_chi2_contingency_trival():
+def test_chi2_contingency_trivial():
"""Some very simple tests for chi2_contingency."""
# A trivial case
obs = np.array([[1, 2], [1, 2]])
@@ -172,6 +172,9 @@ def test_chi2_contingency_bad_args():
# of expected frequencies.
obs = np.array([[0, 1], [0, 1]])
assert_raises(ValueError, chi2_contingency, obs)
+ # A degenerate case: `observed` has size 0.
+ obs = np.empty((0, 8))
+ assert_raises(ValueError, chi2_contingency, obs)
if __name__ == "__main__":

0 comments on commit e90c736

Please sign in to comment.