Skip to content

Commit

Permalink
Merge 106f5df into b2c1259
Browse files Browse the repository at this point in the history
  • Loading branch information
josef-pkt committed Jul 19, 2014
2 parents b2c1259 + 106f5df commit 1daaaae
Show file tree
Hide file tree
Showing 2 changed files with 119 additions and 1 deletion.
2 changes: 1 addition & 1 deletion statsmodels/stats/anova.py
Expand Up @@ -353,7 +353,7 @@ def anova_lm(*args, **kwargs):

table["ssr"] = lmap(getattr, args, ["ssr"]*n_models)
table["df_resid"] = lmap(getattr, args, ["df_resid"]*n_models)
table.ix[1:]["df_diff"] = np.diff(lmap(getattr, args, ["df_model"]*n_models))
table.ix[1:]["df_diff"] = -np.diff(table["df_resid"].values)
table["ss_diff"] = -table["ssr"].diff()
if test == "F":
table["F"] = table["ss_diff"] / table["df_diff"] / scale
Expand Down
118 changes: 118 additions & 0 deletions statsmodels/stats/tests/test_anova.py
@@ -1,3 +1,5 @@
# -*- coding: utf-8 -*-

from statsmodels.compat.python import StringIO

import numpy as np
Expand Down Expand Up @@ -92,6 +94,46 @@ def test_results(self):
np.testing.assert_almost_equal(results['F'].values, f_value, 4)
np.testing.assert_almost_equal(results['PR(>F)'].values, pr_f)


class TestAnovaLMNoconstant(object):
@classmethod
def setupClass(cls):
# kidney data taken from JT's course
# don't know the license
kidney_table.seek(0)
cls.data = read_table(kidney_table, sep="\s+")
cls.kidney_lm = ols('np.log(Days+1) ~ C(Duration) * C(Weight) - 1',
data=cls.data).fit()

def test_results(self):
Df = np.array([2, 2, 2, 54])
sum_sq = np.array([158.6415227, 16.97129, 0.6356584, 28.9892])
mean_sq = np.array([79.3207613, 8.485645, 0.3178292, 0.536837])
f_value = np.array([147.7557648, 15.80674, 0.5920404, np.nan])
pr_f = np.array([1.262324e-22, 3.944502e-06, 0.5567479, np.nan])

results = anova_lm(self.kidney_lm)
np.testing.assert_equal(results['df'].values, Df)
np.testing.assert_almost_equal(results['sum_sq'].values, sum_sq, 4)
np.testing.assert_almost_equal(results['F'].values, f_value, 4)
np.testing.assert_almost_equal(results['PR(>F)'].values, pr_f)

# > sum2.lm = lm(logDays ~ Duration * Weight - 1, contrasts=list(Duration=contr.sum, Weight=contr.sum))
# > anova.lm.sum2 <- anova(sum2.lm)
# > anova.lm.sum2
# Analysis of Variance Table
#
# Response: logDays
# Df Sum Sq Mean Sq F value Pr(>F)
# Duration 2 158.642 79.321 147.756 < 2.2e-16 ***
# Weight 2 16.971 8.486 15.807 3.945e-06 ***
# Duration:Weight 2 0.636 0.318 0.592 0.5567
# Residuals 54 28.989 0.537
# ---
# Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1



class TestAnovaLMCompare(TestAnovaLM):
def test_results(self):
new_model = ols("np.log(Days+1) ~ C(Duration) + C(Weight)",
Expand Down Expand Up @@ -124,6 +166,40 @@ def test_results(self):
np.testing.assert_almost_equal(results["F"].values, F)
np.testing.assert_almost_equal(results["Pr(>F)"].values, PrF)


class TestAnovaLMCompareNoconstant(TestAnovaLM):
def test_results(self):
new_model = ols("np.log(Days+1) ~ C(Duration) + C(Weight) - 1",
self.data).fit()
results = anova_lm(new_model, self.kidney_lm)

Res_Df = np.array([
56, 54
])
RSS = np.array([
29.62486, 28.9892
])
Df = np.array([
0, 2
])
Sum_of_Sq = np.array([
np.nan, 0.6356584
])
F = np.array([
np.nan, 0.5920404
])
PrF = np.array([
np.nan, 0.5567479
])

np.testing.assert_equal(results["df_resid"].values, Res_Df)
np.testing.assert_almost_equal(results["ssr"].values, RSS, 4)
np.testing.assert_almost_equal(results["df_diff"].values, Df)
np.testing.assert_almost_equal(results["ss_diff"].values, Sum_of_Sq)
np.testing.assert_almost_equal(results["F"].values, F)
np.testing.assert_almost_equal(results["Pr(>F)"].values, PrF)


class TestAnova2(TestAnovaLM):
# drop some observations to make an unbalanced, disproportionate panel
# to make sure things are okay
Expand Down Expand Up @@ -151,6 +227,48 @@ def test_results(self):
np.testing.assert_almost_equal(results['F'].values, F_value, 4)
np.testing.assert_almost_equal(results['PR(>F)'].values, PrF)


class TestAnova2Noconstant(TestAnovaLM):
# drop some observations to make an unbalanced, disproportionate panel
# to make sure things are okay
def test_results(self):
data = self.data.drop([0,1,2])
anova_ii = ols("np.log(Days+1) ~ C(Duration, Sum)*C(Weight, Sum) - 1",
data).fit()

Sum_Sq = np.array([
154.7131692, 13.27205, 0.1905093, 27.60181
])
Df = np.array([
2, 2, 2, 51
])
F_value = np.array([
142.9321191, 12.26141, 0.1760025, np.nan
])
PrF = np.array([
1.238624e-21, 4.487909e-05, 0.8391231, np.nan
])

results = anova_lm(anova_ii, typ="II")
np.testing.assert_equal(results['df'].values, Df)
np.testing.assert_almost_equal(results['sum_sq'].values, Sum_Sq, 4)
np.testing.assert_almost_equal(results['F'].values, F_value, 4)
np.testing.assert_almost_equal(results['PR(>F)'].values, PrF)

# > sum2.lm.dropped <- lm(logDays ~ Duration * Weight - 1, dta.dropped,
# contrasts=list(Duration=contr.sum, Weight=contr.sum))
# > anova.ii.dropped2 <- Anova(sum2.lm.dropped, type='II')
# > anova.ii.dropped2
# Anova Table (Type II tests)
#
# Response: logDays
# Sum Sq Df F value Pr(>F)
# Duration 154.713 2 142.932 < 2.2e-16 ***
# Weight 13.272 2 12.261 4.488e-05 ***
# Duration:Weight 0.191 2 0.176 0.8391
# Residuals 27.602 51


class TestAnova2HC0(TestAnovaLM):
#NOTE: R doesn't return SSq with robust covariance. Why?
# drop some observations to make an unbalanced, disproportionate panel
Expand Down

0 comments on commit 1daaaae

Please sign in to comment.