Skip to content

Commit

Permalink
Merge pull request #1867 from josef-pkt/REF_covtype_fit
Browse files Browse the repository at this point in the history
ENH/REF add covtype choice to fit method
  • Loading branch information
josef-pkt committed Aug 3, 2014
2 parents bdb15bb + b20d35a commit 8709f00
Show file tree
Hide file tree
Showing 4 changed files with 183 additions and 20 deletions.
12 changes: 8 additions & 4 deletions statsmodels/base/model.py
Expand Up @@ -1111,12 +1111,14 @@ def t_test(self, r_matrix, q_matrix=None, cov_p=None, scale=None,
_sd = np.sqrt(self.cov_params(r_matrix=r_matrix, cov_p=cov_p))
_t = (_effect - q_matrix) * recipr(_sd)

df_resid = getattr(self, 'df_resid_inference', self.df_resid)

if use_t:
return ContrastResults(effect=_effect, t=_t, sd=_sd,
df_denom=self.df_resid)
df_denom=df_resid)
else:
return ContrastResults(effect=_effect, statistic=_t, sd=_sd,
df_denom=self.df_resid,
df_denom=df_resid,
distribution='norm')

def f_test(self, r_matrix, q_matrix=None, cov_p=None, scale=1.0,
Expand Down Expand Up @@ -1323,9 +1325,10 @@ def wald_test(self, r_matrix, q_matrix=None, cov_p=None, scale=1.0,
else:
F = np.dot(np.dot(Rbq.T, invcov), Rbq)

df_resid = getattr(self, 'df_resid_inference', self.df_resid)
if use_f:
F /= J
return ContrastResults(F=F, df_denom=self.df_resid,
return ContrastResults(F=F, df_denom=df_resid,
df_num=invcov.shape[0])
else:
return ContrastResults(chi2=F, df_denom=J, statistic=F,
Expand Down Expand Up @@ -1390,7 +1393,8 @@ def conf_int(self, alpha=.05, cols=None, method='default'):

if self.use_t:
dist = stats.t
q = dist.ppf(1 - alpha / 2, self.df_resid)
df_resid = getattr(self, 'df_resid_inference', self.df_resid)
q = dist.ppf(1 - alpha / 2, df_resid)
else:
dist = stats.norm
q = dist.ppf(1 - alpha / 2)
Expand Down
26 changes: 24 additions & 2 deletions statsmodels/examples/ex_ols_robustcov.py
Expand Up @@ -38,5 +38,27 @@
print('\n\n')
print(tt.summary_frame())

print(vars(res_hac4.f_test(np.eye(len(res_hac4.params))[:-1], use_f=True)))
print(vars(res_hac4.f_test(np.eye(len(res_hac4.params))[:-1], use_f=False)))
print(vars(res_hac4.f_test(np.eye(len(res_hac4.params))[:-1])))

print(vars(res_hac4.wald_test(np.eye(len(res_hac4.params))[:-1], use_f=True)))
print(vars(res_hac4.wald_test(np.eye(len(res_hac4.params))[:-1], use_f=False)))

# new cov_type can be set in fit method of model

mod_olsg = OLS(g_inv, exogg)
res_hac4b = mod_olsg.fit(cov_type='HAC',
cov_kwds=dict(maxlags=4, use_correction=True))
print(res_hac4b.summary())

res_hc1b = mod_olsg.fit(cov_type='HC1')
print(res_hc1b.summary())

# force t-distribution
res_hc1c = mod_olsg.fit(cov_type='HC1', cov_kwds={'use_t':True})
print(res_hc1c.summary())

# force t-distribution
decade = (d2['year'][1:] // 10).astype(int) # just make up a group variable
res_clu = mod_olsg.fit(cov_type='cluster',
cov_kwds={'groups':decade, 'use_t':True})
print(res_clu.summary())
44 changes: 31 additions & 13 deletions statsmodels/regression/linear_model.py
Expand Up @@ -140,7 +140,7 @@ def df_resid(self, value):
def whiten(self, X):
raise NotImplementedError("Subclasses should implement.")

def fit(self, method="pinv", **kwargs):
def fit(self, method="pinv", cov_type='nonrobust', cov_kwds=None, **kwargs):
"""
Full fit of the model.
Expand Down Expand Up @@ -208,10 +208,12 @@ def fit(self, method="pinv", **kwargs):

if isinstance(self, OLS):
lfit = OLSResults(self, beta,
normalized_cov_params=self.normalized_cov_params)
normalized_cov_params=self.normalized_cov_params,
cov_type=cov_type, cov_kwds=cov_kwds)
else:
lfit = RegressionResults(self, beta,
normalized_cov_params=self.normalized_cov_params)
normalized_cov_params=self.normalized_cov_params,
cov_type=cov_type, cov_kwds=cov_kwds)
return RegressionResultsWrapper(lfit)

def fit_regularized(self, method="coord_descent", maxiter=1000,
Expand Down Expand Up @@ -1048,7 +1050,8 @@ class RegressionResults(base.LikelihoodModelResults):

_cache = {} # needs to be a class attribute for scale setter?

def __init__(self, model, params, normalized_cov_params=None, scale=1.):
def __init__(self, model, params, normalized_cov_params=None, scale=1.,
cov_type='nonrobust', cov_kwds=None):
super(RegressionResults, self).__init__(model, params,
normalized_cov_params,
scale)
Expand All @@ -1058,16 +1061,23 @@ def __init__(self, model, params, normalized_cov_params=None, scale=1.):
self._wexog_singular_values = model.wexog_singular_values
else:
self._wexog_singular_values = None
self.cov_type = 'nonrobust'
self.cov_kwds = {'description' : 'Standard Errors assume that the ' +
'covariance matrix of the errors is correctly ' +
'specified.'}

self.df_model = model.df_model
self.df_resid = model.df_resid

self.use_t = True # default for linear models

if cov_type == 'nonrobust':
self.cov_type = 'nonrobust'
self.cov_kwds = {'description' : 'Standard Errors assume that the ' +
'covariance matrix of the errors is correctly ' +
'specified.'}
else:
if cov_kwds is None:
cov_kwds = {}
self.get_robustcov_results(cov_type=cov_type, use_self=True,
**cov_kwds)


def __str__(self):
self.summary()
Expand Down Expand Up @@ -1718,12 +1728,20 @@ def get_robustcov_results(self, cov_type='HC1', use_t=None, **kwds):

import statsmodels.stats.sandwich_covariance as sw

res = self.__class__(self.model, self.params,
# TODO: make separate function that returns a robust cov plus info
use_self = kwds.pop('use_self', False)
if use_self:
res = self
else:
res = self.__class__(self.model, self.params,
normalized_cov_params=self.normalized_cov_params,
scale=self.scale)

res.cov_type = cov_type = cov_type
res.cov_kwds = {'use_t':use_t}
res.cov_type = cov_type
# use_t might already be defined by the class, and already set
if use_t is None:
use_t = self.use_t
res.cov_kwds = {'use_t':use_t} # store for information
res.use_t = use_t

adjust_df = False
Expand Down Expand Up @@ -1841,8 +1859,8 @@ def get_robustcov_results(self, cov_type='HC1', use_t=None, **kwds):
'available options and spelling')

if adjust_df:
# Note: we leave model.df_resid unchanged at original
res.df_resid = n_groups - 1
# Note: df_resid is used for scale and others, add new attribute
res.df_resid_inference = n_groups - 1

return res

Expand Down
121 changes: 120 additions & 1 deletion statsmodels/regression/tests/test_robustcov.py
Expand Up @@ -201,6 +201,43 @@ def test_ttest(self):
assert_allclose(ci1, ci2, rtol=rtol)


def test_scale(self):
res1 = self.res1
res2 = self.res2
rtol = 1e-5
# Note we always use df_resid for scale
# Stata uses nobs or df_resid for rmse, not always available in Stata
#assert_allclose(res1.scale, res2.rmse**2 * res2.N / (res2.N - res2.df_m - 1), rtol=rtol)
skip = False
if hasattr(res2, 'rss'):
scale = res2.rss / (res2.N - res2.df_m - 1)
elif hasattr(res2, 'rmse'):
scale = res2.rmse**2
else:
skip = True

if isinstance(res1.model, WLS):
skip = True
# Stata uses different scaling and using unweighted resid for rmse

if not skip:
assert_allclose(res1.scale, scale, rtol=rtol)

if not res2.vcetype == 'Newey-West':
# no rsquared in Stata
r2 = res2.r2 if hasattr(res2, 'r2') else res2.r2c
assert_allclose(res1.rsquared, r2, rtol=rtol, err_msg=str(skip))


# consistency checks, not against Stata
df_resid = res1.nobs - res1.df_model - 1
assert_equal(res1.df_resid, df_resid)
# variance of resid_pearson is 1, with ddof, and loc=0
psum = (res1.resid_pearson**2).sum()
assert_allclose(psum, df_resid, rtol=1e-13)



def test_smoke(self):
self.res1.summary()

Expand Down Expand Up @@ -343,6 +380,32 @@ def setup(self):
self.rtolh = 1e-10


class TestOLSRobustCluster2Fit(CheckOLSRobustCluster, CheckOLSRobustNewMixin):
# copy, past uses fit method
# compare with `reg cluster`

def setup(self):
res_ols = self.res1.model.fit(cov_type='cluster',
cov_kwds=dict(
groups=self.groups,
use_correction=True,
use_t=True))
self.res3 = self.res1
self.res1 = res_ols
self.bse_robust = res_ols.bse
self.cov_robust = res_ols.cov_params()
cov1 = sw.cov_cluster(self.res1, self.groups, use_correction=True)
se1 = sw.se_cov(cov1)
self.bse_robust2 = se1
self.cov_robust2 = cov1
self.small = True
self.res2 = res2.results_cluster

self.rtol = 1e-6
self.rtolh = 1e-10



class TestOLSRobustCluster2Large(CheckOLSRobustCluster, CheckOLSRobustNewMixin):
# compare with `reg cluster`

Expand All @@ -368,7 +431,38 @@ def setup(self):
self.rtolh = 1e-10

# skipping see https://github.com/statsmodels/statsmodels/pull/1189#issuecomment-29141741
def test_fvalue(self):
def test_f_value(self):
pass


class TestOLSRobustCluster2LargeFit(CheckOLSRobustCluster, CheckOLSRobustNewMixin):
# compare with `reg cluster`

def setup(self):
model = OLS(self.res1.model.endog, self.res1.model.exog)
#res_ols = self.res1.model.fit(cov_type='cluster',
res_ols = model.fit(cov_type='cluster',
cov_kwds=dict(groups=self.groups,
use_correction=False,
use_t=False,
df_correction=True))
self.res3 = self.res1
self.res1 = res_ols
self.bse_robust = res_ols.bse
self.cov_robust = res_ols.cov_params()
cov1 = sw.cov_cluster(self.res1, self.groups, use_correction=False)
se1 = sw.se_cov(cov1)
self.bse_robust2 = se1
self.cov_robust2 = cov1
self.small = False
self.res2 = res2.results_cluster_large

self.skip_f = True
self.rtol = 1e-6
self.rtolh = 1e-10

# skipping see https://github.com/statsmodels/statsmodels/pull/1189#issuecomment-29141741
def t_est_fvalue(self):
pass


Expand Down Expand Up @@ -398,6 +492,31 @@ def setup(self):
self.rtolh = 1e-10


class TestOLSRobustClusterGSFit(CheckOLSRobustCluster, CheckOLSRobustNewMixin):
# compare with `reg cluster`

def setup(self):
res_ols = self.res1.model.fit(cov_type='nw-groupsum',
cov_kwds=dict(time=self.time,
maxlags=4,
use_correction=False,
use_t=True))
self.res3 = self.res1
self.res1 = res_ols
self.bse_robust = res_ols.bse
self.cov_robust = res_ols.cov_params()
cov1 = sw.cov_nw_groupsum(self.res1, 4, self.time, use_correction=False)
se1 = sw.se_cov(cov1)
self.bse_robust2 = se1
self.cov_robust2 = cov1
self.small = True
self.res2 = res2.results_nw_groupsum4

self.skip_f = True
self.rtol = 1e-6
self.rtolh = 1e-10


class TestOLSRobustClusterNWP(CheckOLSRobustCluster, CheckOLSRobustNewMixin):
# compare with `reg cluster`

Expand Down

0 comments on commit 8709f00

Please sign in to comment.