Skip to content

Commit

Permalink
Merge pull request #5308 from jbrockmendel/covtypes
Browse files Browse the repository at this point in the history
REF: Collect covtype descriptions, de-duplicate normalization func
  • Loading branch information
bashtage committed May 7, 2019
2 parents b0045ff + 4d7b38f commit 6453c81
Show file tree
Hide file tree
Showing 4 changed files with 97 additions and 74 deletions.
80 changes: 62 additions & 18 deletions statsmodels/base/covtype.py
Expand Up @@ -11,6 +11,58 @@

import numpy as np

descriptions = {
'HC0': 'Standard Errors are heteroscedasticity robust (HC0)',
'HC1': 'Standard Errors are heteroscedasticity robust (HC1)',
'HC2': 'Standard Errors are heteroscedasticity robust (HC2)',
'HC3': 'Standard Errors are heteroscedasticity robust (HC3)',
'HAC': 'Standard Errors are heteroscedasticity and autocorrelation '
'robust (HAC) using {maxlags} lags and '
'{correction} small sample correction',
'fixed_scale': 'Standard Errors are based on fixed scale',
'cluster': 'Standard Errors are robust to cluster correlation (cluster)',
'HAC-Panel': 'Standard Errors are robust to '
'cluster correlation (HAC-Panel)',
'HAC-Groupsum': 'Driscoll and Kraay Standard Errors are robust to '
'cluster correlation (HAC-Groupsum)',
'none': 'Covariance matrix not calculated.',
'approx': 'Covariance matrix calculated using numerical ({approx_type}) '
'differentiation.',
'OPG': 'Covariance matrix calculated using the outer product of '
'gradients ({approx_type}).',
'OIM': 'Covariance matrix calculated using the observed information '
'matrix ({approx_type}) described in Harvey (1989).',
'robust': 'Quasi-maximum likelihood covariance matrix used for '
'robustness to some misspecifications; calculated using '
'numerical ({approx_type}) differentiation.',
'robust-OIM': 'Quasi-maximum likelihood covariance matrix used for '
'robustness to some misspecifications; calculated using the '
'observed information matrix ({approx_type}) described in '
'Harvey (1989).',
'robust-approx': 'Quasi-maximum likelihood covariance matrix used for '
'robustness to some misspecifications; calculated using '
'numerical ({approx_type}) differentiation.',
}


def normalize_cov_type(cov_type):
"""
Normalize the cov_type string to a canonical version
Parameters
----------
cov_type : str
Returns
-------
normalized_cov_type : str
"""
if cov_type == 'nw-panel':
cov_type = 'hac-panel'
if cov_type == 'nw-groupsum':
cov_type = 'hac-groupsum'
return cov_type


def get_robustcov_results(self, cov_type='HC1', use_t=None, **kwds):
"""create new results instance with robust covariance as default
Expand Down Expand Up @@ -134,11 +186,8 @@ def get_robustcov_results(self, cov_type='HC1', use_t=None, **kwds):

import statsmodels.stats.sandwich_covariance as sw

#normalize names
if cov_type == 'nw-panel':
cov_type = 'hac-panel'
if cov_type == 'nw-groupsum':
cov_type = 'hac-groupsum'
cov_type = normalize_cov_type(cov_type)

if 'kernel' in kwds:
kwds['weights_func'] = kwds.pop('kernel')
if 'weights_func' in kwds and not callable(kwds['weights_func']):
Expand Down Expand Up @@ -180,10 +229,9 @@ def get_robustcov_results(self, cov_type='HC1', use_t=None, **kwds):
# TODO: make it DRYer repeated code for checking kwds
if cov_type.upper() in ('HC0', 'HC1', 'HC2', 'HC3'):
if kwds:
raise ValueError('heteroscedasticity robust covarians ' +
raise ValueError('heteroscedasticity robust covariance '
'does not use keywords')
res.cov_kwds['description'] = ('Standard Errors are heteroscedasticity ' +
'robust ' + '(' + cov_type + ')')
res.cov_kwds['description'] = descriptions[cov_type.upper()]

res.cov_params_default = getattr(self, 'cov_' + cov_type.upper(), None)
if res.cov_params_default is None:
Expand All @@ -197,9 +245,8 @@ def get_robustcov_results(self, cov_type='HC1', use_t=None, **kwds):
res.cov_kwds['weights_func'] = weights_func
use_correction = kwds.get('use_correction', False)
res.cov_kwds['use_correction'] = use_correction
res.cov_kwds['description'] = ('Standard Errors are heteroscedasticity ' +
'and autocorrelation robust (HAC) using %d lags and %s small ' +
'sample correction') % (maxlags, ['without', 'with'][use_correction])
res.cov_kwds['description'] = descriptions['HAC'].format(
maxlags=maxlags, correction=['without', 'with'][use_correction])

res.cov_params_default = sw.cov_hac_simple(self, nlags=maxlags,
weights_func=weights_func,
Expand Down Expand Up @@ -241,8 +288,7 @@ def get_robustcov_results(self, cov_type='HC1', use_t=None, **kwds):
use_correction=use_correction)[0]
else:
raise ValueError('only two groups are supported')
res.cov_kwds['description'] = ('Standard Errors are robust to' +
'cluster correlation ' + '(' + cov_type + ')')
res.cov_kwds['description'] = descriptions['cluster']

elif cov_type.lower() == 'hac-panel':
#cluster robust standard errors
Expand Down Expand Up @@ -274,8 +320,8 @@ def get_robustcov_results(self, cov_type='HC1', use_t=None, **kwds):
res.cov_params_default = sw.cov_nw_panel(self, maxlags, groupidx,
weights_func=weights_func,
use_correction=use_correction)
res.cov_kwds['description'] = ('Standard Errors are robust to' +
'cluster correlation ' + '(' + cov_type + ')')
res.cov_kwds['description'] = descriptions['HAC-Panel']

elif cov_type.lower() == 'hac-groupsum':
# Driscoll-Kraay standard errors
res.cov_kwds['time'] = time = kwds['time']
Expand All @@ -295,9 +341,7 @@ def get_robustcov_results(self, cov_type='HC1', use_t=None, **kwds):
res.cov_params_default = sw.cov_nw_groupsum(self, maxlags, time,
weights_func=weights_func,
use_correction=use_correction)
res.cov_kwds['description'] = (
'Driscoll and Kraay Standard Errors are robust to ' +
'cluster correlation ' + '(' + cov_type + ')')
res.cov_kwds['description'] = descriptions['HAC-Groupsum']
else:
raise ValueError('cov_type not recognized. See docstring for ' +
'available options and spelling')
Expand Down
38 changes: 13 additions & 25 deletions statsmodels/regression/linear_model.py
Expand Up @@ -2252,12 +2252,10 @@ def get_robustcov_results(self, cov_type='HC1', use_t=None, **kwds):
except in the case of cov_type `HCx`
"""
import statsmodels.stats.sandwich_covariance as sw
from statsmodels.base.covtype import normalize_cov_type, descriptions

cov_type = normalize_cov_type(cov_type)

# normalize names
if cov_type == 'nw-panel':
cov_type = 'hac-panel'
if cov_type == 'nw-groupsum':
cov_type = 'hac-groupsum'
if 'kernel' in kwds:
kwds['weights_func'] = kwds.pop('kernel')
if 'weights_func' in kwds and not callable(kwds['weights_func']):
Expand Down Expand Up @@ -2295,18 +2293,15 @@ def get_robustcov_results(self, cov_type='HC1', use_t=None, **kwds):
# other models
# TODO: make it DRYer repeated code for checking kwds
if cov_type in ['fixed scale', 'fixed_scale']:
res.cov_kwds['description'] = ('Standard Errors are based on ' +
'fixed scale')
res.cov_kwds['description'] = descriptions['fixed_scale']

res.cov_kwds['scale'] = scale = kwds.get('scale', 1.)
res.cov_params_default = scale * res.normalized_cov_params
elif cov_type.upper() in ('HC0', 'HC1', 'HC2', 'HC3'):
if kwds:
raise ValueError('heteroscedasticity robust covarians ' +
raise ValueError('heteroscedasticity robust covariance '
'does not use keywords')
res.cov_kwds['description'] = (
'Standard Errors are heteroscedasticity ' +
'robust ' + '(' + cov_type + ')')
res.cov_kwds['description'] = descriptions[cov_type.upper()]
# TODO cannot access cov without calling se first
getattr(self, cov_type.upper() + '_se')
res.cov_params_default = getattr(self, 'cov_' + cov_type.upper())
Expand All @@ -2317,11 +2312,9 @@ def get_robustcov_results(self, cov_type='HC1', use_t=None, **kwds):
res.cov_kwds['weights_func'] = weights_func
use_correction = kwds.get('use_correction', False)
res.cov_kwds['use_correction'] = use_correction
res.cov_kwds['description'] = (
'Standard Errors are heteroscedasticity and ' +
'autocorrelation robust (HAC) using %d lags and %s small ' +
'sample correction') % (maxlags,
['without', 'with'][use_correction])
res.cov_kwds['description'] = descriptions['HAC'].format(
maxlags=maxlags,
correction=['without', 'with'][use_correction])

res.cov_params_default = sw.cov_hac_simple(
self, nlags=maxlags, weights_func=weights_func,
Expand Down Expand Up @@ -2363,9 +2356,7 @@ def get_robustcov_results(self, cov_type='HC1', use_t=None, **kwds):
self, groups, use_correction=use_correction)[0]
else:
raise ValueError('only two groups are supported')
res.cov_kwds['description'] = (
'Standard Errors are robust to' +
'cluster correlation ' + '(' + cov_type + ')')
res.cov_kwds['description'] = descriptions['cluster']

elif cov_type.lower() == 'hac-panel':
# cluster robust standard errors
Expand Down Expand Up @@ -2396,9 +2387,8 @@ def get_robustcov_results(self, cov_type='HC1', use_t=None, **kwds):
res.cov_params_default = sw.cov_nw_panel(self, maxlags, groupidx,
weights_func=weights_func,
use_correction=use_correction)
res.cov_kwds['description'] = (
'Standard Errors are robust to' +
'cluster correlation ' + '(' + cov_type + ')')
res.cov_kwds['description'] = descriptions['HAC-Panel']

elif cov_type.lower() == 'hac-groupsum':
# Driscoll-Kraay standard errors
res.cov_kwds['time'] = time = kwds['time']
Expand All @@ -2418,9 +2408,7 @@ def get_robustcov_results(self, cov_type='HC1', use_t=None, **kwds):
res.cov_params_default = sw.cov_nw_groupsum(
self, maxlags, time, weights_func=weights_func,
use_correction=use_correction)
res.cov_kwds['description'] = (
'Driscoll and Kraay Standard Errors are robust to ' +
'cluster correlation ' + '(' + cov_type + ')')
res.cov_kwds['description'] = descriptions['HAC-Groupsum']
else:
raise ValueError('cov_type not recognized. See docstring for ' +
'available options and spelling')
Expand Down
21 changes: 9 additions & 12 deletions statsmodels/tsa/regime_switching/markov_switching.py
Expand Up @@ -1934,6 +1934,8 @@ def __init__(self, model, params, results, cov_type='opg', cov_kwds=None,
self.smoothed_marginal_probabilities, index=index)

def _get_robustcov_results(self, cov_type='opg', **kwargs):
from statsmodels.base.covtype import descriptions

use_self = kwargs.pop('use_self', False)
if use_self:
res = self
Expand Down Expand Up @@ -1964,24 +1966,19 @@ def _get_robustcov_results(self, cov_type='opg', **kwargs):
elif cov_type == 'none':
res.cov_params_default = np.zeros((k_params, k_params)) * np.nan
res._rank = np.nan
res.cov_kwds['description'] = 'Covariance matrix not calculated.'
res.cov_kwds['description'] = descriptions['none']
elif self.cov_type == 'approx':
res.cov_params_default = res.cov_params_approx
res.cov_kwds['description'] = (
'Covariance matrix calculated using numerical (%s)'
' differentiation.' % approx_type_str)
res.cov_kwds['description'] = descriptions['approx'].format(
approx_type=approx_type_str)
elif self.cov_type == 'opg':
res.cov_params_default = res.cov_params_opg
res.cov_kwds['description'] = (
'Covariance matrix calculated using the outer product of'
' gradients (%s).' % approx_type_str
)
res.cov_kwds['description'] = descriptions['OPG'].format(
approx_type=approx_type_str)
elif self.cov_type == 'robust':
res.cov_params_default = res.cov_params_robust
res.cov_kwds['description'] = (
'Quasi-maximum likelihood covariance matrix used for'
' robustness to some misspecifications; calculated using'
' numerical (%s) differentiation.' % approx_type_str)
res.cov_kwds['description'] = descriptions['robust'].format(
approx_type=approx_type_str)
else:
raise NotImplementedError('Invalid covariance matrix type.')

Expand Down
32 changes: 13 additions & 19 deletions statsmodels/tsa/statespace/mlemodel.py
Expand Up @@ -1684,6 +1684,8 @@ def _get_robustcov_results(self, cov_type='opg', **kwargs):
intermediate calculations use the 'approx' method.
- 'none' for no covariance matrix calculation.
"""
from statsmodels.base.covtype import descriptions

use_self = kwargs.pop('use_self', False)
if use_self:
res = self
Expand Down Expand Up @@ -1720,35 +1722,27 @@ def _get_robustcov_results(self, cov_type='opg', **kwargs):
elif cov_type == 'none':
res.cov_params_default = np.zeros((k_params, k_params)) * np.nan
res._rank = np.nan
res.cov_kwds['description'] = 'Covariance matrix not calculated.'
res.cov_kwds['description'] = descriptions['none']
elif self.cov_type == 'approx':
res.cov_params_default = res.cov_params_approx
res.cov_kwds['description'] = (
'Covariance matrix calculated using numerical (%s)'
' differentiation.' % approx_type_str)
res.cov_kwds['description'] = descriptions['approx'].format(
approx_type=approx_type_str)
elif self.cov_type == 'oim':
res.cov_params_default = res.cov_params_oim
res.cov_kwds['description'] = (
'Covariance matrix calculated using the observed information'
' matrix (%s) described in Harvey (1989).' % approx_type_str)
res.cov_kwds['description'] = descriptions['OIM'].format(
approx_type=approx_type_str)
elif self.cov_type == 'opg':
res.cov_params_default = res.cov_params_opg
res.cov_kwds['description'] = (
'Covariance matrix calculated using the outer product of'
' gradients (%s).' % approx_type_str)
res.cov_kwds['description'] = descriptions['OPG'].format(
approx_type=approx_type_str)
elif self.cov_type == 'robust' or self.cov_type == 'robust_oim':
res.cov_params_default = res.cov_params_robust_oim
res.cov_kwds['description'] = (
'Quasi-maximum likelihood covariance matrix used for'
' robustness to some misspecifications; calculated using the'
' observed information matrix (%s) described in'
' Harvey (1989).' % approx_type_str)
res.cov_kwds['description'] = descriptions['robust-OIM'].format(
approx_type=approx_type_str)
elif self.cov_type == 'robust_approx':
res.cov_params_default = res.cov_params_robust_approx
res.cov_kwds['description'] = (
'Quasi-maximum likelihood covariance matrix used for'
' robustness to some misspecifications; calculated using'
' numerical (%s) differentiation.' % approx_type_str)
res.cov_kwds['description'] = descriptions['robust-approx'].format(
approx_type=approx_type_str)
else:
raise NotImplementedError('Invalid covariance matrix type.')

Expand Down

0 comments on commit 6453c81

Please sign in to comment.