Merge pull request #5308 from jbrockmendel/covtypes

REF: Collect covtype descriptions, de-duplicate normalization func
statsmodels · May 7, 2019 · 6453c81 · 6453c81
2 parents b0045ff + 4d7b38f
commit 6453c81
Show file tree

Hide file tree

Showing 4 changed files with 97 additions and 74 deletions.
diff --git a/statsmodels/base/covtype.py b/statsmodels/base/covtype.py
@@ -11,6 +11,58 @@
 
 import numpy as np
 
+descriptions = {
+    'HC0': 'Standard Errors are heteroscedasticity robust (HC0)',
+    'HC1': 'Standard Errors are heteroscedasticity robust (HC1)',
+    'HC2': 'Standard Errors are heteroscedasticity robust (HC2)',
+    'HC3': 'Standard Errors are heteroscedasticity robust (HC3)',
+    'HAC': 'Standard Errors are heteroscedasticity and autocorrelation '
+           'robust (HAC) using {maxlags} lags and '
+           '{correction} small sample correction',
+    'fixed_scale': 'Standard Errors are based on fixed scale',
+    'cluster': 'Standard Errors are robust to cluster correlation (cluster)',
+    'HAC-Panel': 'Standard Errors are robust to '
+                 'cluster correlation (HAC-Panel)',
+    'HAC-Groupsum': 'Driscoll and Kraay Standard Errors are robust to '
+                    'cluster correlation (HAC-Groupsum)',
+    'none': 'Covariance matrix not calculated.',
+    'approx': 'Covariance matrix calculated using numerical ({approx_type}) '
+              'differentiation.',
+    'OPG': 'Covariance matrix calculated using the outer product of '
+           'gradients ({approx_type}).',
+    'OIM': 'Covariance matrix calculated using the observed information '
+           'matrix ({approx_type}) described in Harvey (1989).',
+    'robust': 'Quasi-maximum likelihood covariance matrix used for '
+              'robustness to some misspecifications; calculated using '
+              'numerical ({approx_type}) differentiation.',
+    'robust-OIM': 'Quasi-maximum likelihood covariance matrix used for '
+                  'robustness to some misspecifications; calculated using the '
+                  'observed information matrix ({approx_type}) described in '
+                  'Harvey (1989).',
+    'robust-approx': 'Quasi-maximum likelihood covariance matrix used for '
+                     'robustness to some misspecifications; calculated using '
+                     'numerical ({approx_type}) differentiation.',
+}
+
+
+def normalize_cov_type(cov_type):
+    """
+    Normalize the cov_type string to a canonical version
+
+    Parameters
+    ----------
+    cov_type : str
+
+    Returns
+    -------
+    normalized_cov_type : str
+    """
+    if cov_type == 'nw-panel':
+        cov_type = 'hac-panel'
+    if cov_type == 'nw-groupsum':
+        cov_type = 'hac-groupsum'
+    return cov_type
+
 
 def get_robustcov_results(self, cov_type='HC1', use_t=None, **kwds):
     """create new results instance with robust covariance as default
@@ -134,11 +186,8 @@ def get_robustcov_results(self, cov_type='HC1', use_t=None, **kwds):
 
     import statsmodels.stats.sandwich_covariance as sw
 
-    #normalize names
-    if cov_type == 'nw-panel':
-        cov_type = 'hac-panel'
-    if cov_type == 'nw-groupsum':
-        cov_type = 'hac-groupsum'
+    cov_type = normalize_cov_type(cov_type)
+
     if 'kernel' in kwds:
         kwds['weights_func'] = kwds.pop('kernel')
     if 'weights_func' in kwds and not callable(kwds['weights_func']):
@@ -180,10 +229,9 @@ def get_robustcov_results(self, cov_type='HC1', use_t=None, **kwds):
     # TODO: make it DRYer   repeated code for checking kwds
     if cov_type.upper() in ('HC0', 'HC1', 'HC2', 'HC3'):
         if kwds:
-            raise ValueError('heteroscedasticity robust covarians ' +
+            raise ValueError('heteroscedasticity robust covariance '
                              'does not use keywords')
-        res.cov_kwds['description'] = ('Standard Errors are heteroscedasticity ' +
-                                       'robust ' + '(' + cov_type + ')')
+        res.cov_kwds['description'] = descriptions[cov_type.upper()]
 
         res.cov_params_default = getattr(self, 'cov_' + cov_type.upper(), None)
         if res.cov_params_default is None:
@@ -197,9 +245,8 @@ def get_robustcov_results(self, cov_type='HC1', use_t=None, **kwds):
         res.cov_kwds['weights_func'] = weights_func
         use_correction = kwds.get('use_correction', False)
         res.cov_kwds['use_correction'] = use_correction
-        res.cov_kwds['description'] = ('Standard Errors are heteroscedasticity ' +
-             'and autocorrelation robust (HAC) using %d lags and %s small ' +
-             'sample correction') % (maxlags, ['without', 'with'][use_correction])
+        res.cov_kwds['description'] =  descriptions['HAC'].format(
+            maxlags=maxlags, correction=['without', 'with'][use_correction])
 
         res.cov_params_default = sw.cov_hac_simple(self, nlags=maxlags,
                                              weights_func=weights_func,
@@ -241,8 +288,7 @@ def get_robustcov_results(self, cov_type='HC1', use_t=None, **kwds):
                                          use_correction=use_correction)[0]
         else:
             raise ValueError('only two groups are supported')
-        res.cov_kwds['description'] = ('Standard Errors are robust to' +
-                            'cluster correlation ' + '(' + cov_type + ')')
+        res.cov_kwds['description'] = descriptions['cluster']
 
     elif cov_type.lower() == 'hac-panel':
         #cluster robust standard errors
@@ -274,8 +320,8 @@ def get_robustcov_results(self, cov_type='HC1', use_t=None, **kwds):
         res.cov_params_default = sw.cov_nw_panel(self, maxlags, groupidx,
                                             weights_func=weights_func,
                                             use_correction=use_correction)
-        res.cov_kwds['description'] = ('Standard Errors are robust to' +
-                            'cluster correlation ' + '(' + cov_type + ')')
+        res.cov_kwds['description'] = descriptions['HAC-Panel']
+
     elif cov_type.lower() == 'hac-groupsum':
         # Driscoll-Kraay standard errors
         res.cov_kwds['time'] = time = kwds['time']
@@ -295,9 +341,7 @@ def get_robustcov_results(self, cov_type='HC1', use_t=None, **kwds):
         res.cov_params_default = sw.cov_nw_groupsum(self, maxlags, time,
                                         weights_func=weights_func,
                                         use_correction=use_correction)
-        res.cov_kwds['description'] = (
-                    'Driscoll and Kraay Standard Errors are robust to ' +
-                    'cluster correlation ' + '(' + cov_type + ')')
+        res.cov_kwds['description'] = descriptions['HAC-Groupsum']
     else:
         raise ValueError('cov_type not recognized. See docstring for ' +
                          'available options and spelling')

diff --git a/statsmodels/regression/linear_model.py b/statsmodels/regression/linear_model.py
@@ -2252,12 +2252,10 @@ def get_robustcov_results(self, cov_type='HC1', use_t=None, **kwds):
         except in the case of cov_type `HCx`
         """
         import statsmodels.stats.sandwich_covariance as sw
+        from statsmodels.base.covtype import normalize_cov_type, descriptions
+
+        cov_type = normalize_cov_type(cov_type)
 
-        # normalize names
-        if cov_type == 'nw-panel':
-            cov_type = 'hac-panel'
-        if cov_type == 'nw-groupsum':
-            cov_type = 'hac-groupsum'
         if 'kernel' in kwds:
             kwds['weights_func'] = kwds.pop('kernel')
         if 'weights_func' in kwds and not callable(kwds['weights_func']):
@@ -2295,18 +2293,15 @@ def get_robustcov_results(self, cov_type='HC1', use_t=None, **kwds):
         #       other models
         # TODO: make it DRYer   repeated code for checking kwds
         if cov_type in ['fixed scale', 'fixed_scale']:
-            res.cov_kwds['description'] = ('Standard Errors are based on ' +
-                                           'fixed scale')
+            res.cov_kwds['description'] = descriptions['fixed_scale']
 
             res.cov_kwds['scale'] = scale = kwds.get('scale', 1.)
             res.cov_params_default = scale * res.normalized_cov_params
         elif cov_type.upper() in ('HC0', 'HC1', 'HC2', 'HC3'):
             if kwds:
-                raise ValueError('heteroscedasticity robust covarians ' +
+                raise ValueError('heteroscedasticity robust covariance '
                                  'does not use keywords')
-            res.cov_kwds['description'] = (
-                'Standard Errors are heteroscedasticity ' +
-                'robust ' + '(' + cov_type + ')')
+            res.cov_kwds['description'] = descriptions[cov_type.upper()]
             # TODO cannot access cov without calling se first
             getattr(self, cov_type.upper() + '_se')
             res.cov_params_default = getattr(self, 'cov_' + cov_type.upper())
@@ -2317,11 +2312,9 @@ def get_robustcov_results(self, cov_type='HC1', use_t=None, **kwds):
             res.cov_kwds['weights_func'] = weights_func
             use_correction = kwds.get('use_correction', False)
             res.cov_kwds['use_correction'] = use_correction
-            res.cov_kwds['description'] = (
-                'Standard Errors are heteroscedasticity and ' +
-                'autocorrelation robust (HAC) using %d lags and %s small ' +
-                'sample correction') % (maxlags,
-                                        ['without', 'with'][use_correction])
+            res.cov_kwds['description'] = descriptions['HAC'].format(
+                maxlags=maxlags,
+                correction=['without', 'with'][use_correction])
 
             res.cov_params_default = sw.cov_hac_simple(
                 self, nlags=maxlags, weights_func=weights_func,
@@ -2363,9 +2356,7 @@ def get_robustcov_results(self, cov_type='HC1', use_t=None, **kwds):
                     self, groups, use_correction=use_correction)[0]
             else:
                 raise ValueError('only two groups are supported')
-            res.cov_kwds['description'] = (
-                'Standard Errors are robust to' +
-                'cluster correlation ' + '(' + cov_type + ')')
+            res.cov_kwds['description'] = descriptions['cluster']
 
         elif cov_type.lower() == 'hac-panel':
             # cluster robust standard errors
@@ -2396,9 +2387,8 @@ def get_robustcov_results(self, cov_type='HC1', use_t=None, **kwds):
             res.cov_params_default = sw.cov_nw_panel(self, maxlags, groupidx,
                                                      weights_func=weights_func,
                                                      use_correction=use_correction)
-            res.cov_kwds['description'] = (
-                'Standard Errors are robust to' +
-                'cluster correlation ' + '(' + cov_type + ')')
+            res.cov_kwds['description'] = descriptions['HAC-Panel']
+
         elif cov_type.lower() == 'hac-groupsum':
             # Driscoll-Kraay standard errors
             res.cov_kwds['time'] = time = kwds['time']
@@ -2418,9 +2408,7 @@ def get_robustcov_results(self, cov_type='HC1', use_t=None, **kwds):
             res.cov_params_default = sw.cov_nw_groupsum(
                 self, maxlags, time, weights_func=weights_func,
                 use_correction=use_correction)
-            res.cov_kwds['description'] = (
-                        'Driscoll and Kraay Standard Errors are robust to ' +
-                        'cluster correlation ' + '(' + cov_type + ')')
+            res.cov_kwds['description'] = descriptions['HAC-Groupsum']
         else:
             raise ValueError('cov_type not recognized. See docstring for ' +
                              'available options and spelling')

diff --git a/statsmodels/tsa/regime_switching/markov_switching.py b/statsmodels/tsa/regime_switching/markov_switching.py
@@ -1934,6 +1934,8 @@ def __init__(self, model, params, results, cov_type='opg', cov_kwds=None,
                     self.smoothed_marginal_probabilities, index=index)
 
     def _get_robustcov_results(self, cov_type='opg', **kwargs):
+        from statsmodels.base.covtype import descriptions
+
         use_self = kwargs.pop('use_self', False)
         if use_self:
             res = self
@@ -1964,24 +1966,19 @@ def _get_robustcov_results(self, cov_type='opg', **kwargs):
         elif cov_type == 'none':
             res.cov_params_default = np.zeros((k_params, k_params)) * np.nan
             res._rank = np.nan
-            res.cov_kwds['description'] = 'Covariance matrix not calculated.'
+            res.cov_kwds['description'] = descriptions['none']
         elif self.cov_type == 'approx':
             res.cov_params_default = res.cov_params_approx
-            res.cov_kwds['description'] = (
-                'Covariance matrix calculated using numerical (%s)'
-                ' differentiation.' % approx_type_str)
+            res.cov_kwds['description'] = descriptions['approx'].format(
+                                                approx_type=approx_type_str)
         elif self.cov_type == 'opg':
             res.cov_params_default = res.cov_params_opg
-            res.cov_kwds['description'] = (
-                'Covariance matrix calculated using the outer product of'
-                ' gradients (%s).' % approx_type_str
-            )
+            res.cov_kwds['description'] = descriptions['OPG'].format(
+                                                approx_type=approx_type_str)
         elif self.cov_type == 'robust':
             res.cov_params_default = res.cov_params_robust
-            res.cov_kwds['description'] = (
-                'Quasi-maximum likelihood covariance matrix used for'
-                ' robustness to some misspecifications; calculated using'
-                ' numerical (%s) differentiation.' % approx_type_str)
+            res.cov_kwds['description'] = descriptions['robust'].format(
+                                                approx_type=approx_type_str)
         else:
             raise NotImplementedError('Invalid covariance matrix type.')
 

diff --git a/statsmodels/tsa/statespace/mlemodel.py b/statsmodels/tsa/statespace/mlemodel.py
@@ -1684,6 +1684,8 @@ def _get_robustcov_results(self, cov_type='opg', **kwargs):
           intermediate calculations use the 'approx' method.
         - 'none' for no covariance matrix calculation.
         """
+        from statsmodels.base.covtype import descriptions
+
         use_self = kwargs.pop('use_self', False)
         if use_self:
             res = self
@@ -1720,35 +1722,27 @@ def _get_robustcov_results(self, cov_type='opg', **kwargs):
         elif cov_type == 'none':
             res.cov_params_default = np.zeros((k_params, k_params)) * np.nan
             res._rank = np.nan
-            res.cov_kwds['description'] = 'Covariance matrix not calculated.'
+            res.cov_kwds['description'] = descriptions['none']
         elif self.cov_type == 'approx':
             res.cov_params_default = res.cov_params_approx
-            res.cov_kwds['description'] = (
-                'Covariance matrix calculated using numerical (%s)'
-                ' differentiation.' % approx_type_str)
+            res.cov_kwds['description'] = descriptions['approx'].format(
+                                                approx_type=approx_type_str)
         elif self.cov_type == 'oim':
             res.cov_params_default = res.cov_params_oim
-            res.cov_kwds['description'] = (
-                'Covariance matrix calculated using the observed information'
-                ' matrix (%s) described in Harvey (1989).' % approx_type_str)
+            res.cov_kwds['description'] = descriptions['OIM'].format(
+                                                approx_type=approx_type_str)
         elif self.cov_type == 'opg':
             res.cov_params_default = res.cov_params_opg
-            res.cov_kwds['description'] = (
-                'Covariance matrix calculated using the outer product of'
-                ' gradients (%s).' % approx_type_str)
+            res.cov_kwds['description'] = descriptions['OPG'].format(
+                                                approx_type=approx_type_str)
         elif self.cov_type == 'robust' or self.cov_type == 'robust_oim':
             res.cov_params_default = res.cov_params_robust_oim
-            res.cov_kwds['description'] = (
-                'Quasi-maximum likelihood covariance matrix used for'
-                ' robustness to some misspecifications; calculated using the'
-                ' observed information matrix (%s) described in'
-                ' Harvey (1989).' % approx_type_str)
+            res.cov_kwds['description'] = descriptions['robust-OIM'].format(
+                                                approx_type=approx_type_str)
         elif self.cov_type == 'robust_approx':
             res.cov_params_default = res.cov_params_robust_approx
-            res.cov_kwds['description'] = (
-                'Quasi-maximum likelihood covariance matrix used for'
-                ' robustness to some misspecifications; calculated using'
-                ' numerical (%s) differentiation.' % approx_type_str)
+            res.cov_kwds['description'] = descriptions['robust-approx'].format(
+                                                approx_type=approx_type_str)
         else:
             raise NotImplementedError('Invalid covariance matrix type.')