Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
ENH: add OLSVectorized, initial version
- Loading branch information
Showing
3 changed files
with
248 additions
and
6 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,161 @@ | ||
# -*- coding: utf-8 -*- | ||
""" | ||
Created on Tue Nov 13 16:24:12 2018 | ||
Author: Josef Perktold | ||
""" | ||
|
||
import numpy as np | ||
from statsmodels.tools.decorators import (cache_readonly, | ||
cache_writable) | ||
|
||
from statsmodels.regression.linear_model import OLS, RegressionResults | ||
|
||
from numpy.testing import assert_allclose | ||
|
||
|
||
class OLSVectorizedResults(RegressionResults): | ||
r"""results class for vectorized OLS | ||
""" | ||
|
||
@cache_writable() | ||
def scale(self): | ||
wresid = self.wresid | ||
return (wresid * wresid).sum(0) / self.df_resid | ||
|
||
|
||
@cache_readonly | ||
def bse(self): | ||
return np.sqrt(self.scale * | ||
np.diag(self.normalized_cov_params)[:, None]) | ||
|
||
@cache_readonly | ||
def ssr(self): | ||
wresid = self.wresid | ||
return (wresid * wresid).sum(0) | ||
|
||
@cache_readonly | ||
def uncentered_tss(self): | ||
wendog = self.model.wendog | ||
return (wendog * wendog).sum(0) | ||
|
||
def conf_int(self, alpha=.05, cols=None): | ||
#print('using OLSVectorizedResults.conf_int') | ||
ci = super(OLSVectorizedResults, self).conf_int(alpha=alpha, | ||
cols=cols) | ||
|
||
return np.rollaxis(ci, -1) | ||
|
||
def t_test(self, r_matrix, cov_p=None, scale=None, use_t=None): | ||
""" | ||
Compute a t-test for a single linear hypothesis of the form Rb = q | ||
This is an adjusted version of the corresponding LikelihoodModelResults | ||
method adjusted to be vectorized across endog. | ||
Parameters | ||
---------- | ||
r_matrix : array-like, str, tuple | ||
- array : If an array is given, a p x k 2d array or length k 1d | ||
array specifying the linear restrictions. It is assumed | ||
that the linear combination is equal to zero. | ||
- str : The full hypotheses to test can be given as a string. | ||
See the examples. | ||
- tuple : A tuple of arrays in the form (R, q). If q is given, | ||
can be either a scalar or a length p row vector. | ||
cov_p : array-like, optional | ||
An alternative estimate for the parameter covariance matrix. | ||
If None is given, self.normalized_cov_params is used. | ||
scale : float, optional | ||
An optional `scale` to use. Default is the scale specified | ||
by the model fit. | ||
use_t : bool, optional | ||
If use_t is None, then the default of the model is used. | ||
If use_t is True, then the p-values are based on the t | ||
distribution. | ||
If use_t is False, then the p-values are based on the normal | ||
distribution. | ||
Returns | ||
------- | ||
res : ContrastResults instance | ||
The results for the test are attributes of this results instance. | ||
The available results have the same elements as the parameter table | ||
in `summary()`. | ||
Examples | ||
-------- | ||
>>> import numpy as np | ||
>>> import statsmodels.api as sm | ||
See Also | ||
--------- | ||
tvalues : individual t statistics | ||
f_test : for F tests | ||
patsy.DesignInfo.linear_constraint | ||
""" | ||
from statsmodels.tools.tools import recipr | ||
from statsmodels.stats.contrast import ContrastResults | ||
from patsy import DesignInfo | ||
names = self.model.data.param_names | ||
LC = DesignInfo(names).linear_constraint(r_matrix) | ||
r_matrix, q_matrix = LC.coefs, LC.constants | ||
num_ttests = r_matrix.shape[0] | ||
num_params = r_matrix.shape[1] | ||
|
||
if (cov_p is None and self.normalized_cov_params is None and | ||
not hasattr(self, 'cov_params_default')): | ||
raise ValueError('Need covariance of parameters for computing ' | ||
'T statistics') | ||
if num_params != self.params.shape[0]: | ||
raise ValueError('r_matrix and params are not aligned') | ||
if q_matrix is None: | ||
q_matrix = np.zeros(num_ttests) | ||
else: | ||
q_matrix = np.asarray(q_matrix) | ||
q_matrix = q_matrix.squeeze() | ||
if q_matrix.size > 1: | ||
if q_matrix.shape[0] != num_ttests: | ||
raise ValueError("r_matrix and q_matrix must have the same " | ||
"number of rows") | ||
|
||
if use_t is None: | ||
# switch to use_t false if undefined | ||
use_t = (hasattr(self, 'use_t') and self.use_t) | ||
|
||
_t = _sd = None | ||
|
||
_effect = np.dot(r_matrix, self.params) | ||
|
||
# vectorized OLS: we use initially scale=1 and then add the scale | ||
cov_p = self.normalized_cov_params | ||
# Perform the test | ||
if num_ttests > 1: | ||
_sd = np.sqrt(np.diag(self.cov_params( | ||
r_matrix=r_matrix, cov_p=cov_p))) | ||
else: | ||
_sd = np.sqrt(self.cov_params(r_matrix=r_matrix, cov_p=cov_p)) | ||
|
||
_sd = np.squeeze(_sd) * np.sqrt(self.scale) | ||
_effect = np.squeeze(_effect) | ||
_t = (_effect - q_matrix) * recipr(_sd) | ||
|
||
df_resid = getattr(self, 'df_resid_inference', self.df_resid) | ||
|
||
if use_t: | ||
return ContrastResults(effect=_effect, t=_t, sd=_sd, | ||
df_denom=df_resid) | ||
else: | ||
return ContrastResults(effect=_effect, statistic=_t, sd=_sd, | ||
df_denom=df_resid, | ||
distribution='norm') | ||
|
||
def summary(self): | ||
from statsmodels.iolib.summary import summary_params_2dflat | ||
summ = summary_params_2dflat(self, | ||
endog_names=self.model.endog_names)[1] | ||
return summ | ||
|
||
|
||
class OLSVectorized(OLS): | ||
_results_class = OLSVectorizedResults |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
# -*- coding: utf-8 -*- | ||
""" | ||
Created on Tue Nov 13 20:33:02 2018 | ||
Author: Josef Perktold | ||
""" | ||
|
||
import numpy as np | ||
from numpy.testing import assert_allclose | ||
from statsmodels.regression.linear_model import OLS | ||
from statsmodels.regression.special_linear_model import OLSVectorized | ||
|
||
|
||
class TestOLSVectorized(object): | ||
|
||
@classmethod | ||
def setup_class(cls): | ||
np.random.seed(3) | ||
sig_e = 0.1 | ||
|
||
exog = (np.repeat(np.arange(3), 20)[:,None] == | ||
np.arange(3)).astype(float) | ||
|
||
params_true = np.array([[1, 1, 1], [1, 1, 1], | ||
[1, 1.5, 2], [1, 1.5, 2]]).T | ||
|
||
y_true = exog.dot(params_true) | ||
endog = y_true + sig_e * np.random.randn(*y_true.shape) | ||
|
||
cls.res1 = OLSVectorized(endog, exog).fit() | ||
cls.res2_list = [OLS(endog[:, i], exog).fit() for i in range(4)] | ||
|
||
def test_attributes(self): | ||
res1 = self.res1 | ||
res2_list = self.res2_list | ||
|
||
|
||
attrs = ['params', 'scale', 'bse', 'tvalues', 'pvalues', 'ssr', 'centered_tss', | ||
'rsquared', 'llf', 'aic', 'bic', 'fvalue', 'f_pvalue'] | ||
for attr in attrs: | ||
value = getattr(res1, attr) | ||
res2_value = np.array([getattr(r, attr) for r in res2_list]).T | ||
assert_allclose(value, res2_value, rtol=1e-13) | ||
|
||
def test_methods(self): | ||
res1 = self.res1 | ||
res2_list = self.res2_list | ||
|
||
ci1 = res1.conf_int() | ||
ci2 = np.array([r.conf_int() for r in res2_list]) | ||
ci2 = np.rollaxis(ci2, 0, 3) | ||
assert_allclose(ci1, ci2, rtol=1e-13) | ||
|
||
# smoke test | ||
res1.summary() | ||
|
||
def test_wald(self): | ||
res1 = self.res1 | ||
res2_list = self.res2_list | ||
|
||
tt1 = res1.t_test('x1=x2') | ||
tt2_list = [r.t_test('x1=x2') for r in res2_list] | ||
attrs = ['effect', 'sd', 'tvalue', 'pvalue', 'df_denom', 'statistic'] | ||
for attr in attrs: | ||
value = getattr(tt1, attr) | ||
res2_value = np.array([getattr(t, attr) for t in tt2_list]).squeeze() | ||
assert_allclose(value, res2_value, rtol=1e-12, err_msg=attr) | ||
|
||
ci1 = tt1.conf_int() | ||
ci2 = np.array([t.conf_int() for t in tt2_list]).squeeze() | ||
assert_allclose(ci1, ci2, rtol=1e-12) | ||
|
||
# smoke test | ||
tt1.summary() |