/
special_linear_model.py
161 lines (131 loc) · 5.55 KB
/
special_linear_model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
# -*- coding: utf-8 -*-
"""
Created on Tue Nov 13 16:24:12 2018
Author: Josef Perktold
"""
import numpy as np
from statsmodels.tools.decorators import (cache_readonly,
cache_writable)
from statsmodels.regression.linear_model import OLS, RegressionResults
from numpy.testing import assert_allclose
class OLSVectorizedResults(RegressionResults):
r"""results class for vectorized OLS
"""
@cache_writable()
def scale(self):
wresid = self.wresid
return (wresid * wresid).sum(0) / self.df_resid
@cache_readonly
def bse(self):
return np.sqrt(self.scale *
np.diag(self.normalized_cov_params)[:, None])
@cache_readonly
def ssr(self):
wresid = self.wresid
return (wresid * wresid).sum(0)
@cache_readonly
def uncentered_tss(self):
wendog = self.model.wendog
return (wendog * wendog).sum(0)
def conf_int(self, alpha=.05, cols=None):
#print('using OLSVectorizedResults.conf_int')
ci = super(OLSVectorizedResults, self).conf_int(alpha=alpha,
cols=cols)
return np.rollaxis(ci, -1)
def t_test(self, r_matrix, cov_p=None, scale=None, use_t=None):
"""
Compute a t-test for a single linear hypothesis of the form Rb = q
This is an adjusted version of the corresponding LikelihoodModelResults
method adjusted to be vectorized across endog.
Parameters
----------
r_matrix : array-like, str, tuple
- array : If an array is given, a p x k 2d array or length k 1d
array specifying the linear restrictions. It is assumed
that the linear combination is equal to zero.
- str : The full hypotheses to test can be given as a string.
See the examples.
- tuple : A tuple of arrays in the form (R, q). If q is given,
can be either a scalar or a length p row vector.
cov_p : array-like, optional
An alternative estimate for the parameter covariance matrix.
If None is given, self.normalized_cov_params is used.
scale : float, optional
An optional `scale` to use. Default is the scale specified
by the model fit.
use_t : bool, optional
If use_t is None, then the default of the model is used.
If use_t is True, then the p-values are based on the t
distribution.
If use_t is False, then the p-values are based on the normal
distribution.
Returns
-------
res : ContrastResults instance
The results for the test are attributes of this results instance.
The available results have the same elements as the parameter table
in `summary()`.
Examples
--------
>>> import numpy as np
>>> import statsmodels.api as sm
See Also
---------
tvalues : individual t statistics
f_test : for F tests
patsy.DesignInfo.linear_constraint
"""
from statsmodels.tools.tools import recipr
from statsmodels.stats.contrast import ContrastResults
from patsy import DesignInfo
names = self.model.data.param_names
LC = DesignInfo(names).linear_constraint(r_matrix)
r_matrix, q_matrix = LC.coefs, LC.constants
num_ttests = r_matrix.shape[0]
num_params = r_matrix.shape[1]
if (cov_p is None and self.normalized_cov_params is None and
not hasattr(self, 'cov_params_default')):
raise ValueError('Need covariance of parameters for computing '
'T statistics')
if num_params != self.params.shape[0]:
raise ValueError('r_matrix and params are not aligned')
if q_matrix is None:
q_matrix = np.zeros(num_ttests)
else:
q_matrix = np.asarray(q_matrix)
q_matrix = q_matrix.squeeze()
if q_matrix.size > 1:
if q_matrix.shape[0] != num_ttests:
raise ValueError("r_matrix and q_matrix must have the same "
"number of rows")
if use_t is None:
# switch to use_t false if undefined
use_t = (hasattr(self, 'use_t') and self.use_t)
_t = _sd = None
_effect = np.dot(r_matrix, self.params)
# vectorized OLS: we use initially scale=1 and then add the scale
cov_p = self.normalized_cov_params
# Perform the test
if num_ttests > 1:
_sd = np.sqrt(np.diag(self.cov_params(
r_matrix=r_matrix, cov_p=cov_p)))
else:
_sd = np.sqrt(self.cov_params(r_matrix=r_matrix, cov_p=cov_p))
_sd = np.squeeze(_sd) * np.sqrt(self.scale)
_effect = np.squeeze(_effect)
_t = (_effect - q_matrix) * recipr(_sd)
df_resid = getattr(self, 'df_resid_inference', self.df_resid)
if use_t:
return ContrastResults(effect=_effect, t=_t, sd=_sd,
df_denom=df_resid)
else:
return ContrastResults(effect=_effect, statistic=_t, sd=_sd,
df_denom=df_resid,
distribution='norm')
def summary(self):
from statsmodels.iolib.summary import summary_params_2dflat
summ = summary_params_2dflat(self,
endog_names=self.model.endog_names)[1]
return summ
class OLSVectorized(OLS):
_results_class = OLSVectorizedResults