-
Notifications
You must be signed in to change notification settings - Fork 2.8k
/
test_ordinal_model.py
261 lines (211 loc) · 9.25 KB
/
test_ordinal_model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
"""
Test for ordinal models
"""
import numpy as np
import scipy.stats as stats
from numpy.testing import assert_allclose, assert_equal
from .results.results_ordinal_model import data_store as ds
from statsmodels.miscmodels.ordinal_model import OrderedModel
class CheckOrdinalModelMixin(object):
def test_basic(self):
# checks basic results againt R MASS package
n_cat = ds.n_ordinal_cat
res1 = self.res1
res2 = self.res2
# coefficients values, standard errors, t & p values
assert_allclose(res1.params[:-n_cat + 1],
res2.coefficients_val, atol=2e-4)
assert_allclose(res1.bse[:-n_cat + 1],
res2.coefficients_stdE, rtol=0.003, atol=1e-5)
assert_allclose(res1.tvalues[:-n_cat + 1],
res2.coefficients_tval, rtol=0.003, atol=7e-4)
assert_allclose(res1.pvalues[:-n_cat + 1],
res2.coefficients_pval, rtol=0.009, atol=1e-5)
# thresholds are given with exponentiated increments
# from the first threshold
assert_allclose(
res1.model.transform_threshold_params(res1.params)[1:-1],
res2.thresholds, atol=4e-4)
# probabilities
assert_allclose(res1.predict()[:7, :],
res2.prob_pred, atol=5e-5)
def test_pandas(self):
# makes sure that the Pandas ecosystem is supported
res1 = self.res1
resp = self.resp
# converges slightly differently why?
assert_allclose(res1.params, resp.params, atol=1e-10)
assert_allclose(res1.bse, resp.bse, atol=1e-10)
assert_allclose(res1.model.endog, resp.model.endog, rtol=1e-10)
assert_allclose(res1.model.exog, resp.model.exog, rtol=1e-10)
def test_formula(self):
# makes sure the "R-way" of writing models is supported
res1 = self.res1
resf = self.resf
# converges slightly differently why? yet e-5 is ok
assert_allclose(res1.params, resf.params, atol=5e-5)
assert_allclose(res1.bse, resf.bse, atol=5e-5)
assert_allclose(res1.model.endog, resf.model.endog, rtol=1e-10)
assert_allclose(res1.model.exog, resf.model.exog, rtol=1e-10)
def test_unordered(self):
# makes sure that ordered = True is optional for the endog Serie
# et categories have to be set in the right order
res1 = self.res1
resf = self.resu
# converges slightly differently why?
assert_allclose(res1.params, resf.params, atol=1e-10)
assert_allclose(res1.bse, resf.bse, atol=1e-10)
assert_allclose(res1.model.endog, resf.model.endog, rtol=1e-10)
assert_allclose(res1.model.exog, resf.model.exog, rtol=1e-10)
def test_results_other(self):
res1 = self.res1 # numpy
resp = self.resp # pandas
param_names_np = ['x1', 'x2', 'x3', '0/1', '1/2']
param_names_pd = ['pared', 'public', 'gpa', 'unlikely/somewhat likely',
'somewhat likely/very likely']
assert res1.model.data.param_names == param_names_np
assert self.resp.model.data.param_names == param_names_pd
assert self.resp.model.endog_names == "apply"
# results
if hasattr(self, "pred_table"):
table = res1.pred_table()
assert_equal(table.values, self.pred_table)
# smoke test
res1.summary()
# inherited
tt = res1.t_test(np.eye(len(res1.params)))
assert_allclose(tt.pvalue, res1.pvalues, rtol=1e-13)
tt = resp.t_test(['pared', 'public', 'gpa']) # pandas names
assert_allclose(tt.pvalue, res1.pvalues[:3], rtol=1e-13)
pred = res1.predict(exog=res1.model.exog[-5:])
fitted = res1.predict()
assert_allclose(pred, fitted[-5:], rtol=1e-13)
class TestLogitModel(CheckOrdinalModelMixin):
@classmethod
def setup_class(cls):
data = ds.df
data_unordered = ds.df_unordered
# standard fit
mod = OrderedModel(data['apply'].values.codes,
np.asarray(data[['pared', 'public', 'gpa']], float),
distr='logit')
res = mod.fit(method='bfgs', disp=False)
# standard fit with pandas input
modp = OrderedModel(data['apply'],
data[['pared', 'public', 'gpa']],
distr='logit')
resp = modp.fit(method='bfgs', disp=False)
# fit with formula
modf = OrderedModel.from_formula(
"apply ~ pared + public + gpa - 1",
data={"apply": data['apply'].values.codes,
"pared": data['pared'],
"public": data['public'],
"gpa": data['gpa']},
distr='logit')
resf = modf.fit(method='bfgs', disp=False)
# fit on data with ordered=False
modu = OrderedModel(
data_unordered['apply'].values.codes,
np.asarray(data_unordered[['pared', 'public', 'gpa']], float),
distr='logit')
resu = modu.fit(method='bfgs', disp=False)
from .results.results_ordinal_model import res_ord_logit as res2
cls.res2 = res2
cls.res1 = res
cls.resp = resp
cls.resf = resf
cls.resu = resu
class TestProbitModel(CheckOrdinalModelMixin):
@classmethod
def setup_class(cls):
data = ds.df
data_unordered = ds.df_unordered
mod = OrderedModel(data['apply'].values.codes,
np.asarray(data[['pared', 'public', 'gpa']], float),
distr='probit')
res = mod.fit(method='bfgs', disp=False)
modp = OrderedModel(data['apply'],
data[['pared', 'public', 'gpa']],
distr='probit')
resp = modp.fit(method='bfgs', disp=False)
modf = OrderedModel.from_formula(
"apply ~ pared + public + gpa - 1",
data={"apply": data['apply'].values.codes,
"pared": data['pared'],
"public": data['public'],
"gpa": data['gpa']},
distr='probit')
resf = modf.fit(method='bfgs', disp=False)
modu = OrderedModel(
data_unordered['apply'].values.codes,
np.asarray(data_unordered[['pared', 'public', 'gpa']], float),
distr='probit')
resu = modu.fit(method='bfgs', disp=False)
from .results.results_ordinal_model import res_ord_probit as res2
cls.res2 = res2
cls.res1 = res
cls.resp = resp
cls.resf = resf
cls.resu = resu
# regression numbers
cls.pred_table = np.array([[202, 18, 0, 220],
[112, 28, 0, 140],
[ 27, 13, 0, 40], # noqa
[341, 59, 0, 400]], dtype=np.int64)
def test_loglikerelated(self):
res1 = self.res1
# res2 = self.res2
mod = res1.model
fact = 1.1 # evaluate away from optimum
score1 = mod.score(res1.params * fact)
score_obs_numdiff = mod.score_obs(res1.params * fact)
score_obs_exog = mod.score_obs_(res1.params * fact)
assert_allclose(score_obs_numdiff.sum(0), score1, atol=1e-7)
assert_allclose(score_obs_exog.sum(0), score1[:mod.k_vars], atol=1e-7)
# null model
mod_null = OrderedModel(mod.endog, None,
offset=np.zeros(mod.nobs),
distr=mod.distr)
null_params = mod.start_params
res_null = mod_null.fit(method='bfgs', disp=False)
assert_allclose(res_null.params, null_params[mod.k_vars:], rtol=1e-8)
class TestCLogLogModel(CheckOrdinalModelMixin):
@classmethod
def setup_class(cls):
data = ds.df
data_unordered = ds.df_unordered
# a Scipy distribution defined minimally
class CLogLog(stats.rv_continuous):
def _ppf(self, q):
return np.log(-np.log(1 - q))
def _cdf(self, x):
return 1 - np.exp(-np.exp(x))
cloglog = CLogLog()
mod = OrderedModel(data['apply'].values.codes,
np.asarray(data[['pared', 'public', 'gpa']], float),
distr=cloglog)
res = mod.fit(method='bfgs', disp=False)
modp = OrderedModel(data['apply'],
data[['pared', 'public', 'gpa']],
distr=cloglog)
resp = modp.fit(method='bfgs', disp=False)
modf = OrderedModel.from_formula(
"apply ~ pared + public + gpa - 1",
data={"apply": data['apply'].values.codes,
"pared": data['pared'],
"public": data['public'],
"gpa": data['gpa']},
distr=cloglog)
resf = modf.fit(method='bfgs', disp=False)
modu = OrderedModel(
data_unordered['apply'].values.codes,
np.asarray(data_unordered[['pared', 'public', 'gpa']], float),
distr=cloglog)
resu = modu.fit(method='bfgs', disp=False)
from .results.results_ordinal_model import res_ord_cloglog as res2
cls.res2 = res2
cls.res1 = res
cls.resp = resp
cls.resf = resf
cls.resu = resu