WIP: Summary2 #582

Closed
wants to merge 20 commits into
from
+2,049 −1,843
Split
@@ -515,7 +515,7 @@ def summary(self, alpha=.05):
('Method:', [method]),
('At:', [self.margeff_options['at']]),]
- from statsmodels.iolib.summary import (Summary, summary_params,
+ from statsmodels.iolib.summary_old import (Summary, summary_params,
table_extend)
exog_names = model.exog_names[:] # copy
smry = Summary()
@@ -30,6 +30,7 @@
import statsmodels.base.model as base
import statsmodels.regression.linear_model as lm
import statsmodels.base.wrapper as wrap
+from statsmodels.iolib.summary import Summary, summary_params, summary_model
from statsmodels.base.l1_slsqp import fit_l1_slsqp
try:
@@ -1761,6 +1762,7 @@ def fit(self, start_params=None, maxiter=35, method='bfgs', tol=1e-08):
### Results Class ###
class DiscreteResults(base.LikelihoodModelResults):
+
__doc__ = _discrete_results_docs % {"one_line_description" :
"A results class for the discrete dependent variable models.",
"extra_attr" : ""}
@@ -2056,21 +2058,23 @@ def margeff(self, at='overall', method='dydx', atexog=None, dummy=False,
return effects
- def summary(self, yname=None, xname=None, title=None, alpha=.05,
- yname_list=None):
+ def summary(self, title=None, xname=None, yname=None, alpha=.05,
+ float_format="%.4f"):
"""Summarize the Regression Results
Parameters
-----------
- yname : string, optional
- Default is `y`
- xname : list of strings, optional
- Default is `var_##` for ## in p the number of regressors
+ xname : List of strings of length equal to the number of parameters
+ Names of the independent variables (optional)
+ yname : string
+ Name of the dependent variable (optional)
title : string, optional
Title for the top table. If not None, then this replaces the
default title
alpha : float
significance level for the confidence intervals
+ float_format: string
+ print format for floats in parameters summary
Returns
-------
@@ -2084,43 +2088,12 @@ def summary(self, yname=None, xname=None, title=None, alpha=.05,
results
"""
-
- top_left = [('Dep. Variable:', None),
- ('Model:', [self.model.__class__.__name__]),
- ('Method:', ['MLE']),
- ('Date:', None),
- ('Time:', None),
- #('No. iterations:', ["%d" % self.mle_retvals['iterations']]),
- ('converged:', ["%s" % self.mle_retvals['converged']])
- ]
-
- top_right = [('No. Observations:', None),
- ('Df Residuals:', None),
- ('Df Model:', None),
- ('Pseudo R-squ.:', ["%#6.4g" % self.prsquared]),
- ('Log-Likelihood:', None),
- ('LL-Null:', ["%#8.5g" % self.llnull]),
- ('LLR p-value:', ["%#6.4g" % self.llr_pvalue])
- ]
-
- if title is None:
- title = self.model.__class__.__name__ + ' ' + "Regression Results"
-
- #boiler plate
+ # Summary
from statsmodels.iolib.summary import Summary
smry = Summary()
- yname, yname_list = self._get_endog_name(yname, yname_list)
- # for top of table
- smry.add_table_2cols(self, gleft=top_left, gright=top_right, #[],
- yname=yname, xname=xname, title=title)
- # for parameters, etc
- smry.add_table_params(self, yname=yname_list, xname=xname, alpha=.05,
- use_t=False)
-
- #diagnostic table not used yet
- #smry.add_table_2cols(self, gleft=diagn_left, gright=diagn_right,
- # yname=yname, xname=xname,
- # title="")
+ smry.add_base(results=self, alpha=alpha, float_format=float_format,
+ xname=xname, yname=yname, title=title)
+
return smry
class CountResults(DiscreteResults):
@@ -2172,32 +2145,28 @@ def pred_table(self, threshold=.5):
return np.histogram2d(actual, pred, bins=2)[0]
def summary(self, yname=None, xname=None, title=None, alpha=.05,
- yname_list=None):
- smry = super(BinaryResults, self).summary(yname, xname, title, alpha,
- yname_list)
+ float_format="%.4f"):
+ smry = super(BinaryResults, self).summary(yname=yname, xname=xname,
+ title=title, alpha=alpha, float_format=float_format)
+
+ # Diagnostics (TODO: Improve diagnostics)
fittedvalues = self.model.cdf(self.fittedvalues)
absprederror = np.abs(self.model.endog - fittedvalues)
predclose_sum = (absprederror < 1e-4).sum()
predclose_frac = predclose_sum / len(fittedvalues)
- #add warnings/notes
- etext = []
- if predclose_sum == len(fittedvalues): #nobs?
- wstr = "Complete Separation: The results show that there is"
- wstr += "complete separation.\n"
- wstr += "In this case the Maximum Likelihood Estimator does "
- wstr += "not exist and the parameters\n"
- wstr += "are not identified."
- etext.append(wstr)
- elif predclose_frac > 0.1: #TODO: get better diagnosis
- wstr = "Possibly complete quasi-separation: A fraction "
- wstr += "%4.2f of observations can be\n" % predclose_frac
- wstr += "perfectly predicted. This might indicate that there "
- wstr += "is complete\nquasi-separation. In this case some "
- wstr += "parameters will not be identified."
- etext.append(wstr)
- if etext:
- smry.add_extra_txt(etext)
+ if predclose_sum == len(fittedvalues): # TODO: nobs?
+ warn = "Complete Separation: The results show that there is \
+ complete separation. In this case the Maximum Likelihood Estimator \
+ does not exist and the parameters are not identified."
+ smry.add_text(warn)
+ elif predclose_frac > 0.1:
+ warn = "Possibly complete quasi-separation: A fraction %4.2f of \
+ observations can be perfectly predicted. This might indicate that \
+ there is complete quasi-separation. In this case some parameters \
+ will not be identified." % predclose_frac
+ smry.add_text(warn)
+
return smry
summary.__doc__ = DiscreteResults.summary.__doc__
@@ -2285,6 +2254,24 @@ def conf_int(self, alpha=.05, cols=None):
def margeff(self):
raise NotImplementedError("Use get_margeff instead")
+ def summary(self, alpha=0.05, float_format="%.4f"):
+ smry = Summary()
+ smry.add_dict(summary_model(self))
+ # One data frame per value of endog
+ eqn = self.params.shape[1]
+ confint = self.conf_int(alpha)
+ for i in range(eqn):
+ coefs = summary_params(self, alpha, self.params[:,i],
+ self.bse[:,i], self.tvalues[:,i], self.pvalues[:,i],
+ confint[i])
+ # Header must show value of endog
+ level_str = self.model.endog_names + ' = ' + str(i)
+ coefs[level_str] = coefs.index
+ coefs = coefs.ix[:,[-1,0,1,2,3,4,5]]
+ smry.add_df(coefs, index=False, header=True, float_format=float_format)
+ smry.add_title(results=self)
+ return smry
+
class L1MultinomialResults(MultinomialResults):
__doc__ = _discrete_results_docs % {"one_line_description" :
"A results class for multinomial data fit by l1 regularization",
@@ -1,49 +1,49 @@
-==============================================================================
- y=1 coef std err z P>|z| [95.0% Conf. Int.]
-------------------------------------------------------------------------------
-const -0.2794 0.612 -0.457 0.648 -1.479 0.920
-x1 -0.0119 0.034 -0.350 0.727 -0.079 0.055
-x2 0.2946 0.093 3.152 0.002 0.111 0.478
-x3 -0.0257 0.007 -3.873 0.000 -0.039 -0.013
-x4 0.0897 0.069 1.297 0.195 -0.046 0.225
-------------------------------------------------------------------------------
- y=2 coef std err z P>|z| [95.0% Conf. Int.]
-------------------------------------------------------------------------------
-const -1.7392 0.720 -2.414 0.016 -3.151 -0.327
-x1 -0.0941 0.039 -2.415 0.016 -0.170 -0.018
-x2 0.3838 0.108 3.559 0.000 0.172 0.595
-x3 -0.0230 0.008 -2.914 0.004 -0.038 -0.008
-x4 0.2451 0.080 3.053 0.002 0.088 0.402
-------------------------------------------------------------------------------
- y=3 coef std err z P>|z| [95.0% Conf. Int.]
-------------------------------------------------------------------------------
-const -3.0091 1.069 -2.815 0.005 -5.104 -0.914
-x1 -0.1115 0.057 -1.965 0.049 -0.223 -0.000
-x2 0.5641 0.159 3.557 0.000 0.253 0.875
-x3 -0.0156 0.011 -1.391 0.164 -0.038 0.006
-x4 0.0687 0.119 0.577 0.564 -0.165 0.302
-------------------------------------------------------------------------------
- y=4 coef std err z P>|z| [95.0% Conf. Int.]
-------------------------------------------------------------------------------
-const -6.5607 0.870 -7.540 0.000 -8.266 -4.855
-x1 -0.0949 0.043 -2.183 0.029 -0.180 -0.010
-x2 1.2668 0.128 9.897 0.000 1.016 1.518
-x3 -0.0109 0.008 -1.315 0.189 -0.027 0.005
-x4 0.3067 0.088 3.475 0.001 0.134 0.480
-------------------------------------------------------------------------------
- y=5 coef std err z P>|z| [95.0% Conf. Int.]
-------------------------------------------------------------------------------
-const -6.0760 0.774 -7.851 0.000 -7.593 -4.559
-x1 -0.0967 0.039 -2.479 0.013 -0.173 -0.020
-x2 1.3377 0.116 11.493 0.000 1.110 1.566
-x3 -0.0198 0.008 -2.631 0.009 -0.035 -0.005
-x4 0.3193 0.080 4.001 0.000 0.163 0.476
-------------------------------------------------------------------------------
- y=6 coef std err z P>|z| [95.0% Conf. Int.]
-------------------------------------------------------------------------------
-const -10.5973 0.957 -11.076 0.000 -12.473 -8.722
-x1 -0.1447 0.042 -3.475 0.001 -0.226 -0.063
-x2 2.0395 0.141 14.479 0.000 1.763 2.316
-x3 -0.0129 0.008 -1.612 0.107 -0.029 0.003
-x4 0.4576 0.085 5.352 0.000 0.290 0.625
-==============================================================================
+--------------------------------------------------------------
+y = 0 Coef. Std.Err. t P>|t| [0.025 0.975]
+--------------------------------------------------------------
+const -0.2794 0.6119 -0.4566 0.6480 -1.4787 0.9199
+ x1 -0.0119 0.0342 -0.3496 0.7266 -0.0789 0.0550
+ x2 0.2946 0.0935 3.1518 0.0016 0.1114 0.4778
+ x3 -0.0257 0.0066 -3.8728 0.0001 -0.0387 -0.0127
+ x4 0.0897 0.0691 1.2971 0.1946 -0.0458 0.2252
+--------------------------------------------------------------
+y = 1 Coef. Std.Err. t P>|t| [0.025 0.975]
+--------------------------------------------------------------
+const -1.7392 0.7205 -2.4141 0.0158 -3.1513 -0.3272
+ x1 -0.0941 0.0389 -2.4149 0.0157 -0.1704 -0.0177
+ x2 0.3838 0.1078 3.5592 0.0004 0.1724 0.5951
+ x3 -0.0230 0.0079 -2.9137 0.0036 -0.0384 -0.0075
+ x4 0.2451 0.0803 3.0532 0.0023 0.0878 0.4024
+--------------------------------------------------------------
+y = 2 Coef. Std.Err. t P>|t| [0.025 0.975]
+--------------------------------------------------------------
+const -3.0091 1.0690 -2.8149 0.0049 -5.1043 -0.9139
+ x1 -0.1115 0.0568 -1.9647 0.0494 -0.2228 -0.0003
+ x2 0.5641 0.1586 3.5570 0.0004 0.2533 0.8749
+ x3 -0.0156 0.0112 -1.3910 0.1642 -0.0375 0.0064
+ x4 0.0687 0.1191 0.5767 0.5642 -0.1648 0.3022
+--------------------------------------------------------------
+y = 3 Coef. Std.Err. t P>|t| [0.025 0.975]
+--------------------------------------------------------------
+const -6.5607 0.8702 -7.5397 0.0000 -8.2662 -4.8552
+ x1 -0.0949 0.0435 -2.1832 0.0290 -0.1800 -0.0097
+ x2 1.2668 0.1280 9.8972 0.0000 1.0159 1.5177
+ x3 -0.0109 0.0083 -1.3145 0.1887 -0.0271 0.0053
+ x4 0.3067 0.0882 3.4755 0.0005 0.1337 0.4796
+--------------------------------------------------------------
+y = 4 Coef. Std.Err. t P>|t| [0.025 0.975]
+--------------------------------------------------------------
+const -6.0760 0.7739 -7.8512 0.0000 -7.5928 -4.5592
+ x1 -0.0967 0.0390 -2.4786 0.0132 -0.1731 -0.0202
+ x2 1.3377 0.1164 11.4931 0.0000 1.1096 1.5658
+ x3 -0.0198 0.0075 -2.6306 0.0085 -0.0345 -0.0050
+ x4 0.3193 0.0798 4.0013 0.0001 0.1629 0.4757
+--------------------------------------------------------------
+y = 5 Coef. Std.Err. t P>|t| [0.025 0.975]
+--------------------------------------------------------------
+const -10.5973 0.9568 -11.0760 0.0000 -12.4725 -8.7220
+ x1 -0.1447 0.0416 -3.4755 0.0005 -0.2263 -0.0631
+ x2 2.0395 0.1409 14.4786 0.0000 1.7634 2.3156
+ x3 -0.0129 0.0080 -1.6119 0.1070 -0.0285 0.0028
+ x4 0.4576 0.0855 5.3523 0.0000 0.2900 0.6251
+==============================================================
@@ -937,7 +937,7 @@ def test_issue_339():
cur_dir = os.path.dirname(os.path.abspath(__file__))
test_case_file = os.path.join(cur_dir, 'results', 'mn_logit_summary.txt')
test_case = open(test_case_file, 'r').read()
- np.testing.assert_(smry == test_case[:-1])
+ np.testing.assert_(smry == test_case)
def test_issue_341():
data = sm.datasets.anes96.load()
Oops, something went wrong.