From b551e45e2bd1f9d8a955da948c16f14928f6eae0 Mon Sep 17 00:00:00 2001 From: Skipper Seabold Date: Wed, 24 Sep 2014 09:31:14 -0400 Subject: [PATCH 01/15] FIXUP/WIP: Refactor to use generic data handling --- statsmodels/regression/mixed_linear_model.py | 35 +++++++++++++++----- 1 file changed, 27 insertions(+), 8 deletions(-) diff --git a/statsmodels/regression/mixed_linear_model.py b/statsmodels/regression/mixed_linear_model.py index 9cd8d7ace97..4e9f1c7c926 100644 --- a/statsmodels/regression/mixed_linear_model.py +++ b/statsmodels/regression/mixed_linear_model.py @@ -462,7 +462,7 @@ def __init__(self, endog, exog, groups, exog_re=None, # Calling super creates self.endog, etc. as ndarrays and the # original exog, endog, etc. are self.data.endog, etc. super(MixedLM, self).__init__(endog, exog, groups=groups, - exog_re=exog_re, missing=missing) + exog_re=exog_re, missing=missing) if exog_re is None: # Default random effects structure (random intercepts). @@ -491,6 +491,8 @@ def __init__(self, endog, exog, groups, exog_re=None, # Override the default value self.nparams = self.k_fe + self.k_re2 + self.data.xnames = self._make_param_names() + # Convert the data to the internal representation, which is a # list of arrays, corresponding to the groups. group_labels = list(set(groups)) @@ -519,11 +521,23 @@ def __init__(self, endog, exog, groups, exog_re=None, range(self.exog.shape[1])] # Set the random effect parameter names - if isinstance(self.exog_re, pd.DataFrame): - self.exog_re_names = list(self.exog_re.columns) - else: - self.exog_re_names = ["Z%d" % (k+1) for k in - range(self.exog_re.shape[1])] + #if isinstance(self.exog_re, pd.DataFrame): + # self.exog_re_names = list(self.exog_re.columns) + #else: + # self.exog_re_names = ["Z%d" % (k+1) for k in + # range(self.exog_re.shape[1])] + + def _make_param_names(self): + names = list(self.exog_names) + jj = self.k_fe + for i in range(self.k_re): + for j in range(i + 1): + if i == j: + names.append(self.exog_re_names[i] + " RE") + else: + names.append(self.exog_re_names[j] + " x " + + self.exog_re_names[i] + " RE") + jj += 1 @classmethod def from_formula(cls, formula, data, re_formula=None, subset=None, @@ -756,7 +770,7 @@ def fit_regularized(self, start_params=None, method='l1', alpha=0, results.k_re = self.k_re results.k_re2 = self.k_re2 - return results + return MixedLMResultsWrapper(results) def _reparam(self): @@ -1685,7 +1699,7 @@ def fit(self, start_params=None, reml=True, niter_em=0, results.use_sqrt = self.use_sqrt results.freepat = self._freepat - return results + return MixedLMResultsWrapper(results) class MixedLMResults(base.LikelihoodModelResults): @@ -1905,6 +1919,7 @@ def summary(self, yname=None, xname_fe=None, xname_re=None, jj = self.k_fe for i in range(self.k_re): for j in range(i + 1): + import ipdb; ipdb.set_trace() if i == j: names.append(self.model.exog_re_names[i] + " RE") else: @@ -2013,3 +2028,7 @@ def profile_re(self, re_ix, num_low=5, dist_low=1., num_high=5, model.exog_re_li = exog_re_li_save return likev + + +class MixedLMResultsWrapper(base.LikelihoodResultsWrapper): + pass From e109a32c5f7d2bbe7f3d20356723baccd614f251 Mon Sep 17 00:00:00 2001 From: Skipper Seabold Date: Thu, 25 Sep 2014 12:45:56 -0400 Subject: [PATCH 02/15] ENH: Allow more general param_names in data handling --- statsmodels/base/data.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/statsmodels/base/data.py b/statsmodels/base/data.py index 729fe49bd8e..6efe6bebdab 100644 --- a/statsmodels/base/data.py +++ b/statsmodels/base/data.py @@ -52,6 +52,8 @@ class ModelData(object): Class responsible for handling input data and extracting metadata into the appropriate form """ + _param_names = None + def __init__(self, endog, exog=None, missing='none', hasconst=None, **kwargs): if missing != 'none': @@ -251,6 +253,15 @@ def xnames(self): return list(xnames) return None + @property + def param_names(self): + # for handling names of 'extra' parameters in summary, etc. + return self._param_names or self.xnames + + @param_names.setter + def param_names(self, values): + self._param_names = values + @cache_readonly def row_labels(self): exog = self.orig_exog @@ -383,9 +394,9 @@ def attach_columns(self, result): # don't squeeze because it might be a 2d row array # if it needs a squeeze, the bug is elsewhere if result.ndim <= 1: - return Series(result, index=self.xnames) + return Series(result, index=self.param_names) else: # for e.g., confidence intervals - return DataFrame(result, index=self.xnames) + return DataFrame(result, index=self.param_names) def attach_columns_eq(self, result): return DataFrame(result, index=self.xnames, columns=self.ynames) From da1ea47d8a0e59d847ed6fba1d95c78cc7394e7e Mon Sep 17 00:00:00 2001 From: Skipper Seabold Date: Thu, 25 Sep 2014 13:35:30 -0400 Subject: [PATCH 03/15] ENH: Allow attachment of generic names. --- statsmodels/base/data.py | 12 +++++++++++- statsmodels/base/wrapper.py | 4 +++- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/statsmodels/base/data.py b/statsmodels/base/data.py index 6efe6bebdab..5c43347f3f5 100644 --- a/statsmodels/base/data.py +++ b/statsmodels/base/data.py @@ -313,7 +313,7 @@ def _check_integrity(self): if len(self.exog) != len(self.endog): raise ValueError("endog and exog matrices are different sizes") - def wrap_output(self, obj, how='columns'): + def wrap_output(self, obj, how='columns', names=None): if how == 'columns': return self.attach_columns(obj) elif how == 'rows': @@ -326,6 +326,8 @@ def wrap_output(self, obj, how='columns'): return self.attach_columns_eq(obj) elif how == 'cov_eq': return self.attach_cov_eq(obj) + elif how == 'generic_columns': + return self.attach_generic_columns(obj, names) else: return obj @@ -347,6 +349,9 @@ def attach_rows(self, result): def attach_dates(self, result): return result + def attach_generic_columns(self, result, names): + return result + class PatsyData(ModelData): def _get_names(self, arr): @@ -389,6 +394,11 @@ def _get_row_labels(self, arr): # exog is not, so just return the row labels from endog return self.orig_endog.index + def attach_generic_columns(self, result, names): + # get the attribute to use + column_names = getattr(self, names, None) + return Series(result, index=column_names) + def attach_columns(self, result): # this can either be a 1d array or a scalar # don't squeeze because it might be a 2d row array diff --git a/statsmodels/base/wrapper.py b/statsmodels/base/wrapper.py index 90572d82a33..c10246df170 100644 --- a/statsmodels/base/wrapper.py +++ b/statsmodels/base/wrapper.py @@ -35,7 +35,9 @@ def __getattribute__(self, attr): obj = getattr(results, attr) data = results.model.data how = self._wrap_attrs.get(attr) - if how: + if how and isinstance(how, tuple): + obj = data.wrap_output(obj, how=how[0], names=how[1]) + elif how: obj = data.wrap_output(obj, how=how) return obj From 812d6c5420f063c11e9221935b987936aeb18359 Mon Sep 17 00:00:00 2001 From: Skipper Seabold Date: Thu, 25 Sep 2014 13:41:15 -0400 Subject: [PATCH 04/15] ENH: Refactor to allow wrappers to work. --- statsmodels/regression/mixed_linear_model.py | 79 +++++++++++--------- 1 file changed, 42 insertions(+), 37 deletions(-) diff --git a/statsmodels/regression/mixed_linear_model.py b/statsmodels/regression/mixed_linear_model.py index 4e9f1c7c926..b91fc26393e 100644 --- a/statsmodels/regression/mixed_linear_model.py +++ b/statsmodels/regression/mixed_linear_model.py @@ -99,6 +99,7 @@ import numpy as np import statsmodels.base.model as base from scipy.optimize import fmin_ncg, fmin_cg, fmin_bfgs, fmin +from statsmodels.tools.decorators import cache_readonly from scipy.stats.distributions import norm import pandas as pd import patsy @@ -108,6 +109,13 @@ from statsmodels.tools.sm_exceptions import ConvergenceWarning from statsmodels.base._penalties import Penalty + +def _get_exog_re_names(exog_re): + if isinstance(exog_re, (pd.Series, pd.DataFrame)): + return exog_re.columns.tolist() + return ["Z{0}".format(k + 1) for k in range(exog_re.shape[1])] + + class MixedLMParams(object): """ This class represents a parameter state for a mixed linear model. @@ -454,6 +462,7 @@ def __init__(self, endog, exog, groups, exog_re=None, # If there is one covariate, it may be passed in as a column # vector, convert these to 2d arrays. # TODO: Can this be moved up in the class hierarchy? + # yes, it should be done up the hierarchy if exog is not None and exog.ndim == 1: exog = exog[:,None] if exog_re is not None and exog_re.ndim == 1: @@ -464,35 +473,35 @@ def __init__(self, endog, exog, groups, exog_re=None, super(MixedLM, self).__init__(endog, exog, groups=groups, exog_re=exog_re, missing=missing) + self.k_fe = exog.shape[1] # Number of fixed effects parameters + if exog_re is None: # Default random effects structure (random intercepts). + self.k_re = 1 + self.k_re2 = 1 self.exog_re = np.ones((len(endog), 1), dtype=np.float64) self.data.exog_re = self.exog_re + self.data.param_names = self.exog_names + ['Intercept'] else: # Process exog_re the same way that exog is handled # upstream + # TODO: this is wrong and should be handled upstream wholly self.data.exog_re = exog_re self.exog_re = np.asarray(exog_re) - - # Model dimensions - self.k_fe = exog.shape[1] # Number of fixed effects parameters - if exog_re is not None: - + if not self.data._param_names: + # HACK: could've been set in from_formula already + # needs refactor + (self.data.param_names, + self.data.exog_re_names) = self._make_param_names(exog_re) + # Model dimensions # Number of random effect covariates self.k_re = exog_re.shape[1] - # Number of covariance parameters self.k_re2 = self.k_re * (self.k_re + 1) // 2 - else: - self.k_re = 1 # Default (random intercepts model) - self.k_re2 = 1 - # Override the default value self.nparams = self.k_fe + self.k_re2 - self.data.xnames = self._make_param_names() - # Convert the data to the internal representation, which is a # list of arrays, corresponding to the groups. group_labels = list(set(groups)) @@ -520,25 +529,23 @@ def __init__(self, endog, exog, groups, exog_re=None, self.exog_names = ["FE%d" % (k + 1) for k in range(self.exog.shape[1])] - # Set the random effect parameter names - #if isinstance(self.exog_re, pd.DataFrame): - # self.exog_re_names = list(self.exog_re.columns) - #else: - # self.exog_re_names = ["Z%d" % (k+1) for k in - # range(self.exog_re.shape[1])] + def _make_param_names(self, exog_re): + exog_names = list(self.exog_names) + exog_re_names = _get_exog_re_names(exog_re) + param_names = [] - def _make_param_names(self): - names = list(self.exog_names) jj = self.k_fe - for i in range(self.k_re): + for i in range(exog_re.shape[1]): for j in range(i + 1): if i == j: - names.append(self.exog_re_names[i] + " RE") + param_names.append(exog_re_names[i] + " RE") else: - names.append(self.exog_re_names[j] + " x " + - self.exog_re_names[i] + " RE") + param_names.append(exog_re_names[j] + " x " + + exog_re_names[i] + " RE") jj += 1 + return exog_names + exog_re_names, exog_re_names + @classmethod def from_formula(cls, formula, data, re_formula=None, subset=None, *args, **kwargs): @@ -598,14 +605,15 @@ def from_formula(cls, formula, data, re_formula=None, subset=None, else: exog_re = np.ones((data.shape[0], 1), dtype=np.float64) - exog_re_names = ["Intercept",] + exog_re_names = ["Intercept RE"] mod = super(MixedLM, cls).from_formula(formula, data, subset=None, exog_re=exog_re, *args, **kwargs) - mod.exog_re_names = exog_re_names + mod.data.param_names = mod.exog_names + exog_re_names + mod.data.exog_re_names = exog_re_names return mod @@ -1740,6 +1748,7 @@ def __init__(self, model, params, cov_params): super(MixedLMResults, self).__init__(model, params, normalized_cov_params=cov_params) + @cache_readonly def bse_fe(self): """ Returns the standard errors of the fixed effect regression @@ -1748,6 +1757,7 @@ def bse_fe(self): p = self.model.exog.shape[1] return np.sqrt(np.diag(self.cov_params())[0:p]) + @cache_readonly def bse_re(self): """ Returns the standard errors of the variance parameters. Note @@ -1759,7 +1769,6 @@ def bse_re(self): p = self.model.exog.shape[1] return np.sqrt(self.scale * np.diag(self.cov_params())[p:]) - def ranef(self): """ Returns the conditional means of all random effects given the @@ -1893,7 +1902,6 @@ def summary(self, yname=None, xname_fe=None, xname_re=None, float_fmt = "%.3f" - names = list(self.model.exog_names) sdf = np.nan * np.ones((self.k_fe + self.k_re2, 6), dtype=np.float64) @@ -1919,17 +1927,11 @@ def summary(self, yname=None, xname_fe=None, xname_re=None, jj = self.k_fe for i in range(self.k_re): for j in range(i + 1): - import ipdb; ipdb.set_trace() - if i == j: - names.append(self.model.exog_re_names[i] + " RE") - else: - names.append(self.model.exog_re_names[j] + " x " + - self.model.exog_re_names[i] + " RE") sdf[jj, 0] = self.cov_re[i, j] sdf[jj, 1] = np.sqrt(self.scale) * self.bse[jj] jj += 1 - sdf = pd.DataFrame(index=names, data=sdf) + sdf = pd.DataFrame(index=self.model.data.param_names, data=sdf) sdf.columns = ['Coef.', 'Std.Err.', 'z', 'P>|z|', '[' + str(alpha/2), str(1-alpha/2) + ']'] for col in sdf.columns: @@ -1940,7 +1942,6 @@ def summary(self, yname=None, xname_fe=None, xname_re=None, return smry - def profile_re(self, re_ix, num_low=5, dist_low=1., num_high=5, dist_high=1.): """ @@ -2031,4 +2032,8 @@ def profile_re(self, re_ix, num_low=5, dist_low=1., num_high=5, class MixedLMResultsWrapper(base.LikelihoodResultsWrapper): - pass + _attrs = {'bse_re': ('generic_columns', 'exog_re_names'), + 'bse_fe': ('generic_columns', 'xnames'), + } + _upstream_attrs = base.LikelihoodResultsWrapper._wrap_attrs + _wrap_attrs = base.wrap.union_dicts(_attrs, _upstream_attrs) From acfadf935f8030f8164168d1f48367e433952be3 Mon Sep 17 00:00:00 2001 From: Skipper Seabold Date: Thu, 25 Sep 2014 13:52:38 -0400 Subject: [PATCH 05/15] ENH: Handle the generic 2d case. --- statsmodels/base/data.py | 16 ++++++++++++++-- statsmodels/base/wrapper.py | 2 +- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/statsmodels/base/data.py b/statsmodels/base/data.py index 5c43347f3f5..3f4700dedf7 100644 --- a/statsmodels/base/data.py +++ b/statsmodels/base/data.py @@ -328,6 +328,8 @@ def wrap_output(self, obj, how='columns', names=None): return self.attach_cov_eq(obj) elif how == 'generic_columns': return self.attach_generic_columns(obj, names) + elif how == 'generic_columns_2d': + return self.attach_generic_columns_2d(obj, names) else: return obj @@ -349,7 +351,10 @@ def attach_rows(self, result): def attach_dates(self, result): return result - def attach_generic_columns(self, result, names): + def attach_generic_columns(self, result, *args, **kwargs): + return result + + def attach_generic_columns_2d(self, result, *args, **kwargs): return result @@ -399,6 +404,12 @@ def attach_generic_columns(self, result, names): column_names = getattr(self, names, None) return Series(result, index=column_names) + def attach_generic_columns_2d(self, result, rownames, colnames=None): + colnames = colnames or rownames + rownames = getattr(self, rownames, None) + colnames = getattr(self, colnames, None) + return DataFrame(result, index=rownames, columns=colnames) + def attach_columns(self, result): # this can either be a 1d array or a scalar # don't squeeze because it might be a 2d row array @@ -412,7 +423,8 @@ def attach_columns_eq(self, result): return DataFrame(result, index=self.xnames, columns=self.ynames) def attach_cov(self, result): - return DataFrame(result, index=self.xnames, columns=self.xnames) + return DataFrame(result, index=self.param_names, + columns=self.param_names) def attach_cov_eq(self, result): return DataFrame(result, index=self.ynames, columns=self.ynames) diff --git a/statsmodels/base/wrapper.py b/statsmodels/base/wrapper.py index c10246df170..ac25ee3f318 100644 --- a/statsmodels/base/wrapper.py +++ b/statsmodels/base/wrapper.py @@ -36,7 +36,7 @@ def __getattribute__(self, attr): data = results.model.data how = self._wrap_attrs.get(attr) if how and isinstance(how, tuple): - obj = data.wrap_output(obj, how=how[0], names=how[1]) + obj = data.wrap_output(obj, how[0], *how[1:]) elif how: obj = data.wrap_output(obj, how=how) From 609a17e3a1aa0f00d5e9e0fe617a143b86c4ea6f Mon Sep 17 00:00:00 2001 From: Skipper Seabold Date: Thu, 25 Sep 2014 13:59:35 -0400 Subject: [PATCH 06/15] ENH: Wrap 2d attributes --- statsmodels/regression/mixed_linear_model.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/statsmodels/regression/mixed_linear_model.py b/statsmodels/regression/mixed_linear_model.py index b91fc26393e..46760366b13 100644 --- a/statsmodels/regression/mixed_linear_model.py +++ b/statsmodels/regression/mixed_linear_model.py @@ -1769,6 +1769,7 @@ def bse_re(self): p = self.model.exog.shape[1] return np.sqrt(self.scale * np.diag(self.cov_params())[p:]) + @cache_readonly def ranef(self): """ Returns the conditional means of all random effects given the @@ -1808,7 +1809,7 @@ def ranef(self): return ranef_dict - + @cache_readonly def ranef_cov(self): """ Returns the conditional covariance matrix of the random @@ -2034,6 +2035,11 @@ def profile_re(self, re_ix, num_low=5, dist_low=1., num_high=5, class MixedLMResultsWrapper(base.LikelihoodResultsWrapper): _attrs = {'bse_re': ('generic_columns', 'exog_re_names'), 'bse_fe': ('generic_columns', 'xnames'), + 'cov_re': ('generic_columns_2d', 'exog_re_names'), } _upstream_attrs = base.LikelihoodResultsWrapper._wrap_attrs _wrap_attrs = base.wrap.union_dicts(_attrs, _upstream_attrs) + + _methods = {} + _upstream_methods = base.LikelihoodResultsWrapper._wrap_methods + _wrap_methods = base.wrap.union_dicts(_methods, _upstream_methods) From bfa893f9f47984a5a3841af78b6007ca84139881 Mon Sep 17 00:00:00 2001 From: Skipper Seabold Date: Thu, 25 Sep 2014 14:24:57 -0400 Subject: [PATCH 07/15] REF: Return DataFrame instead of dict. --- statsmodels/regression/mixed_linear_model.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/statsmodels/regression/mixed_linear_model.py b/statsmodels/regression/mixed_linear_model.py index 46760366b13..eb96b6c08a4 100644 --- a/statsmodels/regression/mixed_linear_model.py +++ b/statsmodels/regression/mixed_linear_model.py @@ -109,6 +109,8 @@ from statsmodels.tools.sm_exceptions import ConvergenceWarning from statsmodels.base._penalties import Penalty +from pandas import DataFrame + def _get_exog_re_names(exog_re): if isinstance(exog_re, (pd.Series, pd.DataFrame)): @@ -1782,7 +1784,6 @@ def ranef(self): variable to the conditional means of the random effects given the data. """ - try: cov_re_inv = np.linalg.inv(self.cov_re) except np.linalg.LinAlgError: @@ -1807,7 +1808,10 @@ def ranef(self): ranef_dict[label] = np.dot(self.cov_re, np.dot(ex_r.T, vresid)) - return ranef_dict + column_names = dict(zip(range(self.k_re), + self.model.data.exog_re_names)) + df = DataFrame.from_dict(ranef_dict, orient='index') + return df.rename(columns=column_names).ix[self.model.group_labels] @cache_readonly def ranef_cov(self): @@ -1829,10 +1833,9 @@ def ranef_cov(self): cov_re_inv = None ranef_dict = {} + #columns = self.model.data.exog_re_names for k in range(self.model.n_groups): - endog = self.model.endog_li[k] - exog = self.model.exog_li[k] ex_r = self.model.exog_re_li[k] ex2_r = self.model.exog_re2_li[k] label = self.model.group_labels[k] @@ -1843,9 +1846,11 @@ def ranef_cov(self): mat2 = np.dot(mat1.T, mat2) ranef_dict[label] = self.cov_re - mat2 + #ranef_dict[label] = DataFrame(self.cov_re - mat2, + # index=columns, columns=columns) - return ranef_dict + return ranef_dict def summary(self, yname=None, xname_fe=None, xname_re=None, title=None, alpha=.05): From 1f17d211cc802bd3b7c5a0e78ec2407e840585ad Mon Sep 17 00:00:00 2001 From: Skipper Seabold Date: Thu, 25 Sep 2014 14:34:47 -0400 Subject: [PATCH 08/15] TST: Fix tests for refactor --- statsmodels/regression/tests/test_lme.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/statsmodels/regression/tests/test_lme.py b/statsmodels/regression/tests/test_lme.py index f955f78226c..cfdb19f4abd 100644 --- a/statsmodels/regression/tests/test_lme.py +++ b/statsmodels/regression/tests/test_lme.py @@ -298,9 +298,9 @@ def do1(self, reml, irf, ds_ix): # Not supported in R if not irf: - assert_almost_equal(mdf.ranef()[0], rslt.ranef_postmean, + assert_almost_equal(mdf.ranef.ix[0], rslt.ranef_postmean, decimal=3) - assert_almost_equal(mdf.ranef_cov()[0], + assert_almost_equal(mdf.ranef_cov[0], rslt.ranef_condvar, decimal=3) From 0520e8987985e26f45b9156b95d9b7508074cb60 Mon Sep 17 00:00:00 2001 From: Skipper Seabold Date: Thu, 25 Sep 2014 17:13:23 -0400 Subject: [PATCH 09/15] STY: Rename attributes --- statsmodels/regression/mixed_linear_model.py | 21 ++++++++++---------- statsmodels/regression/tests/test_lme.py | 4 ++-- 2 files changed, 12 insertions(+), 13 deletions(-) diff --git a/statsmodels/regression/mixed_linear_model.py b/statsmodels/regression/mixed_linear_model.py index eb96b6c08a4..9669b593201 100644 --- a/statsmodels/regression/mixed_linear_model.py +++ b/statsmodels/regression/mixed_linear_model.py @@ -501,8 +501,7 @@ def __init__(self, endog, exog, groups, exog_re=None, # Number of covariance parameters self.k_re2 = self.k_re * (self.k_re + 1) // 2 - # Override the default value - self.nparams = self.k_fe + self.k_re2 + self.k_params = self.k_fe + self.k_re2 # Convert the data to the internal representation, which is a # list of arrays, corresponding to the groups. @@ -1751,7 +1750,7 @@ def __init__(self, model, params, cov_params): normalized_cov_params=cov_params) @cache_readonly - def bse_fe(self): + def standard_errors_fe(self): """ Returns the standard errors of the fixed effect regression coefficients. @@ -1760,7 +1759,7 @@ def bse_fe(self): return np.sqrt(np.diag(self.cov_params())[0:p]) @cache_readonly - def bse_re(self): + def standard_errors_re(self): """ Returns the standard errors of the variance parameters. Note that the sampling distribution of variance parameters is @@ -1772,17 +1771,17 @@ def bse_re(self): return np.sqrt(self.scale * np.diag(self.cov_params())[p:]) @cache_readonly - def ranef(self): + def random_effects(self): """ Returns the conditional means of all random effects given the data. Returns ------- - ranef_dict : dict - A dictionary mapping the distinct values of the `group` - variable to the conditional means of the random effects - given the data. + random_effects : DataFrame + A DataFrame with the distinct `group` values as the index + and the conditional means of the random effects + in the columns. """ try: cov_re_inv = np.linalg.inv(self.cov_re) @@ -1814,14 +1813,14 @@ def ranef(self): return df.rename(columns=column_names).ix[self.model.group_labels] @cache_readonly - def ranef_cov(self): + def random_effects_cov(self): """ Returns the conditional covariance matrix of the random effects for each group given the data. Returns ------- - ranef_dict : dict + random_effects_cov : dict A dictionary mapping the distinct values of the `group` variable to the conditional covariance matrix of the random effects given the data. diff --git a/statsmodels/regression/tests/test_lme.py b/statsmodels/regression/tests/test_lme.py index cfdb19f4abd..679795a41fb 100644 --- a/statsmodels/regression/tests/test_lme.py +++ b/statsmodels/regression/tests/test_lme.py @@ -298,9 +298,9 @@ def do1(self, reml, irf, ds_ix): # Not supported in R if not irf: - assert_almost_equal(mdf.ranef.ix[0], rslt.ranef_postmean, + assert_almost_equal(mdf.random_effects.ix[0], rslt.ranef_postmean, decimal=3) - assert_almost_equal(mdf.ranef_cov[0], + assert_almost_equal(mdf.random_effects_cov[0], rslt.ranef_condvar, decimal=3) From cba04face574d5b95c3bd89d31ba83bcf7781fed Mon Sep 17 00:00:00 2001 From: Skipper Seabold Date: Fri, 26 Sep 2014 00:36:23 -0400 Subject: [PATCH 10/15] DOC: Update release notes example --- docs/source/release/version0.6.rst | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/docs/source/release/version0.6.rst b/docs/source/release/version0.6.rst index 1e524f29a19..57c512f54fd 100644 --- a/docs/source/release/version0.6.rst +++ b/docs/source/release/version0.6.rst @@ -122,13 +122,14 @@ other types of random effects models can all be fit. Here is an example of fitting a random intercepts model to data from a longitudinal study: -data = pd.read_csv("http://vincentarelbundock.github.io/Rdatasets/csv/geepack/dietox.csv") -md = MixedLM.from_formula("Weight ~ Time", data, groups=data["Pig"]) -mdf = md.fit() -print mdf.summary() +.. ipython:: python -To extend this to a random slopes model, we would add the statement -`md.set_random("Time", data)` before calling the `fit` method. + import statsmodels.api as sm + import statsmodels.formula.api as smf + data = sm.datasets.get_rdataset('dietox', 'geepack', cache=True).data + md = smf.mixedlm("Weight ~ Time", data, groups=data["Pig"]) + mdf = md.fit() + print mdf.summary() The Statsmodels LME framework currently supports post-estimation inference via Wald tests and confidence intervals on the coefficients, From 29f0ecda16183ae5070feae985cc0cd0ea51a9e6 Mon Sep 17 00:00:00 2001 From: Skipper Seabold Date: Fri, 26 Sep 2014 09:20:25 -0400 Subject: [PATCH 11/15] ENH: Let t_test use param_names for better or worse. --- statsmodels/base/model.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/statsmodels/base/model.py b/statsmodels/base/model.py index ab14b6e9662..4a568f1b5a4 100644 --- a/statsmodels/base/model.py +++ b/statsmodels/base/model.py @@ -1136,7 +1136,8 @@ def t_test(self, r_matrix, q_matrix=None, cov_p=None, scale=None, "in 0.6.0. See the documentation for the new API", FutureWarning) r_matrix = (r_matrix, q_matrix) - LC = DesignInfo(self.model.exog_names).linear_constraint(r_matrix) + names = self.model.data.param_names + LC = DesignInfo(names).linear_constraint(r_matrix) r_matrix, q_matrix = LC.coefs, LC.constants num_ttests = r_matrix.shape[0] num_params = r_matrix.shape[1] @@ -1353,7 +1354,8 @@ def wald_test(self, r_matrix, q_matrix=None, cov_p=None, scale=1.0, "in 0.6.0. See the documentation for the new API", FutureWarning) r_matrix = (r_matrix, q_matrix) - LC = DesignInfo(self.model.exog_names).linear_constraint(r_matrix) + names = self.model.data.param_names + LC = DesignInfo(names).linear_constraint(r_matrix) r_matrix, q_matrix = LC.coefs, LC.constants if (self.normalized_cov_params is None and cov_p is None and From a883cac78ee6cb40d20053af34bb69318b3db2aa Mon Sep 17 00:00:00 2001 From: Skipper Seabold Date: Fri, 26 Sep 2014 09:22:14 -0400 Subject: [PATCH 12/15] REF: Keep bse --- statsmodels/regression/mixed_linear_model.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/statsmodels/regression/mixed_linear_model.py b/statsmodels/regression/mixed_linear_model.py index 9669b593201..5a4a2f193c0 100644 --- a/statsmodels/regression/mixed_linear_model.py +++ b/statsmodels/regression/mixed_linear_model.py @@ -1750,7 +1750,7 @@ def __init__(self, model, params, cov_params): normalized_cov_params=cov_params) @cache_readonly - def standard_errors_fe(self): + def bse_fe(self): """ Returns the standard errors of the fixed effect regression coefficients. @@ -1759,7 +1759,7 @@ def standard_errors_fe(self): return np.sqrt(np.diag(self.cov_params())[0:p]) @cache_readonly - def standard_errors_re(self): + def bse_re(self): """ Returns the standard errors of the variance parameters. Note that the sampling distribution of variance parameters is From c7bc0ff5c465298530c82011f2e3737a5a9147f5 Mon Sep 17 00:00:00 2001 From: Skipper Seabold Date: Fri, 26 Sep 2014 09:56:18 -0400 Subject: [PATCH 13/15] REF: likeval -> llf --- statsmodels/regression/mixed_linear_model.py | 6 ++---- statsmodels/regression/tests/test_lme.py | 2 +- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/statsmodels/regression/mixed_linear_model.py b/statsmodels/regression/mixed_linear_model.py index 5a4a2f193c0..75e52fbdd73 100644 --- a/statsmodels/regression/mixed_linear_model.py +++ b/statsmodels/regression/mixed_linear_model.py @@ -774,7 +774,6 @@ def fit_regularized(self, start_params=None, method='l1', alpha=0, results.method = mdf.method results.converged = True results.cov_pen = self.cov_pen - results.likeval = self.loglike(params_prof) results.k_fe = self.k_fe results.k_re = self.k_re results.k_re2 = self.k_re2 @@ -1701,7 +1700,6 @@ def fit(self, start_params=None, reml=True, niter_em=0, results.hist = hist results.reml = self.reml results.cov_pen = self.cov_pen - results.likeval = self.loglike(params) results.k_fe = self.k_fe results.k_re = self.k_re results.k_re2 = self.k_re2 @@ -1900,7 +1898,7 @@ def summary(self, yname=None, xname_fe=None, xname_re=None, info["Dependent Variable:"] = yname info["Method:"] = self.method info["Scale:"] = self.scale - info["Likelihood:"] = self.likeval + info["Likelihood:"] = self.llf info["Converged:"] = "Yes" if self.converged else "No" smry.add_dict(info) smry.add_title("Mixed Linear Model Regression Results") @@ -2027,7 +2025,7 @@ def profile_re(self, re_ix, num_low=5, dist_low=1., num_high=5, params.set_cov_re(cov_re) rslt = model.fit(start_params=params, free=free, reml=self.reml, cov_pen=self.cov_pen) - likev.append([rslt.cov_re[0, 0], rslt.likeval]) + likev.append([rslt.cov_re[0, 0], rslt.llf]) likev = np.asarray(likev) # Restore the original exog diff --git a/statsmodels/regression/tests/test_lme.py b/statsmodels/regression/tests/test_lme.py index 679795a41fb..298fb857192 100644 --- a/statsmodels/regression/tests/test_lme.py +++ b/statsmodels/regression/tests/test_lme.py @@ -294,7 +294,7 @@ def do1(self, reml, irf, ds_ix): assert_almost_equal(rslt.vcov_r, mdf.cov_params()[0:pf,0:pf], decimal=3) - assert_almost_equal(mdf.likeval, rslt.loglike[0], decimal=2) + assert_almost_equal(mdf.llf, rslt.loglike[0], decimal=2) # Not supported in R if not irf: From 76ba5ed4f4fe59eb873669c7af9d45416a7fb6d1 Mon Sep 17 00:00:00 2001 From: Skipper Seabold Date: Fri, 26 Sep 2014 10:17:34 -0400 Subject: [PATCH 14/15] ENH: Define df_resid so t_test works. --- statsmodels/regression/mixed_linear_model.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/statsmodels/regression/mixed_linear_model.py b/statsmodels/regression/mixed_linear_model.py index 75e52fbdd73..554814d0c1f 100644 --- a/statsmodels/regression/mixed_linear_model.py +++ b/statsmodels/regression/mixed_linear_model.py @@ -108,6 +108,7 @@ import warnings from statsmodels.tools.sm_exceptions import ConvergenceWarning from statsmodels.base._penalties import Penalty +from statsmodels.compat.numpy import np_matrix_rank from pandas import DataFrame @@ -524,6 +525,8 @@ def __init__(self, endog, exog, groups, exog_re=None, # The total number of observations, summed over all groups self.n_totobs = sum([len(y) for y in self.endog_li]) + # why do it like the above? + self.nobs = len(self.endog) # Set the fixed effects parameter names if self.exog_names is None: @@ -1745,7 +1748,9 @@ class MixedLMResults(base.LikelihoodModelResults): def __init__(self, model, params, cov_params): super(MixedLMResults, self).__init__(model, params, - normalized_cov_params=cov_params) + normalized_cov_params=cov_params) + self.nobs = self.model.nobs + self.df_resid = self.nobs - np_matrix_rank(self.model.exog) @cache_readonly def bse_fe(self): From 4c7dc569a5e2eae28b1e2fec3aae2deed5113fff Mon Sep 17 00:00:00 2001 From: Skipper Seabold Date: Fri, 26 Sep 2014 10:49:53 -0400 Subject: [PATCH 15/15] TST: Mark slow test. --- statsmodels/regression/tests/test_lme.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/statsmodels/regression/tests/test_lme.py b/statsmodels/regression/tests/test_lme.py index 298fb857192..9e7f39c790d 100644 --- a/statsmodels/regression/tests/test_lme.py +++ b/statsmodels/regression/tests/test_lme.py @@ -1,7 +1,8 @@ import numpy as np import pandas as pd from statsmodels.regression.mixed_linear_model import MixedLM, MixedLMParams -from numpy.testing import assert_almost_equal, assert_equal, assert_allclose +from numpy.testing import (assert_almost_equal, assert_equal, assert_allclose, + dec) from . import lme_r_results from statsmodels.base import _penalties as penalties import os @@ -69,6 +70,7 @@ class TestMixedLM(object): # Test analytic scores using numeric differentiation # TODO: better checks on Hessian + @dec.slow def test_compare_numdiff(self): import statsmodels.tools.numdiff as nd