Skip to content

Commit

Permalink
initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
kshedden committed Sep 23, 2020
1 parent 2322d35 commit b93ac98
Show file tree
Hide file tree
Showing 2 changed files with 91 additions and 18 deletions.
87 changes: 71 additions & 16 deletions statsmodels/regression/mixed_linear_model.py
Expand Up @@ -156,6 +156,8 @@
from statsmodels.tools import data as data_tools
from statsmodels.tools.sm_exceptions import ConvergenceWarning

_warn_cov_sing = "The random effects covariance matrix is singular."


def _dot(x, y):
"""
Expand Down Expand Up @@ -413,7 +415,10 @@ def get_packed(self, use_sqrt, has_fe=False):

if self.k_re > 0:
if use_sqrt:
L = np.linalg.cholesky(self.cov_re)
try:
L = np.linalg.cholesky(self.cov_re)
except np.linalg.LinAlgError:
L = np.diag(np.sqrt(np.diag(self.cov_re)))
cpa = L[self._ix]
else:
cpa = self.cov_re[self._ix]
Expand Down Expand Up @@ -1258,27 +1263,49 @@ def fit_regularized(self, start_params=None, method='l1', alpha=0,

return MixedLMResultsWrapper(results)

def get_fe_params(self, cov_re, vcomp):
def get_fe_params(self, cov_re, vcomp, tol=1e-10):
"""
Use GLS to update the fixed effects parameter estimates.
Parameters
----------
cov_re : array_like
cov_re : array_like (2d)
The covariance matrix of the random effects.
vcomp : array_like (1d)
The variance components.
tol : float
A tolerance parameter to determine when covariances
are singular.
Returns
-------
The GLS estimates of the fixed effects parameters.
params : ndarray
The GLS estimates of the fixed effects parameters.
singular : bool
True if the covariance is singular
"""

if self.k_fe == 0:
return np.array([])
return np.array([]), False

sing = False

if self.k_re == 0:
cov_re_inv = np.empty((0, 0))
else:
cov_re_inv = np.linalg.inv(cov_re)
w, v = np.linalg.eigh(cov_re)
if w.min() < tol:
# Singular, use pseudo-inverse
sing = True
ii = np.flatnonzero(w >= tol)
if len(ii) == 0:
cov_re_inv = np.zeros_like(cov_re)
else:
vi = v[:, ii]
wi = w[ii]
cov_re_inv = np.dot(vi / wi, vi.T)
else:
cov_re_inv = np.linalg.inv(cov_re)

# Cache these quantities that do not change.
if not hasattr(self, "_endex_li"):
Expand All @@ -1292,15 +1319,29 @@ def get_fe_params(self, cov_re, vcomp):
xtxy = 0.
for group_ix, group in enumerate(self.group_labels):
vc_var = self._expand_vcomp(vcomp, group_ix)
if vc_var.size > 0:
if vc_var.min() < tol:
# Pseudo-inverse
sing = True
ii = np.flatnonzero(vc_var >= tol)
vc_vari = np.zeros_like(vc_var)
vc_vari[ii] = 1 / vc_var[ii]
else:
vc_vari = 1 / vc_var
else:
vc_vari = np.empty(0)
exog = self.exog_li[group_ix]
ex_r, ex2_r = self._aex_r[group_ix], self._aex_r2[group_ix]
solver = _smw_solver(1., ex_r, ex2_r, cov_re_inv, 1 / vc_var)
solver = _smw_solver(1., ex_r, ex2_r, cov_re_inv, vc_vari)
u = solver(self._endex_li[group_ix])
xtxy += np.dot(exog.T, u)

fe_params = np.linalg.solve(xtxy[:, 0:-1], xtxy[:, -1])
if sing:
fe_params = np.dot(np.linalg.pinv(xtxy[:, 0:-1]), xtxy[:, -1])
else:
fe_params = np.linalg.solve(xtxy[:, 0:-1], xtxy[:, -1])

return fe_params
return fe_params, sing

def _reparam(self):
"""
Expand Down Expand Up @@ -1451,15 +1492,18 @@ def loglike(self, params, profile_fe=True):

# Move to the profile set
if profile_fe:
fe_params = self.get_fe_params(cov_re, vcomp)
fe_params, sing = self.get_fe_params(cov_re, vcomp)
if sing:
warnings.warn(_warn_cov_sing)
else:
fe_params = params.fe_params

if self.k_re > 0:
try:
cov_re_inv = np.linalg.inv(cov_re)
except np.linalg.LinAlgError:
cov_re_inv = None
cov_re_inv = np.linalg.pinv(cov_re)
warnings.warn(_warn_cov_sing)
_, cov_re_logdet = np.linalg.slogdet(cov_re)
else:
cov_re_inv = np.zeros((0, 0))
Expand Down Expand Up @@ -1579,7 +1623,12 @@ def score(self, params, profile_fe=True):
has_fe=False)

if profile_fe:
params.fe_params = self.get_fe_params(params.cov_re, params.vcomp)
params.fe_params, sing = \
self.get_fe_params(params.cov_re, params.vcomp)

if sing:
msg = "Random effects covariance is singular"
warnings.warn(msg)

if self.use_sqrt:
score_fe, score_re, score_vc = self.score_sqrt(
Expand Down Expand Up @@ -1644,7 +1693,8 @@ def score_full(self, params, calc_fe):
try:
cov_re_inv = np.linalg.inv(cov_re)
except np.linalg.LinAlgError:
cov_re_inv = None
cov_re_inv = np.linalg.pinv(cov_re)
warnings.warn(_warn_cov_sing)

score_fe = np.zeros(self.k_fe)
score_re = np.zeros(self.k_re2)
Expand Down Expand Up @@ -1829,7 +1879,11 @@ def hessian(self, params):
vcomp = params.vcomp
cov_re = params.cov_re
if self.k_re > 0:
cov_re_inv = np.linalg.inv(cov_re)
try:
cov_re_inv = np.linalg.inv(cov_re)
except np.linalg.LinAlgError:
cov_re_inv = np.linalg.pinv(cov_re)
warnings.warn(_warn_cov_sing)
else:
cov_re_inv = np.empty((0, 0))

Expand Down Expand Up @@ -1982,7 +2036,8 @@ def get_scale(self, fe_params, cov_re, vcomp):
try:
cov_re_inv = np.linalg.inv(cov_re)
except np.linalg.LinAlgError:
cov_re_inv = None
cov_re_inv = np.linalg.pinv(cov_re)
warnings.warn(_warn_cov_sing)

qf = 0.
for group_ix, group in enumerate(self.group_labels):
Expand Down Expand Up @@ -2155,7 +2210,7 @@ def fit(self, start_params=None, reml=True, niter_sa=0,
params, self.k_fe, self.k_re, use_sqrt=self.use_sqrt, has_fe=False)
cov_re_unscaled = params.cov_re
vcomp_unscaled = params.vcomp
fe_params = self.get_fe_params(cov_re_unscaled, vcomp_unscaled)
fe_params, sing = self.get_fe_params(cov_re_unscaled, vcomp_unscaled)
params.fe_params = fe_params
scale = self.get_scale(fe_params, cov_re_unscaled, vcomp_unscaled)
cov_re = scale * cov_re_unscaled
Expand Down
22 changes: 20 additions & 2 deletions statsmodels/regression/tests/test_lme.py
Expand Up @@ -469,7 +469,7 @@ def test_dietox_slopes(self):
data = pd.read_csv(fname)
model = MixedLM.from_formula(
"Weight ~ Time", groups="Pig", re_formula="1 + Time", data=data)
result = model.fit(method='powell')
result = model.fit(method="cg")

# fixef(r)
assert_allclose(
Expand All @@ -496,7 +496,7 @@ def test_dietox_slopes(self):
data = pd.read_csv(fname)
model = MixedLM.from_formula(
"Weight ~ Time", groups="Pig", re_formula="1 + Time", data=data)
result = model.fit(method='powell', reml=False)
result = model.fit(method='cg', reml=False)

# fixef(r)
assert_allclose(result.fe_params, np.r_[15.73863, 6.93902], rtol=1e-5)
Expand Down Expand Up @@ -1235,6 +1235,24 @@ def test_smw_logdet(self, p, q, r, s):
check_smw_logdet(p, q, r, s)


def test_singular():
#7051

np.random.seed(3423)
n = 100

data = np.random.randn(n, 2)
df = pd.DataFrame(data, columns=['Y', 'X'])
df['class'] = pd.Series([i % 3 for i in df.index], index=df.index)

with pytest.warns(Warning) as wrn:
md = MixedLM.from_formula("Y ~ X", df, groups=df['class'])
mdf = md.fit()
mdf.summary()
if not wrn:
pytest.fail("warning expected")


def test_get_distribution():

np.random.seed(234)
Expand Down

0 comments on commit b93ac98

Please sign in to comment.