Skip to content

Commit

Permalink
REF/BUG: fix CovDetMCD
Browse files Browse the repository at this point in the history
  • Loading branch information
josef-pkt committed May 12, 2024
1 parent 133de40 commit 1adf4c5
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 12 deletions.
33 changes: 22 additions & 11 deletions statsmodels/robust/covariance.py
Original file line number Diff line number Diff line change
Expand Up @@ -1298,6 +1298,7 @@ def _get_detcov_startidx(z, h, options_start=None, methods_cov="all"):
nobs = z.shape[0]
idx_sel = np.argpartition(np.abs(z), h)[:h]
idx_all = [(idx_sel, "abs-resid")]

Check warning on line 1300 in statsmodels/robust/covariance.py

View check run for this annotation

Codecov / codecov/patch

statsmodels/robust/covariance.py#L1297-L1300

Added lines #L1297 - L1300 were not covered by tests
# next uses symmetric equal-tail trimming
idx_sorted = np.argsort(z)
h_tail = (nobs - h) // 2
idx_all.append((idx_sorted[h_tail : h_tail + h], "trimmed-tail"))
Expand All @@ -1306,6 +1307,7 @@ def _get_detcov_startidx(z, h, options_start=None, methods_cov="all"):
# continue if more than 1 random variable
cov_all = _cov_starting(z, standardize=False, quantile=0.5)

# orthogonalization step
idx_all = []
for c in cov_all:
if not hasattr(c, "method"):
Expand All @@ -1324,10 +1326,14 @@ class CovM:
"""

def __init__(self, data, norm_mean=None, norm_scatter=None,
scale_bias=None): #, method="S"):
scale_bias=None, method="S"):
# todo: method defines how norm_mean and norm_scatter are linked
# currently I try for S-estimator

if method.lower() not in ["s"]:
msg = f"method {method} option not recognize or implemented"
raise ValueError(msg)

Check warning on line 1335 in statsmodels/robust/covariance.py

View check run for this annotation

Codecov / codecov/patch

statsmodels/robust/covariance.py#L1334-L1335

Added lines #L1334 - L1335 were not covered by tests

self.data = np.asarray(data)
self.k_vars = k_vars = self.data.shape[1]

Expand Down Expand Up @@ -1453,11 +1459,11 @@ class CovDetMCD:
optimization change.
"""

def __init__(self):
def __init__(self, data):
# no options yet, methods were written as functions
pass
self.data = np.asarray(data)

def cstep_mcd(self, x, mean, cov, h, maxiter=2, tol=1e-8):
def _cstep(self, x, mean, cov, h, maxiter=2, tol=1e-8):
"""C-step for mcd iteration
x is data, perc is percentile h / nobs, don't need perc when we
Expand All @@ -1468,7 +1474,7 @@ def cstep_mcd(self, x, mean, cov, h, maxiter=2, tol=1e-8):
converged = False

for i in range(maxiter):
d = mahalanobis(x, mean, cov)
d = mahalanobis(x - mean, cov)
idx_sel = np.argpartition(d, h)[:h]
x_sel = x[idx_sel]
mean = x_sel.mean(0)
Expand All @@ -1479,9 +1485,11 @@ def cstep_mcd(self, x, mean, cov, h, maxiter=2, tol=1e-8):
converged = True

Check notice

Code scanning / CodeQL

Unused local variable Note

Variable converged is not used.
break

cov = cov_new

return mean, cov

def _mcd_one(self, x, idx, h, maxiter=2, mean=None, cov=None):
def _fit_one(self, x, idx, h, maxiter=2, mean=None, cov=None):
"""Compute mcd for one starting set of observations.
Parameters
Expand Down Expand Up @@ -1509,19 +1517,22 @@ def _mcd_one(self, x, idx, h, maxiter=2, mean=None, cov=None):
This does not do any preprocessing of the data and returns the empirical mean
and covariance of evaluation set of the data ``x``.
"""
x_sel = x[idx]
if idx is not None:
x_sel = x[idx]
else:
x_sel = x

Check warning on line 1523 in statsmodels/robust/covariance.py

View check run for this annotation

Codecov / codecov/patch

statsmodels/robust/covariance.py#L1523

Added line #L1523 was not covered by tests
if mean is None:
mean = x_sel.mean(0)
if cov is None:
cov = np.cov(x_sel.T, ddof=1)

# updated with c-step
mean, cov = self.cstep_mcd(x, mean, cov, h, maxiter=maxiter)
mean, cov = self._cstep(x, mean, cov, h, maxiter=maxiter)
det = np.linalg.det(cov)

return mean, cov, det

def mcd_det(self, x, h, *, h_start=None, mean_func=None, scale_func=None,
def fit(self, h, *, h_start=None, mean_func=None, scale_func=None,
maxiter=100, options_start=None, reweight=True):
"""
Compute minimum covariance determinant estimate of mean and covariance.
Expand All @@ -1546,7 +1557,7 @@ def mcd_det(self, x, h, *, h_start=None, mean_func=None, scale_func=None,
Holder instance with results
"""

x = np.asarray(x)
x = self.data
nobs, k_vars = x.shape

if h is None:
Expand All @@ -1573,7 +1584,7 @@ def mcd_det(self, x, h, *, h_start=None, mean_func=None, scale_func=None,
res = {}
for ii, ini in enumerate(starts):
idx_sel, method = ini
mean, cov, det = self._mcd_one(x, idx_sel, h, maxiter=maxiter)
mean, cov, det = self._fit_one(x, idx_sel, h, maxiter=maxiter)
res[ii] = Holder(
mean=mean,
cov=cov * fac_trunc,
Expand Down
25 changes: 24 additions & 1 deletion statsmodels/robust/tests/test_covariance.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,9 +179,11 @@ def test_tyler():


def test_cov_ms():
# use CovM as local Cov@
# use CovM as local CovS

# > CovSest(x) using package rrcov
# same result with > CovSest(x, method="sdet")
# but scale difers with method="biweight"
mean_r = np.array([1.53420879676, 1.82865741024, 1.65565146981])
cov_r = np.array([
[1.8090846049573, 0.0479283121828, 0.2446369025717],
Expand Down Expand Up @@ -210,6 +212,27 @@ def test_cov_ms():
assert_allclose(res.scale, scale_r, rtol=1e-5)


def test_covdetmcd():

# results from rrcov
# > cdet = CovMcd(x = hbk, raw.only = TRUE, nsamp = "deterministic",
# use.correction=FALSE)
cov_dmcd_r = np.array("""
2.2059619213639 0.0223939863695 0.7898958050933 0.4060613360808
0.0223939863695 1.1384166802155 0.4315534571891 -0.2344041030201
0.7898958050933 0.4315534571891 1.8930117467493 -0.3292893001459
0.4060613360808 -0.2344041030201 -0.3292893001459 0.6179686100845
""".split(),
float).reshape(4,4)

mean_dmcd_r = np.array([1.7725, 2.2050, 1.5375, -0.0575])

mod = robcov.CovDetMCD(dta_hbk)
# with default start, default start could get wrong local optimum
res = mod.fit(40)
assert_allclose(res.mean, mean_dmcd_r, rtol=1e-5)
assert_allclose(res.cov, cov_dmcd_r, rtol=1e-5)

def test_robcov_SMOKE():
# currently only smoke test or very loose comparisons to dgp
nobs, k_vars = 100, 3
Expand Down

0 comments on commit 1adf4c5

Please sign in to comment.