From 2300ba124b5cc4775900d2add9a7ba7474690ba7 Mon Sep 17 00:00:00 2001 From: Badr MOUFAD Date: Fri, 3 Jun 2022 10:57:05 +0200 Subject: [PATCH 01/24] init commit --- skglm/solvers/cd_solver.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/skglm/solvers/cd_solver.py b/skglm/solvers/cd_solver.py index 2ab16cfdb..83584ee1e 100644 --- a/skglm/solvers/cd_solver.py +++ b/skglm/solvers/cd_solver.py @@ -92,7 +92,7 @@ def cd_solver_path(X, y, datafit, penalty, alphas=None, # X_dense, X_data, X_indices, X_indptr = _sparse_and_dense(X) if alphas is None: - raise ValueError('alphas should be passed explicitely') + raise ValueError('alphas should be passed explicitly') # if hasattr(penalty, "alpha_max"): # if sparse.issparse(X): # grad0 = construct_grad_sparse( From 5ddb3a52d120b4690bac2aaa9fe9e2c8983f317d Mon Sep 17 00:00:00 2001 From: Badr MOUFAD Date: Tue, 7 Jun 2022 10:28:25 +0200 Subject: [PATCH 02/24] implement ``is_penalized`` & gsupp --- skglm/penalties/block_separable.py | 21 +++++++++++++++++++++ skglm/tests/test_group.py | 23 +++++++++++++++++++++++ 2 files changed, 44 insertions(+) diff --git a/skglm/penalties/block_separable.py b/skglm/penalties/block_separable.py index 0a9bd2743..8c6230c08 100644 --- a/skglm/penalties/block_separable.py +++ b/skglm/penalties/block_separable.py @@ -232,3 +232,24 @@ def subdiff_distance(self, w, grad, ws): scores[idx] = norm(grad_g - subdiff) return scores + + def is_penalized(self, n_groups): + return np.ones(n_groups, dtype=np.bool_) + + def generalized_support(self, w): + grp_indices, grp_ptr = self.grp_indices, self.grp_ptr + n_groups = len(grp_ptr) - 1 + is_not_penalized = ~self.is_penalized(n_groups) + + gsupp = np.zeros(n_groups, dtype=np.bool_) + for g in range(n_groups): + if is_not_penalized[g]: + gsupp[g] = True + continue + + grp_g_indices = grp_indices[grp_ptr[g]: grp_ptr[g+1]] + w_g = w[grp_g_indices] + if np.any(w_g): + gsupp[g] = True + + return gsupp diff --git a/skglm/tests/test_group.py b/skglm/tests/test_group.py index 794b05cad..a3030e0df 100644 --- a/skglm/tests/test_group.py +++ b/skglm/tests/test_group.py @@ -112,5 +112,28 @@ def test_vs_celer_grouplasso(n_groups, n_features, shuffle): np.testing.assert_allclose(model.coef_, w, atol=1e-5) +def test_gsupp(): + n_groups, n_features, shuffle = 5, 50, False + grp_indices, grp_ptr, _ = _generate_random_grp(n_groups, n_features, shuffle) + + grp_penalty = WeightedGroupL2( + alpha=1., grp_ptr=grp_ptr, + grp_indices=grp_indices, weights=np.ones(n_groups)) + + assert np.all(grp_penalty.is_penalized(n_groups)) + + w = np.zeros(n_features) + in_gsupp_grps = np.random.choice(n_groups, size=2, replace=False) + out_gsupp_grps = np.setdiff1d(np.arange(n_groups), in_gsupp_grps) + for g in in_gsupp_grps: + grp_g_indices = grp_indices[grp_ptr[g]: grp_ptr[g+1]] + w[grp_g_indices] = 1. + + gsupp = grp_penalty.generalized_support(w) + assert np.all(gsupp[in_gsupp_grps]) + assert np.any(gsupp[out_gsupp_grps]) == False + + if __name__ == '__main__': + test_gsupp() pass From 9eb5bdcff28167d7453d6e70ee26803a26db6d44 Mon Sep 17 00:00:00 2001 From: Badr MOUFAD Date: Tue, 7 Jun 2022 16:38:01 +0200 Subject: [PATCH 03/24] implement ws strategy --- skglm/penalties/block_separable.py | 7 +++- skglm/solvers/group_bcd_solver.py | 67 ++++++++++++++++++++++++------ skglm/tests/test_group.py | 23 ---------- 3 files changed, 60 insertions(+), 37 deletions(-) diff --git a/skglm/penalties/block_separable.py b/skglm/penalties/block_separable.py index 8c6230c08..4161be682 100644 --- a/skglm/penalties/block_separable.py +++ b/skglm/penalties/block_separable.py @@ -218,10 +218,13 @@ def subdiff_distance(self, w, grad, ws): grp_ptr, grp_indices = self.grp_ptr, self.grp_indices scores = np.zeros(len(ws)) + grad_ptr = 0 for idx, g in enumerate(ws): - grad_g = grad[idx] - grp_g_indices = grp_indices[grp_ptr[g]: grp_ptr[g+1]] + + grad_g = grad[grad_ptr: grad_ptr + len(grp_g_indices)] + grad_ptr += len(grp_g_indices) + w_g = w[grp_g_indices] norm_w_g = norm(w_g) diff --git a/skglm/solvers/group_bcd_solver.py b/skglm/solvers/group_bcd_solver.py index 7c6a6a5e6..027ee0cba 100644 --- a/skglm/solvers/group_bcd_solver.py +++ b/skglm/solvers/group_bcd_solver.py @@ -1,8 +1,11 @@ import numpy as np from numba import njit +from skglm.datafits.group import QuadraticGroup +from skglm.penalties.block_separable import WeightedGroupL2 -def bcd_solver(X, y, datafit, penalty, w_init=None, + +def bcd_solver(X, y, datafit, penalty: WeightedGroupL2, w_init=None, p0=2, max_iter=1000, max_epochs=100, tol=1e-7, verbose=False): """Run a group BCD solver. @@ -24,6 +27,9 @@ def bcd_solver(X, y, datafit, penalty, w_init=None, Initial value of coefficients. If set to None, a zero vector is used instead. + p0 : int, default 2 + Min number of groups to be included in the working set. + max_iter : int, default 1000 Maximum number of iterations. @@ -58,41 +64,54 @@ def bcd_solver(X, y, datafit, penalty, w_init=None, p_objs_out = np.zeros(max_iter) for t in range(max_iter): - if t == 0: # avoid computing p_obj twice - prev_p_obj = datafit.value(y, w, Xw) + penalty.value(w) + if t == 0: # avoid computing grad and opt twice + grad = _construct_grad(X, y, w, Xw, datafit, all_groups) + opt = penalty.subdiff_distance(w, grad, all_groups) + stop_crit = np.max(opt) + + if stop_crit <= tol: + print("Outer solver: Early exit") + break + + gsupp_size = penalty.generalized_support(w).sum() + ws_size = max(min(p0, n_features), + min(n_features, 2 * gsupp_size)) + ws = np.argpartition(opt, -ws_size)[-ws_size:] for epoch in range(max_epochs): - _bcd_epoch(X, y, w, Xw, datafit, penalty, all_groups) + _bcd_epoch(X, y, w, Xw, datafit, penalty, ws) if epoch % 10 == 0: - current_p_obj = datafit.value(y, w, Xw) + penalty.value(w) - stop_crit_in = prev_p_obj - current_p_obj + grad_ws = _construct_grad(X, y, w, Xw, datafit, ws) + opt_in = penalty.subdiff_distance(w, grad_ws, ws) + stop_crit_in = np.max(opt_in) if max(verbose - 1, 0): + current_p_obj = datafit.value(y, w, Xw) + penalty.value(w) print( f"Epoch {epoch+1}: {current_p_obj:.10f} " f"obj. variation: {stop_crit_in:.2e}" ) - if stop_crit_in <= tol: + if stop_crit_in <= 0.3 * stop_crit: print("Early exit") break - prev_p_obj = current_p_obj current_p_obj = datafit.value(y, w, Xw) + penalty.value(w) - stop_crit = prev_p_obj - current_p_obj + grad = _construct_grad(X, y, w, Xw, datafit, all_groups) + opt = penalty.subdiff_distance(w, grad, all_groups) + stop_crit = np.max(opt) if max(verbose, 0): print( f"Iteration {t+1}: {current_p_obj:.10f}, " - f"stopping crit: {stop_crit:.2f}" + f"stopping crit: {stop_crit:.2e}" ) if stop_crit <= tol: print("Outer solver: Early exit") break - prev_p_obj = current_p_obj p_objs_out[t] = current_p_obj return w, p_objs_out, stop_crit @@ -100,7 +119,7 @@ def bcd_solver(X, y, datafit, penalty, w_init=None, @njit def _bcd_epoch(X, y, w, Xw, datafit, penalty, ws): - """Perform a single BCD epoch on groups in ws.""" + """Perform a single BCD epoch on groups in ``ws``.""" grp_ptr, grp_indices = penalty.grp_ptr, penalty.grp_indices for g in ws: @@ -119,3 +138,27 @@ def _bcd_epoch(X, y, w, Xw, datafit, penalty, ws): if old_w_g[idx] != w[j]: Xw += (w[j] - old_w_g[idx]) * X[:, j] return + + +def _construct_grad(X, y, w, Xw, datafit: QuadraticGroup, ws): + """Compute the -gradient according to each group in ``ws``. + + Note: -gradients are stacked in a 1d array (e.g. [-grad_g1, -grad_g2, ...]). + """ + grads = np.array([]) + for g in ws: + grad_g = -datafit.gradient_g(X, y, w, Xw, g) + grads = np.concatenate((grads, grad_g)) + return grads + + +def _construct_ws(w, opt, penalty: WeightedGroupL2, p0): + """Construct working set.""" + n_features = len(w) + gsupp_size = penalty.generalized_support(w).sum() + + ws_size = max(min(p0, n_features), + min(n_features, 2 * gsupp_size)) + + ws = np.argpartition(opt, -ws_size)[-ws_size:] + return ws diff --git a/skglm/tests/test_group.py b/skglm/tests/test_group.py index a3030e0df..794b05cad 100644 --- a/skglm/tests/test_group.py +++ b/skglm/tests/test_group.py @@ -112,28 +112,5 @@ def test_vs_celer_grouplasso(n_groups, n_features, shuffle): np.testing.assert_allclose(model.coef_, w, atol=1e-5) -def test_gsupp(): - n_groups, n_features, shuffle = 5, 50, False - grp_indices, grp_ptr, _ = _generate_random_grp(n_groups, n_features, shuffle) - - grp_penalty = WeightedGroupL2( - alpha=1., grp_ptr=grp_ptr, - grp_indices=grp_indices, weights=np.ones(n_groups)) - - assert np.all(grp_penalty.is_penalized(n_groups)) - - w = np.zeros(n_features) - in_gsupp_grps = np.random.choice(n_groups, size=2, replace=False) - out_gsupp_grps = np.setdiff1d(np.arange(n_groups), in_gsupp_grps) - for g in in_gsupp_grps: - grp_g_indices = grp_indices[grp_ptr[g]: grp_ptr[g+1]] - w[grp_g_indices] = 1. - - gsupp = grp_penalty.generalized_support(w) - assert np.all(gsupp[in_gsupp_grps]) - assert np.any(gsupp[out_gsupp_grps]) == False - - if __name__ == '__main__': - test_gsupp() pass From d3972fbdd09bf700fb6e948f7acf945724becb50 Mon Sep 17 00:00:00 2001 From: Badr MOUFAD Date: Tue, 7 Jun 2022 16:51:00 +0200 Subject: [PATCH 04/24] fix bug ``argpartition`` --- skglm/solvers/group_bcd_solver.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/skglm/solvers/group_bcd_solver.py b/skglm/solvers/group_bcd_solver.py index 027ee0cba..fe6e5de71 100644 --- a/skglm/solvers/group_bcd_solver.py +++ b/skglm/solvers/group_bcd_solver.py @@ -74,8 +74,8 @@ def bcd_solver(X, y, datafit, penalty: WeightedGroupL2, w_init=None, p0=2, break gsupp_size = penalty.generalized_support(w).sum() - ws_size = max(min(p0, n_features), - min(n_features, 2 * gsupp_size)) + ws_size = max(min(p0, n_groups), + min(n_groups, 2 * gsupp_size)) ws = np.argpartition(opt, -ws_size)[-ws_size:] for epoch in range(max_epochs): From b937aa5b6bba975881bdd5e4982c9909d113578d Mon Sep 17 00:00:00 2001 From: Badr MOUFAD Date: Tue, 7 Jun 2022 16:53:35 +0200 Subject: [PATCH 05/24] add informative comment --- skglm/solvers/group_bcd_solver.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/skglm/solvers/group_bcd_solver.py b/skglm/solvers/group_bcd_solver.py index fe6e5de71..2a0f3b1cf 100644 --- a/skglm/solvers/group_bcd_solver.py +++ b/skglm/solvers/group_bcd_solver.py @@ -76,7 +76,7 @@ def bcd_solver(X, y, datafit, penalty: WeightedGroupL2, w_init=None, p0=2, gsupp_size = penalty.generalized_support(w).sum() ws_size = max(min(p0, n_groups), min(n_groups, 2 * gsupp_size)) - ws = np.argpartition(opt, -ws_size)[-ws_size:] + ws = np.argpartition(opt, -ws_size)[-ws_size:] # k-largest items [no sort] for epoch in range(max_epochs): _bcd_epoch(X, y, w, Xw, datafit, penalty, ws) From 18d8107894e54d19ec60e2ded0883552bb087bdf Mon Sep 17 00:00:00 2001 From: Badr MOUFAD Date: Tue, 7 Jun 2022 17:02:32 +0200 Subject: [PATCH 06/24] inplace construction of ws --- skglm/solvers/group_bcd_solver.py | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/skglm/solvers/group_bcd_solver.py b/skglm/solvers/group_bcd_solver.py index 2a0f3b1cf..c5d686734 100644 --- a/skglm/solvers/group_bcd_solver.py +++ b/skglm/solvers/group_bcd_solver.py @@ -150,15 +150,3 @@ def _construct_grad(X, y, w, Xw, datafit: QuadraticGroup, ws): grad_g = -datafit.gradient_g(X, y, w, Xw, g) grads = np.concatenate((grads, grad_g)) return grads - - -def _construct_ws(w, opt, penalty: WeightedGroupL2, p0): - """Construct working set.""" - n_features = len(w) - gsupp_size = penalty.generalized_support(w).sum() - - ws_size = max(min(p0, n_features), - min(n_features, 2 * gsupp_size)) - - ws = np.argpartition(opt, -ws_size)[-ws_size:] - return ws From 9422e69701f7e63934014a45485a30feec43132f Mon Sep 17 00:00:00 2001 From: Badr MOUFAD Date: Wed, 8 Jun 2022 16:01:57 +0200 Subject: [PATCH 07/24] handle case ``max_iter=0`` --- skglm/solvers/group_bcd_solver.py | 1 + 1 file changed, 1 insertion(+) diff --git a/skglm/solvers/group_bcd_solver.py b/skglm/solvers/group_bcd_solver.py index c5d686734..93172ebea 100644 --- a/skglm/solvers/group_bcd_solver.py +++ b/skglm/solvers/group_bcd_solver.py @@ -62,6 +62,7 @@ def bcd_solver(X, y, datafit, penalty: WeightedGroupL2, w_init=None, p0=2, datafit.initialize(X, y) all_groups = np.arange(n_groups) p_objs_out = np.zeros(max_iter) + stop_crit = 0 # prevent ref before assignment when max_iter == 0 for t in range(max_iter): if t == 0: # avoid computing grad and opt twice From 033755f8b19251b368b4a409a6400d8bf076a79f Mon Sep 17 00:00:00 2001 From: Badr MOUFAD Date: Wed, 8 Jun 2022 16:08:11 +0200 Subject: [PATCH 08/24] remove type annotation --- skglm/solvers/group_bcd_solver.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/skglm/solvers/group_bcd_solver.py b/skglm/solvers/group_bcd_solver.py index 93172ebea..a9dd53544 100644 --- a/skglm/solvers/group_bcd_solver.py +++ b/skglm/solvers/group_bcd_solver.py @@ -1,11 +1,8 @@ import numpy as np from numba import njit -from skglm.datafits.group import QuadraticGroup -from skglm.penalties.block_separable import WeightedGroupL2 - -def bcd_solver(X, y, datafit, penalty: WeightedGroupL2, w_init=None, p0=2, +def bcd_solver(X, y, datafit, penalty, w_init=None, p0=2, max_iter=1000, max_epochs=100, tol=1e-7, verbose=False): """Run a group BCD solver. @@ -141,7 +138,7 @@ def _bcd_epoch(X, y, w, Xw, datafit, penalty, ws): return -def _construct_grad(X, y, w, Xw, datafit: QuadraticGroup, ws): +def _construct_grad(X, y, w, Xw, datafit, ws): """Compute the -gradient according to each group in ``ws``. Note: -gradients are stacked in a 1d array (e.g. [-grad_g1, -grad_g2, ...]). From b013dca559a02c330e06ddbba46ca8308f8fd76c Mon Sep 17 00:00:00 2001 From: Badr MOUFAD Date: Wed, 8 Jun 2022 16:21:42 +0200 Subject: [PATCH 09/24] do not negate ``is_penalized`` --- skglm/penalties/block_separable.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/skglm/penalties/block_separable.py b/skglm/penalties/block_separable.py index 4161be682..76721f5ea 100644 --- a/skglm/penalties/block_separable.py +++ b/skglm/penalties/block_separable.py @@ -242,11 +242,11 @@ def is_penalized(self, n_groups): def generalized_support(self, w): grp_indices, grp_ptr = self.grp_indices, self.grp_ptr n_groups = len(grp_ptr) - 1 - is_not_penalized = ~self.is_penalized(n_groups) + is_penalized = self.is_penalized(n_groups) gsupp = np.zeros(n_groups, dtype=np.bool_) for g in range(n_groups): - if is_not_penalized[g]: + if not is_penalized[g]: gsupp[g] = True continue From 1be93902ee7caf8fae2a4f44d7b6c4b4d5c6f9f7 Mon Sep 17 00:00:00 2001 From: Badr MOUFAD Date: Wed, 8 Jun 2022 17:55:40 +0200 Subject: [PATCH 10/24] ``_compute_grad`` with exact size of ``grads`` --- skglm/solvers/group_bcd_solver.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/skglm/solvers/group_bcd_solver.py b/skglm/solvers/group_bcd_solver.py index a9dd53544..909f15b2f 100644 --- a/skglm/solvers/group_bcd_solver.py +++ b/skglm/solvers/group_bcd_solver.py @@ -138,13 +138,19 @@ def _bcd_epoch(X, y, w, Xw, datafit, penalty, ws): return +@njit def _construct_grad(X, y, w, Xw, datafit, ws): """Compute the -gradient according to each group in ``ws``. Note: -gradients are stacked in a 1d array (e.g. [-grad_g1, -grad_g2, ...]). """ - grads = np.array([]) + grp_ptr = datafit.grp_ptr + n_features_ws = sum([grp_ptr[g+1] - grp_ptr[g] for g in ws]) + + grads = np.zeros(n_features_ws) + grad_ptr = 0 for g in ws: - grad_g = -datafit.gradient_g(X, y, w, Xw, g) - grads = np.concatenate((grads, grad_g)) + grad_g = datafit.gradient_g(X, y, w, Xw, g) + grads[grad_ptr: grad_ptr+len(grad_g)] = -grad_g + grad_ptr += len(grad_g) return grads From 213f5fac55d04067d3f7463fb3d9797eabed9c5f Mon Sep 17 00:00:00 2001 From: Badr MOUFAD Date: Wed, 8 Jun 2022 20:28:58 +0200 Subject: [PATCH 11/24] cosmetics & align with existing code --- skglm/solvers/group_bcd_solver.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/skglm/solvers/group_bcd_solver.py b/skglm/solvers/group_bcd_solver.py index 909f15b2f..5ca297c5b 100644 --- a/skglm/solvers/group_bcd_solver.py +++ b/skglm/solvers/group_bcd_solver.py @@ -2,7 +2,7 @@ from numba import njit -def bcd_solver(X, y, datafit, penalty, w_init=None, p0=2, +def bcd_solver(X, y, datafit, penalty, w_init=None, p0=10, max_iter=1000, max_epochs=100, tol=1e-7, verbose=False): """Run a group BCD solver. @@ -25,7 +25,7 @@ def bcd_solver(X, y, datafit, penalty, w_init=None, p0=2, If set to None, a zero vector is used instead. p0 : int, default 2 - Min number of groups to be included in the working set. + Minimum number of groups to be included in the working set. max_iter : int, default 1000 Maximum number of iterations. From edf231c8f9141d1386b35a3b6dd90193a15bcf9a Mon Sep 17 00:00:00 2001 From: Badr MOUFAD Date: Wed, 8 Jun 2022 20:35:32 +0200 Subject: [PATCH 12/24] cosmetics again --- skglm/solvers/group_bcd_solver.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/skglm/solvers/group_bcd_solver.py b/skglm/solvers/group_bcd_solver.py index 5ca297c5b..6c15ae3e9 100644 --- a/skglm/solvers/group_bcd_solver.py +++ b/skglm/solvers/group_bcd_solver.py @@ -59,7 +59,7 @@ def bcd_solver(X, y, datafit, penalty, w_init=None, p0=10, datafit.initialize(X, y) all_groups = np.arange(n_groups) p_objs_out = np.zeros(max_iter) - stop_crit = 0 # prevent ref before assignment when max_iter == 0 + stop_crit = 0. # prevent ref before assign when max_iter == 0 for t in range(max_iter): if t == 0: # avoid computing grad and opt twice From beabd58de1e1d2beb3e732988df185c19a9723be Mon Sep 17 00:00:00 2001 From: Badr MOUFAD Date: Wed, 8 Jun 2022 21:59:08 +0200 Subject: [PATCH 13/24] ``PAB`` remarks --- skglm/solvers/group_bcd_solver.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/skglm/solvers/group_bcd_solver.py b/skglm/solvers/group_bcd_solver.py index 6c15ae3e9..c375f45e2 100644 --- a/skglm/solvers/group_bcd_solver.py +++ b/skglm/solvers/group_bcd_solver.py @@ -3,7 +3,7 @@ def bcd_solver(X, y, datafit, penalty, w_init=None, p0=10, - max_iter=1000, max_epochs=100, tol=1e-7, verbose=False): + max_iter=1000, max_epochs=100, tol=1e-6, verbose=False): """Run a group BCD solver. Parameters @@ -24,7 +24,7 @@ def bcd_solver(X, y, datafit, penalty, w_init=None, p0=10, Initial value of coefficients. If set to None, a zero vector is used instead. - p0 : int, default 2 + p0 : int, default 10 Minimum number of groups to be included in the working set. max_iter : int, default 1000 @@ -117,7 +117,7 @@ def bcd_solver(X, y, datafit, penalty, w_init=None, p0=10, @njit def _bcd_epoch(X, y, w, Xw, datafit, penalty, ws): - """Perform a single BCD epoch on groups in ``ws``.""" + # perform a single BCD epoch on groups in ``ws`` grp_ptr, grp_indices = penalty.grp_ptr, penalty.grp_indices for g in ws: @@ -140,10 +140,8 @@ def _bcd_epoch(X, y, w, Xw, datafit, penalty, ws): @njit def _construct_grad(X, y, w, Xw, datafit, ws): - """Compute the -gradient according to each group in ``ws``. - - Note: -gradients are stacked in a 1d array (e.g. [-grad_g1, -grad_g2, ...]). - """ + # compute the -gradient according to each group in ``ws`` + # note: -gradients are stacked in a 1d array (e.g. [-grad_g1, -grad_g2, ...]) grp_ptr = datafit.grp_ptr n_features_ws = sum([grp_ptr[g+1] - grp_ptr[g] for g in ws]) From fc9e4df9da1fee0463761220ceb42bb978add40f Mon Sep 17 00:00:00 2001 From: Badr MOUFAD Date: Thu, 9 Jun 2022 10:45:54 +0200 Subject: [PATCH 14/24] ``MM`` remarks --- skglm/penalties/block_separable.py | 11 ++++++++--- skglm/solvers/group_bcd_solver.py | 19 ++++++++----------- 2 files changed, 16 insertions(+), 14 deletions(-) diff --git a/skglm/penalties/block_separable.py b/skglm/penalties/block_separable.py index 76721f5ea..de52e8444 100644 --- a/skglm/penalties/block_separable.py +++ b/skglm/penalties/block_separable.py @@ -212,8 +212,13 @@ def prox_1group(self, value, stepsize, g): """Compute the proximal operator of group ``g``.""" return BST(value, self.alpha * stepsize * self.weights[g]) - def subdiff_distance(self, w, grad, ws): - """Compute distance of negative gradient to the subdifferential at ``w``.""" + def subdiff_distance(self, w, grad_ws, ws): + """Compute distance of negative gradient restricted to groups in ``ws`` + to the subdifferential at ``w``. + + Note: ``grad_ws`` is a stacked array of ``-``gradients. + ([-grad_ws_1, -grad_ws_2, ...]) + """ alpha, weights = self.alpha, self.weights grp_ptr, grp_indices = self.grp_ptr, self.grp_indices @@ -222,7 +227,7 @@ def subdiff_distance(self, w, grad, ws): for idx, g in enumerate(ws): grp_g_indices = grp_indices[grp_ptr[g]: grp_ptr[g+1]] - grad_g = grad[grad_ptr: grad_ptr + len(grp_g_indices)] + grad_g = grad_ws[grad_ptr: grad_ptr + len(grp_g_indices)] grad_ptr += len(grp_g_indices) w_g = w[grp_g_indices] diff --git a/skglm/solvers/group_bcd_solver.py b/skglm/solvers/group_bcd_solver.py index c375f45e2..5c142ac09 100644 --- a/skglm/solvers/group_bcd_solver.py +++ b/skglm/solvers/group_bcd_solver.py @@ -3,7 +3,7 @@ def bcd_solver(X, y, datafit, penalty, w_init=None, p0=10, - max_iter=1000, max_epochs=100, tol=1e-6, verbose=False): + max_iter=1000, max_epochs=100, tol=1e-4, verbose=False): """Run a group BCD solver. Parameters @@ -33,7 +33,7 @@ def bcd_solver(X, y, datafit, penalty, w_init=None, p0=10, max_epochs : int, default 100 Maximum number of epochs. - tol : float, default 1e-6 + tol : float, default 1e-4 Tolerance for convergence. verbose : bool, default False @@ -68,7 +68,6 @@ def bcd_solver(X, y, datafit, penalty, w_init=None, p0=10, stop_crit = np.max(opt) if stop_crit <= tol: - print("Outer solver: Early exit") break gsupp_size = penalty.generalized_support(w).sum() @@ -85,32 +84,30 @@ def bcd_solver(X, y, datafit, penalty, w_init=None, p0=10, stop_crit_in = np.max(opt_in) if max(verbose - 1, 0): - current_p_obj = datafit.value(y, w, Xw) + penalty.value(w) + p_obj = datafit.value(y, w, Xw) + penalty.value(w) print( - f"Epoch {epoch+1}: {current_p_obj:.10f} " + f"Epoch {epoch+1}: {p_obj:.10f} " f"obj. variation: {stop_crit_in:.2e}" ) if stop_crit_in <= 0.3 * stop_crit: - print("Early exit") break - current_p_obj = datafit.value(y, w, Xw) + penalty.value(w) + p_obj = datafit.value(y, w, Xw) + penalty.value(w) grad = _construct_grad(X, y, w, Xw, datafit, all_groups) opt = penalty.subdiff_distance(w, grad, all_groups) stop_crit = np.max(opt) if max(verbose, 0): print( - f"Iteration {t+1}: {current_p_obj:.10f}, " + f"Iteration {t+1}: {p_obj:.10f}, " f"stopping crit: {stop_crit:.2e}" ) if stop_crit <= tol: - print("Outer solver: Early exit") break - p_objs_out[t] = current_p_obj + p_objs_out[t] = p_obj return w, p_objs_out, stop_crit @@ -141,7 +138,7 @@ def _bcd_epoch(X, y, w, Xw, datafit, penalty, ws): @njit def _construct_grad(X, y, w, Xw, datafit, ws): # compute the -gradient according to each group in ``ws`` - # note: -gradients are stacked in a 1d array (e.g. [-grad_g1, -grad_g2, ...]) + # note: -gradients are stacked in a 1d array ([-grad_ws_1, -grad_ws_2, ...]) grp_ptr = datafit.grp_ptr n_features_ws = sum([grp_ptr[g+1] - grp_ptr[g] for g in ws]) From 6226f711bbbb0de55036232952cd6b1176f7d3ec Mon Sep 17 00:00:00 2001 From: Badr MOUFAD Date: Thu, 9 Jun 2022 10:49:19 +0200 Subject: [PATCH 15/24] linter happy --- skglm/penalties/block_separable.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/skglm/penalties/block_separable.py b/skglm/penalties/block_separable.py index de52e8444..e0940d3e1 100644 --- a/skglm/penalties/block_separable.py +++ b/skglm/penalties/block_separable.py @@ -213,8 +213,9 @@ def prox_1group(self, value, stepsize, g): return BST(value, self.alpha * stepsize * self.weights[g]) def subdiff_distance(self, w, grad_ws, ws): - """Compute distance of negative gradient restricted to groups in ``ws`` - to the subdifferential at ``w``. + """ + Compute distance to the subdifferential at ``w`` of + negative gradient restricted to groups in ``ws``. Note: ``grad_ws`` is a stacked array of ``-``gradients. ([-grad_ws_1, -grad_ws_2, ...]) From 89ec65b4ec18292db6050c66dabb000fade64e72 Mon Sep 17 00:00:00 2001 From: Badr MOUFAD Date: Thu, 9 Jun 2022 10:51:35 +0200 Subject: [PATCH 16/24] linter be happy --- skglm/penalties/block_separable.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/skglm/penalties/block_separable.py b/skglm/penalties/block_separable.py index e0940d3e1..dcf047d8b 100644 --- a/skglm/penalties/block_separable.py +++ b/skglm/penalties/block_separable.py @@ -214,7 +214,7 @@ def prox_1group(self, value, stepsize, g): def subdiff_distance(self, w, grad_ws, ws): """ - Compute distance to the subdifferential at ``w`` of + Compute distance to the subdifferential at ``w`` of negative gradient restricted to groups in ``ws``. Note: ``grad_ws`` is a stacked array of ``-``gradients. From b07dcb5a4f6af5993715febad4d8ba1e4f41e360 Mon Sep 17 00:00:00 2001 From: Badr MOUFAD Date: Thu, 9 Jun 2022 10:55:51 +0200 Subject: [PATCH 17/24] stupide linter be happy --- skglm/penalties/block_separable.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/skglm/penalties/block_separable.py b/skglm/penalties/block_separable.py index dcf047d8b..24074860a 100644 --- a/skglm/penalties/block_separable.py +++ b/skglm/penalties/block_separable.py @@ -213,9 +213,7 @@ def prox_1group(self, value, stepsize, g): return BST(value, self.alpha * stepsize * self.weights[g]) def subdiff_distance(self, w, grad_ws, ws): - """ - Compute distance to the subdifferential at ``w`` of - negative gradient restricted to groups in ``ws``. + """Compute distance to the subdifferential at ``w`` of negative gradient. Note: ``grad_ws`` is a stacked array of ``-``gradients. ([-grad_ws_1, -grad_ws_2, ...]) From 3a683792cad6c4a3153a014159320aa652ca5cb5 Mon Sep 17 00:00:00 2001 From: Badr MOUFAD Date: Thu, 9 Jun 2022 11:02:10 +0200 Subject: [PATCH 18/24] explicit declaration for var used once --- skglm/penalties/block_separable.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/skglm/penalties/block_separable.py b/skglm/penalties/block_separable.py index 24074860a..dc9029147 100644 --- a/skglm/penalties/block_separable.py +++ b/skglm/penalties/block_separable.py @@ -255,8 +255,7 @@ def generalized_support(self, w): continue grp_g_indices = grp_indices[grp_ptr[g]: grp_ptr[g+1]] - w_g = w[grp_g_indices] - if np.any(w_g): + if np.any(w[grp_g_indices]): gsupp[g] = True return gsupp From 0105d83e46ef357f654a2d86567ca209c290b58b Mon Sep 17 00:00:00 2001 From: Badr MOUFAD Date: Thu, 9 Jun 2022 11:24:03 +0200 Subject: [PATCH 19/24] Implement ``_check_group_compatible`` --- skglm/solvers/group_bcd_solver.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/skglm/solvers/group_bcd_solver.py b/skglm/solvers/group_bcd_solver.py index 5c142ac09..2a80462b3 100644 --- a/skglm/solvers/group_bcd_solver.py +++ b/skglm/solvers/group_bcd_solver.py @@ -50,6 +50,9 @@ def bcd_solver(X, y, datafit, penalty, w_init=None, p0=10, stop_crit: float The value of the stop criterion. """ + _check_group_compatible(datafit) + _check_group_compatible(penalty) + n_features = X.shape[1] n_groups = len(penalty.grp_ptr) - 1 @@ -149,3 +152,14 @@ def _construct_grad(X, y, w, Xw, datafit, ws): grads[grad_ptr: grad_ptr+len(grad_g)] = -grad_g grad_ptr += len(grad_g) return grads + + +def _check_group_compatible(obj): + obj_name = obj.__class__.__name__ + group_attrs = ['grp_ptr', 'grp_indices'] + + for attr in group_attrs: + if not hasattr(obj, attr): + raise Exception( + f"Missing {attr} attribute from {obj_name}." + ) From 7abe91a24d2565f410d3ac756fe87acdca566b59 Mon Sep 17 00:00:00 2001 From: Badr MOUFAD Date: Thu, 9 Jun 2022 11:47:02 +0200 Subject: [PATCH 20/24] add unit testing --- skglm/tests/test_group.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/skglm/tests/test_group.py b/skglm/tests/test_group.py index 794b05cad..81ec1c2e3 100644 --- a/skglm/tests/test_group.py +++ b/skglm/tests/test_group.py @@ -2,6 +2,8 @@ import numpy as np from numpy.linalg import norm +from skglm.penalties import L1 +from skglm.datafits import Quadratic from skglm.penalties.block_separable import WeightedGroupL2 from skglm.datafits.group import QuadraticGroup from skglm.solvers.group_bcd_solver import bcd_solver @@ -26,6 +28,15 @@ def _generate_random_grp(n_groups, n_features, shuffle=True): return grp_indices, splits, groups +def test_check_group_compatible(): + l1_penalty = L1(1e-3) + quad_datafit = Quadratic() + X, y = np.random.randn(5, 5), np.random.randn(5) + + with np.testing.assert_raises(Exception): + bcd_solver(X, y, quad_datafit, l1_penalty) + + @pytest.mark.parametrize("n_groups, n_features, shuffle", [[10, 50, True], [10, 50, False], [17, 53, False]]) def test_alpha_max(n_groups, n_features, shuffle): From 139bc96698ad048468a054cc43645e94d0c10bb6 Mon Sep 17 00:00:00 2001 From: Badr MOUFAD Date: Thu, 9 Jun 2022 14:05:38 +0200 Subject: [PATCH 21/24] more clean ups --- skglm/solvers/group_bcd_solver.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/skglm/solvers/group_bcd_solver.py b/skglm/solvers/group_bcd_solver.py index 2a80462b3..b647e48db 100644 --- a/skglm/solvers/group_bcd_solver.py +++ b/skglm/solvers/group_bcd_solver.py @@ -76,7 +76,7 @@ def bcd_solver(X, y, datafit, penalty, w_init=None, p0=10, gsupp_size = penalty.generalized_support(w).sum() ws_size = max(min(p0, n_groups), min(n_groups, 2 * gsupp_size)) - ws = np.argpartition(opt, -ws_size)[-ws_size:] # k-largest items [no sort] + ws = np.argpartition(opt, -ws_size)[-ws_size:] # k-largest items (no sort) for epoch in range(max_epochs): _bcd_epoch(X, y, w, Xw, datafit, penalty, ws) @@ -101,7 +101,7 @@ def bcd_solver(X, y, datafit, penalty, w_init=None, p0=10, opt = penalty.subdiff_distance(w, grad, all_groups) stop_crit = np.max(opt) - if max(verbose, 0): + if verbose: print( f"Iteration {t+1}: {p_obj:.10f}, " f"stopping crit: {stop_crit:.2e}" @@ -117,7 +117,7 @@ def bcd_solver(X, y, datafit, penalty, w_init=None, p0=10, @njit def _bcd_epoch(X, y, w, Xw, datafit, penalty, ws): - # perform a single BCD epoch on groups in ``ws`` + # perform a single BCD epoch on groups in ws grp_ptr, grp_indices = penalty.grp_ptr, penalty.grp_indices for g in ws: @@ -140,7 +140,7 @@ def _bcd_epoch(X, y, w, Xw, datafit, penalty, ws): @njit def _construct_grad(X, y, w, Xw, datafit, ws): - # compute the -gradient according to each group in ``ws`` + # compute the -gradient according to each group in ws # note: -gradients are stacked in a 1d array ([-grad_ws_1, -grad_ws_2, ...]) grp_ptr = datafit.grp_ptr n_features_ws = sum([grp_ptr[g+1] - grp_ptr[g] for g in ws]) From 35820d3af5792e8059e41c75ac3184ca915c2a31 Mon Sep 17 00:00:00 2001 From: Badr MOUFAD Date: Thu, 9 Jun 2022 14:44:34 +0200 Subject: [PATCH 22/24] more clear exception message --- skglm/solvers/group_bcd_solver.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/skglm/solvers/group_bcd_solver.py b/skglm/solvers/group_bcd_solver.py index b647e48db..72cd7dc52 100644 --- a/skglm/solvers/group_bcd_solver.py +++ b/skglm/solvers/group_bcd_solver.py @@ -156,10 +156,12 @@ def _construct_grad(X, y, w, Xw, datafit, ws): def _check_group_compatible(obj): obj_name = obj.__class__.__name__ - group_attrs = ['grp_ptr', 'grp_indices'] + group_attrs = ('grp_ptr', 'grp_indices') for attr in group_attrs: if not hasattr(obj, attr): raise Exception( - f"Missing {attr} attribute from {obj_name}." + f"datafit and penalty must be compatible with bcd_solver.\n" + f"'{obj_name}' is not block-separable. " + f"Missing '{attr}' attribute." ) From d5d5a03472005faa14c8a2fca529044ecc5f6024 Mon Sep 17 00:00:00 2001 From: Badr MOUFAD Date: Fri, 10 Jun 2022 13:16:06 +0200 Subject: [PATCH 23/24] move ``check_group_compatible`` to utils --- skglm/solvers/group_bcd_solver.py | 19 ++++--------------- skglm/utils.py | 20 ++++++++++++++++++++ 2 files changed, 24 insertions(+), 15 deletions(-) diff --git a/skglm/solvers/group_bcd_solver.py b/skglm/solvers/group_bcd_solver.py index 72cd7dc52..941a60b21 100644 --- a/skglm/solvers/group_bcd_solver.py +++ b/skglm/solvers/group_bcd_solver.py @@ -1,6 +1,8 @@ import numpy as np from numba import njit +from skglm.utils import check_group_compatible + def bcd_solver(X, y, datafit, penalty, w_init=None, p0=10, max_iter=1000, max_epochs=100, tol=1e-4, verbose=False): @@ -50,8 +52,8 @@ def bcd_solver(X, y, datafit, penalty, w_init=None, p0=10, stop_crit: float The value of the stop criterion. """ - _check_group_compatible(datafit) - _check_group_compatible(penalty) + check_group_compatible(datafit) + check_group_compatible(penalty) n_features = X.shape[1] n_groups = len(penalty.grp_ptr) - 1 @@ -152,16 +154,3 @@ def _construct_grad(X, y, w, Xw, datafit, ws): grads[grad_ptr: grad_ptr+len(grad_g)] = -grad_g grad_ptr += len(grad_g) return grads - - -def _check_group_compatible(obj): - obj_name = obj.__class__.__name__ - group_attrs = ('grp_ptr', 'grp_indices') - - for attr in group_attrs: - if not hasattr(obj, attr): - raise Exception( - f"datafit and penalty must be compatible with bcd_solver.\n" - f"'{obj_name}' is not block-separable. " - f"Missing '{attr}' attribute." - ) diff --git a/skglm/utils.py b/skglm/utils.py index b4751d637..9a8aa4c4e 100644 --- a/skglm/utils.py +++ b/skglm/utils.py @@ -234,3 +234,23 @@ def grp_converter(groups, n_features): else: raise ValueError("Unsupported group format.") return grp_indices.astype(np.int32), grp_ptr.astype(np.int32) + + +def check_group_compatible(obj): + """Check whether obj is compatible with ``bcd_solver``. + + Parameters + ---------- + obj : instance of BaseDatafit or BasePenalty + Object to check. + """ + obj_name = obj.__class__.__name__ + group_attrs = ('grp_ptr', 'grp_indices') + + for attr in group_attrs: + if not hasattr(obj, attr): + raise Exception( + f"datafit and penalty must be compatible with 'bcd_solver'.\n" + f"'{obj_name}' is not block-separable. " + f"Missing '{attr}' attribute." + ) From eb262ac9a5686d083d2a2ae320b7a32a8067bbea Mon Sep 17 00:00:00 2001 From: Badr MOUFAD Date: Fri, 10 Jun 2022 13:21:37 +0200 Subject: [PATCH 24/24] some other cosmetics --- skglm/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/skglm/utils.py b/skglm/utils.py index 9a8aa4c4e..d9ff10207 100644 --- a/skglm/utils.py +++ b/skglm/utils.py @@ -237,7 +237,7 @@ def grp_converter(groups, n_features): def check_group_compatible(obj): - """Check whether obj is compatible with ``bcd_solver``. + """Check whether ``obj`` is compatible with ``bcd_solver``. Parameters ----------