diff --git a/doc/api.rst b/doc/api.rst index d55c84867..f550b9cdd 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -70,6 +70,7 @@ Datafits Logistic LogisticGroup Poisson + PoissonGroup Quadratic QuadraticGroup QuadraticHessian diff --git a/doc/changes/0.5.rst b/doc/changes/0.5.rst index 1c6b6d820..af0150df4 100644 --- a/doc/changes/0.5.rst +++ b/doc/changes/0.5.rst @@ -5,3 +5,4 @@ Version 0.5 (in progress) - Add support for fitting an intercept in :ref:`SqrtLasso ` (PR: :gh:`298`) - Add experimental :ref:`QuantileHuber ` and :ref:`SmoothQuantileRegressor ` for quantile regression, and an example script (PR: :gh:`312`). - Add :ref:`GeneralizedLinearEstimatorCV ` for cross-validation with automatic parameter selection for L1 and elastic-net penalties (PR: :gh:`299`) +- Add :class:`skglm.datafits.group.PoissonGroup` datafit for group-structured Poisson regression. (PR: :gh:`317`) diff --git a/skglm/datafits/__init__.py b/skglm/datafits/__init__.py index 74e0e5d75..0c6a5973e 100644 --- a/skglm/datafits/__init__.py +++ b/skglm/datafits/__init__.py @@ -2,13 +2,13 @@ from .single_task import (Quadratic, QuadraticSVC, Logistic, Huber, Poisson, Gamma, Cox, WeightedQuadratic, QuadraticHessian,) from .multi_task import QuadraticMultiTask -from .group import QuadraticGroup, LogisticGroup +from .group import QuadraticGroup, LogisticGroup, PoissonGroup __all__ = [ BaseDatafit, BaseMultitaskDatafit, Quadratic, QuadraticSVC, Logistic, Huber, Poisson, Gamma, Cox, QuadraticMultiTask, - QuadraticGroup, LogisticGroup, WeightedQuadratic, + QuadraticGroup, LogisticGroup, PoissonGroup, WeightedQuadratic, QuadraticHessian ] diff --git a/skglm/datafits/group.py b/skglm/datafits/group.py index 487df4a30..264fc0f38 100644 --- a/skglm/datafits/group.py +++ b/skglm/datafits/group.py @@ -3,7 +3,7 @@ from numba import int32, float64 from skglm.datafits.base import BaseDatafit -from skglm.datafits.single_task import Logistic +from skglm.datafits.single_task import Logistic, Poisson from skglm.utils.sparse_ops import spectral_norm, sparse_columns_slice @@ -161,3 +161,52 @@ def gradient_g(self, X, y, w, Xw, g): grad_g[idx] = X[:, j] @ raw_grad_val return grad_g + + +class PoissonGroup(Poisson): + r"""Poisson datafit used with group penalties. + + The datafit reads: + + .. math:: 1 / n_"samples" \sum_{i=1}^{n_"samples"} (\exp((Xw)_i) - y_i (Xw)_i) + + Attributes + ---------- + grp_indices : array, shape (n_features,) + The group indices stacked contiguously + ``[grp1_indices, grp2_indices, ...]``. + + grp_ptr : array, shape (n_groups + 1,) + The group pointers such that two consecutive elements delimit + the indices of a group in ``grp_indices``. + """ + + def __init__(self, grp_ptr, grp_indices): + self.grp_ptr, self.grp_indices = grp_ptr, grp_indices + + def get_spec(self): + return ( + ('grp_ptr', int32[:]), + ('grp_indices', int32[:]), + ) + + def params_to_dict(self): + return dict(grp_ptr=self.grp_ptr, grp_indices=self.grp_indices) + + def gradient_g(self, X, y, w, Xw, g): + grp_ptr, grp_indices = self.grp_ptr, self.grp_indices + grp_g_indices = grp_indices[grp_ptr[g]: grp_ptr[g+1]] + raw_grad_val = self.raw_grad(y, Xw) + grad_g = np.zeros(len(grp_g_indices)) + for idx, j in enumerate(grp_g_indices): + grad_g[idx] = X[:, j] @ raw_grad_val + return grad_g + + def gradient_g_sparse(self, X_data, X_indptr, X_indices, y, w, Xw, g): + grp_ptr, grp_indices = self.grp_ptr, self.grp_indices + grp_g_indices = grp_indices[grp_ptr[g]: grp_ptr[g+1]] + grad_g = np.zeros(len(grp_g_indices)) + for idx, j in enumerate(grp_g_indices): + grad_g[idx] = self.gradient_scalar_sparse( + X_data, X_indptr, X_indices, y, Xw, j) + return grad_g diff --git a/skglm/tests/test_group.py b/skglm/tests/test_group.py index 4b052ab81..eeb785677 100644 --- a/skglm/tests/test_group.py +++ b/skglm/tests/test_group.py @@ -4,14 +4,14 @@ import numpy as np from numpy.linalg import norm -from skglm.penalties import L1 -from skglm.datafits import Quadratic +from skglm.penalties import L1, L2 +from skglm.datafits import Quadratic, Poisson from skglm import GeneralizedLinearEstimator from skglm.penalties.block_separable import ( WeightedL1GroupL2, WeightedGroupL2 ) -from skglm.datafits.group import QuadraticGroup, LogisticGroup -from skglm.solvers import GroupBCD, GroupProxNewton +from skglm.datafits.group import QuadraticGroup, LogisticGroup, PoissonGroup +from skglm.solvers import GroupBCD, GroupProxNewton, LBFGS from skglm.utils.anderson import AndersonAcceleration from skglm.utils.data import (make_correlated_data, grp_converter, @@ -19,6 +19,7 @@ from celer import GroupLasso, Lasso from sklearn.linear_model import LogisticRegression +from scipy import sparse def _generate_random_grp(n_groups, n_features, shuffle=True): @@ -312,5 +313,82 @@ def test_anderson_acceleration(): np.testing.assert_array_equal(n_iter, 99) +def test_poisson_group_gradient(): + """Test gradient computation for PoissonGroup and compare sparse vs dense.""" + n_samples, n_features = 15, 6 + n_groups = 2 + + np.random.seed(0) + X = np.random.randn(n_samples, n_features) + X[X < 0] = 0 + X_sparse = sparse.csc_matrix(X) + y = np.random.poisson(1.0, n_samples) + w = np.random.randn(n_features) * 0.1 + Xw = X @ w + + grp_indices, grp_ptr = grp_converter(n_groups, n_features) + poisson_group = PoissonGroup(grp_ptr=grp_ptr, grp_indices=grp_indices) + + for group_id in range(n_groups): + # Test dense gradient against expected + raw_grad = poisson_group.raw_grad(y, Xw) + group_idx = grp_indices[grp_ptr[group_id]:grp_ptr[group_id+1]] + expected = X[:, group_idx].T @ raw_grad + grad = poisson_group.gradient_g(X, y, w, Xw, group_id) + np.testing.assert_allclose(grad, expected, rtol=1e-10) + + # Test sparse matches dense + grad_dense = poisson_group.gradient_g(X, y, w, Xw, group_id) + grad_sparse = poisson_group.gradient_g_sparse( + X_sparse.data, X_sparse.indptr, X_sparse.indices, y, w, Xw, group_id + ) + np.testing.assert_allclose(grad_sparse, grad_dense, rtol=1e-8) + + +def test_poisson_group_solver(): + """Test solver convergence, solution quality.""" + n_samples, n_features = 30, 9 + n_groups = 3 + alpha = 0.1 + + np.random.seed(0) + X = np.random.randn(n_samples, n_features) + y = np.random.poisson(np.exp(alpha * X.sum(axis=1))) + + grp_indices, grp_ptr = grp_converter(n_groups, n_features) + datafit = PoissonGroup(grp_ptr=grp_ptr, grp_indices=grp_indices) + weights = np.array([1.0, 0.5, 2.0]) + penalty = WeightedGroupL2(alpha=alpha, grp_ptr=grp_ptr, + grp_indices=grp_indices, weights=weights) + + w, _, stop_crit = GroupProxNewton(fit_intercept=False, tol=1e-8).solve( + X, y, datafit, penalty) + + assert stop_crit < 1e-8 and np.all(np.isfinite(w)) + + +def test_poisson_vs_poisson_group_equivalence(): + """Test that Poisson and PoissonGroup give same results when group size is 1.""" + n_samples = 20 + n_features = 8 + alpha = 0.05 + + np.random.seed(42) + X = np.random.randn(n_samples, n_features) + y = np.random.poisson(np.exp(0.1 * X.sum(axis=1))) + + # Poisson with L2 penalty + w_poisson, _, _ = LBFGS(tol=1e-10 + ).solve(X, y, Poisson(), L2(alpha=alpha)) + + # PoissonGroup with group size = 1, other settings same as Poisson + grp_indices, grp_ptr = grp_converter(n_features, n_features) + w_group, _, _ = LBFGS(tol=1e-10).solve( + X, y, PoissonGroup(grp_ptr=grp_ptr, grp_indices=grp_indices), + L2(alpha=alpha)) + + np.testing.assert_equal(w_poisson, w_group) + + if __name__ == "__main__": pass