diff --git a/doc/changes/0.4.rst b/doc/changes/0.4.rst index 0e7ac4883..e649956a9 100644 --- a/doc/changes/0.4.rst +++ b/doc/changes/0.4.rst @@ -5,4 +5,5 @@ Version 0.4 (in progress) - Add support for weights and positive coefficients to :ref:`MCPRegression Estimator ` (PR: :gh:`184`) - Move solver specific computations from ``Datafit.initialize()`` to separate ``Datafit`` methods to ease ``Solver`` - ``Datafit`` compatibility check (PR: :gh:`192`) - Add :ref:`LogSumPenalty ` (PR: :gh:`#127`) +- Remove abstract methods in ``BaseDatafit`` and ``BasePenalty`` to make solver/penalty/datafit compatibility check easier (PR :gh:`#205`) - Add fixed-point distance to build working sets in :ref:`ProxNewton ` solver (:gh:`138`) diff --git a/doc/tutorials/add_datafit.rst b/doc/tutorials/add_datafit.rst index 9aa60633b..2258bba38 100644 --- a/doc/tutorials/add_datafit.rst +++ b/doc/tutorials/add_datafit.rst @@ -30,16 +30,17 @@ They can then be passed to a :class:`~skglm.GeneralizedLinearEstimator`. ) -A ``Datafit`` is a jitclass which must inherit from the ``BaseDatafit`` class: +A ``Datafit`` is a jitclass that must inherit from the ``BaseDatafit`` class: -.. literalinclude:: ../skglm/datafits/base.py +.. literalinclude:: ../../skglm/datafits/base.py :pyobject: BaseDatafit -To define a custom datafit, you need to implement the methods declared in the ``BaseDatafit`` class. -One needs to overload at least the ``value`` and ``gradient`` methods for skglm to support the datafit. +To define a custom datafit, you need to inherit from ``BaseDatafit`` class and implement methods required by the targeted solver. +These methods can be found in the solver documentation. Optionally, overloading the methods with the suffix ``_sparse`` adds support for sparse datasets (CSC matrix). -As an example, we show how to implement the Poisson datafit in skglm. + +This tutorial shows how to implement :ref:`Poisson ` datafit to be fitted with :ref:`ProxNewton ` solver. A case in point: defining Poisson datafit @@ -104,18 +105,16 @@ For the Poisson datafit, this yields .. math:: \frac{\partial F(\beta)}{\partial \beta_j} = \frac{1}{n} \sum_{i=1}^n X_{i,j} \left( - \exp([X\beta]_i) - y + \exp([X\beta]_i) - y \right) \ . When implementing these quantities in the ``Poisson`` datafit class, this gives: -.. literalinclude:: ../skglm/datafits/single_task.py +.. literalinclude:: ../../skglm/datafits/single_task.py :pyobject: Poisson Note that we have not initialized any quantities in the ``initialize`` method. -Usually it serves to compute a Lipschitz constant of the datafit, whose inverse is used by the solver as a step size. -However, in this example, the Poisson datafit has no Lipschitz constant since the eigenvalues of the Hessian matrix are unbounded. -This implies that a step size is not known in advance and a line search has to be performed at every epoch by the solver. +Usually, it serves to compute datafit attributes specific to a dataset ``X, y`` for computational efficiency, for example the computation of ``X.T @ y`` in :ref:`Quadratic ` datafit. diff --git a/doc/tutorials/add_penalty.rst b/doc/tutorials/add_penalty.rst index 0cb39bacd..62a2a3bba 100644 --- a/doc/tutorials/add_penalty.rst +++ b/doc/tutorials/add_penalty.rst @@ -10,10 +10,12 @@ skglm supports any arbitrary proximable penalty. It is implemented as a jitclass which must inherit from the ``BasePenalty`` class: -.. literalinclude:: ../skglm/penalties/base.py +.. literalinclude:: ../../skglm/penalties/base.py :pyobject: BasePenalty -To implement your own penalty, you only need to define a new jitclass, inheriting from ``BasePenalty`` and define how its value, proximal operator, distance to subdifferential (for KKT violation) and penalized features are computed. +To implement your own penalty, you only need to define a new jitclass, inheriting from ``BasePenalty`` and implement the methods required by the targeted solver. +Theses methods can be found in the solver documentation. + A case in point: defining L1 penalty ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -66,6 +68,6 @@ Note that since ``lambda`` is a reserved keyword in Python, ``alpha`` in skglm c When putting all together, this gives the implementation of the ``L1`` penalty: -.. literalinclude:: ../skglm/penalties/separable.py +.. literalinclude:: ../../skglm/penalties/separable.py :pyobject: L1 diff --git a/skglm/datafits/base.py b/skglm/datafits/base.py index 17bad0df0..2f70d9573 100644 --- a/skglm/datafits/base.py +++ b/skglm/datafits/base.py @@ -1,10 +1,7 @@ -from abc import abstractmethod - -class BaseDatafit(): +class BaseDatafit: """Base class for datafits.""" - @abstractmethod def get_spec(self): """Specify the numba types of the class attributes. @@ -14,7 +11,6 @@ def get_spec(self): spec to be passed to Numba jitclass to compile the class. """ - @abstractmethod def params_to_dict(self): """Get the parameters to initialize an instance of the class. @@ -24,7 +20,6 @@ def params_to_dict(self): The parameters to instantiate an object of the class. """ - @abstractmethod def initialize(self, X, y): """Pre-computations before fitting on X and y. @@ -37,9 +32,7 @@ def initialize(self, X, y): Target vector. """ - @abstractmethod - def initialize_sparse( - self, X_data, X_indptr, X_indices, y): + def initialize_sparse(self, X_data, X_indptr, X_indices, y): """Pre-computations before fitting on X and y when X is a sparse matrix. Parameters @@ -57,7 +50,6 @@ def initialize_sparse( Target vector. """ - @abstractmethod def value(self, y, w, Xw): """Value of datafit at vector w. @@ -78,68 +70,10 @@ def value(self, y, w, Xw): The datafit value at vector w. """ - @abstractmethod - def gradient_scalar(self, X, y, w, Xw, j): - """Gradient with respect to j-th coordinate of w. - - Parameters - ---------- - X : array, shape (n_samples, n_features) - Design matrix. - - y : array, shape (n_samples,) - Target vector. - - w : array, shape (n_features,) - Coefficient vector. - - Xw : array, shape (n_samples,) - Model fit. - - j : int - The coordinate at which the gradient is evaluated. - - Returns - ------- - gradient : float - The gradient of the datafit with respect to the j-th coordinate of w. - """ - - @abstractmethod - def gradient_scalar_sparse(self, X_data, X_indptr, X_indices, y, Xw, j): - """Gradient with respect to j-th coordinate of w when X is sparse. - - Parameters - ---------- - X_data : array, shape (n_elements,) - `data` attribute of the sparse CSC matrix X. - - X_indptr : array, shape (n_features + 1,) - `indptr` attribute of the sparse CSC matrix X. - - X_indices : array, shape (n_elements,) - `indices` attribute of the sparse CSC matrix X. - - y : array, shape (n_samples,) - Target vector. - - Xw: array, shape (n_samples,) - Model fit. - - j : int - The dimension along which the gradient is evaluated. - - Returns - ------- - gradient : float - The gradient of the datafit with respect to the j-th coordinate of w. - """ - -class BaseMultitaskDatafit(): +class BaseMultitaskDatafit: """Base class for multitask datafits.""" - @abstractmethod def get_spec(self): """Specify the numba types of the class attributes. @@ -149,7 +83,6 @@ def get_spec(self): spec to be passed to Numba jitclass to compile the class. """ - @abstractmethod def params_to_dict(self): """Get the parameters to initialize an instance of the class. @@ -159,7 +92,6 @@ def params_to_dict(self): The parameters to instantiate an object of the class. """ - @abstractmethod def initialize(self, X, Y): """Store useful values before fitting on X and Y. @@ -172,7 +104,6 @@ def initialize(self, X, Y): Multitask target. """ - @abstractmethod def initialize_sparse(self, X_data, X_indptr, X_indices, Y): """Store useful values before fitting on X and Y, when X is sparse. @@ -191,7 +122,6 @@ def initialize_sparse(self, X_data, X_indptr, X_indices, Y): Target matrix. """ - @abstractmethod def value(self, Y, W, XW): """Value of datafit at matrix W. @@ -211,60 +141,3 @@ def value(self, Y, W, XW): value : float The datafit value evaluated at matrix W. """ - - @abstractmethod - def gradient_j(self, X, Y, W, XW, j): - """Gradient with respect to j-th coordinate of W. - - Parameters - ---------- - X : array, shape (n_samples, n_features) - Design matrix. - - Y : array, shape (n_samples, n_tasks) - Target matrix. - - W : array, shape (n_features, n_tasks) - Coefficient matrix. - - XW : array, shape (n_samples, n_tasks) - Model fit. - - j : int - The coordinate along which the gradient is evaluated. - - Returns - ------- - gradient : array, shape (n_tasks,) - The gradient of the datafit with respect to the j-th coordinate of W. - """ - - @abstractmethod - def gradient_j_sparse(self, X_data, X_indptr, X_indices, Y, XW, j): - """Gradient with respect to j-th coordinate of W when X is sparse. - - Parameters - ---------- - X_data : array-like - `data` attribute of the sparse CSC matrix X. - - X_indptr : array-like - `indptr` attribute of the sparse CSC matrix X. - - X_indices : array-like - `indices` attribute of the sparse CSC matrix X. - - Y : array, shape (n_samples, n_tasks) - Target matrix. - - XW : array, shape (n_samples, n_tasks) - Model fit. - - j : int - The coordinate along which the gradient is evaluated. - - Returns - ------- - gradient : array, shape (n_tasks,) - The gradient of the datafit with respect to the j-th coordinate of W. - """ diff --git a/skglm/penalties/base.py b/skglm/penalties/base.py index b45254b71..dc8bce200 100644 --- a/skglm/penalties/base.py +++ b/skglm/penalties/base.py @@ -1,10 +1,7 @@ -from abc import abstractmethod - -class BasePenalty(): +class BasePenalty: """Base class for penalty subclasses.""" - @abstractmethod def get_spec(self): """Specify the numba types of the class attributes. @@ -14,7 +11,6 @@ def get_spec(self): spec to be passed to Numba jitclass to compile the class. """ - @abstractmethod def params_to_dict(self): """Get the parameters to initialize an instance of the class. @@ -24,39 +20,11 @@ def params_to_dict(self): The parameters to instantiate an object of the class. """ - @abstractmethod def value(self, w): """Value of penalty at vector w.""" - @abstractmethod - def prox_1d(self, value, stepsize, j): - """Proximal operator of penalty for feature j.""" - - @abstractmethod - def subdiff_distance(self, w, grad, ws): - """Distance of negative gradient to subdifferential at w for features in `ws`. - - Parameters - ---------- - w: array, shape (n_features,) - Coefficient vector. - - grad: array, shape (ws.shape[0],) - Gradient of the datafit at w, restricted to features in `ws`. - - ws: array, shape (ws_size,) - Indices of features in the working set. - - Returns - ------- - distances: array, shape (ws.shape[0],) - The distances to the subdifferential. - """ - - @abstractmethod def is_penalized(self, n_features): """Return a binary mask with the penalized features.""" - @abstractmethod def generalized_support(self, w): - r"""Return a mask which is True for coefficients in the generalized support.""" + """Return a mask which is True for coefficients in the generalized support."""