Skip to content

Commit

Permalink
ENH add ElasticNet (#230)
Browse files Browse the repository at this point in the history
Co-authored-by: mathurinm <mathurin.massias@gmail.com>
  • Loading branch information
Badr-MOUFAD and mathurinm committed May 13, 2022
1 parent a4263f3 commit 4230db1
Show file tree
Hide file tree
Showing 13 changed files with 657 additions and 106 deletions.
2 changes: 2 additions & 0 deletions README.rst
Expand Up @@ -9,6 +9,8 @@ Currently, the package handles the following problems:

- Lasso
- Weighted Lasso
- ElasticNet
- Weighted ElasticNet
- Sparse Logistic regression
- Weighted Group Lasso
- Multitask Lasso
Expand Down
42 changes: 25 additions & 17 deletions celer/PN_logreg.pyx
Expand Up @@ -13,8 +13,8 @@ from libc.math cimport fabs, sqrt, exp
from sklearn.exceptions import ConvergenceWarning

from .cython_utils cimport fdot, faxpy, fcopy, fposv, fscal, fnrm2
from .cython_utils cimport (primal, dual, create_dual_pt, create_accel_pt,
sigmoid, ST, LOGREG, dnorm_l1,
from .cython_utils cimport (primal, dual, create_dual_pt,
sigmoid, ST, LOGREG, dnorm_enet,
compute_Xw, compute_norms_X_col, set_prios)

cdef:
Expand All @@ -35,6 +35,10 @@ def newton_celer(
else:
dtype = np.float32

# Enet not supported for Logreg
cdef floating l1_ratio = 1.0
cdef floating norm_w2 = 0.

cdef int verbose_in = max(0, verbose - 1)
cdef int n_samples = y.shape[0]
cdef int n_features = w.shape[0]
Expand Down Expand Up @@ -102,19 +106,19 @@ def newton_celer(
cdef bint positive = 0

for t in range(max_iter):
p_obj = primal(LOGREG, alpha, Xw, y, w, weights_pen)
p_obj = primal(LOGREG, alpha, l1_ratio, Xw, y, w, weights_pen)

# theta = y * sigmoid(-y * Xw)
create_dual_pt(LOGREG, n_samples, &theta[0], &Xw[0], &y[0])
norm_Xtheta = dnorm_l1(
is_sparse, theta, X, X_data, X_indices, X_indptr,
screened, X_mean, weights_pen, center, positive)
norm_Xtheta = dnorm_enet(
is_sparse, theta, w, X, X_data, X_indices, X_indptr,
screened, X_mean, weights_pen, center, positive, alpha, l1_ratio)

if norm_Xtheta > alpha:
tmp = alpha / norm_Xtheta
fscal(&n_samples, &tmp, &theta[0], &inc)

d_obj = dual(LOGREG, n_samples, 0., &theta[0], &y[0])
d_obj = dual(LOGREG, n_samples, alpha, l1_ratio, 0., norm_w2, &theta[0], &y[0])
gap = p_obj - d_obj

if t != 0 and use_accel:
Expand Down Expand Up @@ -165,15 +169,15 @@ def newton_celer(
for i in range(n_samples):
exp_Xw[i] = exp(Xw[i])

norm_Xtheta_acc = dnorm_l1(
is_sparse, theta_acc, X, X_data, X_indices, X_indptr,
screened, X_mean, weights_pen, center, positive)
norm_Xtheta_acc = dnorm_enet(
is_sparse, theta_acc, w, X, X_data, X_indices, X_indptr,
screened, X_mean, weights_pen, center, positive, alpha, l1_ratio)

if norm_Xtheta_acc > alpha:
tmp = alpha / norm_Xtheta_acc
fscal(&n_samples, &tmp, &theta_acc[0], &inc)

d_obj_acc = dual(LOGREG, n_samples, 0., &theta_acc[0], &y[0])
d_obj_acc = dual(LOGREG, n_samples, alpha, l1_ratio, 0., norm_w2, &theta_acc[0], &y[0])
if d_obj_acc > d_obj:
fcopy(&n_samples, &theta_acc[0], &inc, &theta[0], &inc)
gap = p_obj - d_obj_acc
Expand All @@ -188,7 +192,7 @@ def newton_celer(
break


set_prios(is_sparse, theta, alpha, X, X_data, X_indices, X_indptr,
set_prios(is_sparse, theta, w, alpha, l1_ratio, X, X_data, X_indices, X_indptr,
norms_X_col, weights_pen, prios, screened, radius,
&n_screened, 0)

Expand Down Expand Up @@ -249,6 +253,10 @@ cpdef int PN_logreg(
cdef int ws_size = WS.shape[0]
cdef int n_features = w.shape[0]

# Enet not supported for Logreg
cdef floating l1_ratio = 1.0
cdef floating norm_w2 = 0.

if floating is double:
dtype = np.float64
else:
Expand Down Expand Up @@ -369,15 +377,15 @@ cpdef int PN_logreg(

else:
# rescale aux to create dual point
norm_Xaux = dnorm_l1(
is_sparse, aux, X, X_data, X_indices, X_indptr,
notin_WS, X_mean, weights_pen, center, 0)
norm_Xaux = dnorm_enet(
is_sparse, aux, w, X, X_data, X_indices, X_indptr,
notin_WS, X_mean, weights_pen, center, 0, alpha, l1_ratio)

for i in range(n_samples):
aux[i] /= max(1, norm_Xaux / alpha)

d_obj = dual(LOGREG, n_samples, 0, &aux[0], &y[0])
p_obj = primal(LOGREG, alpha, Xw, y, w, weights_pen)
d_obj = dual(LOGREG, n_samples, alpha, l1_ratio, 0., norm_w2, &aux[0], &y[0])
p_obj = primal(LOGREG, alpha, l1_ratio, Xw, y, w, weights_pen)

gap = p_obj - d_obj
if verbose_in:
Expand Down
6 changes: 4 additions & 2 deletions celer/__init__.py
@@ -1,8 +1,10 @@
"""Celer algorithm to solve L1-type regularized problems."""

from .homotopy import celer_path
from .dropin_sklearn import (Lasso, LassoCV, LogisticRegression, GroupLasso,
GroupLassoCV, MultiTaskLasso, MultiTaskLassoCV)
from .dropin_sklearn import (ElasticNet, ElasticNetCV,
GroupLasso, GroupLassoCV,
Lasso, LassoCV, LogisticRegression,
MultiTaskLasso, MultiTaskLassoCV)


__version__ = '0.7dev'
16 changes: 9 additions & 7 deletions celer/cython_utils.pxd
Expand Up @@ -8,8 +8,10 @@ cdef int LOGREG

cdef floating ST(floating, floating) nogil

cdef floating dual(int, int, floating, floating *, floating *) nogil
cdef floating primal(int, floating, floating[:], floating [:],
cdef floating fweighted_norm_w2(floating[:], floating[:]) nogil

cdef floating dual(int, int, floating, floating, floating, floating, floating *, floating *) nogil
cdef floating primal(int, floating, floating, floating[:], floating [:],
floating [:], floating[:]) nogil
cdef void create_dual_pt(int, int, floating *, floating *, floating *) nogil

Expand All @@ -27,7 +29,7 @@ cdef void fposv(char *, int *, int *, floating *,
int *, floating *, int *, int *) nogil

cdef int create_accel_pt(
int, int, int, int, floating, floating *, floating *,
int, int, int, int, floating *, floating *,
floating *, floating[:, :], floating[:, :], floating[:], floating[:])


Expand All @@ -42,11 +44,11 @@ cpdef void compute_norms_X_col(
floating[:], int[:], int[:], floating[:])


cpdef floating dnorm_l1(
bint, floating[:], floating[::1, :], floating[:],
int[:], int[:], int[:], floating[:], floating[:], bint, bint) nogil
cpdef floating dnorm_enet(
bint, floating[:], floating[:], floating[::1, :], floating[:],
int[:], int[:], int[:], floating[:], floating[:], bint, bint, floating, floating) nogil


cdef void set_prios(
bint, floating[:], floating, floating[::1, :], floating[:], int[:],
bint, floating[:], floating[:], floating, floating, floating[::1, :], floating[:], int[:],
int[:], floating[:], floating[:], floating[:], int[:], floating, int *, bint) nogil
62 changes: 45 additions & 17 deletions celer/cython_utils.pyx
Expand Up @@ -109,6 +109,20 @@ cdef inline floating Nh(floating x) nogil:
return INFINITY # not - INFINITY


@cython.boundscheck(False)
@cython.wraparound(False)
cdef floating fweighted_norm_w2(floating[:] w, floating[:] weights) nogil:
cdef floating weighted_norm = 0.
cdef int n_features = w.shape[0]
cdef int j

for j in range(n_features):
if weights[j] == INFINITY:
continue
weighted_norm += weights[j] * w[j] ** 2
return weighted_norm


@cython.boundscheck(False)
@cython.wraparound(False)
@cython.cdivision(True)
Expand Down Expand Up @@ -141,7 +155,7 @@ cdef floating primal_logreg(
@cython.wraparound(False)
@cython.cdivision(True)
cdef floating primal_lasso(
floating alpha, floating[:] R, floating[:] w,
floating alpha, floating l1_ratio, floating[:] R, floating[:] w,
floating[:] weights) nogil:
cdef int n_samples = R.shape[0]
cdef int n_features = w.shape[0]
Expand All @@ -152,24 +166,27 @@ cdef floating primal_lasso(
for j in range(n_features):
# avoid nan when weights[j] is INFINITY
if w[j]:
p_obj += alpha * weights[j] * fabs(w[j])
p_obj += alpha * weights[j] * (
l1_ratio * fabs(w[j]) +
0.5 * (1. - l1_ratio) * w[j] ** 2)
return p_obj


cdef floating primal(
int pb, floating alpha, floating[:] R, floating[:] y,
int pb, floating alpha, floating l1_ratio, floating[:] R, floating[:] y,
floating[:] w, floating[:] weights) nogil:
if pb == LASSO:
return primal_lasso(alpha, R, w, weights)
return primal_lasso(alpha, l1_ratio, R, w, weights)
else:
return primal_logreg(alpha, R, y, w, weights)


@cython.boundscheck(False)
@cython.wraparound(False)
@cython.cdivision(True)
cdef floating dual_lasso(int n_samples, floating norm_y2,
floating * theta, floating * y) nogil:
cdef floating dual_enet(int n_samples, floating alpha, floating l1_ratio,
floating norm_y2, floating norm_w2, floating * theta,
floating * y) nogil:
"""Theta must be feasible"""
cdef int i
cdef floating d_obj = 0.
Expand All @@ -178,6 +195,8 @@ cdef floating dual_lasso(int n_samples, floating norm_y2,
d_obj -= (y[i] - n_samples * theta[i]) ** 2
d_obj *= 0.5 / n_samples
d_obj += norm_y2 / (2. * n_samples)
if l1_ratio != 1.0:
d_obj -= 0.5 * alpha * (1 - l1_ratio) * norm_w2
return d_obj


Expand All @@ -195,11 +214,10 @@ cdef floating dual_logreg(int n_samples, floating * theta,
return d_obj


cdef floating dual(int pb, int n_samples, floating norm_y2,
floating * theta, floating * y) nogil:

cdef floating dual(int pb, int n_samples, floating alpha, floating l1_ratio,
floating norm_y2, floating norm_w2, floating * theta, floating * y) nogil:
if pb == LASSO:
return dual_lasso(n_samples, norm_y2, &theta[0], &y[0])
return dual_enet(n_samples, alpha, l1_ratio, norm_y2, norm_w2, &theta[0], &y[0])
else:
return dual_logreg(n_samples, &theta[0], &y[0])

Expand All @@ -226,7 +244,6 @@ cdef void create_dual_pt(
@cython.cdivision(True)
cdef int create_accel_pt(
int pb, int n_samples, int epoch, int gap_freq,
floating alpha,
floating * R, floating * out, floating * last_K_R, floating[:, :] U,
floating[:, :] UtU, floating[:] onesK, floating[:] y):

Expand Down Expand Up @@ -365,11 +382,11 @@ cpdef void compute_Xw(
@cython.boundscheck(False)
@cython.wraparound(False)
@cython.cdivision(True)
cpdef floating dnorm_l1(
bint is_sparse, floating[:] theta, floating[::1, :] X,
cpdef floating dnorm_enet(
bint is_sparse, floating[:] theta, floating[:] w, floating[::1, :] X,
floating[:] X_data, int[:] X_indices, int[:] X_indptr, int[:] skip,
floating[:] X_mean, floating[:] weights, bint center,
bint positive) nogil:
bint positive, floating alpha, floating l1_ratio) nogil:
"""compute norm(X[:, ~skip].T.dot(theta), ord=inf)"""
cdef int n_samples = theta.shape[0]
cdef int n_features = skip.shape[0]
Expand Down Expand Up @@ -399,6 +416,10 @@ cpdef floating dnorm_l1(
else:
Xj_theta = fdot(&n_samples, &theta[0], &inc, &X[0, j], &inc)

# minus sign to consider the choice theta = y - Xw and not theta = Xw -y
if l1_ratio != 1:
Xj_theta -= alpha * (1 - l1_ratio) * weights[j] * w[j]

if not positive:
Xj_theta = fabs(Xj_theta)
scal = max(scal, Xj_theta / weights[j])
Expand All @@ -409,14 +430,15 @@ cpdef floating dnorm_l1(
@cython.wraparound(False)
@cython.cdivision(True)
cdef void set_prios(
bint is_sparse, floating[:] theta, floating alpha,
bint is_sparse, floating[:] theta, floating[:] w, floating alpha, floating l1_ratio,
floating[::1, :] X, floating[:] X_data, int[:] X_indices, int[:] X_indptr,
floating[:] norms_X_col, floating[:] weights, floating[:] prios,
int[:] screened, floating radius, int * n_screened, bint positive) nogil:
cdef int i, j, startptr, endptr
cdef floating Xj_theta
cdef int n_samples = theta.shape[0]
cdef int n_features = prios.shape[0]
cdef floating norms_X_col_j = 0.

# TODO we do not substract theta_sum, which seems to indicate that theta
# is always centered...
Expand All @@ -433,11 +455,17 @@ cdef void set_prios(
else:
Xj_theta = fdot(&n_samples, &theta[0], &inc, &X[0, j], &inc)

norms_X_col_j = norms_X_col[j]
if l1_ratio != 1:
Xj_theta -= alpha * (1 - l1_ratio) * weights[j] * w[j]

norms_X_col_j = norms_X_col_j ** 2
norms_X_col_j += sqrt(norms_X_col_j + alpha * (1 - l1_ratio) * weights[j])

if positive:
prios[j] = fabs(Xj_theta - alpha * weights[j]) / norms_X_col[j]
prios[j] = fabs(Xj_theta - alpha * l1_ratio * weights[j]) / norms_X_col_j
else:
prios[j] = (alpha * weights[j] - fabs(Xj_theta)) / norms_X_col[j]
prios[j] = (alpha * l1_ratio * weights[j] - fabs(Xj_theta)) / norms_X_col_j

if prios[j] > radius:
screened[j] = True
Expand Down

0 comments on commit 4230db1

Please sign in to comment.