Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,8 @@ Project implementing some optimization routines in Python.
Philosophy
==========

* No compiled code. State of the art performance.
* Automate as much as possible (line-search whenever possible).
* Optimization-centric API.
* Ø compiled code. State of the art performance.
* Modular, extensible, general-purpose optimization library.


License
Expand Down
2 changes: 1 addition & 1 deletion copt/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from .gradient_descent import minimize_PGD, minimize_DavisYin, minimize_APGD
from .primal_dual import fmin_CondatVu
from .stochastic import minimize_SAGA
from .stochastic import minimize_SAGA, minimize_BCD
from .utils import LogisticLoss, SquaredLoss, L1Norm, TotalVariation2D
from . import datasets
from . import tv_prox
4 changes: 2 additions & 2 deletions copt/gradient_descent.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import warnings
import numpy as np
from scipy import optimize, linalg
from scipy import optimize, linalg, sparse
from datetime import datetime
from numba import njit

# .. local imports ..
from .utils import DummyProx
Expand Down Expand Up @@ -285,7 +286,6 @@ def minimize_APGD(
trace_time=trace_time)



def minimize_DavisYin(
fun, fun_deriv, g_prox, h_prox, y0, alpha=1.0, beta=1.0, tol=1e-6, max_iter=1000,
g_prox_args=(), h_prox_args=(),
Expand Down
108 changes: 101 additions & 7 deletions copt/stochastic.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,6 @@ def minimize_SAGA(
trace_func = []
trace_time = []

# .. iterate on epochs ..
with concurrent.futures.ThreadPoolExecutor() as executor:
futures = []
for job_id in range(n_jobs):
Expand Down Expand Up @@ -193,7 +192,7 @@ def _factory_sparse_SAGA(f, g):
d[~idx] = 0.

@njit(nogil=True)
def epoch_iteration_template(
def _saga_algorithm(
x, memory_gradient, gradient_average, step_size, max_iter, job_id,
tol, stop_flag, trace, trace_x):

Expand All @@ -216,19 +215,19 @@ def epoch_iteration_template(
p += x[j_idx] * A_data[j]

grad_i = partial_gradient(p, b[i])
mem_i = memory_gradient[i]

# .. update coefficients ..
for j in range(A_indptr[i], A_indptr[i+1]):
j_idx = A_indices[j]
incr = (grad_i - mem_i) * A_data[j] + d[j_idx] * (
gradient_average[j_idx] + f_alpha * x[j_idx])
incr = (grad_i - memory_gradient[i]) * A_data[j]
incr += d[j_idx] * (gradient_average[j_idx] + f_alpha * x[j_idx])
x[j_idx] = prox(x[j_idx] - step_size * incr, step_size * d[j_idx])

# .. update memory terms ..
for j in range(A_indptr[i], A_indptr[i+1]):
j_idx = A_indices[j]
gradient_average[j_idx] += (grad_i - memory_gradient[i]) * A_data[j] / n_samples
gradient_average[j_idx] += (
grad_i - memory_gradient[i]) * A_data[j] / n_samples
memory_gradient[i] = grad_i

if job_id == 0:
Expand All @@ -253,5 +252,100 @@ def epoch_iteration_template(
break
return it, cert

return epoch_iteration_template
return _saga_algorithm


def minimize_BCD(
f, g=None, x0=None, step_size=None, max_iter=500, trace=False, verbose=False,
n_jobs=1):
"""Block Coordinate Descent

Parameters
----------
f
g
x0

Returns
-------

"""

if x0 is None:
xk = np.zeros(f.n_features)
else:
xk = np.array(x0, copy=True)
if g is None:
g = utils.DummyProx()
if step_size is None:
step_size = 2. / f.lipschitz_constant()

Ax = f.A.dot(xk)
f_alpha = f.alpha
n_samples, n_features = f.A.shape
success = False

partial_gradient = f.partial_gradient_factory()
prox = g.prox_factory()

start_time = datetime.now()
if trace:
trace_x = np.zeros((max_iter, n_features))
else:
trace_x = np.zeros((0, 0))
stop_flag = np.zeros(1, dtype=np.bool)
trace_func = []
trace_time = []


@njit(nogil=True)
def _bcd_algorithm(
x, Ax, A_csc_data, A_csc_indices, A_csc_indptr, b, trace_x):
feature_indices = np.arange(n_features)
for it in range(1, max_iter):
np.random.shuffle(feature_indices)
for j in feature_indices:
grad_j = 0.
for i_indptr in range(A_csc_indptr[j], A_csc_indptr[j+1]):
# get the current sample
i_idx = A_csc_indices[i_indptr]
grad_j += partial_gradient(Ax[i_idx], b[i_idx]) * A_csc_data[i_indptr] / n_samples
x_new = prox(x[j] - step_size * (grad_j + f_alpha * x[j]), step_size)
for i_indptr in range(A_csc_indptr[j], A_csc_indptr[j+1]):
i_idx = A_csc_indices[i_indptr]
Ax[i_idx] += A_csc_data[i_indptr] * (x_new - x[j])
x[j] = x_new
if trace:
trace_x[it, :] = x
return it, None

X_csc = sparse.csc_matrix(f.A)

trace_func = []
start = datetime.now()
trace_time = [(start - datetime.now()).total_seconds()]

with concurrent.futures.ThreadPoolExecutor() as executor:
futures = []
for job_id in range(n_jobs):
futures.append(executor.submit(
_bcd_algorithm,
xk, Ax, X_csc.data, X_csc.indices, X_csc.indptr, f.b,
trace_x))
concurrent.futures.wait(futures)

n_iter, certificate = futures[0].result()
if trace:
delta = (datetime.now() - start_time).total_seconds()
trace_time = np.linspace(0, delta, n_iter)
if verbose:
print('Computing trace')
# .. compute function values ..
trace_func = []
for i in range(n_iter):
# TODO: could be parallelized
trace_func.append(f(trace_x[i]) + g(trace_x[i]))
return optimize.OptimizeResult(
x=xk, success=success, nit=n_iter, trace_func=trace_func, trace_time=trace_time,
certificate=certificate)

2 changes: 1 addition & 1 deletion copt/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ class SquaredLoss:

def __init__(self, A, b, alpha='auto'):
self.b = b
self.A = sparse.csr_matrix(A)
self.A = A
self.n_features = self.A.shape[1]
if alpha == 'auto':
self.alpha = 1. / A.shape[0]
Expand Down
22 changes: 12 additions & 10 deletions tests/test_stochastic.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,22 +12,24 @@
def test_optimize():
for alpha in np.logspace(-1, 3, 3):
for X in (X_dense, X_sparse):
f = cp.LogisticLoss(X, y, alpha)
opt = cp.minimize_SAGA(f)
assert np.linalg.norm(f.gradient(opt.x)) < 1e-5
for alg in (cp.minimize_SAGA, cp.minimize_BCD):
f = cp.LogisticLoss(X, y, alpha)
opt = alg(f, trace=True)
assert np.linalg.norm(f.gradient(opt.x)) < 1e-5


def test_optimize_prox():
for alpha in np.logspace(-1, 3, 3):
for X in (X_dense, X_sparse):
f = cp.LogisticLoss(X, y)
g = cp.L1Norm(alpha)
opt = cp.minimize_SAGA(f, g)
gamma = 1. / f.lipschitz_constant()
gmap = (opt.x - g.prox(opt.x - gamma * f.gradient(opt.x), gamma)) / gamma
assert np.linalg.norm(gmap) < 1e-5
for alg in (cp.minimize_SAGA, cp.minimize_BCD):
f = cp.LogisticLoss(X, y)
g = cp.L1Norm(alpha)
opt = alg(f, g)
gamma = 1. / f.lipschitz_constant()
gmap = (opt.x - g.prox(opt.x - gamma * f.gradient(opt.x), gamma)) / gamma
assert np.linalg.norm(gmap) < 1e-5


#
#
# def test_prox_groups():
# """Test sparse problems with group structure
Expand Down