# Frank-Wolfe algorithm on constrained Lasso problem

Author: Alexandre Gramfort

Problem considered is:

$$
\min \|Ax-b\|^2  s.t. \|x\|_1 \leq r
$$


In [None]:
import numpy as np
from scipy import linalg
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
n_samples = 30
n_features = 40
nnz = n_features // 10  # number of non-zeros in the true solution

rng = np.random.RandomState(42)
A = rng.randn(n_samples, n_features)
x_true = np.concatenate((np.ones(nnz), -np.ones(nnz), -np.zeros(n_features - 2*nnz)))

noise = 0.1 * rng.randn(n_samples)
b = A.dot(x_true) + noise

In [None]:
plt.stem(x_true, use_line_collection=True)

In [None]:
def lasso_frank_wolfe(A, b, r, max_iter, use_linesearch, verbose=True):
    x = np.zeros(A.shape[1])
    Ax = np.dot(A, x)
    pobj = []
    for k in range(1, max_iter):
        # call the LMO
        i = np.argmax(np.abs(np.dot(A.T, Ax - b)))

        step_sign = np.sign(np.dot(A[:, i].T, b - Ax))  # sign of -gradF[i]
        s = np.zeros(n_features)
        s[i] = step_sign * r
        As = s[i] * A[:, i]  # = the i-th column of the design matrix A

        if use_linesearch:  # line-search on the univariate quadratic problem in gamma
            As_minus_Ax = As - Ax;
            gamma = np.dot(As_minus_Ax.T, b - Ax) / np.dot(As_minus_Ax, As_minus_Ax)
            gamma = max(0, min(1, gamma))  # project in [0, 1]
        else:
            gamma = 2. / (k + 2.)

        x = (1. - gamma) * x + gamma * s  # do the FW step
        Ax = (1 - gamma) * Ax + gamma * As  # lazy update of Ax

        f_at_x = linalg.norm(Ax - b)**2 / 2.
        if verbose:
            print('k=%02d - f=%f - i=%d - gamma=%1.3f - ||x_k||_1=%1.3f' %
                  (k, f_at_x, i, gamma, np.sum(np.abs(x))));

        pobj.append(f_at_x)

    return pobj, x

r = 1.  # the regularization constraint imposed on the l_1-norm
pobj_ls, x = lasso_frank_wolfe(A, b, r, max_iter=20, use_linesearch=False)

### Plot result

In [None]:
pobj_ls, x = lasso_frank_wolfe(A, b, r, max_iter=100,
                               use_linesearch=False, verbose=False)

fig, (ax1, ax2) = plt.subplots(nrows=1, ncols=2, figsize=(9, 5))
ax1.plot(pobj_ls)
ax1.set_xlabel('Iteration')
ax1.set_ylabel('Objective')

ax2.stem(x_true, linefmt='k-', label='True', use_line_collection=True)
ax2.stem(x, linefmt='r-', label='Estimated', use_line_collection=True)
ax2.set_xlabel('Features')
ax2.set_ylabel('Amplitude')
plt.legend()
plt.tight_layout();

## Look at convergence rates

First compute $f^*$ by running solver many iterations

In [None]:
pobj, _ = lasso_frank_wolfe(A, b, r, max_iter=10000, use_linesearch=True, verbose=False)
f_star = np.min(pobj)

The plot objective:

In [None]:
max_iter = 100
pobj_ls, _ = lasso_frank_wolfe(A, b, r, max_iter=max_iter,
                               use_linesearch=True, verbose=False)
pobj_no_ls, _ = lasso_frank_wolfe(A, b, r, max_iter=max_iter,
                                  use_linesearch=False, verbose=False)

fig, (ax1, ax2) = plt.subplots(nrows=1, ncols=2, figsize=(9, 4))
ax1.plot(pobj_ls, label='linesearch')
ax1.plot(pobj_no_ls, label='no linesearch')
ax1.set_xlabel('Iteration')
ax1.set_ylabel('Objective')
ax1.legend()

ax2.plot(np.log10(pobj_ls[:-1] - f_star), label='linesearch')
ax2.plot(np.log10(pobj_no_ls[:-1] - f_star), label='no linesearch')
ax2.set_xlabel('Iteration')
ax2.set_ylabel('$\log_{10}(f(x) - f(x^*)$')
ax2.legend()
plt.tight_layout();

<div class="alert alert-success">
    <b>EXERCISE:</b>
     <ul>
      <li>Change the conditioning of the problem, the sparsity level and observe how it affects the rate of convergence.</li>
      <li>Implement FW for L1 constrained logistic regression</li>
    </ul>
</div>