In [None]:
%matplotlib inline

In [None]:
import time
import numpy
import scipy.stats
import scipy.sparse

In [None]:
from utils import errfun, l1_gt_wrapper_gen
from utils import Stat

In [None]:
from model_cvxpy import l1_cvxpy_mosek, l1_cvxpy_gurobi
from solver_mosek import l1_mosek_qp
from solver_gurobi import l1_gurobi_nonexpand
from method_sub_grad import l1_sub_grad, l1_stochastic_sub_grad
from method_proj_grad import l1_proj_grad
from method_smooth_grad import l1_smooth_grad_sqrt, l1_smooth_grad_log_exp, l1_fast_smooth_grad_sqrt, l1_fast_smooth_grad_log_exp
from method_prox_grad import l1_prox_grad, l1_fast_prox_grad
from method_explicit_MM_dual import l1_ALM_dual, l1_ADMM_dual
from method_ADMM_primal_direct import l1_ADMM_primal_direct
from method_ADMM_primal_linear import l1_ADMM_primal_linear
from method_momentum import l1_momentum_sub_grad
from method_AdaGrad import l1_sub_AdaGrad
from method_RMSProp import l1_sub_RMSProp
from method_Adam import l1_sub_Adam

In [None]:
n, m = 1024, 512
mu = 1.e-3

In [None]:
# Set seeds for result consistency
# One may skip this cell to test random A
numpy.random.seed(1)

In [None]:
A = numpy.random.randn(m, n)
norm = scipy.stats.norm()
u = scipy.sparse.random(n, 1, density=0.1, data_rvs=norm.rvs).A
b = A.dot(u)

x0 = numpy.random.rand(n, 1)

In [None]:
xx, _ = l1_cvxpy_mosek(x0, A, b, mu)

In [None]:
def test(func, **opts):
    start = time.time()
    solution, out = func(x0, A, b, mu, **opts)
    end = time.time()
    elapsed = end - start

    error = A.dot(solution) - b
    approximation_loss = 1. / 2. * numpy.sum(error**2)
    regularization = numpy.sum(numpy.abs(solution))
    check_loss = approximation_loss + mu * regularization
    
    out["name"] = func.__name__
    out["solution"] = solution
    out["time"] = elapsed
    out["check_loss"] = check_loss
    out["approximation_loss"] = approximation_loss
    out["regularization"] = regularization
    out["error_xx"] = errfun(xx, solution)
    out["error_gt"] = errfun(u, solution)

    return out

In [None]:
stat = Stat()

In [None]:
stat(test(l1_gt_wrapper_gen(u)))
stat.notebook_last()

In [None]:
stat(test(l1_cvxpy_mosek))
stat.notebook_last()

In [None]:
stat(test(l1_cvxpy_gurobi))
stat.notebook_last()

In [None]:
stat(test(l1_mosek_qp))
stat.notebook_last()

In [None]:
stat(test(l1_gurobi_nonexpand))
stat.notebook_last()

In [None]:
stat(test(
    l1_sub_grad,
    iter_list=[100, 225, 400, 425, 425, 450],
    lr_list=[3.e-4, 5.e-4, 5.e-4, 5.e-4, 5.e-4, 5.e-4],
    mu_list=[100., 10., 1., 0.1, 0.01, 0.001],
    res_list=[0.]*6,
    figure=True,
    xx=u,
))
stat.loss_curve_last(log=True)
stat.loss_curve_last(label={"error": "Error to u", "grad_norm2": "Squared grad norm"}, log=True)
stat.pop()
stat(test(
    l1_sub_grad,
    iter_list=[100, 225, 400, 425, 425, 450],
    lr_list=[3.e-4, 5.e-4, 5.e-4, 5.e-4, 5.e-4, 5.e-4],
    mu_list=[100., 10., 1., 0.1, 0.01, 0.001],
))
stat.notebook_last()

In [None]:
# Set seed for result consistency
# One may skip this cell to use random SGD
numpy.random.seed(1)

In [None]:
stat(test(
    l1_stochastic_sub_grad,
    iter_list=[100, 225, 400, 425, 425, 450],
    lr_list=[3.e-4, 5.e-4, 5.e-4, 5.e-4, 5.e-4, 5.e-4],
    mu_list=[100., 10., 1., 0.1, 0.01, 0.001],
    density_list=[0.5, 0.6, 0.7, 0.8, 0.9, 1.0],
    res_list=[0.]*6,
    figure=True,
    xx=u,
))
stat.loss_curve_last(log=True)
stat.loss_curve_last(label={"error": "Error to u", "grad_norm2": "Squared grad norm"}, log=True)
stat.pop()
stat(test(
    l1_stochastic_sub_grad,
    iter_list=[100, 225, 400, 425, 425, 450],
    lr_list=[3.e-4, 5.e-4, 5.e-4, 5.e-4, 5.e-4, 5.e-4],
    mu_list=[100., 10., 1., 0.1, 0.01, 0.001],
    density_list=[0.5, 0.6, 0.7, 0.8, 0.9, 1.0],
))
stat.notebook_last()

In [None]:
stat(test(
    l1_proj_grad,
    iter_list=[75, 225, 300, 300, 300, 350],
    lr_list=[1.5e-3, 2.e-3, 2.e-3, 2.e-3, 2.e-3, 2.e-3],
    mu_list=[100., 10., 1., 0.1, 0.01, 0.001],
    res_list=[0.]*6,
    figure=True,
    xx=u,
))
stat.loss_curve_last(log=True)
stat.loss_curve_last(label={"error": "Error to u", "grad_norm2": "Squared grad norm"}, log=True)
stat.pop()
stat(test(
    l1_proj_grad,
    iter_list=[75, 225, 300, 300, 300, 350],
    lr_list=[1.5e-3, 2.e-3, 2.e-3, 2.e-3, 2.e-3, 2.e-3],
    mu_list=[100., 10., 1., 0.1, 0.01, 0.001],
))
stat.notebook_last()

In [None]:
stat(test(
    l1_smooth_grad_sqrt,
    iter_list=[100, 225, 400, 425, 425, 450],
    lr_list=[3.e-4, 5.e-4, 5.e-4, 5.e-4, 5.e-4, 5.e-4],
    mu_list=[100., 10., 1., 0.1, 0.01, 0.001],
    eps_list=[1.e-2, 1.e-3, 1.e-4, 1.e-6, 1.e-8, 1.e-10],
    res_list=[0.]*6,
    figure=True,
    xx=u,
))
stat.loss_curve_last(log=True)
stat.loss_curve_last(label={"error": "Error to u", "grad_norm2": "Squared grad norm"}, log=True)
stat.pop()
stat(test(
    l1_smooth_grad_sqrt,
    iter_list=[100, 225, 400, 425, 425, 450],
    lr_list=[3.e-4, 5.e-4, 5.e-4, 5.e-4, 5.e-4, 5.e-4],
    mu_list=[100., 10., 1., 0.1, 0.01, 0.001],
    eps_list=[1.e-2, 1.e-3, 1.e-4, 1.e-6, 1.e-8, 1.e-10],
))
stat.notebook_last()

In [None]:
stat(test(
    l1_smooth_grad_log_exp,
    iter_list=[100, 225, 400, 425, 425, 450],
    lr_list=[3.e-4, 5.e-4, 5.e-4, 5.e-4, 5.e-4, 5.e-4],
    mu_list=[100., 10., 1., 0.1, 0.01, 0.001],
    eps_list=[1.e-2, 1.e-3, 1.e-4, 1.e-6, 1.e-8, 1.e-10],
    res_list=[0.]*6,
    figure=True,
    xx=u,
))
stat.loss_curve_last(log=True)
stat.loss_curve_last(label={"error": "Error to u", "grad_norm2": "Squared grad norm"}, log=True)
stat.pop()
stat(test(
    l1_smooth_grad_log_exp,
    iter_list=[100, 225, 400, 425, 425, 450],
    lr_list=[3.e-4, 5.e-4, 5.e-4, 5.e-4, 5.e-4, 5.e-4],
    mu_list=[100., 10., 1., 0.1, 0.01, 0.001],
    eps_list=[1.e-2, 1.e-3, 1.e-4, 1.e-6, 1.e-8, 1.e-10],
))
stat.notebook_last()

In [None]:
stat(test(
    l1_fast_smooth_grad_sqrt,
    iter_list=[125, 125, 125, 225],
    lr_list=[3.e-4, 3.e-4, 3.e-4, 3.e-4],
    mu_list=[10., 0.5, 0.02, 0.001],
    eps_list=[1.e-3, 1.e-5, 1.e-8, 1.e-10],
    res_list=[0.]*4,
    figure=True,
    xx=u,
))
stat.loss_curve_last(log=True)
stat.loss_curve_last(label={"error": "Error to u", "grad_norm2": "Squared grad norm"}, log=True)
stat.pop()
stat(test(
    l1_fast_smooth_grad_sqrt,
    iter_list=[125, 125, 125, 225],
    lr_list=[3.e-4, 3.e-4, 3.e-4, 3.e-4],
    mu_list=[10., 0.5, 0.02, 0.001],
    eps_list=[1.e-3, 1.e-5, 1.e-8, 1.e-10],
))
stat.notebook_last()

In [None]:
stat(test(
    l1_fast_smooth_grad_log_exp,
    iter_list=[125, 125, 125, 225],
    lr_list=[3.e-4, 3.e-4, 3.e-4, 3.e-4],
    mu_list=[10., 0.5, 0.02, 0.001],
    eps_list=[1.e-3, 1.e-5, 1.e-8, 1.e-10],
    res_list=[0.]*4,
    figure=True,
    xx=u,
))
stat.loss_curve_last(log=True)
stat.loss_curve_last(label={"error": "Error to u", "grad_norm2": "Squared grad norm"}, log=True)
stat.pop()
stat(test(
    l1_fast_smooth_grad_log_exp,
    iter_list=[125, 125, 125, 225],
    lr_list=[3.e-4, 3.e-4, 3.e-4, 3.e-4],
    mu_list=[10., 0.5, 0.02, 0.001],
    eps_list=[1.e-3, 1.e-5, 1.e-8, 1.e-10],
))
stat.notebook_last()

In [None]:
stat(test(
    l1_prox_grad,
    iter_list=[100, 225, 300, 300, 300, 325],
    lr_list=[3.e-4, 5.e-4, 5.e-4, 5.e-4, 5.e-4, 5.e-4],
    mu_list=[100., 10., 1., 0.1, 0.01, 0.001],
    res_list=[0.]*6,
    figure=True,
    xx=u,
))
stat.loss_curve_last(log=True)
stat.loss_curve_last(label={"error": "Error to u", "grad_norm2": "Squared grad norm"}, log=True)
stat.pop()
stat(test(
    l1_prox_grad,
    iter_list=[100, 225, 300, 300, 300, 325],
    lr_list=[3.e-4, 5.e-4, 5.e-4, 5.e-4, 5.e-4, 5.e-4],
    mu_list=[100., 10., 1., 0.1, 0.01, 0.001],
))
stat.notebook_last()

In [None]:
stat(test(
    l1_fast_prox_grad,
    iter_list=[32000],
    lr_list=[3.e-4],
    mu_list=[0.001],
    res_list=[0.]*1,
    figure=True,
    xx=u,
))
stat.loss_curve_last(log=True)
stat.loss_curve_last(label={"error": "Error to u", "grad_norm2": "Squared grad norm"}, log=True)
stat.pop()
stat(test(
    l1_fast_prox_grad,
    iter_list=[32000],
    lr_list=[3.e-4],
    mu_list=[0.001],
))
stat.notebook_last()

In [None]:
stat(test(
    l1_fast_prox_grad,
    iter_list=[100, 100, 100, 150],
    lr_list=[3.e-4, 3.e-4, 3.e-4, 3.e-4],
    mu_list=[10., 0.5, 0.02, 0.001],
    res_list=[0.]*4,
    figure=True,
    xx=u,
))
stat.loss_curve_last(log=True)
stat.loss_curve_last(label={"error": "Error to u", "grad_norm2": "Squared grad norm"}, log=True)
stat.pop()
stat(test(
    l1_fast_prox_grad,
    iter_list=[100, 100, 100, 150],
    lr_list=[3.e-4, 3.e-4, 3.e-4, 3.e-4],
    mu_list=[10., 0.5, 0.02, 0.001],
))
stat.notebook_last()

In [None]:
stat(test(
    l1_ALM_dual,
    iter_list=[40, 30, 30, 40],
    lr_list=[1.e-2, 1.e-2, 1.e-2, 1.e-2],
    mu_list=[10., 0.5, 0.02, 0.001],
    figure=True,
    xx=u,
))
stat.loss_curve_last(log=True)
stat.loss_curve_last(label={"error": "Error to u"}, log=True)
stat.pop()
stat(test(
    l1_ALM_dual,
    iter_list=[40, 30, 30, 40],
    lr_list=[1.e-2, 1.e-2, 1.e-2, 1.e-2],
    mu_list=[10., 0.5, 0.02, 0.001],
))
stat.notebook_last()

In [None]:
stat(test(
    l1_ADMM_dual,
    iter_list=[30, 25, 25, 35],
    lr_list=[1.e-2, 1.e-2, 1.e-2, 1.e-2],
    mu_list=[10., 0.5, 0.02, 0.001],
    figure=True,
    xx=u,
))
stat.loss_curve_last(log=True)
stat.loss_curve_last(label={"error": "Error to u"}, log=True)
stat.pop()
stat(test(
    l1_ADMM_dual,
    iter_list=[30, 25, 25, 35],
    lr_list=[1.e-2, 1.e-2, 1.e-2, 1.e-2],
    mu_list=[10., 0.5, 0.02, 0.001],
))
stat.notebook_last()

In [None]:
stat(test(
    l1_ADMM_primal_linear,
    iter_list=[50, 150, 200, 300],
    lr_list=[1.5e-4, 1.5e-4, 1.5e-4, 2.e-4],
    mu_list=[1., 0.1, 0.01, 0.001],
    tau_list=[2., 2., 2., 2.],
    figure=True,
    xx=u,
))
stat.loss_curve_last(log=True)
stat.loss_curve_last(label={"error": "Error to u"}, log=True)
stat.pop()
stat(test(
    l1_ADMM_primal_linear,
    iter_list=[50, 150, 200, 300],
    lr_list=[1.5e-4, 1.5e-4, 1.5e-4, 2.e-4],
    mu_list=[1., 0.1, 0.01, 0.001],
    tau_list=[2., 2., 2., 2.],
))
stat.notebook_last()

In [None]:
stat(test(
    l1_ADMM_primal_direct,
    iter_list=[50, 150, 200, 300],
    lr_list=[1.5e-3, 1.5e-3, 1.5e-3, 1.5e-3],
    mu_list=[1., 0.1, 0.01, 0.001],
    figure=True,
    xx=u,
))
stat.loss_curve_last(log=True)
stat.loss_curve_last(label={"error": "Error to u"}, log=True)
stat.pop()
stat(test(
    l1_ADMM_primal_direct,
    iter_list=[50, 150, 200, 300],
    lr_list=[1.5e-3, 1.5e-3, 1.5e-3, 1.5e-3],
    mu_list=[1., 0.1, 0.01, 0.001],
))
stat.notebook_last()

In [None]:
stat(test(
    l1_momentum_sub_grad,
    iter_list=[50, 75, 100, 100, 100, 125],
    lr_list=[3.e-4, 5.e-4, 5.e-4, 5.e-4, 5.e-4, 5.e-4],
    mu_list=[100., 10., 1., 0.1, 0.01, 0.001],
    alpha_list=[0.8]*6,
    res_list=[0.]*6,
    figure=True,
    xx=u,
))
stat.loss_curve_last(log=True)
stat.loss_curve_last(label={"error": "Error to u", "grad_norm2": "Squared grad norm"}, log=True)
stat.pop()
stat(test(
    l1_momentum_sub_grad,
    iter_list=[50, 75, 100, 100, 100, 125],
    lr_list=[3.e-4, 5.e-4, 5.e-4, 5.e-4, 5.e-4, 5.e-4],
    mu_list=[100., 10., 1., 0.1, 0.01, 0.001],
    alpha_list=[0.8]*6,
))
stat.notebook_last()

In [None]:
stat(test(
    l1_sub_AdaGrad,
    iter_list=[150, 250, 325, 325, 325, 325],
    lr_list=[5.e-1, 5.5e-1, 6.e-1, 6.5e-1, 7.e-1, 7.5e-1],
    mu_list=[100., 10., 1., 0.1, 0.01, 0.001],
    delta=1.e-7,
    res_list=[0.]*6,
    figure=True,
    xx=u,
))
stat.loss_curve_last(log=True)
stat.loss_curve_last(label={"error": "Error to u", "grad_norm2": "Squared grad norm"}, log=True)
stat.pop()
stat(test(
    l1_sub_AdaGrad,
    iter_list=[150, 250, 325, 325, 325, 325],
    lr_list=[5.e-1, 5.5e-1, 6.e-1, 6.5e-1, 7.e-1, 7.5e-1],
    mu_list=[100., 10., 1., 0.1, 0.01, 0.001],
    delta=1.e-7,
))
stat.notebook_last()

In [None]:
stat(test(
    l1_sub_RMSProp,
    iter_list=[125, 200, 300, 300, 300, 325],
    lr_list=[4.e-2, 1.e-2, 1.2e-3, 1.e-4, 1.e-5, 1.e-6],
    mu_list=[100., 10., 1., 0.1, 0.01, 0.001],
    rho_list=[0.9]*6,
    delta=1.e-7,
    res_list=[0.]*6,
    figure=True,
    xx=u,
))
stat.loss_curve_last(log=True)
stat.loss_curve_last(label={"error": "Error to u", "grad_norm2": "Squared grad norm"}, log=True)
stat.pop()
stat(test(
    l1_sub_RMSProp,
    iter_list=[125, 200, 300, 300, 300, 325],
    lr_list=[4.e-2, 1.e-2, 1.2e-3, 1.e-4, 1.e-5, 1.e-6],
    mu_list=[100., 10., 1., 0.1, 0.01, 0.001],
    rho_list=[0.9]*6,
    delta=1.e-7,
))
stat.notebook_last()

In [None]:
stat(test(
    l1_sub_Adam,
    iter_list=[100, 125, 150, 150, 150, 150],
    lr_list=[1.e-1, 2.e-1, 2.e-1, 2.e-1, 2.e-1, 2.e-1],
    mu_list=[100., 10., 1., 0.1, 0.01, 0.001],
    rho1_list=[0.9]*6,
    rho2_list=[0.999]*6,
    delta=1.e-7,
    res_list=[0.]*6,
    figure=True,
    xx=u,
))
stat.loss_curve_last(log=True)
stat.loss_curve_last(label={"error": "Error to u", "grad_norm2": "Squared grad norm"}, log=True)
stat.pop()
stat(test(
    l1_sub_Adam,
    iter_list=[100, 125, 150, 150, 150, 150],
    lr_list=[1.e-1, 2.e-1, 2.e-1, 2.e-1, 2.e-1, 2.e-1],
    mu_list=[100., 10., 1., 0.1, 0.01, 0.001],
    rho1_list=[0.9]*6,
    rho2_list=[0.999]*6,
    delta=1.e-7,
))
stat.notebook_last()

In [None]:
stat.LaTeX_all()