# Newton method on a 2d case

Author : Pierre Ablin, Alexandre Gramfort

We're interested in minimizing the Rosenbrock function

$f(x_1, x_2) = 100(x_1 - x_2^2)^2 + (1 - x_2)^2$

**Question** : is it a convex function? What it its minimum?

In [None]:
import numpy as np
from ipywidgets import interact, fixed
%matplotlib inline
import matplotlib.pyplot as plt
from scipy.optimize import rosen as f
from scipy.optimize import rosen_der as fp
from scipy.optimize import rosen_hess as fpp
from scipy.optimize import line_search

fontsize = 18
params = {
      'axes.titlesize': fontsize + 4,
      'axes.labelsize': fontsize + 2,
      'font.size': fontsize + 2,
      'legend.fontsize': fontsize + 2,
      'xtick.labelsize': fontsize,
      'ytick.labelsize': fontsize,
      'text.usetex': True}
plt.rcParams.update(params)

In [None]:
def newton(x0, max_iter):
    lambda_min = 0.1
    x = x0
    h_list = []
    x_l = []
    for i in range(max_iter):
        gradient = fp(x)
        hess = fpp(x)
        # Regularize
        v, w = np.linalg.eigh(hess)
        v[v < lambda_min] = lambda_min
        hess = (v * w).dot(w.T)
        h_list.append(hess.copy())
        x_l.append(x.copy())
        # Compute the search direction
        direction = - np.linalg.solve(hess, gradient)
        alpha = line_search(f, fp, x, direction, gradient, maxiter=1000, c1=0.0000001, c2=0.99)[0]
        x += alpha * direction
    return np.array(x_l), h_list

In [None]:
def plot(f, x_l=None,axe=None, title='Rosenbrock function', quad=None, level=1):
    if axe is None:
        fig, axe = plt.subplots(1, 1, figsize=(8, 8))
    X1, X2 = np.meshgrid(np.linspace(-1, 2, 200),
                     np.linspace(-1, 3, 200))
    Z = f([X1, X2])
    levels = np.logspace(-6, 4, 40)
    axe.contourf(X1, X2, np.log(Z + 0.001), levels=np.log(levels + 0.001), cmap=plt.cm.Oranges_r)
    
    if x_l is not None:
        x1, x2 = x_l.T
        axe.plot(x1, x2, 'b', linewidth=3)
        axe.plot(x1, x2, 'k+', markersize=10, markeredgewidth=3)
        if quad is not None:
            Z_q = quad([X1, X2])
            levels = 100 * (level / 100) ** np.array([1.5, 1, 0.8])
            levels[-1] += 1.
            levels[0] /=2
            axe.contour(X1, X2, Z_q, levels=levels, colors=['k'])
    axe.set_title(title)

In [None]:
plot(f)

In [None]:
def plot_newton(x0, max_iter=1):
    x_l, h_l = newton(x0, max_iter)
    H = h_l[-1]
    x_f = x_l[-1]
    # Compute sqrt
    v, w = np.linalg.eigh(H)
    C = (1 /np.sqrt(v) * w).dot(w.T)
    # Plot
    fig, axes = plt.subplots(1, 2, figsize=(16, 8))
    def f_prec(x):
        return f(np.tensordot(C, x - x_f[:, None, None], 1) / 2 + x_f[:, None, None])
    
    def q_ID(x):
        a, b = x[:-1] - x_f[0], x[1:] - x_f[1]
        g = C.dot(fp(x_f))
        return np.sum(f(x_f) + g[0] * a + g[1] * b +
                      0.5 * (a ** 2 + b ** 2), axis=0)
    
    def q_H(x):
        a, b = x[:-1] - x_f[0], x[1:] - x_f[1]
        g = fp(x_f)
        return np.sum(f(x_f) + g[0] * a + g[1] * b +
                      0.5 * (H[0, 0] * a ** 2 +
                             H[1, 1] * b ** 2 + 2 * H[0, 1] * a * b), axis=0)
    for ax, fun, title, points, quad in zip(axes.ravel(), [f, f_prec],
                                            ['Contours of $f(x)$. $f(x_t)=%.2g$' % f(x_f), '$f(H_t^{-1/2}(x-x_t) + x_t)$'],
                                            [x_l, np.array([x_f,])],
                                            [q_H, q_ID]):
        plot(fun, points, ax, title, quad=quad, level=f(x_f))
    x_sol = x_f - np.linalg.solve(H, fp(x_f))
    axes[0].plot(x_sol[0], x_sol[1], 'g*', markersize=10)

In [None]:
interact(plot_newton, max_iter=(1, 12, 1), x0=fixed([0.5, -0.5]))