# Newton's method in the 1-D case: some intuitions

Author: Pierre Ablin, Alexandre Gramfort

Newton's method writes $x \leftarrow x - [\nabla^2f(x)]^{-1} \nabla f(x)$.

Through a few one-dimensional cases, let us gain some intuitions about its behavior.

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.text import Text
from ipywidgets import interact, fixed
%matplotlib inline


fontsize = 18
params = {
      'axes.titlesize': fontsize + 4,
      'axes.labelsize': fontsize + 2,
      'font.size': fontsize + 2,
      'legend.fontsize': fontsize + 2,
      'xtick.labelsize': fontsize,
      'ytick.labelsize': fontsize,
      'text.usetex': True}
plt.rcParams.update(params)

In [None]:
def plot(f, f_p, f_pp, x_l=None, i=0, plot_approx=True, ylim=[-2, 10],axe=None):
    t = np.linspace(-3, 3)
    ft = f(t)
    if axe is None:
        fig, axe = plt.subplots(1, 1, figsize=(8, 8))
    axe.plot(t, ft, label='$f(x)$', linewidth=6)
    if x_l is not None:
        x = x_l[i]
        fx = f(x)
        points = [(x, fx, '$x_{%d}$' % i, '#804000')]
        if plot_approx:
            def q(t):
                return fx + (t - x) * f_p(x) + 0.5 * (t - x) ** 2 * f_pp(x)
            x_star = x - f_p(x) / f_pp(x)
            q_star = q(x_star)
            fx_star = f(x_star)
            axe.plot(t, q(t),label='$Q_{x_{%d}}(x - x_{%d})$' % (i, i),
                     color='red', linewidth=3)
            axe.axhline(q_star, c='r', linestyle='dashed')
            axe.axvline(x_star, c='g', linestyle='dashed')
            points.append((x_star, fx_star,'$x_{%d}$' % (i + 1), '#264d00'))
        for a, fa, label, color in points:
            axe.plot([a], [fa], '+', color=color, markersize=20, markeredgewidth=4)
            axe.annotate(label, (a+0.2, fa-0.3), fontsize=40, color=color)
        axe.set_title('$|x_{%d} - x^*| = %.2g$' % (i, np.abs(x)))
    axe.legend(loc='upper center')
    axe.set_ylim(*ylim)
    axe.set_xlim(-3, 3)
    axe.set_xlabel(r'$x$')

In [None]:
def plot_newton(x0, ylim, i=0):
    fig, axe = plt.subplots(1, 1, figsize=(8, 8))
    x_l = newton(x0, f_p, f_pp, i)
    plot(f, f_p, f_pp, x_l, i, ylim=ylim, axe=axe)
    plt.figure(figsize=(5, 3))
    plt.semilogy(np.abs(x_l), linewidth=3)
    plt.xlabel('iterations')
    plt.ylabel('$|x_i - x^*|$')
    plt.title('Convergence curve')
    plt.show()

Newton's method code:

In [None]:
def newton(x0, f_p, f_pp, max_iter=10):
    '''
    f_p must return the derivatives of f, and f_pp its second derivative
    '''
    x = x0
    x_l = [x,]
    for i in range(max_iter):
        x -= f_p(x) / f_pp(x)
        x_l.append(x)
    return np.array(x_l)

# Quadratic function $f(x) = x^2$

In [None]:
def f(x):
    return x ** 2

def f_p(x):
    return 2. * x

def f_pp(x):
    return 2.

ylim = [-2, 10]

In [None]:
plot(f, f_p, f_pp, ylim=ylim)

In [None]:
interact(plot_newton, i=(0, 10, 1), x0=fixed(1.1), ylim=fixed(ylim));

# Soft absolute value $f(x) = \log(\cosh(x))$

When x is big, $f(x) \simeq |x|$. When $x$ is small, $f(x) \simeq x^2$ 

In [None]:
def f(x):
    return np.log(np.cosh(x))

def f_p(x):
    return np.tanh(x)

def f_pp(x):
    return 1 - np.tanh(x) ** 2

ylim = [-1, 3]

In [None]:
plot(f, f_p, f_pp, ylim=ylim)

## Convergence zone:

$x_0$ small

In [None]:
x0 = 1.05
interact(plot_newton, i=(0, 10, 1), x0=fixed(x0), ylim=fixed(ylim));

## Divergence zone
$x_0$ too big

In [None]:
x0 = 1.1
interact(plot_newton, i=(0, 10, 1), x0=fixed(x0), ylim=fixed(ylim));

# Cubic function

In [None]:
def f(x):
    return x ** 3

def f_p(x):
    return 3 * x ** 2

def f_pp(x):
    return 6 * x

ylim = [-2, 10]

In [None]:
interact(plot_newton, i=(0, 10, 1), x0=fixed(2), ylim=fixed(ylim));