# Exercise 9.6
Write a simple routine for implementing the steepest descent method for quadratic functions.

In [1]:
import numpy as np
from matplotlib import pyplot as plt
from scipy import linalg as la

In [2]:
def steepest_descent_quadratic(Q, b, c, x_0, ɛ=1e-9, maxiters=1e3):
    '''
    Computes minimizer and minimum value of 
    f(x) = 0.5 * x.T @ Q @ x - b.T @ x + c
    using the steepest descent method.
    '''
    dist = 1e3
    i = 0
    x_k = x_0
    while dist > ɛ and i < maxiters:
        DfT = Q @ x_k - b  # compute Df(x).T from given formula
        α_k = (DfT.T @ DfT) / (DfT.T @ Q @ DfT)  # compute α from formula
        x_k1 = x_k - α_k * DfT
        dist = la.norm(DfT.T)
        x_k = x_k1
        i += 1
    if i < maxiters:
        print(f"Converged in {i} iterations")
        f = 0.5 * x_k.T @ Q @ x_k - b.T @ x_k + c
    else:
        print("Did not converge")
        f = None
    return x_k, f

In [3]:
Q = np.array([[2, 1],[1, 2]])
b = np.array([-2, 1])
c = 0
x_0 = np.array([23, 7])

In [4]:
steepest_descent_quadratic(Q, b, c, x_0)

Converged in 18 iterations


(array([-1.66666667,  1.33333333]), -2.333333333333333)

In [5]:
# check function using optimizer from scipy
from scipy import optimize as opt

In [6]:
def f(x, args):
    Q, b, c = args
    return 0.5 * x.T @ Q @ x - b.T @ x + c

In [7]:
opt.minimize(f, x_0, args=[Q, b, c])

      fun: -2.333333333333333
 hess_inv: array([[ 0.66667127, -0.3333328 ],
       [-0.3333328 ,  0.66666691]])
      jac: array([0.00000000e+00, 2.98023224e-08])
  message: 'Optimization terminated successfully.'
     nfev: 28
      nit: 5
     njev: 7
   status: 0
  success: True
        x: array([-1.66666668,  1.33333333])

# Exercise 9.7
Write a simple method for computing $Df$ using forward differences and a step size of $\sqrt{Rerr_f}$.

In [8]:
def compute_Df(f, x, Rerr=1e-9):
    '''Approximates Df(x)'''
    h = np.sqrt(Rerr)  # step size
    
    # make function applicable for both scalar and vector valued functions
    if np.isscalar(x):
        n = 1
    else:
        n = len(x)
    if np.isscalar(f(x)):
        m = 1
    else:
        m = len(f(x))
    
    Df = np.empty((m, n))
    for j in range(n):
        e = np.identity(n)[j]  # jth standard basis vector (e=1 if n=1)
        Df[:, j] = (f(x + h * e) - f(x - h * e)) / (2 * h)
    return Df

In [9]:
f = lambda x: np.array([x[0] ** 2 + x[2], x[0] ** 3 - x[1]]).T
x = np.array([1, 2, 3])

In [10]:
f(x)

array([ 4, -1])

In [11]:
compute_Df(f, x)

array([[ 2.,  0.,  1.],
       [ 3., -1.,  0.]])

# Exercise 9.8
Use your differentiation method from the previous problem to construct a simple method for implementing the steepest descent method for arbitrary functions, using the secant method for the line search.

In [16]:
# Use the secant method to find the minimizer of a function
def secant_minimizer(x_0, x_1, ϕ, ɛ=1e-9, maxiters=1000):
    '''
    Computes minimum using the secant method.
    '''
    x = np.zeros(maxiters, dtype=float)
    fprime = np.zeros(maxiters, dtype=float)  # store derivatives to minimize computation
    
    # initial guesses
    x[0] = x_0
    fprime[0] = compute_Df(ϕ, x[0])[0]      # calling compute_Df with ϕ as defined in the steepest descent
    x[1] = x_1                              # function to find α returns [α, α] (index[0] to get just α)
    fprime[1] = compute_Df(ϕ, x[1])[0]
    
    k = 2
    dist = 1e3
    while dist > ɛ and k < maxiters:
        x[k] = x[k-1] - fprime[k-1] * ((x[k-1] - x[k-2]) / (fprime[k-1] - fprime[k-2]))  # next approximation
        fprime[k] = compute_Df(ϕ, x[k])[0]
        dist = np.abs((x[k] - x[k-1]) / x[k-1])  # check distance from previous iteration
        k +=1
    return x[k-1]

In [17]:
def steepest_descent(f, x_0, ɛ=1e-6, maxiters=1000):    
    '''
    Computes minimizer and minimum value of an arbitrary function
    using the steepest descent method.
    '''
    dist = 1e3
    i = 0
    x_k = x_0
    while dist > ɛ and i < maxiters:
        Df = compute_Df(f, x_k, 1e-3)[0]
        ϕ = lambda α: f(x_k - α * Df.T)
        α_k = secant_minimizer(0.1, 0.8, ϕ)  # get "reasonably good" α_k using secant method
        x_k1 = x_k - α_k * Df.T
        dist = la.norm(x_k1 - x_k)
        x_k = x_k1
        i += 1
    if i < maxiters:
        print(f"Converged in {i} iterations")
        min_val = f(x_k)
    else:
        print("Did not converge")
        min_val = None
    return x_k, min_val

# Exercise 9.9
Apply your code from the previous problem to the Rosenbrock function.

In [18]:
f = lambda x: 100 * (x[1] - x[0] ** 2) ** 2 + (1 - x[0]) ** 2
x_0 = np.array([-2, 2])

In [19]:
(x, y), f_xy = steepest_descent(f, x_0)

Converged in 94 iterations


In [20]:
print('Minimizer: (', x, y, ')')
print('Minimum:', f_xy)

Minimizer: ( 0.9893702165322639 0.978800430993886 )
Minimum: 0.00011327313686658299
