![](HW5P1.png)

# Step #1: Translate rosenbrock_2Nd.m to Python. 

In [23]:
import numpy as np 
import matplotlib.pyplot as plt 
from homework_2_debug import my_line_search

from linesearch import line_search_BFGS
from scipy.optimize import line_search
import plotly.graph_objects as go

In [24]:
def rosenbrock_2Nd(order=18):
    """ generate initial conditions for the BFGS method.  18 dimensional initial condition. """
    if order == 18:
        xN = np.array([1, 1])
        x0easy = np.array([1.2, 1.2])
        x0e2 = (xN + x0easy) / 2
        x0e3   = (xN + x0e2)  / 2
        x0e4   = (xN + x0e3) / 2
        x0hard = np.array([-1.2 , 1.0])
        x0h2   = (xN + x0hard) / 2
        x0h3   = (xN + x0h2)  / 2
        x0h4   = (xN + x0h3) / 2
        x0h5   = 2*x0hard
    return [x0easy, x0e2, x0e3, x0e4, x0hard, x0h2, x0h3, x0h4, x0h5]

In [25]:
print("Initial conditions = ")
ic_vec = rosenbrock_2Nd()
ic_vec

Initial conditions = 


[array([1.2, 1.2]),
 array([1.1, 1.1]),
 array([1.05, 1.05]),
 array([1.025, 1.025]),
 array([-1.2,  1. ]),
 array([-0.1,  1. ]),
 array([0.45, 1.  ]),
 array([0.725, 1.   ]),
 array([-2.4,  2. ])]

# Rosenbrock function

In [26]:
def rosenbrock_fun(xk):
    """ This function returns the output of the Rosenbrock function."""
    x1, x2 = xk
    return 100*((x2 - x1**2)**2) + (1 - x1)**2

In [27]:
def rosenbrock_gradient(xk):
    """ return [df/dx1 df/dx2]"""
    x1, x2 = xk
    dfx1 = -400*x2*x1 + 400*(x1**3) - 2 + 2*x1
    dfx2 = 200*x2 - 200*(x1**2)
    return np.array([dfx1, dfx2])

In [28]:
def rosenbrock_hessian(xk):
    """ return [d2f/dx1^2   d2f/dx1dx2
                d2f/dx1dx2  d2f/dx2^2]"""
    x1, x2 = xk
    h = np.zeros((2, 2))
    h[0, 0] = -400*x2 + 1200*(x1**2) + 2
    h[0, 1] = -400*x1
    h[1, 0] = -400*x1
    h[1, 1] = 200 
    return h 

In [29]:
def mk_fun(xk, pk):
    """ mk taylor approximation of the objective function"""
    Bk = rosenbrock_hessian(xk)
    return rosenbrock_fun(xk) + np.dot(pk, rosenbrock_gradient(xk)) + 0.5*np.dot(pk, np.matmul(Bk, pk))

In [30]:
def rho_k(xk, pk):
    """ return rho_k = (f(xk) - f(xk+pk))/(mk(0) - mk(pk))"""
    return (rosenbrock_fun(xk) - rosenbrock_fun(xk + pk))/(mk_fun(xk, [0, 0]) - mk_fun(xk, pk))

In [31]:
def get_pk_fs(gradient, hessian):
    """ search direction for Newton's method."""
    h_inv = np.linalg.inv(hessian)
    return -np.matmul(h_inv, gradient)

![](BFGS.png)

In [39]:
def bfgs_method(x0, eps=1e-6, H0=np.eye(2)):
    """ x0 - initial starting point (dim2)
        eps - default is 1e-8
        H0 - default is the identity matrix. 
    """
    k = 0 # initialize num of outer iterations. 
    inner_k = 0 # initialize inner k iteration. 
    
    xk = x0 # intitialize x. 
    Hk = H0 # initialize H, positive definite matrix. 
    I = np.eye(2) # idenitity matrix of 2 by 2. 
    
    alpha_vec = []
    f_vec = []
    grad_vec = []
    
    while np.linalg.norm(rosenbrock_gradient(xk)) > eps:
        pk = np.dot(-Hk, rosenbrock_gradient(xk))
        
        # line search that satisfies the strong wolfe condition. 
        alpha, ink = line_search_BFGS(rosenbrock_fun, xk, pk, rosenbrock_gradient(xk), rosenbrock_fun(xk))
        inner_k += abs(int(ink))
        
        xk_next = xk + alpha*pk
        
        sk = xk_next - xk
        
        yk = rosenbrock_gradient(xk_next) - rosenbrock_gradient(xk)
        
        rho = 1/np.dot(yk, sk)
        
        Hk = np.copy((I - rho * sk @ yk.T) @ Hk @ (I - rho * yk @ sk.T) + rho * sk @ sk.T)
        
        xk = np.copy(xk_next) 
        
        alpha_vec.append(alpha)
        f_vec.append(rosenbrock_fun(xk))
        grad_vec.append(np.linalg.norm(rosenbrock_gradient(xk)))

        k += 1
        
#         print("\n iteration num = ", k)
#         print("f = ", rosenbrock_fun(xk))
#         print("|| gradient f ||", np.linalg.norm(rosenbrock_gradient(xk)))
#         print("alpha = ", alpha)
#         print("xk = ", xk)
    return xk, k, inner_k, alpha_vec, f_vec, grad_vec

In [40]:
xk, k, inner_k, alpha_vec, f_vec, grad_vec = bfgs_method(ic_vec[0])

print("outer k = ", k)
print("inner k = ", inner_k)

outer k =  2657
inner k =  2878


# x0 =[1.2, 1.2]

In [41]:
fig = go.Figure(data=[go.Table(header=dict(values=['k - iteration', 'f(xk)', '||gradient f||', 'alpha']),
                 cells=dict(values=[np.arange(1, k+1), f_vec, grad_vec, alpha_vec]))])
fig.show()

In [42]:
xk, k, inner_k, alpha_vec, f_vec, grad_vec = bfgs_method(ic_vec[1])

print("outer k = ", k)
print("inner k = ", inner_k)

outer k =  4412
inner k =  27


# x0 =[1.1, 1.1]

In [43]:
fig = go.Figure(data=[go.Table(header=dict(values=['k - iteration', 'f(xk)', '||gradient f||', 'alpha']),
                 cells=dict(values=[np.arange(1, k+1), f_vec, grad_vec, alpha_vec]))])
fig.show()

In [45]:
xk, k, inner_k, alpha_vec, f_vec, grad_vec = bfgs_method(ic_vec[2])

print("outer k = ", k)
print("inner k = ", inner_k)

outer k =  1835
inner k =  0


# x0 =[1.05, 1.05]

In [47]:
fig = go.Figure(data=[go.Table(header=dict(values=['k - iteration', 'f(xk)', '||gradient f||', 'alpha']),
                 cells=dict(values=[np.arange(1, k+1), f_vec, grad_vec, alpha_vec]))])
fig.show()

In [48]:
xk, k, inner_k, alpha_vec, f_vec, grad_vec = bfgs_method(ic_vec[3])

print("outer k = ", k)
print("inner k = ", inner_k)

outer k =  2799
inner k =  0


# x0 = [1.025, 1.025]

In [49]:
fig = go.Figure(data=[go.Table(header=dict(values=['k - iteration', 'f(xk)', '||gradient f||', 'alpha']),
                 cells=dict(values=[np.arange(1, k+1), f_vec, grad_vec, alpha_vec]))])
fig.show()

In [None]:
xk, k, inner_k, alpha_vec, f_vec, grad_vec = bfgs_method(ic_vec[4])

print("outer k = ", k)
print("inner k = ", inner_k)

# x0 = [-1.2,  1. ]

In [None]:
fig = go.Figure(data=[go.Table(header=dict(values=['k - iteration', 'f(xk)', '||gradient f||', 'alpha']),
                 cells=dict(values=[np.arange(1, k+1), f_vec, grad_vec, alpha_vec]))])
fig.show()