In [3]:
import numpy as np
from pprint import pprint

In [5]:
def rosenbrock_f(x: np.array):
    return 100 * (x[1] - x[0]**2)**2 + (1 - x[0])**2

In [6]:
def grad_rosenbrock_f(x: np.array):
    return np.array([-400 * (x[1] - x[0]**2) * x[0] - 2 * (1 - x[0]), 200 * (x[1] - x[0]**2)])

In [4]:
def f(x: np.array):
    return 150 * (x[0] * x[1])**2 + (0.5 * x[0] + 2 * x[1] - 2)**2

In [5]:
def grad_f(x: np.array):
    return np.array([300 * x[0] * x[1]**2 + (0.5 * x[0] + 2 * x[1] - 2) * 0.5, 300 * x[0]**2 * x[1] + (0.5 * x[0] + 2 * x[1] - 2) * 2])

In [6]:
def backtracking_line_search(func, xk, pk, grad_fk, alpha=1, rho=0.5, c=1e-4):
    while func(xk + alpha * pk) > func(xk) + c * alpha * np.dot(grad_fk.T, pk):
        alpha *= rho

    if alpha < 1e-8:
        alpha = 1
    return alpha

In [7]:
# Quasi-Newton SR1 with Rosenbrock function
starting_points = [np.array([1.2, 1.2]), np.array([-1.2, 1.0]), np.array([0.2, 0.8])]

def sr1_method(x0, B0, H0, max_iter=10000, tol=1e-6):
    xk = x0
    Bk = B0
    Hk = H0
    results = []
    for k in range(max_iter):
        grad_fk = grad_rosenbrock_f(xk)
        pk = -np.linalg.solve(Bk, grad_fk)

        alpha_k = backtracking_line_search(rosenbrock_f, xk, pk, grad_fk)

        xk1 = xk + alpha_k * pk
        sk = xk1 - xk
        grad_fk1 = grad_rosenbrock_f(xk1)
        yk = grad_fk1 - grad_fk
        
        ys = yk - Bk @ sk
        ys_T_s = ys.T @ sk
        Bk1 = Bk + np.outer(ys, ys) / ys_T_s

        sy = sk - Hk @ yk
        sy_T_y = sy.T @ yk
        Hk1 = Hk + np.outer(sy, sy) / sy_T_y
        
        results.append({
            "iteration": k,
            "x_k+1": xk1,
            "s_k": sk,
            "y_k": yk,
            "B_k+1": Bk1,
            "H_k+1": Hk1,
            "(sk - Hkyk)^Tyk": sy_T_y,
            "|grad_f_k+1|": np.linalg.norm(grad_fk1)
        })

        if np.linalg.norm(grad_fk1) < tol:
            break

        xk, Bk = xk1, Bk1

    return results

for starting_point in starting_points:
    sr1_results = sr1_method(starting_point, np.eye(2), np.eye(2))
    print("-"*100)
    print(f"SR1 method with Rosenbrock function f(x) = 100(x2 − x1^2)^2 + (1 − x1)^2 (exact derivatives) and starting point: {starting_point}:")
    pprint(sr1_results[-1])


----------------------------------------------------------------------------------------------------
SR1 method with Rosenbrock function f(x) = 100(x2 − x1^2)^2 + (1 − x1)^2 (exact derivatives) and starting point: [1.2 1.2]:
{'(sk - Hkyk)^Tyk': -2.4469478401972732e-11,
 'B_k+1': array([[ 802.08375949, -400.04403773],
       [-400.04403773,  200.02315519]]),
 'H_k+1': array([[0.22195977, 0.41585146],
       [0.41585146, 0.7777333 ]]),
 'iteration': 12,
 's_k': array([-1.07407490e-07, -2.04175304e-07]),
 'x_k+1': array([1., 1.]),
 'y_k': array([-4.47069051e-06,  2.12793754e-06]),
 '|grad_f_k+1|': 2.886945632584808e-08}
----------------------------------------------------------------------------------------------------
SR1 method with Rosenbrock function f(x) = 100(x2 − x1^2)^2 + (1 − x1)^2 (exact derivatives) and starting point: [-1.2  1. ]:
{'(sk - Hkyk)^Tyk': -3.60249754347629e-10,
 'B_k+1': array([[ 802.17779941, -400.09232949],
       [-400.09232949,  200.04795229]]),
 'H_k+1': array

In [9]:
# Quasi-Newton SR1 with second function
starting_points = [np.array([-0.2, 1.2]), np.array([3.8, 0.1]), np.array([1.9, 0.6])]

def sr1_method(x0, B0, H0, max_iter=10000, tol=1e-6):
    xk = x0
    Bk = B0
    Hk = H0
    results = []
    for k in range(max_iter):
        grad_fk = grad_f(xk)
        pk = -np.linalg.solve(Bk, grad_fk)

        alpha_k = backtracking_line_search(f, xk, pk, grad_fk)

        xk1 = xk + alpha_k * pk
        sk = xk1 - xk
        grad_fk1 = grad_f(xk1)
        yk = grad_fk1 - grad_fk
        
        ys = yk - Bk @ sk
        ys_T_s = ys.T @ sk
        Bk1 = Bk + np.outer(ys, ys) / ys_T_s

        sy = sk - Hk @ yk
        sy_T_y = sy.T @ yk
        Hk1 = Hk + np.outer(sy, sy) / sy_T_y
        
        results.append({
            "iteration": k,
            "x_k+1": xk1,
            "s_k": sk,
            "y_k": yk,
            "B_k+1": Bk1,
            "H_k+1": Hk1,
            "(sk - Hkyk)^Tyk": sy_T_y,
            "|grad_f_k+1|": np.linalg.norm(grad_fk1)
        })

        if np.linalg.norm(grad_fk1) < tol:
            break

        xk, Bk = xk1, Bk1

    return results

for starting_point in starting_points:
    sr1_results = sr1_method(starting_point, np.eye(2), np.eye(2))
    print("-"*100)
    print(f"SR1 method with f(x) = 150(x1x2)^2 + (0.5x1 + 2x2 − 2)^2 (exact derivatives) and starting point: {starting_point}:")
    pprint(sr1_results[-1])


----------------------------------------------------------------------------------------------------
SR1 method with f(x) = 150(x1x2)^2 + (0.5x1 + 2x2 − 2)^2 (exact derivatives) and starting point: [-0.2  1.2]:
{'(sk - Hkyk)^Tyk': -1.510534067252835e-06,
 'B_k+1': array([[300.25079113,   1.00329068],
       [  1.00329068,   4.00214609]]),
 'H_k+1': array([[0.00340049, 0.00626065],
       [0.00626065, 0.99996067]]),
 'iteration': 12,
 's_k': array([-4.11327702e-06,  3.94201610e-06]),
 'x_k+1': array([-6.89878648e-11,  1.00000000e+00]),
 'y_k': array([-1.23105969e-03,  1.16497119e-05]),
 '|grad_f_k+1|': 2.3908866810282288e-08}
----------------------------------------------------------------------------------------------------
SR1 method with f(x) = 150(x1x2)^2 + (0.5x1 + 2x2 − 2)^2 (exact derivatives) and starting point: [3.8 0.1]:
{'(sk - Hkyk)^Tyk': -3.986915980931939e-09,
 'B_k+1': array([[2.49999258e-01, 9.99419744e-01],
       [9.99419744e-01, 4.80350014e+03]]),
 'H_k+1': array([[ 0

In [15]:
# Quasi-Newton BFGS method with Rosenbrock function
starting_points = [np.array([1.2, 1.2]), np.array([-1.2, 1.0]), np.array([0.2, 0.8])]

def bfgs_method(x0, B0, H0, max_iter=10000, tol=1e-6):
    xk = x0
    Bk = B0
    Hk = H0
    results = []
    for k in range(max_iter):
        grad_fk = grad_rosenbrock_f(xk)
        pk = -np.linalg.solve(Bk, grad_fk)

        alpha_k = backtracking_line_search(rosenbrock_f, xk, pk, grad_fk)

        xk1 = xk + alpha_k * pk
        sk = xk1 - xk
        grad_fk1 = grad_rosenbrock_f(xk1)
        yk = grad_fk1 - grad_fk
        
        rho_k = 1 / (yk.T @ sk)
        Bk1 = Bk - (Bk @ np.outer(sk, sk) @ Bk) / (sk.T @ Bk @ sk) + np.outer(yk, yk) / (yk.T @ sk) 
        Hk1 = (np.eye(2) - rho_k * sk @ yk.T) @ Hk @ (np.eye(2) - rho_k * yk @ sk.T) + rho_k * sk @ sk.T
        
        results.append({
            "iteration": k,
            "x_k+1": xk1,
            "s_k": sk,
            "y_k": yk,
            "B_k+1": Bk1,
            "H_k+1": Hk1,
            "|grad_f_k+1|": np.linalg.norm(grad_fk1)
        })

        if np.linalg.norm(grad_fk1) < tol:
            break

        xk, Bk = xk1, Bk1

    return results

for starting_point in starting_points:
    sr1_results = bfgs_method(starting_point, np.eye(2), np.eye(2))
    print("-"*100)
    print(f"SR1 method with Rosenbrock function f(x) = 100(x2 − x1^2)^2 + (1 − x1)^2 (exact derivatives) and starting point: {starting_point}:")
    pprint(sr1_results[-1])


----------------------------------------------------------------------------------------------------
SR1 method with Rosenbrock function f(x) = 100(x2 − x1^2)^2 + (1 − x1)^2 (exact derivatives) and starting point: [1.2 1.2]:
{'B_k+1': array([[ 801.95466823, -399.9719623 ],
       [-399.9719623 ,  199.9827051 ]]),
 'H_k+1': array([[1.12261585, 0.12261585],
       [0.12261585, 1.12261585]]),
 'iteration': 11,
 's_k': array([-5.37503016e-07, -8.74767017e-07]),
 'x_k+1': array([1.        , 1.00000001]),
 'y_k': array([-8.11707728e-05,  4.00478618e-05]),
 '|grad_f_k+1|': 4.399163394005115e-07}
----------------------------------------------------------------------------------------------------
SR1 method with Rosenbrock function f(x) = 100(x2 − x1^2)^2 + (1 − x1)^2 (exact derivatives) and starting point: [-1.2  1. ]:
{'B_k+1': array([[ 802.41302375, -400.19902684],
       [-400.19902684,  200.09591022]]),
 'H_k+1': array([[2.68485292, 1.68485292],
       [1.68485292, 2.68485292]]),
 'iterati

In [16]:
# Quasi-Newton BFGS method with second function
starting_points = [np.array([-0.2, 1.2]), np.array([3.8, 0.1]), np.array([1.9, 0.6])]

def bfgs_method(x0, B0, H0, max_iter=10000, tol=1e-6):
    xk = x0
    Bk = B0
    Hk = H0
    results = []
    for k in range(max_iter):
        grad_fk = grad_rosenbrock_f(xk)
        pk = -np.linalg.solve(Bk, grad_fk)

        alpha_k = backtracking_line_search(rosenbrock_f, xk, pk, grad_fk)

        xk1 = xk + alpha_k * pk
        sk = xk1 - xk
        grad_fk1 = grad_rosenbrock_f(xk1)
        yk = grad_fk1 - grad_fk
        
        rho_k = 1 / (yk.T @ sk)
        Bk1 = Bk - (Bk @ np.outer(sk, sk) @ Bk) / (sk.T @ Bk @ sk) + np.outer(yk, yk) / (yk.T @ sk) 
        Hk1 = (np.eye(2) - rho_k * sk @ yk.T) @ Hk @ (np.eye(2) - rho_k * yk @ sk.T) + rho_k * sk @ sk.T
        
        results.append({
            "iteration": k,
            "x_k+1": xk1,
            "s_k": sk,
            "y_k": yk,
            "B_k+1": Bk1,
            "H_k+1": Hk1,
            "|grad_f_k+1|": np.linalg.norm(grad_fk1)
        })

        if np.linalg.norm(grad_fk1) < tol:
            break

        xk, Bk = xk1, Bk1

    return results

for starting_point in starting_points:
    sr1_results = bfgs_method(starting_point, np.eye(2), np.eye(2))
    print("-"*100)
    print(f"SR1 method with f(x) = 150(x1x2)^2 + (0.5x1 + 2x2 − 2)^2 (exact derivatives) and starting point: {starting_point}:")
    pprint(sr1_results[-1])


----------------------------------------------------------------------------------------------------
SR1 method with f(x) = 150(x1x2)^2 + (0.5x1 + 2x2 − 2)^2 (exact derivatives) and starting point: [-0.2  1.2]:
{'B_k+1': array([[ 815.07091355, -406.97105555],
       [-406.97105555,  203.71784287]]),
 'H_k+1': array([[1.88137418, 0.88137418],
       [0.88137418, 1.88137418]]),
 'iteration': 29,
 's_k': array([4.06895029e-08, 7.62940218e-08]),
 'x_k+1': array([1.        , 0.99999999]),
 'y_k': array([ 2.11537171e-06, -1.01699640e-06]),
 '|grad_f_k+1|': 9.972229639611428e-09}
----------------------------------------------------------------------------------------------------
SR1 method with f(x) = 150(x1x2)^2 + (0.5x1 + 2x2 − 2)^2 (exact derivatives) and starting point: [3.8 0.1]:
{'B_k+1': array([[ 804.34174426, -401.15633078],
       [-401.15633078,  200.57098648]]),
 'H_k+1': array([[3.39757869, 2.39757869],
       [2.39757869, 3.39757869]]),
 'iteration': 44,
 's_k': array([3.89895365

In [13]:
# Quasi-Newton SR1 (trust region) with Rosenbrock function
starting_points = [np.array([1.2, 1.2]), np.array([-1.2, 1.0]), np.array([0.2, 0.8])]

def sr1_trust_region_method(x0, B0, max_iter=10000, tol=1e-6, trust_region_delta=1.0, eta=1e-4, r=0.5):
    xk = x0
    Bk = B0
    results = []
    for k in range(max_iter):
        grad_fk = grad_rosenbrock_f(xk)
        if np.linalg.norm(grad_fk) < tol:
            break
            
        sk = -np.linalg.solve(Bk, grad_fk)
        if np.linalg.norm(sk) > trust_region_delta:
            sk = sk * (trust_region_delta / np.linalg.norm(sk)) 

        grad_fk1 = grad_rosenbrock_f(xk + sk)
        yk = grad_fk1 - grad_fk

        fk = rosenbrock_f(xk)
        fk1 = rosenbrock_f(xk + sk)
        ared = fk - fk1
        pred = -grad_fk.T @ sk - 0.5 * sk.T @ Bk @ sk

        if ared / pred > eta:
            xk1 = xk + sk
        else:
            xk1 = xk

        if ared / pred > 0.75:
            if np.linalg.norm(sk) <= 0.8 * trust_region_delta:
                trust_region_delta = trust_region_delta
            else:
                trust_region_delta = 2 * trust_region_delta
        elif ared / pred >= 0.1 and ared / pred <= 0.75:
            trust_region_delta = trust_region_delta
        else:
            trust_region_delta = 0.5 * trust_region_delta
        
        # check 6.26
        if np.linalg.norm(sk.T @ (yk - Bk @ sk)) >= r * np.linalg.norm(sk) * np.linalg.norm(yk - Bk @ sk):
            ys = yk - Bk @ sk
            ys_T_s = ys.T @ sk
            Bk1 = Bk + np.outer(ys, ys) / ys_T_s
        else:
            Bk1 = Bk
        
        results.append({
            "iteration": k,
            "x_k+1": xk1,
            "s_k": sk,
            "y_k": yk,
            "B_k+1": Bk1,
            "delta": trust_region_delta,
            "|grad_f_k+1|": np.linalg.norm(grad_fk1)
        })

        xk, Bk = xk1, Bk1

    return results

for starting_point in starting_points:
    sr1_results = sr1_trust_region_method(starting_point, np.eye(2))
    print("-"*100)
    print(f"SR1 (trust region) method with Rosenbrock function f(x) = 100(x2 − x1^2)^2 + (1 − x1)^2 (exact derivatives) and starting point: {starting_point}:")
    pprint(sr1_results[-1])

----------------------------------------------------------------------------------------------------
SR1 (trust region) method with Rosenbrock function f(x) = 100(x2 − x1^2)^2 + (1 − x1)^2 (exact derivatives) and starting point: [1.2 1.2]:
{'B_k+1': array([[ 852.56410905, -412.4609586 ],
       [-412.4609586 ,  199.9933109 ]]),
 'delta': 5.960464477539063e-08,
 'iteration': 71,
 's_k': array([-2.53151943e-08, -5.39616032e-08]),
 'x_k+1': array([1., 1.]),
 'y_k': array([ 1.28185523e-06, -6.66242794e-07]),
 '|grad_f_k+1|': 7.524967540362846e-08}
----------------------------------------------------------------------------------------------------
SR1 (trust region) method with Rosenbrock function f(x) = 100(x2 − x1^2)^2 + (1 − x1)^2 (exact derivatives) and starting point: [-1.2  1. ]:
{'B_k+1': array([[ 769.03990825, -391.53713197],
       [-391.53713197,  199.84991373]]),
 'delta': 0.03125,
 'iteration': 294,
 's_k': array([-4.28582436e-08, -8.05910337e-08]),
 'x_k+1': array([0.99999999, 

In [14]:
# Quasi-Newton SR1 (trust region) with second function

starting_points = [np.array([-0.2, 1.2]), np.array([3.8, 0.1]), np.array([1.9, 0.6])]

def sr1_trust_region_method(x0, B0, max_iter=10000, tol=1e-6, trust_region_delta=1.0, eta=1e-4, r=0.5):
    xk = x0
    Bk = B0
    results = []
    for k in range(max_iter):
        grad_fk = grad_rosenbrock_f(xk)
        if np.linalg.norm(grad_fk) < tol:
            break
            
        sk = -np.linalg.solve(Bk, grad_fk)
        if np.linalg.norm(sk) > trust_region_delta:
            sk = sk * (trust_region_delta / np.linalg.norm(sk)) 

        grad_fk1 = grad_rosenbrock_f(xk + sk)
        yk = grad_fk1 - grad_fk

        fk = rosenbrock_f(xk)
        fk1 = rosenbrock_f(xk + sk)
        ared = fk - fk1
        pred = -grad_fk.T @ sk - 0.5 * sk.T @ Bk @ sk

        if ared / pred > eta:
            xk1 = xk + sk
        else:
            xk1 = xk

        if ared / pred > 0.75:
            if np.linalg.norm(sk) <= 0.8 * trust_region_delta:
                trust_region_delta = trust_region_delta
            else:
                trust_region_delta = 2 * trust_region_delta
        elif ared / pred >= 0.1 and ared / pred <= 0.75:
            trust_region_delta = trust_region_delta
        else:
            trust_region_delta = 0.5 * trust_region_delta
        
        # check 6.26
        if np.linalg.norm(sk.T @ (yk - Bk @ sk)) >= r * np.linalg.norm(sk) * np.linalg.norm(yk - Bk @ sk):
            ys = yk - Bk @ sk
            ys_T_s = ys.T @ sk
            Bk1 = Bk + np.outer(ys, ys) / ys_T_s
        else:
            Bk1 = Bk
        
        results.append({
            "iteration": k,
            "x_k+1": xk1,
            "s_k": sk,
            "y_k": yk,
            "B_k+1": Bk1,
            "delta": trust_region_delta,
            "|grad_f_k+1|": np.linalg.norm(grad_fk1)
        })

        xk, Bk = xk1, Bk1

    return results

for starting_point in starting_points:
    sr1_results = sr1_trust_region_method(starting_point, np.eye(2))
    print("-"*100)
    print(f"SR1 method with f(x) = 150(x1x2)^2 + (0.5x1 + 2x2 − 2)^2 (exact derivatives) and starting point: {starting_point}:")
    pprint(sr1_results[-1])

----------------------------------------------------------------------------------------------------
SR1 method with f(x) = 150(x1x2)^2 + (0.5x1 + 2x2 − 2)^2 (exact derivatives) and starting point: [-0.2  1.2]:
{'B_k+1': array([[ 784.44893228, -394.93424092],
       [-394.93424092,  199.34004821]]),
 'delta': 0.03125,
 'iteration': 170,
 's_k': array([-3.24194414e-08, -5.94864659e-08]),
 'x_k+1': array([0.99999999, 0.99999998]),
 'y_k': array([-2.20580597e-06,  1.07048352e-06]),
 '|grad_f_k+1|': 2.9539017183410467e-07}
----------------------------------------------------------------------------------------------------
SR1 method with f(x) = 150(x1x2)^2 + (0.5x1 + 2x2 − 2)^2 (exact derivatives) and starting point: [3.8 0.1]:
{'B_k+1': array([[ 950.66395966, -435.51383572],
       [-435.51383572,  207.45554685]]),
 'delta': 0.0078125,
 'iteration': 4303,
 's_k': array([-1.19318491e-08, -2.37963031e-08]),
 'x_k+1': array([1.00000022, 1.00000043]),
 'y_k': array([-5.08238332e-08,  1.348006