In [2]:
import numpy as np
from pprint import pprint



In [7]:
def rosenbrock_f(x: np.array):
    return 100 * (x[1] - x[0]**2)**2 + (1 - x[0])**2

In [8]:
def grad_rosenbrock_f(x: np.array):
    return np.array([-400 * (x[1] - x[0]**2) * x[0] - 2 * (1 - x[0]), 200 * (x[1] - x[0]**2)])

In [9]:
def f(x: np.array):
    return 150 * (x[0] * x[1])**2 + (0.5 * x[0] + 2 * x[1] - 2)**2

In [10]:
def grad_f(x: np.array):
    return np.array([300 * x[0] * x[1]**2 + (0.5 * x[0] + 2 * x[1] - 2) * 0.5, 300 * x[0]**2 * x[1] + (0.5 * x[0] + 2 * x[1] - 2) * 2])

In [11]:
def backtracking_line_search(func, xk, pk, grad_fk, alpha=1, rho=0.5, c=1e-4):
    while func(xk + alpha * pk) > func(xk) + c * alpha * np.dot(grad_fk.T, pk):
        alpha *= rho

    if alpha < 1e-8:
        alpha = 1
    return alpha

## Runs with exact derivatives

In [7]:
# Quasi-Newton SR1 with Rosenbrock function
starting_points = [np.array([1.2, 1.2]), np.array([-1.2, 1.0]), np.array([0.2, 0.8])]

def sr1_method(x0, B0, H0, max_iter=10000, tol=1e-6):
    xk = x0
    Bk = B0
    Hk = H0
    results = []
    for k in range(max_iter):
        grad_fk = grad_rosenbrock_f(xk)
        pk = -np.linalg.solve(Bk, grad_fk)

        alpha_k = backtracking_line_search(rosenbrock_f, xk, pk, grad_fk)

        xk1 = xk + alpha_k * pk
        sk = xk1 - xk
        grad_fk1 = grad_rosenbrock_f(xk1)
        yk = grad_fk1 - grad_fk
        
        ys = yk - Bk @ sk
        ys_T_s = ys.T @ sk
        Bk1 = Bk + np.outer(ys, ys) / ys_T_s

        sy = sk - Hk @ yk
        sy_T_y = sy.T @ yk
        Hk1 = Hk + np.outer(sy, sy) / sy_T_y
        
        results.append({
            "iteration": k,
            "x_k+1": xk1,
            "s_k": sk,
            "y_k": yk,
            "B_k+1": Bk1,
            "H_k+1": Hk1,
            "(sk - Hkyk)^Tyk": sy_T_y,
            "|grad_f_k+1|": np.linalg.norm(grad_fk1)
        })

        if np.linalg.norm(grad_fk1) < tol:
            break

        xk, Bk = xk1, Bk1

    return results

for starting_point in starting_points:
    sr1_results = sr1_method(starting_point, np.eye(2), np.eye(2))
    print("-"*100)
    print(f"SR1 method with Rosenbrock function f(x) = 100(x2 − x1^2)^2 + (1 − x1)^2 (exact derivatives) and starting point: {starting_point}:")
    pprint(sr1_results[-1])


----------------------------------------------------------------------------------------------------
SR1 method with Rosenbrock function f(x) = 100(x2 − x1^2)^2 + (1 − x1)^2 (exact derivatives) and starting point: [1.2 1.2]:
{'(sk - Hkyk)^Tyk': -2.4469478401972732e-11,
 'B_k+1': array([[ 802.08375949, -400.04403773],
       [-400.04403773,  200.02315519]]),
 'H_k+1': array([[0.22195977, 0.41585146],
       [0.41585146, 0.7777333 ]]),
 'iteration': 12,
 's_k': array([-1.07407490e-07, -2.04175304e-07]),
 'x_k+1': array([1., 1.]),
 'y_k': array([-4.47069051e-06,  2.12793754e-06]),
 '|grad_f_k+1|': 2.886945632584808e-08}
----------------------------------------------------------------------------------------------------
SR1 method with Rosenbrock function f(x) = 100(x2 − x1^2)^2 + (1 − x1)^2 (exact derivatives) and starting point: [-1.2  1. ]:
{'(sk - Hkyk)^Tyk': -3.60249754347629e-10,
 'B_k+1': array([[ 802.17779941, -400.09232949],
       [-400.09232949,  200.04795229]]),
 'H_k+1': array

In [9]:
# Quasi-Newton SR1 with second function
starting_points = [np.array([-0.2, 1.2]), np.array([3.8, 0.1]), np.array([1.9, 0.6])]

def sr1_method(x0, B0, H0, max_iter=10000, tol=1e-6):
    xk = x0
    Bk = B0
    Hk = H0
    results = []
    for k in range(max_iter):
        grad_fk = grad_f(xk)
        pk = -np.linalg.solve(Bk, grad_fk)

        alpha_k = backtracking_line_search(f, xk, pk, grad_fk)

        xk1 = xk + alpha_k * pk
        sk = xk1 - xk
        grad_fk1 = grad_f(xk1)
        yk = grad_fk1 - grad_fk
        
        ys = yk - Bk @ sk
        ys_T_s = ys.T @ sk
        Bk1 = Bk + np.outer(ys, ys) / ys_T_s

        sy = sk - Hk @ yk
        sy_T_y = sy.T @ yk
        Hk1 = Hk + np.outer(sy, sy) / sy_T_y
        
        results.append({
            "iteration": k,
            "x_k+1": xk1,
            "s_k": sk,
            "y_k": yk,
            "B_k+1": Bk1,
            "H_k+1": Hk1,
            "(sk - Hkyk)^Tyk": sy_T_y,
            "|grad_f_k+1|": np.linalg.norm(grad_fk1)
        })

        if np.linalg.norm(grad_fk1) < tol:
            break

        xk, Bk = xk1, Bk1

    return results

for starting_point in starting_points:
    sr1_results = sr1_method(starting_point, np.eye(2), np.eye(2))
    print("-"*100)
    print(f"SR1 method with f(x) = 150(x1x2)^2 + (0.5x1 + 2x2 − 2)^2 (exact derivatives) and starting point: {starting_point}:")
    pprint(sr1_results[-1])


----------------------------------------------------------------------------------------------------
SR1 method with f(x) = 150(x1x2)^2 + (0.5x1 + 2x2 − 2)^2 (exact derivatives) and starting point: [-0.2  1.2]:
{'(sk - Hkyk)^Tyk': -1.510534067252835e-06,
 'B_k+1': array([[300.25079113,   1.00329068],
       [  1.00329068,   4.00214609]]),
 'H_k+1': array([[0.00340049, 0.00626065],
       [0.00626065, 0.99996067]]),
 'iteration': 12,
 's_k': array([-4.11327702e-06,  3.94201610e-06]),
 'x_k+1': array([-6.89878648e-11,  1.00000000e+00]),
 'y_k': array([-1.23105969e-03,  1.16497119e-05]),
 '|grad_f_k+1|': 2.3908866810282288e-08}
----------------------------------------------------------------------------------------------------
SR1 method with f(x) = 150(x1x2)^2 + (0.5x1 + 2x2 − 2)^2 (exact derivatives) and starting point: [3.8 0.1]:
{'(sk - Hkyk)^Tyk': -3.986915980931939e-09,
 'B_k+1': array([[2.49999258e-01, 9.99419744e-01],
       [9.99419744e-01, 4.80350014e+03]]),
 'H_k+1': array([[ 0

In [30]:
# Quasi-Newton BFGS method with Rosenbrock function
starting_points = [np.array([1.2, 1.2]), np.array([-1.2, 1.0]), np.array([0.2, 0.8])]

def bfgs_method(x0, B0, H0, max_iter=10000, tol=1e-6):
    xk = x0
    Bk = B0
    Hk = H0
    results = []
    for k in range(max_iter):
        grad_fk = grad_rosenbrock_f(xk)
        pk = -np.linalg.solve(Bk, grad_fk)

        alpha_k = backtracking_line_search(rosenbrock_f, xk, pk, grad_fk)

        xk1 = xk + alpha_k * pk
        sk = xk1 - xk
        grad_fk1 = grad_rosenbrock_f(xk1)
        yk = grad_fk1 - grad_fk
        
        rho_k = 1 / (yk.T @ sk)
        Bk1 = Bk - (Bk @ np.outer(sk, sk) @ Bk) / (sk.T @ Bk @ sk) + np.outer(yk, yk) / (yk.T @ sk) 
        Hk1 = (np.eye(2) - rho_k * sk @ yk.T) @ Hk @ (np.eye(2) - rho_k * yk @ sk.T) + rho_k * sk @ sk.T
        
        results.append({
            "iteration": k,
            "x_k+1": xk1,
            "s_k": sk,
            "y_k": yk,
            "B_k+1": Bk1,
            "H_k+1": Hk1,
            "|grad_f_k+1|": np.linalg.norm(grad_fk1)
        })

        if np.linalg.norm(grad_fk1) < tol:
            break

        xk, Bk = xk1, Bk1

    return results

for starting_point in starting_points:
    sr1_results = bfgs_method(starting_point, np.eye(2), np.eye(2))
    print("-"*100)
    print(f"BFGS method with Rosenbrock function f(x) = 100(x2 − x1^2)^2 + (1 − x1)^2 (exact derivatives) and starting point: {starting_point}:")
    pprint(sr1_results[-1])


----------------------------------------------------------------------------------------------------
BFGS method with Rosenbrock function f(x) = 100(x2 − x1^2)^2 + (1 − x1)^2 (exact derivatives) and starting point: [1.2 1.2]:
{'B_k+1': array([[ 801.95466823, -399.9719623 ],
       [-399.9719623 ,  199.9827051 ]]),
 'H_k+1': array([[1.12261585, 0.12261585],
       [0.12261585, 1.12261585]]),
 'iteration': 11,
 's_k': array([-5.37503016e-07, -8.74767017e-07]),
 'x_k+1': array([1.        , 1.00000001]),
 'y_k': array([-8.11707729e-05,  4.00478619e-05]),
 '|grad_f_k+1|': 4.399163394005115e-07}
----------------------------------------------------------------------------------------------------
BFGS method with Rosenbrock function f(x) = 100(x2 − x1^2)^2 + (1 − x1)^2 (exact derivatives) and starting point: [-1.2  1. ]:
{'B_k+1': array([[ 802.41303166, -400.19903063],
       [-400.19903063,  200.09591203]]),
 'H_k+1': array([[2.68484929, 1.68484929],
       [1.68484929, 2.68484929]]),
 'itera

In [31]:
# Quasi-Newton BFGS method with second function
starting_points = [np.array([-0.2, 1.2]), np.array([3.8, 0.1]), np.array([1.9, 0.6])]

def bfgs_method(x0, B0, H0, max_iter=10000, tol=1e-6):
    xk = x0
    Bk = B0
    Hk = H0
    results = []
    for k in range(max_iter):
        grad_fk = grad_f(xk)
        pk = -np.linalg.solve(Bk, grad_fk)

        alpha_k = backtracking_line_search(f, xk, pk, grad_fk)

        xk1 = xk + alpha_k * pk
        sk = xk1 - xk
        grad_fk1 = grad_f(xk1)
        yk = grad_fk1 - grad_fk
        
        rho_k = 1 / (yk.T @ sk)
        Bk1 = Bk - (Bk @ np.outer(sk, sk) @ Bk) / (sk.T @ Bk @ sk) + np.outer(yk, yk) / (yk.T @ sk) 
        Hk1 = (np.eye(2) - rho_k * sk @ yk.T) @ Hk @ (np.eye(2) - rho_k * yk @ sk.T) + rho_k * sk @ sk.T
        
        results.append({
            "iteration": k,
            "x_k+1": xk1,
            "s_k": sk,
            "y_k": yk,
            "B_k+1": Bk1,
            "H_k+1": Hk1,
            "|grad_f_k+1|": np.linalg.norm(grad_fk1)
        })

        if np.linalg.norm(grad_fk1) < tol:
            break

        xk, Bk = xk1, Bk1

    return results

for starting_point in starting_points:
    sr1_results = bfgs_method(starting_point, np.eye(2), np.eye(2))
    print("-"*100)
    print(f"BFGS method with f(x) = 150(x1x2)^2 + (0.5x1 + 2x2 − 2)^2 (exact derivatives) and starting point: {starting_point}:")
    pprint(sr1_results[-1])


----------------------------------------------------------------------------------------------------
BFGS method with f(x) = 150(x1x2)^2 + (0.5x1 + 2x2 − 2)^2 (exact derivatives) and starting point: [-0.2  1.2]:
{'B_k+1': array([[301.41338487,   1.2280021 ],
       [  1.2280021 ,   4.04468996]]),
 'H_k+1': array([[1.06857755, 0.06857755],
       [0.06857755, 1.06857755]]),
 'iteration': 12,
 's_k': array([ 3.16151631e-08, -1.61303315e-07]),
 'x_k+1': array([-7.99019976e-10,  9.99999998e-01]),
 'y_k': array([ 9.33115250e-06, -6.13598412e-07]),
 '|grad_f_k+1|': 2.4157406177491484e-07}
----------------------------------------------------------------------------------------------------
BFGS method with f(x) = 150(x1x2)^2 + (0.5x1 + 2x2 − 2)^2 (exact derivatives) and starting point: [3.8 0.1]:
{'B_k+1': array([[2.50020592e-01, 9.85621947e-01],
       [9.85621947e-01, 4.81403338e+03]]),
 'H_k+1': array([[4.8063102, 3.8063102],
       [3.8063102, 4.8063102]]),
 'iteration': 6,
 's_k': array([

In [13]:
# Quasi-Newton SR1 (trust region) with Rosenbrock function
starting_points = [np.array([1.2, 1.2]), np.array([-1.2, 1.0]), np.array([0.2, 0.8])]

def sr1_trust_region_method(x0, B0, max_iter=10000, tol=1e-6, trust_region_delta=1.0, eta=1e-4, r=0.5):
    xk = x0
    Bk = B0
    results = []
    for k in range(max_iter):
        grad_fk = grad_rosenbrock_f(xk)
        if np.linalg.norm(grad_fk) < tol:
            break
            
        sk = -np.linalg.solve(Bk, grad_fk)
        if np.linalg.norm(sk) > trust_region_delta:
            sk = sk * (trust_region_delta / np.linalg.norm(sk)) 

        grad_fk1 = grad_rosenbrock_f(xk + sk)
        yk = grad_fk1 - grad_fk

        fk = rosenbrock_f(xk)
        fk1 = rosenbrock_f(xk + sk)
        ared = fk - fk1
        pred = -grad_fk.T @ sk - 0.5 * sk.T @ Bk @ sk

        if ared / pred > eta:
            xk1 = xk + sk
        else:
            xk1 = xk

        if ared / pred > 0.75:
            if np.linalg.norm(sk) <= 0.8 * trust_region_delta:
                trust_region_delta = trust_region_delta
            else:
                trust_region_delta = 2 * trust_region_delta
        elif ared / pred >= 0.1 and ared / pred <= 0.75:
            trust_region_delta = trust_region_delta
        else:
            trust_region_delta = 0.5 * trust_region_delta
        
        # check 6.26
        if np.linalg.norm(sk.T @ (yk - Bk @ sk)) >= r * np.linalg.norm(sk) * np.linalg.norm(yk - Bk @ sk):
            ys = yk - Bk @ sk
            ys_T_s = ys.T @ sk
            Bk1 = Bk + np.outer(ys, ys) / ys_T_s
        else:
            Bk1 = Bk
        
        results.append({
            "iteration": k,
            "x_k+1": xk1,
            "s_k": sk,
            "y_k": yk,
            "B_k+1": Bk1,
            "delta": trust_region_delta,
            "|grad_f_k+1|": np.linalg.norm(grad_fk1)
        })

        xk, Bk = xk1, Bk1

    return results

for starting_point in starting_points:
    sr1_results = sr1_trust_region_method(starting_point, np.eye(2))
    print("-"*100)
    print(f"SR1 (trust region) method with Rosenbrock function f(x) = 100(x2 − x1^2)^2 + (1 − x1)^2 (exact derivatives) and starting point: {starting_point}:")
    pprint(sr1_results[-1])

----------------------------------------------------------------------------------------------------
SR1 (trust region) method with Rosenbrock function f(x) = 100(x2 − x1^2)^2 + (1 − x1)^2 (exact derivatives) and starting point: [1.2 1.2]:
{'B_k+1': array([[ 852.56410905, -412.4609586 ],
       [-412.4609586 ,  199.9933109 ]]),
 'delta': 5.960464477539063e-08,
 'iteration': 71,
 's_k': array([-2.53151943e-08, -5.39616032e-08]),
 'x_k+1': array([1., 1.]),
 'y_k': array([ 1.28185523e-06, -6.66242794e-07]),
 '|grad_f_k+1|': 7.524967540362846e-08}
----------------------------------------------------------------------------------------------------
SR1 (trust region) method with Rosenbrock function f(x) = 100(x2 − x1^2)^2 + (1 − x1)^2 (exact derivatives) and starting point: [-1.2  1. ]:
{'B_k+1': array([[ 769.03990825, -391.53713197],
       [-391.53713197,  199.84991373]]),
 'delta': 0.03125,
 'iteration': 294,
 's_k': array([-4.28582436e-08, -8.05910337e-08]),
 'x_k+1': array([0.99999999, 

In [14]:
# Quasi-Newton SR1 (trust region) with second function

starting_points = [np.array([-0.2, 1.2]), np.array([3.8, 0.1]), np.array([1.9, 0.6])]

def sr1_trust_region_method(x0, B0, max_iter=10000, tol=1e-6, trust_region_delta=1.0, eta=1e-4, r=0.5):
    xk = x0
    Bk = B0
    results = []
    for k in range(max_iter):
        grad_fk = grad_rosenbrock_f(xk)
        if np.linalg.norm(grad_fk) < tol:
            break
            
        sk = -np.linalg.solve(Bk, grad_fk)
        if np.linalg.norm(sk) > trust_region_delta:
            sk = sk * (trust_region_delta / np.linalg.norm(sk)) 

        grad_fk1 = grad_rosenbrock_f(xk + sk)
        yk = grad_fk1 - grad_fk

        fk = rosenbrock_f(xk)
        fk1 = rosenbrock_f(xk + sk)
        ared = fk - fk1
        pred = -grad_fk.T @ sk - 0.5 * sk.T @ Bk @ sk

        if ared / pred > eta:
            xk1 = xk + sk
        else:
            xk1 = xk

        if ared / pred > 0.75:
            if np.linalg.norm(sk) <= 0.8 * trust_region_delta:
                trust_region_delta = trust_region_delta
            else:
                trust_region_delta = 2 * trust_region_delta
        elif ared / pred >= 0.1 and ared / pred <= 0.75:
            trust_region_delta = trust_region_delta
        else:
            trust_region_delta = 0.5 * trust_region_delta
        
        # check 6.26
        if np.linalg.norm(sk.T @ (yk - Bk @ sk)) >= r * np.linalg.norm(sk) * np.linalg.norm(yk - Bk @ sk):
            ys = yk - Bk @ sk
            ys_T_s = ys.T @ sk
            Bk1 = Bk + np.outer(ys, ys) / ys_T_s
        else:
            Bk1 = Bk
        
        results.append({
            "iteration": k,
            "x_k+1": xk1,
            "s_k": sk,
            "y_k": yk,
            "B_k+1": Bk1,
            "delta": trust_region_delta,
            "|grad_f_k+1|": np.linalg.norm(grad_fk1)
        })

        xk, Bk = xk1, Bk1

    return results

for starting_point in starting_points:
    sr1_results = sr1_trust_region_method(starting_point, np.eye(2))
    print("-"*100)
    print(f"SR1 method with f(x) = 150(x1x2)^2 + (0.5x1 + 2x2 − 2)^2 (exact derivatives) and starting point: {starting_point}:")
    pprint(sr1_results[-1])

----------------------------------------------------------------------------------------------------
SR1 method with f(x) = 150(x1x2)^2 + (0.5x1 + 2x2 − 2)^2 (exact derivatives) and starting point: [-0.2  1.2]:
{'B_k+1': array([[ 784.44893228, -394.93424092],
       [-394.93424092,  199.34004821]]),
 'delta': 0.03125,
 'iteration': 170,
 's_k': array([-3.24194414e-08, -5.94864659e-08]),
 'x_k+1': array([0.99999999, 0.99999998]),
 'y_k': array([-2.20580597e-06,  1.07048352e-06]),
 '|grad_f_k+1|': 2.9539017183410467e-07}
----------------------------------------------------------------------------------------------------
SR1 method with f(x) = 150(x1x2)^2 + (0.5x1 + 2x2 − 2)^2 (exact derivatives) and starting point: [3.8 0.1]:
{'B_k+1': array([[ 950.66395966, -435.51383572],
       [-435.51383572,  207.45554685]]),
 'delta': 0.0078125,
 'iteration': 4303,
 's_k': array([-1.19318491e-08, -2.37963031e-08]),
 'x_k+1': array([1.00000022, 1.00000043]),
 'y_k': array([-5.08238332e-08,  1.348006

## Runs with approximated derivatives

In [3]:
u = 1.0
while 1.0 + u != 1.0:
    u /= 2
epsilon = np.sqrt(u)
    
# Print the unit roundoff value
print("Manually calculated unit roundoff value (u):", u)
print("Epsilon from manually calculated u:", epsilon)

Manually calculated unit roundoff value (u): 1.1102230246251565e-16
Epsilon from manually calculated u: 1.0536712127723509e-08


In [4]:
central_epsilon = u**(1/3)
print(f"Epsilon for central-difference Method: {central_epsilon}")

Epsilon for central-difference Method: 4.806217383937355e-06


In [5]:
def forward_difference(func: callable, x: np.ndarray, epsilon: float) -> np.ndarray:
    """
    Parameters:
    func: Function you want to approximate the derivative of
    x: Point where at which you want the approximation of the derivative
    epsilon: very small number typically sqrt(u), where u is unit-roundoff
    Returns:
    Approximation of derivative at point x
    """
    dim = x.shape[0] if x.shape != () else 1
    grad = np.zeros(dim)
    
    for i in range(dim):
        e = np.zeros(dim)
        e[i] = 1
        grad_i = (func(x + epsilon * e) - func(x)) / epsilon
        grad[i] = grad_i
    
    return grad

In [26]:
# Quasi-Newton SR1 with Rosenbrock function (approximated gradients)
starting_points = [np.array([1.2, 1.2]), np.array([-1.2, 1.0]), np.array([0.2, 0.8])]

def sr1_method(x0, B0, H0, max_iter=10000, tol=1e-6):
    xk = x0
    Bk = B0
    Hk = H0
    results = []
    for k in range(max_iter):
        approx_grad_fk = forward_difference(rosenbrock_f, xk, epsilon)
        pk = -np.linalg.solve(Bk, approx_grad_fk)

        alpha_k = backtracking_line_search(rosenbrock_f, xk, pk, approx_grad_fk)

        xk1 = xk + alpha_k * pk
        sk = xk1 - xk
        approx_grad_fk1 = forward_difference(rosenbrock_f, xk1, epsilon)
        yk = approx_grad_fk1 - approx_grad_fk
        
        ys = yk - Bk @ sk
        ys_T_s = ys.T @ sk
        Bk1 = Bk + np.outer(ys, ys) / ys_T_s

        sy = sk - Hk @ yk
        sy_T_y = sy.T @ yk
        Hk1 = Hk + np.outer(sy, sy) / sy_T_y
        
        results.append({
            "iteration": k,
            "x_k+1": xk1,
            "s_k": sk,
            "y_k": yk,
            "B_k+1": Bk1,
            "H_k+1": Hk1,
            "(sk - Hkyk)^Tyk": sy_T_y,
            "|approx_grad_f_k+1|": np.linalg.norm(approx_grad_fk1)
        })

        if np.linalg.norm(approx_grad_fk1) < tol:
            break

        xk, Bk = xk1, Bk1

    return results

for starting_point in starting_points:
    sr1_results = sr1_method(starting_point, np.eye(2), np.eye(2))
    print("-"*100)
    print(f"SR1 method with Rosenbrock function f(x) = 100(x2 − x1^2)^2 + (1 − x1)^2 (approximated derivatives) and starting point: {starting_point}:")
    pprint(sr1_results[-1])


----------------------------------------------------------------------------------------------------
SR1 method with Rosenbrock function f(x) = 100(x2 − x1^2)^2 + (1 − x1)^2 (approximated derivatives) and starting point: [1.2 1.2]:
{'(sk - Hkyk)^Tyk': -2.458025735395886e-11,
 'B_k+1': array([[ 802.07747783, -400.04213437],
       [-400.04213437,  200.02282448]]),
 'H_k+1': array([[0.22188413, 0.41579211],
       [0.41579211, 0.77781834]]),
 'iteration': 12,
 's_k': array([-1.07278675e-07, -2.03891446e-07]),
 'x_k+1': array([0.99999683, 0.99999366]),
 'y_k': array([-4.48063989e-06,  2.13304724e-06]),
 '|approx_grad_f_k+1|': 2.8117862651621427e-08}
----------------------------------------------------------------------------------------------------
SR1 method with Rosenbrock function f(x) = 100(x2 − x1^2)^2 + (1 − x1)^2 (approximated derivatives) and starting point: [-1.2  1. ]:
{'(sk - Hkyk)^Tyk': -2.2929851377233814e-10,
 'B_k+1': array([[ 802.28998987, -400.13955764],
       [-400.1395

In [18]:
# Quasi-Newton SR1 with second function (approximated gradients)
starting_points = [np.array([-0.2, 1.2]), np.array([3.8, 0.1]), np.array([1.9, 0.6])]

def sr1_method(x0, B0, H0, max_iter=10000, tol=1e-6):
    xk = x0
    Bk = B0
    Hk = H0
    results = []
    for k in range(max_iter):
        approx_grad_fk = forward_difference(f, xk, epsilon)
        pk = -np.linalg.solve(Bk, approx_grad_fk)

        alpha_k = backtracking_line_search(f, xk, pk, approx_grad_fk)

        xk1 = xk + alpha_k * pk
        sk = xk1 - xk
        approx_grad_fk1 = forward_difference(f, xk1, epsilon)
        yk = approx_grad_fk1 - approx_grad_fk
        
        ys = yk - Bk @ sk
        ys_T_s = ys.T @ sk
        Bk1 = Bk + np.outer(ys, ys) / ys_T_s

        sy = sk - Hk @ yk
        sy_T_y = sy.T @ yk
        Hk1 = Hk + np.outer(sy, sy) / sy_T_y
        
        results.append({
            "iteration": k,
            "x_k+1": xk1,
            "s_k": sk,
            "y_k": yk,
            "B_k+1": Bk1,
            "H_k+1": Hk1,
            "(sk - Hkyk)^Tyk": sy_T_y,
            "|approx_grad_f_k+1|": np.linalg.norm(approx_grad_fk1)
        })

        if np.linalg.norm(approx_grad_fk1) < tol:
            break

        xk, Bk = xk1, Bk1

    return results

for starting_point in starting_points:
    sr1_results = sr1_method(starting_point, np.eye(2), np.eye(2))
    print("-"*100)
    print(f"SR1 method with f(x) = 150(x1x2)^2 + (0.5x1 + 2x2 − 2)^2 (approximated derivatives) and starting point: {starting_point}:")
    pprint(sr1_results[-1])


----------------------------------------------------------------------------------------------------
SR1 method with f(x) = 150(x1x2)^2 + (0.5x1 + 2x2 − 2)^2 (approximated derivatives) and starting point: [-0.2  1.2]:
{'(sk - Hkyk)^Tyk': -5.372720137750628e-10,
 'B_k+1': array([[300.48104723,   2.00108704],
       [  2.00108704,   7.99994076]]),
 'H_k+1': array([[ 0.14550005, -0.3272513 ],
       [-0.3272513 ,  0.87467125]]),
 'iteration': 11,
 's_k': array([-6.38347364e-08, -1.15402266e-06]),
 'x_k+1': array([-6.12350991e-09,  1.00000002e+00]),
 'y_k': array([-2.14904282e-05, -9.35985179e-06]),
 '|approx_grad_f_k+1|': 2.971290870845121e-07}
----------------------------------------------------------------------------------------------------
SR1 method with f(x) = 150(x1x2)^2 + (0.5x1 + 2x2 − 2)^2 (approximated derivatives) and starting point: [3.8 0.1]:
{'(sk - Hkyk)^Tyk': -9.650418987453935e-11,
 'B_k+1': array([[5.00002423e-01, 2.00020295e+00],
       [2.00020295e+00, 4.80801738e+03]

In [29]:
# Quasi-Newton BFGS method with Rosenbrock function (approximated gradients)
starting_points = [np.array([1.2, 1.2]), np.array([-1.2, 1.0]), np.array([0.2, 0.8])]

def bfgs_method(x0, B0, H0, max_iter=10000, tol=1e-6):
    xk = x0
    Bk = B0
    Hk = H0
    results = []
    for k in range(max_iter):
        approx_grad_fk = forward_difference(rosenbrock_f, xk, epsilon)
        pk = -np.linalg.solve(Bk, approx_grad_fk)

        alpha_k = backtracking_line_search(rosenbrock_f, xk, pk, approx_grad_fk)

        xk1 = xk + alpha_k * pk
        sk = xk1 - xk
        approx_grad_fk1 = forward_difference(rosenbrock_f, xk1, epsilon)
        yk = approx_grad_fk1 - approx_grad_fk
        
        rho_k = 1 / (yk.T @ sk)
        Bk1 = Bk - (Bk @ np.outer(sk, sk) @ Bk) / (sk.T @ Bk @ sk) + np.outer(yk, yk) / (yk.T @ sk) 
        Hk1 = (np.eye(2) - rho_k * sk @ yk.T) @ Hk @ (np.eye(2) - rho_k * yk @ sk.T) + rho_k * sk @ sk.T
        
        results.append({
            "iteration": k,
            "x_k+1": xk1,
            "s_k": sk,
            "y_k": yk,
            "B_k+1": Bk1,
            "H_k+1": Hk1,
            "|approx_grad_f_k+1|": np.linalg.norm(approx_grad_fk1)
        })

        if np.linalg.norm(approx_grad_fk1) < tol:
            break

        xk, Bk = xk1, Bk1

    return results

for starting_point in starting_points:
    sr1_results = bfgs_method(starting_point, np.eye(2), np.eye(2))
    print("-"*100)
    print(f"BFGS method with Rosenbrock function f(x) = 100(x2 − x1^2)^2 + (1 − x1)^2 (approximated derivatives) and starting point: {starting_point}:")
    pprint(sr1_results[-1])


----------------------------------------------------------------------------------------------------
BFGS method with Rosenbrock function f(x) = 100(x2 − x1^2)^2 + (1 − x1)^2 (approximated derivatives) and starting point: [1.2 1.2]:
{'B_k+1': array([[ 801.94954873, -399.97065695],
       [-399.97065695,  199.98268193]]),
 'H_k+1': array([[1.12268226, 0.12268226],
       [0.12268226, 1.12268226]]),
 'iteration': 11,
 's_k': array([-5.37967053e-07, -8.75568731e-07]),
 'x_k+1': array([0.99999684, 0.99999367]),
 'y_k': array([-8.12206351e-05,  4.00724527e-05]),
 '|approx_grad_f_k+1|': 4.402834695549562e-07}
----------------------------------------------------------------------------------------------------
BFGS method with Rosenbrock function f(x) = 100(x2 − x1^2)^2 + (1 − x1)^2 (approximated derivatives) and starting point: [-1.2  1. ]:
{'B_k+1': array([[ 802.01610034, -400.01810655],
       [-400.01810655,  200.01772837]]),
 'H_k+1': array([[1.01318099, 0.01318099],
       [0.01318099, 1

In [28]:
# Quasi-Newton BFGS method with second function (approximated gradients)
starting_points = [np.array([-0.2, 1.2]), np.array([3.8, 0.1]), np.array([1.9, 0.6])]

def bfgs_method(x0, B0, H0, max_iter=10000, tol=1e-6):
    xk = x0
    Bk = B0
    Hk = H0
    results = []
    for k in range(max_iter):
        approx_grad_fk = forward_difference(f, xk, epsilon)
        pk = -np.linalg.solve(Bk, approx_grad_fk)

        alpha_k = backtracking_line_search(f, xk, pk, approx_grad_fk)

        xk1 = xk + alpha_k * pk
        sk = xk1 - xk
        approx_grad_fk1 = forward_difference(f, xk1, epsilon)
        yk = approx_grad_fk1 - approx_grad_fk
        
        rho_k = 1 / (yk.T @ sk)
        Bk1 = Bk - (Bk @ np.outer(sk, sk) @ Bk) / (sk.T @ Bk @ sk) + np.outer(yk, yk) / (yk.T @ sk) 
        Hk1 = (np.eye(2) - rho_k * sk @ yk.T) @ Hk @ (np.eye(2) - rho_k * yk @ sk.T) + rho_k * sk @ sk.T
        
        results.append({
            "iteration": k,
            "x_k+1": xk1,
            "s_k": sk,
            "y_k": yk,
            "B_k+1": Bk1,
            "H_k+1": Hk1,
            "|approx_grad_f_k+1|": np.linalg.norm(approx_grad_fk1)
        })

        if np.linalg.norm(approx_grad_fk1) < tol:
            break

        xk, Bk = xk1, Bk1

    return results

for starting_point in starting_points:
    sr1_results = bfgs_method(starting_point, np.eye(2), np.eye(2))
    print("-"*100)
    print(f"BFGS method with f(x) = 150(x1x2)^2 + (0.5x1 + 2x2 − 2)^2 (approximated derivatives) and starting point: {starting_point}:")
    pprint(sr1_results[-1])


----------------------------------------------------------------------------------------------------
BFGS method with f(x) = 150(x1x2)^2 + (0.5x1 + 2x2 − 2)^2 (approximated derivatives) and starting point: [-0.2  1.2]:
{'B_k+1': array([[301.19799132,   1.73808687],
       [  1.73808687,   8.09826885]]),
 'H_k+1': array([[1.02202007, 0.02202007],
       [0.02202007, 1.02202007]]),
 'iteration': 10,
 's_k': array([2.77515881e-08, 7.39622956e-08]),
 'x_k+1': array([-5.25762022e-09,  9.99999994e-01]),
 'y_k': array([8.48727548e-06, 6.47201225e-07]),
 '|approx_grad_f_k+1|': 1.63142031775232e-08}
----------------------------------------------------------------------------------------------------
BFGS method with f(x) = 150(x1x2)^2 + (0.5x1 + 2x2 − 2)^2 (approximated derivatives) and starting point: [3.8 0.1]:
{'B_k+1': array([[5.00097591e-01, 2.03133156e+00],
       [2.03133156e+00, 4.81813104e+03]]),
 'H_k+1': array([[2.8727484, 1.8727484],
       [1.8727484, 2.8727484]]),
 'iteration': 6,


In [36]:
# Quasi-Newton SR1 (trust region) with Rosenbrock function (approximated gradients)
starting_points = [np.array([1.2, 1.2]), np.array([-1.2, 1.0]), np.array([0.2, 0.8])]

def sr1_trust_region_method(x0, B0, max_iter=10000, tol=2e-6, trust_region_delta=1.0, eta=1e-4, r=0.5):
    xk = x0
    Bk = B0
    results = []
    for k in range(max_iter):
        approx_grad_fk = forward_difference(rosenbrock_f, xk, epsilon)
        if np.linalg.norm(approx_grad_fk) < tol:
            break
            
        sk = -np.linalg.solve(Bk, approx_grad_fk)
        if np.linalg.norm(sk) > trust_region_delta:
            sk = sk * (trust_region_delta / np.linalg.norm(sk)) 

        approx_grad_fk1 = forward_difference(rosenbrock_f, xk + sk, epsilon)
        yk = approx_grad_fk1 - approx_grad_fk

        fk = rosenbrock_f(xk)
        fk1 = rosenbrock_f(xk + sk)
        ared = fk - fk1
        pred = -approx_grad_fk.T @ sk - 0.5 * sk.T @ Bk @ sk

        if ared / pred > eta:
            xk1 = xk + sk
        else:
            xk1 = xk

        if ared / pred > 0.75:
            if np.linalg.norm(sk) <= 0.8 * trust_region_delta:
                trust_region_delta = trust_region_delta
            else:
                trust_region_delta = 2 * trust_region_delta
        elif ared / pred >= 0.1 and ared / pred <= 0.75:
            trust_region_delta = trust_region_delta
        else:
            trust_region_delta = 0.5 * trust_region_delta
        
        # check 6.26
        if np.linalg.norm(sk.T @ (yk - Bk @ sk)) >= r * np.linalg.norm(sk) * np.linalg.norm(yk - Bk @ sk):
            ys = yk - Bk @ sk
            ys_T_s = ys.T @ sk
            Bk1 = Bk + np.outer(ys, ys) / ys_T_s
        else:
            Bk1 = Bk
        
        results.append({
            "iteration": k,
            "x_k+1": xk1,
            "s_k": sk,
            "y_k": yk,
            "B_k+1": Bk1,
            "delta": trust_region_delta,
            "|approx_grad_f_k+1|": np.linalg.norm(approx_grad_fk1)
        })

        xk, Bk = xk1, Bk1

    return results

for starting_point in starting_points:
    sr1_results = sr1_trust_region_method(starting_point, np.eye(2))
    print("-"*100)
    print(f"SR1 (trust region) method with Rosenbrock function f(x) = 100(x2 − x1^2)^2 + (1 − x1)^2 (approximated derivatives) and starting point: {starting_point}:")
    pprint(sr1_results[-1])

----------------------------------------------------------------------------------------------------
SR1 (trust region) method with Rosenbrock function f(x) = 100(x2 − x1^2)^2 + (1 − x1)^2 (approximated derivatives) and starting point: [1.2 1.2]:
{'B_k+1': array([[ 852.56416494, -412.46095041],
       [-412.46095041,  199.99330668]]),
 'delta': 8.881784197001252e-16,
 'iteration': 9999,
 's_k': array([-3.77304765e-16, -8.04053493e-16]),
 'x_k+1': array([0.9999977 , 0.99999551]),
 'y_k': array([ 4.37426381e-14, -2.22043893e-14]),
 '|approx_grad_f_k+1|': 4.8847231456976755e-05}
----------------------------------------------------------------------------------------------------
SR1 (trust region) method with Rosenbrock function f(x) = 100(x2 − x1^2)^2 + (1 − x1)^2 (approximated derivatives) and starting point: [-1.2  1. ]:
{'B_k+1': array([[ 719.63262295, -377.29616479],
       [-377.29616479,  193.74230218]]),
 'delta': 4.76837158203125e-07,
 'iteration': 240,
 's_k': array([5.25266600e-

In [40]:
# Quasi-Newton SR1 (trust region) with second function

starting_points = [np.array([-0.2, 1.2]), np.array([3.8, 0.1]), np.array([1.9, 0.6])]

def sr1_trust_region_method(x0, B0, max_iter=10000, tol=1e-5, trust_region_delta=1.0, eta=1e-4, r=0.5):
    xk = x0
    Bk = B0
    results = []
    for k in range(max_iter):
        approx_grad_fk = forward_difference(f, xk, epsilon)
        if np.linalg.norm(approx_grad_fk) < tol:
            break
            
        sk = -np.linalg.solve(Bk, approx_grad_fk)
        if np.linalg.norm(sk) > trust_region_delta:
            sk = sk * (trust_region_delta / np.linalg.norm(sk)) 

        approx_grad_fk1 = forward_difference(f, xk + sk, epsilon)
        yk = approx_grad_fk1 - approx_grad_fk

        fk = f(xk)
        fk1 = f(xk + sk)
        ared = fk - fk1
        pred = -approx_grad_fk.T @ sk - 0.5 * sk.T @ Bk @ sk

        if ared / pred > eta:
            xk1 = xk + sk
        else:
            xk1 = xk

        if ared / pred > 0.75:
            if np.linalg.norm(sk) <= 0.8 * trust_region_delta:
                trust_region_delta = trust_region_delta
            else:
                trust_region_delta = 2 * trust_region_delta
        elif ared / pred >= 0.1 and ared / pred <= 0.75:
            trust_region_delta = trust_region_delta
        else:
            trust_region_delta = 0.5 * trust_region_delta
        
        # check 6.26
        if np.linalg.norm(sk.T @ (yk - Bk @ sk)) >= r * np.linalg.norm(sk) * np.linalg.norm(yk - Bk @ sk):
            ys = yk - Bk @ sk
            ys_T_s = ys.T @ sk
            Bk1 = Bk + np.outer(ys, ys) / ys_T_s
        else:
            Bk1 = Bk
        
        results.append({
            "iteration": k,
            "x_k+1": xk1,
            "s_k": sk,
            "y_k": yk,
            "B_k+1": Bk1,
            "delta": trust_region_delta,
            "|approx_grad_f_k+1|": np.linalg.norm(approx_grad_fk1)
        })

        xk, Bk = xk1, Bk1

    return results

for starting_point in starting_points:
    sr1_results = sr1_trust_region_method(starting_point, np.eye(2))
    print("-"*100)
    print(f"SR1 trust region method with f(x) = 150(x1x2)^2 + (0.5x1 + 2x2 − 2)^2 (approximated derivatives) and starting point: {starting_point}:")
    pprint(sr1_results[-1])

----------------------------------------------------------------------------------------------------
SR1 trust region method with f(x) = 150(x1x2)^2 + (0.5x1 + 2x2 − 2)^2 (approximated derivatives) and starting point: [-0.2  1.2]:
{'B_k+1': array([[300.29618261,   1.87403327],
       [  1.87403327,   8.00140066]]),
 'delta': 0.5,
 'iteration': 11,
 's_k': array([1.10016386e-06, 2.66982352e-05]),
 'x_k+1': array([6.57359700e-09, 1.00000001e+00]),
 'y_k': array([0.00038398, 0.00021579]),
 '|approx_grad_f_k+1|': 3.57131488110888e-06}
----------------------------------------------------------------------------------------------------
SR1 trust region method with f(x) = 150(x1x2)^2 + (0.5x1 + 2x2 − 2)^2 (approximated derivatives) and starting point: [3.8 0.1]:
{'B_k+1': array([[ 253.51652892, -677.9325584 ],
       [-677.9325584 , 5178.50582125]]),
 'delta': 0.001953125,
 'iteration': 5896,
 's_k': array([ 1.37496012e-08, -9.54573240e-12]),
 'x_k+1': array([ 3.99999303e+00, -4.22048356e-10]