In [1]:
import numpy as np
from pprint import pprint

In [2]:
def rosenbrock_f(x: np.array):
    return 100 * (x[1] - x[0]**2)**2 + (1 - x[0])**2

In [3]:
def grad_rosenbrock_f(x: np.array):
    return np.array([-400 * (x[1] - x[0]**2) * x[0] - 2 * (1 - x[0]), 200 * (x[1] - x[0]**2)])

In [4]:
def f(x: np.array):
    return 150 * (x[0] * x[1])**2 + (0.5 * x[0] + 2 * x[1] - 2)**2

In [5]:
def grad_f(x: np.array):
    return np.array([300 * x[0] * x[1]**2 + (0.5 * x[0] + 2 * x[1] - 2), 300 * x[0]**2 * x[1] + (0.5 * x[0] + 2 * x[1] - 2) * 4])

In [6]:
def backtracking_line_search(func, xk, pk, grad_fk, alpha=1, rho=0.3, c=5e-4):
    while func(xk + alpha * pk) > func(xk) + c * alpha * np.dot(grad_fk.T, pk):
        alpha *= rho

    if alpha < 1e-9:
        alpha = 1
    return alpha

In [7]:
solution_rosenbrock = np.array([1.0, 1.0])
solutions_second_function = np.array([[4.0, 0.0], [0.0, 1.0]])

## Runs with exact derivatives

In [8]:
# Quasi-Newton SR1 with Rosenbrock function
starting_points = [np.array([1.2, 1.2]), np.array([-1.2, 1.0]), np.array([0.2, 0.8])]

def sr1_method(x0, B0, H0, max_iter=10000, tol=1e-6):
    xk = x0
    Bk = B0
    Hk = H0
    results = []
    for k in range(max_iter):
        grad_fk = grad_rosenbrock_f(xk)
        pk = -np.linalg.solve(Bk, grad_fk)

        alpha_k = backtracking_line_search(rosenbrock_f, xk, pk, grad_fk)

        xk1 = xk + alpha_k * pk
        sk = xk1 - xk
        grad_fk1 = grad_rosenbrock_f(xk1)
        yk = grad_fk1 - grad_fk
        
        ys = yk - Bk @ sk
        ys_T_s = ys.T @ sk
        Bk1 = Bk + np.outer(ys, ys) / ys_T_s

        sy = sk - Hk @ yk
        sy_T_y = sy.T @ yk
        Hk1 = Hk + np.outer(sy, sy) / sy_T_y

        dist_to_solution = np.linalg.norm(xk1 - solution_rosenbrock)
        
        results.append({
            "iteration": k,
            "x_k+1": xk1,
            "distance_to_solution": dist_to_solution,
            "|grad_f_k+1|": np.linalg.norm(grad_fk1)
        })

        if np.linalg.norm(grad_fk1) < tol:
            break

        xk, Bk = xk1, Bk1

    return results

for starting_point in starting_points:
    sr1_results = sr1_method(starting_point, np.eye(2), np.eye(2))
    print("-"*100)
    print(f"SR1 method with Rosenbrock function f(x) = 100(x2 − x1^2)^2 + (1 − x1)^2 (exact derivatives) and starting point: {starting_point}:")
    pprint(sr1_results[-1])


----------------------------------------------------------------------------------------------------
SR1 method with Rosenbrock function f(x) = 100(x2 − x1^2)^2 + (1 − x1)^2 (exact derivatives) and starting point: [1.2 1.2]:
{'distance_to_solution': 2.651650731357318e-12,
 'iteration': 13,
 'x_k+1': array([1., 1.]),
 '|grad_f_k+1|': 3.5158764953239405e-11}
----------------------------------------------------------------------------------------------------
SR1 method with Rosenbrock function f(x) = 100(x2 − x1^2)^2 + (1 − x1)^2 (exact derivatives) and starting point: [-1.2  1. ]:
{'distance_to_solution': 5.121067746494062e-09,
 'iteration': 54,
 'x_k+1': array([1., 1.]),
 '|grad_f_k+1|': 1.1067690541327862e-07}
----------------------------------------------------------------------------------------------------
SR1 method with Rosenbrock function f(x) = 100(x2 − x1^2)^2 + (1 − x1)^2 (exact derivatives) and starting point: [0.2 0.8]:
{'distance_to_solution': 7.181383329974395e-09,
 'itera

In [9]:
# Quasi-Newton SR1 with second function
starting_points = [np.array([-0.2, 1.2]), np.array([3.8, 0.1]), np.array([1.9, 0.6])]

def sr1_method(x0, B0, H0, max_iter=10000, tol=1e-6):
    xk = x0
    Bk = B0
    Hk = H0
    results = []
    for k in range(max_iter):
        grad_fk = grad_f(xk)
        pk = -np.linalg.solve(Bk, grad_fk)

        alpha_k = backtracking_line_search(f, xk, pk, grad_fk)

        xk1 = xk + alpha_k * pk
        sk = xk1 - xk
        grad_fk1 = grad_f(xk1)
        yk = grad_fk1 - grad_fk
        
        ys = yk - Bk @ sk
        ys_T_s = ys.T @ sk
        Bk1 = Bk + np.outer(ys, ys) / ys_T_s

        sy = sk - Hk @ yk
        sy_T_y = sy.T @ yk
        Hk1 = Hk + np.outer(sy, sy) / sy_T_y

        distances_to_solutions = np.linalg.norm(xk1 - solutions_second_function, axis=1)
        distance_to_nearest_solution = np.min(distances_to_solutions)
        
        results.append({
            "iteration": k,
            "x_k+1": xk1,
            "distance_to_nearest_solution": distance_to_nearest_solution,
            "|grad_f_k+1|": np.linalg.norm(grad_fk1)
        })

        if np.linalg.norm(grad_fk1) < tol:
            break

        xk, Bk = xk1, Bk1

    return results

for starting_point in starting_points:
    sr1_results = sr1_method(starting_point, np.eye(2), np.eye(2))
    print("-"*100)
    print(f"SR1 method with f(x) = 150(x1x2)^2 + (0.5x1 + 2x2 − 2)^2 (exact derivatives) and starting point: {starting_point}:")
    pprint(sr1_results[-1])


----------------------------------------------------------------------------------------------------
SR1 method with f(x) = 150(x1x2)^2 + (0.5x1 + 2x2 − 2)^2 (exact derivatives) and starting point: [-0.2  1.2]:
{'distance_to_nearest_solution': 9.112444517832828e-09,
 'iteration': 7,
 'x_k+1': array([9.22990593e-11, 9.99999991e-01]),
 '|grad_f_k+1|': 7.333074226228451e-08}
----------------------------------------------------------------------------------------------------
SR1 method with f(x) = 150(x1x2)^2 + (0.5x1 + 2x2 − 2)^2 (exact derivatives) and starting point: [3.8 0.1]:
{'distance_to_nearest_solution': 3.2561597881480904e-10,
 'iteration': 7,
 'x_k+1': array([4.00000000e+00, 1.11272826e-14]),
 '|grad_f_k+1|': 6.195021989785833e-10}
----------------------------------------------------------------------------------------------------
SR1 method with f(x) = 150(x1x2)^2 + (0.5x1 + 2x2 − 2)^2 (exact derivatives) and starting point: [1.9 0.6]:
{'distance_to_nearest_solution': 8.0741563

In [10]:
# Quasi-Newton BFGS method with Rosenbrock function
starting_points = [np.array([1.2, 1.2]), np.array([-1.2, 1.0]), np.array([0.2, 0.8])]

def bfgs_method(x0, B0, H0, max_iter=10000, tol=1e-6):
    xk = x0
    Bk = B0
    Hk = H0
    results = []
    for k in range(max_iter):
        grad_fk = grad_rosenbrock_f(xk)
        pk = -np.linalg.solve(Bk, grad_fk)

        alpha_k = backtracking_line_search(rosenbrock_f, xk, pk, grad_fk)

        xk1 = xk + alpha_k * pk
        sk = xk1 - xk
        grad_fk1 = grad_rosenbrock_f(xk1)
        yk = grad_fk1 - grad_fk
        
        rho_k = 1 / (yk.T @ sk)
        Bk1 = Bk - (Bk @ np.outer(sk, sk) @ Bk) / (sk.T @ Bk @ sk) + np.outer(yk, yk) / (yk.T @ sk) 
        Hk1 = (np.eye(2) - rho_k * sk @ yk.T) @ Hk @ (np.eye(2) - rho_k * yk @ sk.T) + rho_k * sk @ sk.T

        dist_to_solution = np.linalg.norm(xk1 - solution_rosenbrock)
        
        results.append({
            "iteration": k,
            "x_k+1": xk1,
            "distance_to_solution": dist_to_solution,
            "|grad_f_k+1|": np.linalg.norm(grad_fk1)
        })

        if np.linalg.norm(grad_fk1) < tol:
            break

        xk, Bk = xk1, Bk1

    return results

for starting_point in starting_points:
    sr1_results = bfgs_method(starting_point, np.eye(2), np.eye(2))
    print("-"*100)
    print(f"BFGS method with Rosenbrock function f(x) = 100(x2 − x1^2)^2 + (1 − x1)^2 (exact derivatives) and starting point: {starting_point}:")
    pprint(sr1_results[-1])


----------------------------------------------------------------------------------------------------
BFGS method with Rosenbrock function f(x) = 100(x2 − x1^2)^2 + (1 − x1)^2 (exact derivatives) and starting point: [1.2 1.2]:
{'distance_to_solution': 2.5455463464545344e-08,
 'iteration': 11,
 'x_k+1': array([1.00000001, 1.00000002]),
 '|grad_f_k+1|': 8.059309414894322e-07}
----------------------------------------------------------------------------------------------------
BFGS method with Rosenbrock function f(x) = 100(x2 − x1^2)^2 + (1 − x1)^2 (exact derivatives) and starting point: [-1.2  1. ]:
{'distance_to_solution': 7.81050225096746e-10,
 'iteration': 34,
 'x_k+1': array([1., 1.]),
 '|grad_f_k+1|': 2.026865758859956e-08}
----------------------------------------------------------------------------------------------------
BFGS method with Rosenbrock function f(x) = 100(x2 − x1^2)^2 + (1 − x1)^2 (exact derivatives) and starting point: [0.2 0.8]:
{'distance_to_solution': 1.33880873112

In [11]:
# Quasi-Newton BFGS method with second function
starting_points = [np.array([-0.2, 1.2]), np.array([3.8, 0.1]), np.array([1.9, 0.6])]

def bfgs_method(x0, B0, H0, max_iter=10000, tol=1e-6):
    xk = x0
    Bk = B0
    Hk = H0
    results = []
    for k in range(max_iter):
        grad_fk = grad_f(xk)
        pk = -np.linalg.solve(Bk, grad_fk)

        alpha_k = backtracking_line_search(f, xk, pk, grad_fk)

        xk1 = xk + alpha_k * pk
        sk = xk1 - xk
        grad_fk1 = grad_f(xk1)
        yk = grad_fk1 - grad_fk
        
        rho_k = 1 / (yk.T @ sk)
        Bk1 = Bk - (Bk @ np.outer(sk, sk) @ Bk) / (sk.T @ Bk @ sk) + np.outer(yk, yk) / (yk.T @ sk) 
        Hk1 = (np.eye(2) - rho_k * sk @ yk.T) @ Hk @ (np.eye(2) - rho_k * yk @ sk.T) + rho_k * sk @ sk.T
        
        distances_to_solutions = np.linalg.norm(xk1 - solutions_second_function, axis=1)
        distance_to_nearest_solution = np.min(distances_to_solutions)
        
        results.append({
            "iteration": k,
            "x_k+1": xk1,
            "distance_to_nearest_solution": distance_to_nearest_solution,
            "|grad_f_k+1|": np.linalg.norm(grad_fk1)
        })

        if np.linalg.norm(grad_fk1) < tol:
            break

        xk, Bk = xk1, Bk1

    return results

for starting_point in starting_points:
    sr1_results = bfgs_method(starting_point, np.eye(2), np.eye(2))
    print("-"*100)
    print(f"BFGS method with f(x) = 150(x1x2)^2 + (0.5x1 + 2x2 − 2)^2 (exact derivatives) and starting point: {starting_point}:")
    pprint(sr1_results[-1])


----------------------------------------------------------------------------------------------------
BFGS method with f(x) = 150(x1x2)^2 + (0.5x1 + 2x2 − 2)^2 (exact derivatives) and starting point: [-0.2  1.2]:
{'distance_to_nearest_solution': 8.899903636802087e-09,
 'iteration': 8,
 'x_k+1': array([1.67086050e-10, 9.99999991e-01]),
 '|grad_f_k+1|': 7.79144419305197e-08}
----------------------------------------------------------------------------------------------------
BFGS method with f(x) = 150(x1x2)^2 + (0.5x1 + 2x2 − 2)^2 (exact derivatives) and starting point: [3.8 0.1]:
{'distance_to_nearest_solution': 1.3522392827270172e-09,
 'iteration': 11,
 'x_k+1': array([4.00000000e+00, 1.65898616e-11]),
 '|grad_f_k+1|': 7.70624610741654e-08}
----------------------------------------------------------------------------------------------------
BFGS method with f(x) = 150(x1x2)^2 + (0.5x1 + 2x2 − 2)^2 (exact derivatives) and starting point: [1.9 0.6]:
{'distance_to_nearest_solution': 1.32074

In [12]:
# Quasi-Newton SR1 (trust region) with Rosenbrock function
starting_points = [np.array([1.2, 1.2]), np.array([-1.2, 1.0]), np.array([0.2, 0.8])]

def sr1_trust_region_method(x0, B0, max_iter=10000, tol=1e-6, trust_region_delta=1.0, eta=1e-4, r=0.5):
    xk = x0
    Bk = B0
    results = []
    for k in range(max_iter):
        grad_fk = grad_rosenbrock_f(xk)
        if np.linalg.norm(grad_fk) < tol:
            break
            
        sk = -np.linalg.solve(Bk, grad_fk)
        if np.linalg.norm(sk) > trust_region_delta:
            sk = sk * (trust_region_delta / np.linalg.norm(sk)) 

        grad_fk1 = grad_rosenbrock_f(xk + sk)
        yk = grad_fk1 - grad_fk

        fk = rosenbrock_f(xk)
        fk1 = rosenbrock_f(xk + sk)
        ared = fk - fk1
        pred = -grad_fk.T @ sk - 0.5 * sk.T @ Bk @ sk

        if ared / pred > eta:
            xk1 = xk + sk
        else:
            xk1 = xk

        if ared / pred > 0.75:
            if np.linalg.norm(sk) <= 0.8 * trust_region_delta:
                trust_region_delta = trust_region_delta
            else:
                trust_region_delta = 2 * trust_region_delta
        elif ared / pred >= 0.1 and ared / pred <= 0.75:
            trust_region_delta = trust_region_delta
        else:
            trust_region_delta = 0.5 * trust_region_delta
        
        # check 6.26
        if np.linalg.norm(sk.T @ (yk - Bk @ sk)) >= r * np.linalg.norm(sk) * np.linalg.norm(yk - Bk @ sk):
            ys = yk - Bk @ sk
            ys_T_s = ys.T @ sk
            Bk1 = Bk + np.outer(ys, ys) / ys_T_s
        else:
            Bk1 = Bk

        dist_to_solution = np.linalg.norm(xk1 - solution_rosenbrock)
        
        results.append({
            "iteration": k,
            "x_k+1": xk1,
            "distance_to_solution": dist_to_solution,
            "delta": trust_region_delta,
            "|grad_f_k+1|": np.linalg.norm(grad_fk1)
        })

        xk, Bk = xk1, Bk1

    return results

for starting_point in starting_points:
    sr1_results = sr1_trust_region_method(starting_point, np.eye(2))
    print("-"*100)
    print(f"SR1 (trust region) method with Rosenbrock function f(x) = 100(x2 − x1^2)^2 + (1 − x1)^2 (exact derivatives) and starting point: {starting_point}:")
    pprint(sr1_results[-1])

----------------------------------------------------------------------------------------------------
SR1 (trust region) method with Rosenbrock function f(x) = 100(x2 − x1^2)^2 + (1 − x1)^2 (exact derivatives) and starting point: [1.2 1.2]:
{'delta': 5.960464477539063e-08,
 'distance_to_solution': 3.101184469905063e-09,
 'iteration': 71,
 'x_k+1': array([1., 1.]),
 '|grad_f_k+1|': 7.524967540362846e-08}
----------------------------------------------------------------------------------------------------
SR1 (trust region) method with Rosenbrock function f(x) = 100(x2 − x1^2)^2 + (1 − x1)^2 (exact derivatives) and starting point: [-1.2  1. ]:
{'delta': 0.03125,
 'distance_to_solution': 3.127213837398988e-08,
 'iteration': 294,
 'x_k+1': array([0.99999999, 0.99999997]),
 '|grad_f_k+1|': 8.103543645537549e-07}
----------------------------------------------------------------------------------------------------
SR1 (trust region) method with Rosenbrock function f(x) = 100(x2 − x1^2)^2 + (1 − 

In [13]:
# Quasi-Newton SR1 (trust region) with second function
starting_points = [np.array([-0.2, 1.2]), np.array([3.8, 0.1]), np.array([1.9, 0.6])]

def sr1_trust_region_method(x0, B0, max_iter=10000, tol=1e-6, trust_region_delta=1.0, eta=1e-4, r=0.2):
    xk = x0
    Bk = B0
    results = []
    for k in range(max_iter):
        grad_fk = grad_f(xk)
        if np.linalg.norm(grad_fk) < tol:
            break
            
        sk = -np.linalg.solve(Bk, grad_fk)
        if np.linalg.norm(sk) > trust_region_delta:
            sk = sk * (trust_region_delta / np.linalg.norm(sk)) 

        grad_fk1 = grad_f(xk + sk)
        yk = grad_fk1 - grad_fk

        fk = f(xk)
        fk1 = f(xk + sk)
        ared = fk - fk1
        pred = -grad_fk.T @ sk - 0.5 * sk.T @ Bk @ sk

        if ared / pred > eta:
            xk1 = xk + sk
        else:
            xk1 = xk

        if ared / pred > 0.75:
            if np.linalg.norm(sk) <= 0.8 * trust_region_delta:
                trust_region_delta = trust_region_delta
            else:
                trust_region_delta = 2 * trust_region_delta
        elif ared / pred >= 0.1 and ared / pred <= 0.75:
            trust_region_delta = trust_region_delta
        else:
            trust_region_delta = 0.5 * trust_region_delta
        
        # check 6.26
        if abs(sk.T @ (yk - Bk @ sk)) >= r * np.linalg.norm(sk) * np.linalg.norm(yk - Bk @ sk):
            ys = yk - Bk @ sk
            ys_T_s = ys.T @ sk
            Bk1 = Bk + np.outer(ys, ys) / ys_T_s
        else:
            Bk1 = Bk

        distances_to_solutions = np.linalg.norm(xk1 - solutions_second_function, axis=1)
        distance_to_nearest_solution = np.min(distances_to_solutions)
        
        results.append({
            "iteration": k,
            "x_k+1": xk1,
            "distance_to_nearest_solution": distance_to_nearest_solution,
            "delta": trust_region_delta,
            "|grad_f_k+1|": np.linalg.norm(grad_fk1)
        })

        xk, Bk = xk1, Bk1

    return results

for starting_point in starting_points:
    sr1_results = sr1_trust_region_method(starting_point, np.eye(2))
    print("-"*100)
    print(f"SR1 method with f(x) = 150(x1x2)^2 + (0.5x1 + 2x2 − 2)^2 (exact derivatives) and starting point: {starting_point}:")
    pprint(sr1_results[-1])

----------------------------------------------------------------------------------------------------
SR1 method with f(x) = 150(x1x2)^2 + (0.5x1 + 2x2 − 2)^2 (exact derivatives) and starting point: [-0.2  1.2]:
{'delta': 0.25,
 'distance_to_nearest_solution': 8.234394820251813e-11,
 'iteration': 11,
 'x_k+1': array([3.31869207e-11, 1.00000000e+00]),
 '|grad_f_k+1|': 9.836591285286996e-09}
----------------------------------------------------------------------------------------------------
SR1 method with f(x) = 150(x1x2)^2 + (0.5x1 + 2x2 − 2)^2 (exact derivatives) and starting point: [3.8 0.1]:
{'delta': 1.0,
 'distance_to_nearest_solution': 1.1591792835831045e-09,
 'iteration': 23,
 'x_k+1': array([4.00000000e+00, 2.05241968e-10]),
 '|grad_f_k+1|': 9.845216668908105e-07}
----------------------------------------------------------------------------------------------------
SR1 method with f(x) = 150(x1x2)^2 + (0.5x1 + 2x2 − 2)^2 (exact derivatives) and starting point: [1.9 0.6]:
{'delta':

## Runs with approximated derivatives

In [14]:
u = 1.0
while 1.0 + u != 1.0:
    u /= 2
epsilon = np.sqrt(u)
    
# Print the unit roundoff value
print("Manually calculated unit roundoff value (u):", u)
print("Epsilon from manually calculated u:", epsilon)

Manually calculated unit roundoff value (u): 1.1102230246251565e-16
Epsilon from manually calculated u: 1.0536712127723509e-08


In [15]:
central_epsilon = u**(1/3)
print(f"Epsilon for central-difference Method: {central_epsilon}")

Epsilon for central-difference Method: 4.806217383937355e-06


In [16]:
def forward_difference(func: callable, x: np.ndarray, epsilon: float) -> np.ndarray:
    """
    Parameters:
    func: Function you want to approximate the derivative of
    x: Point where at which you want the approximation of the derivative
    epsilon: very small number typically sqrt(u), where u is unit-roundoff
    Returns:
    Approximation of derivative at point x
    """
    dim = x.shape[0] if x.shape != () else 1
    grad = np.zeros(dim)
    
    for i in range(dim):
        e = np.zeros(dim)
        e[i] = 1
        grad_i = (func(x + epsilon * e) - func(x)) / epsilon
        grad[i] = grad_i
    
    return grad

In [17]:
def central_difference(func: callable, x: np.ndarray, epsilon: float) -> np.ndarray:
    """
    Parameters:
    func: Function you want to approximate the derivative of
    x: Point where at which you want the approximation of the derivative
    epsilon: very small number typically sqrt(u), where u is unit-roundoff
    Returns:
    Approximation of derivative at point x
    """
    dim = x.shape[0] if x.shape != () else 1
    grad = np.zeros(dim)
    
    for i in range(dim):
        e = np.zeros(dim)
        e[i] = 1
        grad_i = (func(x + epsilon * e) - func(x - epsilon * e)) / (2 * epsilon)
        grad[i] = grad_i
    
    return grad

In [18]:
# Quasi-Newton SR1 with Rosenbrock function (approximated gradients)
starting_points = [np.array([1.2, 1.2]), np.array([-1.2, 1.0]), np.array([0.2, 0.8])]

def sr1_method(x0, B0, H0, max_iter=10000, tol=1e-6):
    xk = x0
    Bk = B0
    Hk = H0
    results = []
    for k in range(max_iter):
        approx_grad_fk = forward_difference(rosenbrock_f, xk, epsilon)
        pk = -np.linalg.solve(Bk, approx_grad_fk)

        alpha_k = backtracking_line_search(rosenbrock_f, xk, pk, approx_grad_fk)

        xk1 = xk + alpha_k * pk
        sk = xk1 - xk
        approx_grad_fk1 = forward_difference(rosenbrock_f, xk1, epsilon)
        yk = approx_grad_fk1 - approx_grad_fk
        
        ys = yk - Bk @ sk
        ys_T_s = ys.T @ sk
        Bk1 = Bk + np.outer(ys, ys) / ys_T_s

        sy = sk - Hk @ yk
        sy_T_y = sy.T @ yk
        Hk1 = Hk + np.outer(sy, sy) / sy_T_y

        dist_to_solution = np.linalg.norm(xk1 - solution_rosenbrock)
        
        results.append({
            "iteration": k,
            "x_k+1": xk1,
            "distance_to_solution": dist_to_solution,
            "|approx_grad_f_k+1|": np.linalg.norm(approx_grad_fk1)
        })

        if np.linalg.norm(approx_grad_fk1) < tol:
            break

        xk, Bk = xk1, Bk1

    return results

for starting_point in starting_points:
    sr1_results = sr1_method(starting_point, np.eye(2), np.eye(2))
    print("-"*100)
    print(f"SR1 method with Rosenbrock function f(x) = 100(x2 − x1^2)^2 + (1 − x1)^2 (approximated derivatives) and starting point: {starting_point}:")
    pprint(sr1_results[-1])


----------------------------------------------------------------------------------------------------
SR1 method with Rosenbrock function f(x) = 100(x2 − x1^2)^2 + (1 − x1)^2 (approximated derivatives) and starting point: [1.2 1.2]:
{'distance_to_solution': 7.084685644326211e-06,
 'iteration': 13,
 'x_k+1': array([0.99999683, 0.99999366]),
 '|approx_grad_f_k+1|': 3.827147777230628e-11}
----------------------------------------------------------------------------------------------------
SR1 method with Rosenbrock function f(x) = 100(x2 − x1^2)^2 + (1 − x1)^2 (approximated derivatives) and starting point: [-1.2  1. ]:
{'distance_to_solution': 7.084691381291606e-06,
 'iteration': 51,
 'x_k+1': array([0.99999683, 0.99999366]),
 '|approx_grad_f_k+1|': 1.6154802336819816e-09}
----------------------------------------------------------------------------------------------------
SR1 method with Rosenbrock function f(x) = 100(x2 − x1^2)^2 + (1 − x1)^2 (approximated derivatives) and starting point: 

In [19]:
# Quasi-Newton SR1 with second function (approximated gradients)
starting_points = [np.array([-0.2, 1.2]), np.array([3.8, 0.1]), np.array([1.9, 0.6])]

def sr1_method(x0, B0, H0, max_iter=10000, tol=1e-6):
    xk = x0
    Bk = B0
    Hk = H0
    results = []
    for k in range(max_iter):
        approx_grad_fk = forward_difference(f, xk, epsilon)
        pk = -np.linalg.solve(Bk, approx_grad_fk)

        alpha_k = backtracking_line_search(f, xk, pk, approx_grad_fk)

        xk1 = xk + alpha_k * pk
        sk = xk1 - xk
        approx_grad_fk1 = forward_difference(f, xk1, epsilon)
        yk = approx_grad_fk1 - approx_grad_fk
        
        ys = yk - Bk @ sk
        ys_T_s = ys.T @ sk
        Bk1 = Bk + np.outer(ys, ys) / ys_T_s

        sy = sk - Hk @ yk
        sy_T_y = sy.T @ yk
        Hk1 = Hk + np.outer(sy, sy) / sy_T_y

        distances_to_solutions = np.linalg.norm(xk1 - solutions_second_function, axis=1)
        distance_to_nearest_solution = np.min(distances_to_solutions)
        
        results.append({
            "iteration": k,
            "x_k+1": xk1,
            "distance_to_nearest_solution": distance_to_nearest_solution,
            "|approx_grad_f_k+1|": np.linalg.norm(approx_grad_fk1)
        })

        if np.linalg.norm(approx_grad_fk1) < tol:
            break

        xk, Bk = xk1, Bk1

    return results

for starting_point in starting_points:
    sr1_results = sr1_method(starting_point, np.eye(2), np.eye(2))
    print("-"*100)
    print(f"SR1 method with f(x) = 150(x1x2)^2 + (0.5x1 + 2x2 − 2)^2 (approximated derivatives) and starting point: {starting_point}:")
    pprint(sr1_results[-1])


----------------------------------------------------------------------------------------------------
SR1 method with f(x) = 150(x1x2)^2 + (0.5x1 + 2x2 − 2)^2 (approximated derivatives) and starting point: [-0.2  1.2]:
{'distance_to_nearest_solution': 1.4047286282667243e-08,
 'iteration': 7,
 'x_k+1': array([-5.14973344e-09,  9.99999987e-01]),
 '|approx_grad_f_k+1|': 7.332596651988921e-08}
----------------------------------------------------------------------------------------------------
SR1 method with f(x) = 150(x1x2)^2 + (0.5x1 + 2x2 − 2)^2 (approximated derivatives) and starting point: [3.8 0.1]:
{'distance_to_nearest_solution': 1.6378424629019884e-08,
 'iteration': 7,
 'x_k+1': array([ 4.00000002e+00, -5.27492953e-09]),
 '|approx_grad_f_k+1|': 6.156688753962577e-10}
----------------------------------------------------------------------------------------------------
SR1 method with f(x) = 150(x1x2)^2 + (0.5x1 + 2x2 − 2)^2 (approximated derivatives) and starting point: [1.9 0.6]:
{'

In [20]:
# Quasi-Newton BFGS method with Rosenbrock function (approximated gradients)
starting_points = [np.array([1.2, 1.2]), np.array([-1.2, 1.0]), np.array([0.2, 0.8])]

def bfgs_method(x0, B0, H0, max_iter=10000, tol=1e-6):
    xk = x0
    Bk = B0
    Hk = H0
    results = []
    for k in range(max_iter):
        approx_grad_fk = forward_difference(rosenbrock_f, xk, epsilon)
        pk = -np.linalg.solve(Bk, approx_grad_fk)

        alpha_k = backtracking_line_search(rosenbrock_f, xk, pk, approx_grad_fk)

        xk1 = xk + alpha_k * pk
        sk = xk1 - xk
        approx_grad_fk1 = forward_difference(rosenbrock_f, xk1, epsilon)
        yk = approx_grad_fk1 - approx_grad_fk
        
        rho_k = 1 / (yk.T @ sk)
        Bk1 = Bk - (Bk @ np.outer(sk, sk) @ Bk) / (sk.T @ Bk @ sk) + np.outer(yk, yk) / (yk.T @ sk) 
        Hk1 = (np.eye(2) - rho_k * sk @ yk.T) @ Hk @ (np.eye(2) - rho_k * yk @ sk.T) + rho_k * sk @ sk.T

        dist_to_solution = np.linalg.norm(xk1 - solution_rosenbrock)
        
        results.append({
            "iteration": k,
            "x_k+1": xk1,
            "distance_to_solution": dist_to_solution,
            "|approx_grad_f_k+1|": np.linalg.norm(approx_grad_fk1)
        })

        if np.linalg.norm(approx_grad_fk1) < tol:
            break

        xk, Bk = xk1, Bk1

    return results

for starting_point in starting_points:
    sr1_results = bfgs_method(starting_point, np.eye(2), np.eye(2))
    print("-"*100)
    print(f"BFGS method with Rosenbrock function f(x) = 100(x2 − x1^2)^2 + (1 − x1)^2 (approximated derivatives) and starting point: {starting_point}:")
    pprint(sr1_results[-1])


----------------------------------------------------------------------------------------------------
BFGS method with Rosenbrock function f(x) = 100(x2 − x1^2)^2 + (1 − x1)^2 (approximated derivatives) and starting point: [1.2 1.2]:
{'distance_to_solution': 7.059203135883713e-06,
 'iteration': 11,
 'x_k+1': array([0.99999684, 0.99999369]),
 '|approx_grad_f_k+1|': 8.071409078255463e-07}
----------------------------------------------------------------------------------------------------
BFGS method with Rosenbrock function f(x) = 100(x2 − x1^2)^2 + (1 − x1)^2 (approximated derivatives) and starting point: [-1.2  1. ]:
{'distance_to_solution': 7.081566144358239e-06,
 'iteration': 34,
 'x_k+1': array([0.99999683, 0.99999367]),
 '|approx_grad_f_k+1|': 1.46411286399573e-07}
----------------------------------------------------------------------------------------------------
BFGS method with Rosenbrock function f(x) = 100(x2 − x1^2)^2 + (1 − x1)^2 (approximated derivatives) and starting point:

In [21]:
# Quasi-Newton BFGS method with second function (approximated gradients)
starting_points = [np.array([-0.2, 1.2]), np.array([3.8, 0.1]), np.array([1.9, 0.6])]

def bfgs_method(x0, B0, H0, max_iter=10000, tol=1e-6):
    xk = x0
    Bk = B0
    Hk = H0
    results = []
    for k in range(max_iter):
        approx_grad_fk = forward_difference(f, xk, epsilon)
        pk = -np.linalg.solve(Bk, approx_grad_fk)

        alpha_k = backtracking_line_search(f, xk, pk, approx_grad_fk)

        xk1 = xk + alpha_k * pk
        sk = xk1 - xk
        approx_grad_fk1 = forward_difference(f, xk1, epsilon)
        yk = approx_grad_fk1 - approx_grad_fk
        
        rho_k = 1 / (yk.T @ sk)
        Bk1 = Bk - (Bk @ np.outer(sk, sk) @ Bk) / (sk.T @ Bk @ sk) + np.outer(yk, yk) / (yk.T @ sk) 
        Hk1 = (np.eye(2) - rho_k * sk @ yk.T) @ Hk @ (np.eye(2) - rho_k * yk @ sk.T) + rho_k * sk @ sk.T

        distances_to_solutions = np.linalg.norm(xk1 - solutions_second_function, axis=1)
        distance_to_nearest_solution = np.min(distances_to_solutions)
        
        results.append({
            "iteration": k,
            "x_k+1": xk1,
            "distance_to_nearest_solution": distance_to_nearest_solution,
            "|approx_grad_f_k+1|": np.linalg.norm(approx_grad_fk1)
        })

        if np.linalg.norm(approx_grad_fk1) < tol:
            break

        xk, Bk = xk1, Bk1

    return results

for starting_point in starting_points:
    sr1_results = bfgs_method(starting_point, np.eye(2), np.eye(2))
    print("-"*100)
    print(f"BFGS method with f(x) = 150(x1x2)^2 + (0.5x1 + 2x2 − 2)^2 (approximated derivatives) and starting point: {starting_point}:")
    pprint(sr1_results[-1])


----------------------------------------------------------------------------------------------------
BFGS method with f(x) = 150(x1x2)^2 + (0.5x1 + 2x2 − 2)^2 (approximated derivatives) and starting point: [-0.2  1.2]:
{'distance_to_nearest_solution': 1.3821570298912018e-08,
 'iteration': 8,
 'x_k+1': array([-5.07492386e-09,  9.99999987e-01]),
 '|approx_grad_f_k+1|': 7.791482203545763e-08}
----------------------------------------------------------------------------------------------------
BFGS method with f(x) = 150(x1x2)^2 + (0.5x1 + 2x2 − 2)^2 (approximated derivatives) and starting point: [3.8 0.1]:
{'distance_to_nearest_solution': 1.5404521134960973e-08,
 'iteration': 11,
 'x_k+1': array([ 4.00000001e+00, -5.25835167e-09]),
 '|approx_grad_f_k+1|': 7.706232900378349e-08}
----------------------------------------------------------------------------------------------------
BFGS method with f(x) = 150(x1x2)^2 + (0.5x1 + 2x2 − 2)^2 (approximated derivatives) and starting point: [1.9 0.6]

In [22]:
# Quasi-Newton SR1 (trust region) with Rosenbrock function (approximated gradients)
starting_points = [np.array([1.2, 1.2]), np.array([-1.2, 1.0]), np.array([0.2, 0.8])]

def sr1_trust_region_method(x0, B0, max_iter=10000, tol=1e-4, trust_region_delta=1.0, eta=1e-4, r=0.6):
    xk = x0
    Bk = B0
    results = []
    for k in range(max_iter):
        approx_grad_fk = forward_difference(rosenbrock_f, xk, epsilon)
        if np.linalg.norm(approx_grad_fk) < tol:
            break
            
        sk = -np.linalg.solve(Bk, approx_grad_fk)
        if np.linalg.norm(sk) > trust_region_delta:
            sk = sk * (trust_region_delta / np.linalg.norm(sk)) 

        approx_grad_fk1 = forward_difference(rosenbrock_f, xk + sk, epsilon)
        yk = approx_grad_fk1 - approx_grad_fk

        fk = rosenbrock_f(xk)
        fk1 = rosenbrock_f(xk + sk)
        ared = fk - fk1
        pred = -approx_grad_fk.T @ sk - 0.5 * sk.T @ Bk @ sk

        if ared / pred > eta:
            xk1 = xk + sk
        else:
            xk1 = xk

        if ared / pred > 0.75:
            if np.linalg.norm(sk) <= 0.8 * trust_region_delta:
                trust_region_delta = trust_region_delta
            else:
                trust_region_delta = 2 * trust_region_delta
        elif ared / pred >= 0.1 and ared / pred <= 0.75:
            trust_region_delta = trust_region_delta
        else:
            trust_region_delta = 0.5 * trust_region_delta
        
        # check 6.26
        if np.linalg.norm(sk.T @ (yk - Bk @ sk)) >= r * np.linalg.norm(sk) * np.linalg.norm(yk - Bk @ sk):
            ys = yk - Bk @ sk
            ys_T_s = ys.T @ sk
            Bk1 = Bk + np.outer(ys, ys) / ys_T_s
        else:
            Bk1 = Bk

        dist_to_solution = np.linalg.norm(xk1 - solution_rosenbrock)
        
        results.append({
            "iteration": k,
            "x_k+1": xk1,
            "distance_to_solution": dist_to_solution,
            "delta": trust_region_delta,
            "|approx_grad_f_k+1|": np.linalg.norm(approx_grad_fk1)
        })

        xk, Bk = xk1, Bk1

    return results

for starting_point in starting_points:
    sr1_results = sr1_trust_region_method(starting_point, np.eye(2))
    print("-"*100)
    print(f"SR1 (trust region) method with Rosenbrock function f(x) = 100(x2 − x1^2)^2 + (1 − x1)^2 (approximated derivatives) and starting point: {starting_point}:")
    pprint(sr1_results[-1])

----------------------------------------------------------------------------------------------------
SR1 (trust region) method with Rosenbrock function f(x) = 100(x2 − x1^2)^2 + (1 − x1)^2 (approximated derivatives) and starting point: [1.2 1.2]:
{'delta': 0.03125,
 'distance_to_solution': 2.5802685631495484e-06,
 'iteration': 252,
 'x_k+1': array([0.99999881, 0.99999771]),
 '|approx_grad_f_k+1|': 3.6790712570867334e-05}
----------------------------------------------------------------------------------------------------
SR1 (trust region) method with Rosenbrock function f(x) = 100(x2 − x1^2)^2 + (1 − x1)^2 (approximated derivatives) and starting point: [-1.2  1. ]:
{'delta': 0.015625,
 'distance_to_solution': 7.502412132112569e-05,
 'iteration': 279,
 'x_k+1': array([0.99996654, 0.99993285]),
 '|approx_grad_f_k+1|': 4.946973149100229e-05}
----------------------------------------------------------------------------------------------------
SR1 (trust region) method with Rosenbrock functi

In [23]:
# Quasi-Newton SR1 (trust region) with second function
starting_points = [np.array([-0.2, 1.2]), np.array([3.8, 0.1]), np.array([1.9, 0.6])]

def sr1_trust_region_method(x0, B0, max_iter=10000, tol=1e-5, trust_region_delta=1.0, eta=1e-4, r=0.3):
    xk = x0
    Bk = B0
    results = []
    for k in range(max_iter):
        approx_grad_fk = forward_difference(f, xk, epsilon)
        if np.linalg.norm(approx_grad_fk) < tol:
            break
            
        sk = -np.linalg.solve(Bk, approx_grad_fk)
        if np.linalg.norm(sk) > trust_region_delta:
            sk = sk * (trust_region_delta / np.linalg.norm(sk)) 

        approx_grad_fk1 = forward_difference(f, xk + sk, epsilon)
        yk = approx_grad_fk1 - approx_grad_fk

        fk = f(xk)
        fk1 = f(xk + sk)
        ared = fk - fk1
        pred = -approx_grad_fk.T @ sk - 0.5 * sk.T @ Bk @ sk

        if ared / pred > eta:
            xk1 = xk + sk
        else:
            xk1 = xk

        if ared / pred > 0.75:
            if np.linalg.norm(sk) <= 0.8 * trust_region_delta:
                trust_region_delta = trust_region_delta
            else:
                trust_region_delta = 2 * trust_region_delta
        elif ared / pred >= 0.1 and ared / pred <= 0.75:
            trust_region_delta = trust_region_delta
        else:
            trust_region_delta = 0.5 * trust_region_delta
        
        # check 6.26
        if np.linalg.norm(sk.T @ (yk - Bk @ sk)) >= r * np.linalg.norm(sk) * np.linalg.norm(yk - Bk @ sk):
            ys = yk - Bk @ sk
            ys_T_s = ys.T @ sk
            Bk1 = Bk + np.outer(ys, ys) / ys_T_s
        else:
            Bk1 = Bk

        distances_to_solutions = np.linalg.norm(xk1 - solutions_second_function, axis=1)
        distance_to_nearest_solution = np.min(distances_to_solutions)
        
        results.append({
            "iteration": k,
            "x_k+1": xk1,
            "distance_to_nearest_solution": distance_to_nearest_solution,
            "delta": trust_region_delta,
            "|approx_grad_f_k+1|": np.linalg.norm(approx_grad_fk1)
        })

        xk, Bk = xk1, Bk1

    return results

for starting_point in starting_points:
    sr1_results = sr1_trust_region_method(starting_point, np.eye(2))
    print("-"*100)
    print(f"SR1 trust region method with f(x) = 150(x1x2)^2 + (0.5x1 + 2x2 − 2)^2 (approximated derivatives) and starting point: {starting_point}:")
    pprint(sr1_results[-1])

----------------------------------------------------------------------------------------------------
SR1 trust region method with f(x) = 150(x1x2)^2 + (0.5x1 + 2x2 − 2)^2 (approximated derivatives) and starting point: [-0.2  1.2]:
{'delta': 0.5,
 'distance_to_nearest_solution': 1.75434290746335e-08,
 'iteration': 10,
 'x_k+1': array([5.63381230e-09, 1.00000002e+00]),
 '|approx_grad_f_k+1|': 3.3145714762079956e-06}
----------------------------------------------------------------------------------------------------
SR1 trust region method with f(x) = 150(x1x2)^2 + (0.5x1 + 2x2 − 2)^2 (approximated derivatives) and starting point: [3.8 0.1]:
{'delta': 0.001953125,
 'distance_to_nearest_solution': 5.598158626742398e-06,
 'iteration': 5041,
 'x_k+1': array([ 3.99999440e+00, -9.47821466e-10]),
 '|approx_grad_f_k+1|': 9.977221558170227e-06}
----------------------------------------------------------------------------------------------------
SR1 trust region method with f(x) = 150(x1x2)^2 + (0.