In [1]:
import numpy as np



In [2]:
solution_rosenbrock = np.array([1.0, 1.0])
solutions_second_function = np.array([[4.0, 0.0], [0.0, 1.0]])

## THE FLETCHER–REEVES METHOD

In [3]:
def fr_method(f, grad_f, start_point, tol=1e-4, max_iter=1000, alpha_bar=1.0, rho=0.9, c1=0.0001, c2=0.9, min_alpha=1e-10):
    """
    The Fletcher–Reeves method for unconstrained optimization.
    
    Args:
    - f: Objective function to minimize.
    - grad_f: Function to compute the gradient of the objective function.
    - start_point: Initial guess.
    - tol: Tolerance for convergence based on the norm of the gradient.
    - max_iter: Maximum number of iterations.
    - alpha_bar: Initial step size.
    - rho: Factor to reduce step size during line search.
    - c1: Parameter for the Armijo condition.
    - c2: Parameter for the curvature condition.
    - min_alpha: Minimum step size allowed during line search.

    Returns:
    - x_opt: Optimal solution.
    - num_iterations: Number of iterations until convergence.
    """



    x = start_point
    grad_fk = grad_f(x)
    p = -grad_fk
    iterations = 0

    while np.linalg.norm(grad_fk) > tol and iterations < max_iter:
        alpha_k = alpha_bar

        while f(x + alpha_k * p) > f(x) + c1 * alpha_k * (grad_f(x).T @ p):
            alpha_k *= rho
            if alpha_k < min_alpha:
                break

        x_new = x + alpha_k * p
        grad_fk_new = grad_f(x_new)

        beta_FR_k1 = max(grad_fk_new.T @ grad_fk_new / (grad_fk.T @ grad_fk), 0)
            
        p_new = -grad_fk_new + beta_FR_k1 * p

        x, grad_fk, p = x_new, grad_fk_new, p_new
        iterations += 1

   
    print("Gradient: ", np.linalg.norm(grad_fk))  
    

    return x, iterations

## THE POLAK–RIBIERE METHOD

In [4]:
def pr_method(f, grad_f, start_point, tol=1e-4, max_iter=1000, alpha_bar=1.0, rho=0.9, c1=0.1, c2=0.9, min_alpha=1e-10):
    """
    The Polak-Ribiere method for unconstrained optimization.
      
    Args:
    - f: Objective function to minimize.
    - grad_f: Function to compute the gradient of the objective function.
    - start_point: Initial guess.
    - tol: Tolerance for convergence based on the norm of the gradient.
    - max_iter: Maximum number of iterations.
    - alpha_bar: Initial step size.
    - rho: Factor to reduce step size during line search.
    - c1: Parameter for the Armijo condition.
    - c2: Parameter for the curvature condition.
    - min_alpha: Minimum step size allowed during line search.

    Returns:
    - x_opt: Optimal solution.
    - num_iterations: Number of iterations until convergence.
    """

    
    x = start_point
    # fk = f(x)
    grad_fk = grad_f(x)
    p = -grad_fk
    iterations = 0

    while np.linalg.norm(grad_fk) > tol and iterations < max_iter:
       
        alpha_k = alpha_bar

        while f(x + alpha_k * p) > f(x) + c1 * alpha_k * (grad_f(x).T @ p):
            alpha_k *= rho
            if alpha_k < min_alpha:
                break


        x_new = x + alpha_k * p
        grad_fk_new = grad_f(x_new)

        # beta_PR_k1 = max(grad_fk_new.T @ (grad_fk_new - grad_fk) / (grad_fk.T @ grad_fk), 0)
        beta_PR_k1 = max(grad_fk_new.T @ (grad_fk_new - grad_fk) / ((grad_fk_new - grad_fk).T @ p), 0) #with The Hestenes–
                                                                                                        #Stiefel formula computes better

            
        p_new = -grad_fk_new + beta_PR_k1 * p

        x, grad_fk, p = x_new, grad_fk_new, p_new
        iterations += 1

    print("Gradient: ", np.linalg.norm(grad_fk))    

    return x, iterations

## Function 1

In [5]:
def f(x):
    return 100 * (x[1] - x[0]**2)**2 + (1 - x[0])**2

In [6]:
def gradient_f(x):
    df_dx1 = -400 * x[0] * (x[1] - x[0]**2) - 2 * (1 - x[0])  
    df_dx2 = 200 * (x[1] - x[0]**2)  
    return np.array([df_dx1, df_dx2])

## Function 2



In [7]:
def f_2(x):
    return 150 * (x[0] * x[1])**2 + (0.5 * x[0] + 2 * x[1] - 2)**2

In [8]:
def gradient_f_2(x):
    df_dx1 = 300 * x[0] * x[1]**2 + (0.5 * x[0] + 2 * x[1] - 2)  
    df_dx2 = 300 * x[0]**2 * x[1] + 4 * (0.5 * x[0] + 2 * x[1] - 2)  
    return np.array([df_dx1, df_dx2])

# Runs with exact gradients

## Computing Function 1

In [9]:
start_points_1 = (
    np.array([1.2, 1.2]),
    np.array([-1.2, 1]),
    np.array([0.2, 0.8])
)


# result = minimize(f_2, start_point, jac=gradient_f_2, method='CG')
# print("Exact solution:", result.x)

print("THE FLETCHER–REEVES METHOD F1")
print('_' * 100)
for start_points in start_points_1:
    solution, num_iterations = fr_method(f, gradient_f, start_points)
    print("Starting points:", start_points)
    print("Solution FR function_1:", solution)
    dist_to_solution = np.linalg.norm(solution - solution_rosenbrock) 
    print("Distant from solution:", dist_to_solution)
    print("Number of iterations FR function_1:", num_iterations)

    print('-' * 100)


print('\n')

print("THE POLAK–RIBIERE METHOD F1")
print('_' * 100)
for start_points in start_points_1:
    solution, num_iterations = pr_method(f, gradient_f, start_points)
    print("Starting points:", start_points)
    print("Solution PR function_1:", solution)
    dist_to_solution = np.linalg.norm(solution - solution_rosenbrock) 
    print("Distant from solution:", dist_to_solution)
    print("Number of iterations PR function_1:", num_iterations)

    print('-' * 100)


THE FLETCHER–REEVES METHOD F1
____________________________________________________________________________________________________
Gradient:  8.708164523214734e-05
Starting points: [1.2 1.2]
Solution FR function_1: [0.99999806 0.99999632]
Distant from solution: 4.161227003007066e-06
Number of iterations FR function_1: 261
----------------------------------------------------------------------------------------------------
Gradient:  9.84009568999249e-05
Starting points: [-1.2  1. ]
Solution FR function_1: [1.00000021 1.00000064]
Distant from solution: 6.690233300493315e-07
Number of iterations FR function_1: 282
----------------------------------------------------------------------------------------------------
Gradient:  9.906377736311834e-05
Starting points: [0.2 0.8]
Solution FR function_1: [1.00008437 1.00016895]
Distant from solution: 0.00018884552181523055
Number of iterations FR function_1: 419
--------------------------------------------------------------------------------------

## Computing Function 2

In [10]:
start_points_2 = (
    np.array([0.2, -1.2]),
    np.array([3.8, 0.1]),
    np.array([1.9 , 0.6])
)

print("THE FLETCHER–REEVES METHOD F2")
print('_' * 100)

for start_points in start_points_2:
    solution, num_iterations = fr_method(f_2, gradient_f_2, start_points)
    print("Starting points:", start_points)
    print("Solution FR function_2:", solution)
    dist_to_solution = np.min(np.linalg.norm(solution - solutions_second_function, axis=1))
    print("Distant from solution:", dist_to_solution)
    print("Number of iterations FR function_2:", num_iterations)

    print('-' * 100)


print('\n')

print("THE POLAK–RIBIERE METHOD F2")
print('_' * 100)
for start_points in start_points_2:
    solution, num_iterations = pr_method(f_2, gradient_f_2, start_points)
    print("Starting points:", start_points)
    print("Solution PR function_2:", solution)
    dist_to_solution = np.min(np.linalg.norm(solution - solutions_second_function, axis=1))
    print("Distant from solution:", dist_to_solution)
    print("Number of iterations PR function_2:", num_iterations)

    print('-' * 100)

    

THE FLETCHER–REEVES METHOD F2
____________________________________________________________________________________________________
Gradient:  9.484847022594425e-05
Starting points: [ 0.2 -1.2]
Solution FR function_2: [-2.36525297e-07  1.00001006e+00]
Distant from solution: 1.0061361958363637e-05
Number of iterations FR function_2: 271
----------------------------------------------------------------------------------------------------
Gradient:  8.66877884433774e-05
Starting points: [3.8 0.1]
Solution FR function_2: [3.99994265e+00 6.83820035e-09]
Distant from solution: 5.734527624449863e-05
Number of iterations FR function_2: 364
----------------------------------------------------------------------------------------------------
Gradient:  9.552978398147783e-05
Starting points: [1.9 0.6]
Solution FR function_2: [-2.99070252e-07  1.00000749e+00]
Distant from solution: 7.492965823332256e-06
Number of iterations FR function_2: 257
----------------------------------------------------------

# Runs with approximated gradients

In [11]:
u = 1.0
while 1.0 + u != 1.0:
    u /= 2
epsilon = np.sqrt(u)

In [12]:
def forward_difference(func: callable, x: np.ndarray, epsilon: float) -> np.ndarray:
    """
    Parameters:
    func: Function you want to approximate the derivative of
    x: Point where at which you want the approximation of the derivative
    epsilon: very small number typically sqrt(u), where u is unit-roundoff
    Returns:
    Approximation of derivative at point x
    """
    dim = x.shape[0] if x.shape != () else 1
    grad = np.zeros(dim)
    
    for i in range(dim):
        e = np.zeros(dim)
        e[i] = 1
        grad_i = (func(x + epsilon * e) - func(x)) / epsilon
        grad[i] = grad_i
    
    return grad

In [13]:
def approx_gradient_f(x):
    return forward_difference(f, x, epsilon)
def approx_gradient_f_2(x):
    return forward_difference(f_2, x, epsilon)

## Function 1

In [14]:
start_points_1 = (
    np.array([1.2, 1.2]),
    np.array([-1.2, 1]),
    np.array([0.2, 0.8])
)


# result = minimize(f_2, start_point, jac=gradient_f_2, method='CG')
# print("Exact solution:", result.x)

print("THE FLETCHER–REEVES METHOD F1")
print('_' * 100)
for start_points in start_points_1:
    solution, num_iterations = fr_method(f, approx_gradient_f, start_points)
    print("Starting points:", start_points)
    print("Solution FR function_1:", solution)
    dist_to_solution = np.linalg.norm(solution - solution_rosenbrock) 
    print("Distant from solution:", dist_to_solution)
    print("Number of iterations FR function_1:", num_iterations)

    print('-' * 100)


print('\n')

print("THE POLAK–RIBIERE METHOD F1")
print('_' * 100)
for start_points in start_points_1:
    solution, num_iterations = pr_method(f, approx_gradient_f, start_points, max_iter=10000)
    print("Starting points:", start_points)
    print("Solution PR function_1:", solution)
    dist_to_solution = np.linalg.norm(solution - solution_rosenbrock) 
    print("Distant from solution:", dist_to_solution)
    print("Number of iterations PR function_1:", num_iterations)

    print('-' * 100)


THE FLETCHER–REEVES METHOD F1
____________________________________________________________________________________________________
Gradient:  8.027064986043284e-05
Starting points: [1.2 1.2]
Solution FR function_1: [0.99999899 0.99999816]
Distant from solution: 2.0992902957430258e-06
Number of iterations FR function_1: 209
----------------------------------------------------------------------------------------------------
Gradient:  9.930995685736585e-05
Starting points: [-1.2  1. ]
Solution FR function_1: [1.00002103 1.00004237]
Distant from solution: 4.730502065354511e-05
Number of iterations FR function_1: 312
----------------------------------------------------------------------------------------------------
Gradient:  7.895310749504596e-05
Starting points: [0.2 0.8]
Solution FR function_1: [0.99998702 0.99997417]
Distant from solution: 2.890703971108051e-05
Number of iterations FR function_1: 450
-------------------------------------------------------------------------------------

## Function 2

In [15]:
start_points_2 = (
    np.array([0.2, -1.2]),
    np.array([3.8, 0.1]),
    np.array([1.9 , 0.6])
)

print("THE FLETCHER–REEVES METHOD F2")
print('_' * 100)

for start_points in start_points_2:
    solution, num_iterations = fr_method(f_2, approx_gradient_f_2, start_points)
    print("Starting points:", start_points)
    print("Solution FR function_2:", solution)
    dist_to_solution = np.min(np.linalg.norm(solution - solutions_second_function, axis=1))
    print("Distant from solution:", dist_to_solution)
    print("Number of iterations FR function_2:", num_iterations)

    print('-' * 100)


print('\n')

print("THE POLAK–RIBIERE METHOD F2")
print('_' * 100)
for start_points in start_points_2:
    solution, num_iterations = pr_method(f_2, approx_gradient_f_2, start_points)
    print("Starting points:", start_points)
    print("Solution PR function_2:", solution)
    dist_to_solution = np.min(np.linalg.norm(solution - solutions_second_function, axis=1))
    print("Distant from solution:", dist_to_solution)
    print("Number of iterations PR function_2:", num_iterations)

    print('-' * 100)

    

THE FLETCHER–REEVES METHOD F2
____________________________________________________________________________________________________
Gradient:  9.902823529083983e-05
Starting points: [ 0.2 -1.2]
Solution FR function_2: [-2.43448743e-07  9.99993582e-01]
Distant from solution: 6.422409330802356e-06
Number of iterations FR function_2: 346
----------------------------------------------------------------------------------------------------
Gradient:  9.395461451365948e-05
Starting points: [3.8 0.1]
Solution FR function_2: [ 3.99997299e+00 -1.33693177e-08]
Distant from solution: 2.7013160807674228e-05
Number of iterations FR function_2: 265
----------------------------------------------------------------------------------------------------
Gradient:  9.830286117411164e-05
Starting points: [1.9 0.6]
Solution FR function_2: [-1.95441259e-07  9.99991871e-01]
Distant from solution: 8.131842896802817e-06
Number of iterations FR function_2: 286
-------------------------------------------------------