In [869]:
import numpy as np
import decimal
from decimal import Decimal

<h3>Problem 1:</h3>
$$f(x_1, x_2)=100(x_2-x_1^2)^2+(1-x_1)^2$$
$$f'(x_1, x_2)=\begin{pmatrix}
-400x_1(x_2-x_1^2)-2(1-x_1)\\ 
200(x_2-x_1^2)\\ 
\end{pmatrix}$$
$$f''(x_1, x_2)=\begin{pmatrix}
1200x_1^2-400x_2+2 & -400x_1\\ 
-400x_1 & 200\\ 
\end{pmatrix}$$
starting points $x_0$:
$$(1.2, 1.2), (-1.2, 1), (0.2, 0.8)$$
with the minimum at $x^*=(1,1)$

In [870]:
def fct_1(x: np.array) -> int:
    return 100*(x[1]-x[0]**2)**2 + (1-x[0])**2

def grad_fct_1(x: np.array) -> np.array:
    return np.array([-400*x[0]*(x[1]-x[0]**2)-2*(1-x[0]),
                    200*(x[1]-x[0]**2)], dtype=np.float64)

def hessian_fct_1(x: np.array) -> np.array:
    return np.array([[1200*x[0]**2-400*x[1]+2, -400*x[0]], 
                     [-400*x[0], 200]], dtype=np.float64)

<h3>Problem 2:</h3>
$$f(x_1, x_2)=150(x_1 x_2)^2+(0.5x_1 + 2x_2 - 2)^2$$
$$f'(x_1, x_2)=\begin{pmatrix}
300x_2^2 x_1 + 0.5 x_1 + 2x_2 - 2\\ 
300x_1^2 x_2 + 2x_1 + 8x_2 - 8\\ 
\end{pmatrix}$$
$$f''(x_1, x_2)=\begin{pmatrix}
300x_2^2 +0.5 & 600x_1 x_2+2\\ 
600x_1 x_2+2 & 300x_1^2 +8\\ 
\end{pmatrix}$$

starting points $x_0$:
$$(-0.2, 1.2), (3.8, 0.1), (1.9, 0.6)$$<br>
with the minimums at $x^*=(0,1)$ and $x^*=(4,0)$ and a saddle point at $(0.43685, 0.10921)$

In [871]:
def fct_2(x: np.array) -> int:
    return 150*(x[0] * x[1])**2 + (0.5 * x[0] + 2*x[1] - 2)**2

def fct_2_dec(x: np.array) -> int:
    return 150*(Decimal(x[0]) * Decimal(x[1]))**2 + (Decimal(0.5) * Decimal(x[0]) + 2*Decimal(x[1]) - 2)**2

def grad_fct_2(x: np.array) -> np.array:
    return np.array([300*x[0]*x[1]**2 + 0.5*x[0]+2*x[1]-2,
                     300*(x[0]**2)*x[1] + 2*x[0]+8*x[1]-8], dtype=np.float64)

def hessian_fct_2(x: np.array) -> np.array:
    return np.array([[300*x[1]**2 + 0.5, 600*x[0]*x[1] + 2], 
                     [600*x[0]*x[1] + 2, 300*x[0]**2 + 8]], dtype=np.float64)

<h2>1. Implementation of BFGS</h2>

In [872]:
def Backtrack(f, x, gradient, pk, alpha_zero=1, rho=0.5, c=0.5):
    alpha = alpha_zero
    
    #print(x, alpha * pk)
    #print(float(f(x)) + c * alpha * np.dot(gradient.T, pk))
    while not f(x + alpha * pk) <= float(f(x)) + c * alpha * np.dot(gradient, pk):
        alpha *= rho
        
    return alpha

In [873]:
def BFGS(f, grad, x0, max_iter=10000, tol=1e-6, c=0.5, rho=0.5, estimate=False):
    x = x0
    num_iter = 0
    e_g = Decimal(0.0000000000001)
    e_h = Decimal(0.00000001)
    
    identity = np.eye(len(x))
    hessian_inv = identity
    
    while num_iter < max_iter:
        if estimate:
            x_dec = np.array([Decimal(val) for val in x])
            gradient = grad(f,x_dec,e_g)
        else:  
            gradient = grad(x)
        
        grad_norm = np.linalg.norm(gradient)
        if grad_norm < tol:
            break
        
        search_direction = -np.dot(hessian_inv, gradient)
        
        alpha = Backtrack(f=f, x=x, gradient=gradient, pk=search_direction, alpha_zero=1, rho=rho, c=c)
        
        x_new = x + alpha * search_direction
        sk = x_new - x
        if estimate:
            x_new_dec = np.array([Decimal(val) for val in x_new])
            yk = grad(f,x_new_dec,e_g) - gradient
        else:
            yk = grad(x_new) - gradient
            
        if (yk.dot(sk))==0:
            print('combination of c,rho leads to error')
            break
            
        rho_k = 1 / (yk.dot(sk))
        hessian_inv = (identity - (rho_k*(sk.dot(yk))))@hessian_inv@(identity - (rho_k*(yk.dot(sk)))) + (rho_k*sk.dot(sk))
        x = x_new
        num_iter += 1
        
    x_star = x
    f_star = f(x_star)
    return x_star, round(float(f_star),15), num_iter, grad_norm

<h2>2. Implementation of SR1</h2>

<h3>using line search</h3>

In [874]:
import numpy as np
from scipy.optimize import line_search

def SR1_line(f, grad, initial_guess, max_iter=1000, c=0.5, rho=0.5, tol=1e-6, estimate=False):
    x = initial_guess
    n = len(x)
    e_g = Decimal(0.0000000000001)

    Bk = np.eye(n)

    for iteration in range(max_iter):

        if estimate:
            x_dec = np.array([Decimal(val) for val in x])
            gradient = grad(f,x_dec,e_g)
        else:  
            gradient = grad(x)

        if np.linalg.norm(gradient) < tol:
            break

        search_direction = -np.dot(Bk, gradient)

        alpha = Backtrack(f=f, x=x, gradient=gradient, pk=search_direction, alpha_zero=1, rho=rho, c=c)

        x_new = x + alpha * search_direction

        if estimate:
            x_new_dec = np.array([Decimal(val) for val in x_new])
            gradient_new = grad(f,x_new_dec,e_g)
            yk = gradient_new  - gradient
        else:
            gradient_new = grad(x_new)
            yk = gradient_new - gradient
        sk = x_new - x

        if np.abs(np.dot(sk, yk)) > tol:

            Bk += np.outer(sk - np.dot(Bk, yk), sk - np.dot(Bk, yk)) / \
                 np.dot(sk - np.dot(Bk, yk), yk)

        x = x_new
    
    f_star = f(x)
    x_star = x
    return x_star, f_star, iteration, np.linalg.norm(gradient_new)

<h2>3. Gradient and hessian approximation</h2>

In [875]:
# use to get gradient as np.array
def grad_estimate_np(f, x: np.array, eps: decimal.Decimal):
    grad=np.full(len(x), Decimal(0))
    for i in range(len(x)):
        unit_vector = np.full(len(x), Decimal(0))
        unit_vector[i] = Decimal(1)
        grad[i] = round(float((f(x + (eps * unit_vector)) - f(x)) / eps), 15)
    return np.array(grad, dtype=float)

# use for further calculation of hessian estimate
def grad_estimate(f, x: np.array, eps: decimal.Decimal):
    grad=np.full(len(x), Decimal(0))
    for i in range(len(x)):
        unit_vector = np.full(len(x), Decimal(0))
        unit_vector[i] = Decimal(1)
        grad[i] = (f(x + (eps * unit_vector)) - f(x)) / eps
    return np.array(grad)

In [876]:
def hessian_estimate(f, x: np.array, eps_hess: decimal.Decimal, eps_grad):
    hessian = np.full((len(x), len(x)), Decimal(0))
    for i in range(len(x)):
        unit_vector = np.full(len(x), Decimal(0))
        unit_vector[i] = Decimal(1)
        hessian[:, i] = np.array([round(float(g),15) for g in (np.divide(grad_estimate(f=f, x=(x + (eps_hess * unit_vector)), eps=eps_grad) - grad_estimate(f=f, x=x, eps=eps_grad), eps_hess))])
    return np.array(hessian, dtype=float)

<h2>4. Problems to test BFGS</h2>

<h3>Problem 1.1:</h3>
$$f(x_1, x_2)=100(x_2-x_1^2)^2+(1-x_1)^2$$
$$f'(x_1, x_2)=\begin{pmatrix}
-400x_1(x_2-x_1^2)-2(1-x_1)\\ 
200(x_2-x_1^2)\\ 
\end{pmatrix}$$
$$f''(x_1, x_2)=\begin{pmatrix}
1200x_1^2-400x_2+2 & -400x_1\\ 
-400x_1 & 200\\ 
\end{pmatrix}$$
on the starting points $x_0$:
$$(1.2, 1.2)$$

<h4>using exact gradient:</h4>

In [880]:
x_0= np.array([1.2, 1.2])
x_true = [1, 1]
x_star, fval, it, grad_norm = BFGS(fct_1, grad_fct_1, x_0, c=0.51, rho=0.99, max_iter=1000)
print(f'minimum {fval:<4} at x = {x_star} after {it:<4} iterations with remaining gradient norm {grad_norm}')
print('distance to true solution:',np.linalg.norm(x_true - x_star))

minimum 2.03e-13 at x = [1.00000045 1.0000009 ] after 195  iterations with remaining gradient norm 5.805101228540534e-07
distance to true solution: 1.0088961458002889e-06


<h4>using approximated gradient:</h4>

In [881]:
x_star, fval, it, grad_norm = BFGS(fct_1, grad_estimate_np, x_0, c=0.51, rho=0.99, max_iter=1000, estimate=True)
print(f'minimum {fval:<4} at x = {x_star} after {it:<4} iterations with remaining gradient norm {grad_norm}')
print('distance to true solution:',np.linalg.norm(x_true - x_star))

minimum 2.7e-14 at x = [1.00000016 1.00000033] after 440  iterations with remaining gradient norm 1.477754780616286e-07
distance to true solution: 3.6897152000751864e-07


<h3>Problem 1.2:</h3>
$$f(x_1, x_2)=100(x_2-x_1^2)^2+(1-x_1)^2$$
$$f'(x_1, x_2)=\begin{pmatrix}
-400x_1(x_2-x_1^2)-2(1-x_1)\\ 
200(x_2-x_1^2)\\ 
\end{pmatrix}$$
$$f''(x_1, x_2)=\begin{pmatrix}
1200x_1^2-400x_2+2 & -400x_1\\ 
-400x_1 & 200\\ 
\end{pmatrix}$$
on the starting points $x_0$:
$$(-1.2, 1)$$

<h4>using exact gradient:</h4>

In [882]:
x_0= np.array([-1.2, 1])
x_star, fval, it, grad_norm = BFGS(fct_1, grad_fct_1, x_0, c=0.5, rho=0.95, max_iter=1000)
print(f'minimum {fval:<4} at x = {x_star} after {it:<4} iterations with remaining gradient norm {grad_norm}')
print('distance to true solution:', np.linalg.norm(x_true - x_star))

minimum 5.17e-13 at x = [0.99999928 0.99999856] after 161  iterations with remaining gradient norm 6.442759799950901e-07
distance to true solution: 1.6095306945376956e-06


<h4>using approximated gradient:</h4>

In [883]:
x_star, fval, it, grad_norm = BFGS(fct_1, grad_estimate_np, x_0, c=0.5, rho=0.95, max_iter=1000, estimate=True)
print(f'minimum {fval:<4} at x = {x_star} after {it:<4} iterations with remaining gradient norm {grad_norm}')
print('distance to true solution:', np.linalg.norm(x_true - x_star))

minimum 0.0  at x = [0.99999999 0.99999997] after 250  iterations with remaining gradient norm 8.884249813094153e-08
distance to true solution: 3.150101601598859e-08


<h3>Problem 1.3:</h3>
$$f(x_1, x_2)=100(x_2-x_1^2)^2+(1-x_1)^2$$
$$f'(x_1, x_2)=\begin{pmatrix}
-400x_1(x_2-x_1^2)-2(1-x_1)\\ 
200(x_2-x_1^2)\\ 
\end{pmatrix}$$
$$f''(x_1, x_2)=\begin{pmatrix}
1200x_1^2-400x_2+2 & -400x_1\\ 
-400x_1 & 200\\ 
\end{pmatrix}$$
on the starting points $x_0$:
$$(0.2, 0.8)$$

<h4>using exact gradient:</h4>

In [884]:
x_0= np.array([0.2, 0.8])
x_star, fval, it, grad_norm = BFGS(fct_1, grad_fct_1, x_0, c=0.5, rho=0.99, max_iter=1000)
print(f'minimum {fval:<4} at x = {x_star} after {it:<4} iterations with remaining gradient norm {grad_norm}')
print('distance to true solution:', np.linalg.norm(x_true - x_star))

minimum 2.91e-13 at x = [0.99999946 0.99999892] after 122  iterations with remaining gradient norm 9.780902008957435e-07
distance to true solution: 1.2065051863196908e-06


<h4>using approximated gradient:</h4>

In [885]:
x_star, fval, it, grad_norm = BFGS(fct_1, grad_estimate_np, x_0, c=0.5, rho=0.99, max_iter=1000, estimate=True)
print(f'minimum {fval:<4} at x = {x_star} after {it:<4} iterations with remaining gradient norm {grad_norm}')
print('distance to true solution:', np.linalg.norm(x_true - x_star))

minimum 1.16e-13 at x = [0.99999966 0.99999932] after 238  iterations with remaining gradient norm 8.134660926768885e-07
distance to true solution: 7.623790604592505e-07


<h3>Problem 2.1:</h3>
$$f(x_1, x_2)=150(x_1 x_2)^2+(0.5x_1 + 2x_2 - 2)^2$$
$$f'(x_1, x_2)=\begin{pmatrix}
300x_2^2 x_1 + 0.5 x_1 + 2x_2 - 2\\ 
300x_1^2 x_2 + 2x_1 + 8x_2 - 8\\ 
\end{pmatrix}$$
$$f''(x_1, x_2)=\begin{pmatrix}
300x_2^2 +0.5 & 600x_1 x_2+2\\ 
600x_1 x_2+2 & 300x_1^2 +8\\ 
\end{pmatrix}$$

on the starting points $x_0$:
$$(-0.2, 1.2)$$
is close to the soltion $x^*=(0,1)$:

<h4>using exact gradient:</h4>

In [886]:
x_0= np.array([-0.2, 1.2])
x_true_0 = [0,1]
x_true_1 = [4,0]
x_star, fval, it, grad_norm = BFGS(fct_2, grad_fct_2, x_0, c=0.51, rho=0.99, max_iter=1000)
print(f'minimum {fval:<4} at x = {x_star} after {it:<4} iterations with remaining gradient norm {grad_norm}')
print('distance to true solution:', np.linalg.norm(x_true_0 - x_star))

minimum 3.4e-14 at x = [-2.70363670e-09  1.00000009e+00] after 40   iterations with remaining gradient norm 9.603133618740993e-07
distance to true solution: 9.132737518142627e-08


<h4>using approximated gradient:</h4>

In [887]:
x_star, fval, it, grad_norm = BFGS(fct_2_dec, grad_estimate_np, x_0, c=0.51, rho=0.99, max_iter=10000, estimate=True)
print(f'minimum {fval:<4} at x = {x_star} after {it:<4} iterations with remaining gradient norm {grad_norm}')
print('distance to true solution:', np.linalg.norm(x_true_0 - x_star)) 

minimum 3.4e-14 at x = [-2.70368646e-09  1.00000009e+00] after 40   iterations with remaining gradient norm 9.603133622675806e-07
distance to true solution: 9.13273391451692e-08


<h3>Problem 2.2:</h3>
$$f(x_1, x_2)=150(x_1 x_2)^2+(0.5x_1 + 2x_2 - 2)^2$$
$$f'(x_1, x_2)=\begin{pmatrix}
300x_2^2 x_1 + 0.5 x_1 + 2x_2 - 2\\ 
300x_1^2 x_2 + 2x_1 + 8x_2 - 8\\ 
\end{pmatrix}$$
$$f''(x_1, x_2)=\begin{pmatrix}
300x_2^2 +0.5 & 600x_1 x_2+2\\ 
600x_1 x_2+2 & 300x_1^2 +8\\ 
\end{pmatrix}$$

on the starting points $x_0$:
$$(3.8, 0.1)$$
is close to the soltion $x^*=(4,0)$:

<h4>using exact gradient:</h4>

In [906]:
x_0= np.array([3.8, 0.1])
x_star, fval, it, grad_norm = BFGS(fct_2, grad_fct_2, x_0, c=0.5, rho=0.99, max_iter=1000)
print(f'minimum {fval:<4} at x = {x_star} after {it:<4} iterations with remaining gradient norm {grad_norm}')
print('distance to true solution:', np.linalg.norm(x_true_1 - x_star))


minimum 8.66e-13 at x = [3.99999814e+00 7.47299407e-10] after 667  iterations with remaining gradient norm 9.396158766638441e-07
distance to true solution: 1.863193068925873e-06


<h4>using approximated gradient:</h4>

In [908]:
x_star, fval, it, grad_norm = BFGS(fct_2_dec, grad_estimate_np, x_0, c=0.5, rho=0.99, max_iter=896, estimate=True)
print(f'minimum {fval:<4} at x = {x_star} after {it:<4} iterations with remaining gradient norm {grad_norm}')
print('distance to true solution:', np.linalg.norm(x_true_1 - x_star))

minimum 9e-15 at x = [3.99999981e+00 8.09314444e-11] after 895  iterations with remaining gradient norm 9.579027498275813e-08
distance to true solution: 1.9147511954389457e-07


<h3>Problem 2.3:</h3>
$$f(x_1, x_2)=150(x_1 x_2)^2+(0.5x_1 + 2x_2 - 2)^2$$
$$f'(x_1, x_2)=\begin{pmatrix}
300x_2^2 x_1 + 0.5 x_1 + 2x_2 - 2\\ 
300x_1^2 x_2 + 2x_1 + 8x_2 - 8\\ 
\end{pmatrix}$$
$$f''(x_1, x_2)=\begin{pmatrix}
300x_2^2 +0.5 & 600x_1 x_2+2\\ 
600x_1 x_2+2 & 300x_1^2 +8\\ 
\end{pmatrix}$$

on the starting points $x_0$:
$$(1.9, 0.6)$$
close $x^*=(0,1)$

<h4>using exact gradient:</h4>

In [909]:
x_0= np.array([1.9, 0.6])
x_star, fval, it, grad_norm = BFGS(fct_2, grad_fct_2, x_0, c=0.106, rho=0.99, max_iter=10000)
print(f'minimum {fval:<4} at x = {x_star} after {it:<4} iterations with remaining gradient norm {grad_norm}')
print('distance to true solution:', np.linalg.norm(x_true_0 - x_star))

minimum 1.1e-14 at x = [2.48073771e-09 1.00000005e+00] after 223  iterations with remaining gradient norm 9.341720419513596e-07
distance to true solution: 4.943878551800224e-08


<h4>using approximated gradient:</h4>

In [910]:
x_star, fval, it, grad_norm = BFGS(fct_2_dec, grad_estimate_np, x_0, c=0.106, rho=0.99, max_iter=10000, estimate=True)
print(f'minimum {fval:<4} at x = {x_star} after {it:<4} iterations with remaining gradient norm {grad_norm}')
print('distance to true solution:', np.linalg.norm(x_true_0 - x_star))

minimum 1.1e-14 at x = [2.48068795e-09 1.00000005e+00] after 223  iterations with remaining gradient norm 9.341720393122907e-07
distance to true solution: 4.943874554283383e-08


<h2>5. Problems to test SR1</h2>

<h3>Problem 1.1:</h3>
$$f(x_1, x_2)=100(x_2-x_1^2)^2+(1-x_1)^2$$
$$f'(x_1, x_2)=\begin{pmatrix}
-400x_1(x_2-x_1^2)-2(1-x_1)\\ 
200(x_2-x_1^2)\\ 
\end{pmatrix}$$
$$f''(x_1, x_2)=\begin{pmatrix}
1200x_1^2-400x_2+2 & -400x_1\\ 
-400x_1 & 200\\ 
\end{pmatrix}$$
on the starting points $x_0$:
$$(1.2, 1.2)$$

<h4>using exact gradient:</h4>

In [892]:
x_0= np.array([1.2, 1.2])
x_true = [1, 1]
x_star, fval, it, grad_norm = SR1_line(fct_1, grad_fct_1, x_0, c=0.34, rho=0.77, max_iter=2000)
print(f'minimum {fval:<4} at x = {x_star} after {it:<4} iterations with remaining gradient norm {grad_norm}')
print('distance to true solution:',np.linalg.norm(x_true - x_star))

minimum 2.1672212678386607e-18 at x = [1. 1.] after 11   iterations with remaining gradient norm 3.9148685859557664e-08
distance to true solution: 2.650703594869426e-09


<h4>using approximated gradient:</h4>

In [893]:
x_star, fval, it, grad_norm = SR1_line(fct_1, grad_estimate_np, x_0, c=0.34, rho=0.77, max_iter=10000, estimate=True)
print(f'minimum {fval:<4} at x = {x_star} after {it:<4} iterations with remaining gradient norm {grad_norm}')
print('distance to true solution:',np.linalg.norm(x_true - x_star))

minimum 2.098175001404131e-18 at x = [1. 1.] after 11   iterations with remaining gradient norm 3.9148503855478725e-08
distance to true solution: 2.583473219952715e-09


<h3>Problem 1.2:</h3>
$$f(x_1, x_2)=100(x_2-x_1^2)^2+(1-x_1)^2$$
$$f'(x_1, x_2)=\begin{pmatrix}
-400x_1(x_2-x_1^2)-2(1-x_1)\\ 
200(x_2-x_1^2)\\ 
\end{pmatrix}$$
$$f''(x_1, x_2)=\begin{pmatrix}
1200x_1^2-400x_2+2 & -400x_1\\ 
-400x_1 & 200\\ 
\end{pmatrix}$$
on the starting points $x_0$:
$$(-1.2, 1)$$

<h4>using exact gradient:</h4>

In [894]:
x_0= np.array([-1.2, 1])
x_star, fval, it, grad_norm = SR1_line(fct_1, grad_fct_1, x_0, c=0.51, rho=0.95, max_iter=10000)
print(f'minimum {fval:<4} at x = {x_star} after {it:<4} iterations with remaining gradient norm {grad_norm}')
print('distance to true solution:', np.linalg.norm(x_true - x_star))

minimum 4.442869342414104e-14 at x = [0.99999979 0.99999958] after 38   iterations with remaining gradient norm 1.8849505868609327e-07
distance to true solution: 4.7169788067320353e-07


<h4>using approximated gradient:</h4>

In [895]:
x_star, fval, it, grad_norm = SR1_line(fct_1, grad_estimate_np, x_0, c=0.51, rho=0.95, max_iter=10000, estimate=True)
print(f'minimum {fval:<4} at x = {x_star} after {it:<4} iterations with remaining gradient norm {grad_norm}')
print('distance to true solution:', np.linalg.norm(x_true - x_star))

minimum 2.646190809511522e-13 at x = [1.00000051 1.00000103] after 37   iterations with remaining gradient norm 5.313738863320231e-07
distance to true solution: 1.1511021952801512e-06


<h3>Problem 1.3:</h3>
$$f(x_1, x_2)=100(x_2-x_1^2)^2+(1-x_1)^2$$
$$f'(x_1, x_2)=\begin{pmatrix}
-400x_1(x_2-x_1^2)-2(1-x_1)\\ 
200(x_2-x_1^2)\\ 
\end{pmatrix}$$
$$f''(x_1, x_2)=\begin{pmatrix}
1200x_1^2-400x_2+2 & -400x_1\\ 
-400x_1 & 200\\ 
\end{pmatrix}$$
on the starting points $x_0$:
$$(0.2, 0.8)$$

<h4>using exact gradient:</h4>

In [896]:
x_0= np.array([0.2, 0.8])
x_star, fval, it, grad_norm = SR1_line(fct_1, grad_fct_1, x_0, c=0.59, rho=0.95, max_iter=10000)
print(f'minimum {fval:<4} at x = {x_star} after {it:<4} iterations with remaining gradient norm {grad_norm}')
print('distance to true solution:', np.linalg.norm(x_true - x_star))

minimum 8.376977605410461e-17 at x = [1. 1.] after 25   iterations with remaining gradient norm 4.0676682963734096e-07
distance to true solution: 2.4573247140190953e-09


<h4>using approximated gradient:</h4>

In [897]:
x_star, fval, it, grad_norm = SR1_line(fct_1, grad_estimate_np, x_0, c=0.59, rho=0.95, max_iter=10000, estimate=True)
print(f'minimum {fval:<4} at x = {x_star} after {it:<4} iterations with remaining gradient norm {grad_norm}')
print('distance to true solution:', np.linalg.norm(x_true - x_star))

minimum 8.368241720822556e-17 at x = [1. 1.] after 25   iterations with remaining gradient norm 4.067416706858997e-07
distance to true solution: 2.39080812319887e-09


<h3>Problem 2.1:</h3>
$$f(x_1, x_2)=150(x_1 x_2)^2+(0.5x_1 + 2x_2 - 2)^2$$
$$f'(x_1, x_2)=\begin{pmatrix}
300x_2^2 x_1 + 0.5 x_1 + 2x_2 - 2\\ 
300x_1^2 x_2 + 2x_1 + 8x_2 - 8\\ 
\end{pmatrix}$$
$$f''(x_1, x_2)=\begin{pmatrix}
300x_2^2 +0.5 & 600x_1 x_2+2\\ 
600x_1 x_2+2 & 300x_1^2 +8\\ 
\end{pmatrix}$$

on the starting points $x_0$:
$$(-0.2, 1.2)$$
is close to the soltion $x^*=(0,1)$:

<h4>using exact gradient:</h4>

In [898]:
x_0= np.array([-0.2, 1.2])
x_true_0 = [0,1]
x_true_1 = [4,0]
x_star, fval, it, grad_norm = SR1_line(fct_2, grad_fct_2, x_0, c=0.45, rho=0.99, max_iter=10000)
print(f'minimum {fval:<4} at x = {x_star} after {it:<4} iterations with remaining gradient norm {grad_norm}')
print('distance to true solution:', np.linalg.norm(x_true_0 - x_star))

minimum 8.912391347522993e-17 at x = [6.23112165e-10 1.00000000e+00] after 9    iterations with remaining gradient norm 1.9377023511855158e-07
distance to true solution: 2.6958726549807412e-09


<h4>using approximated gradient:</h4>

In [899]:
x_star, fval, it, grad_norm = SR1_line(fct_2_dec, grad_estimate_np, x_0, c=0.45, rho=0.99, max_iter=10000, estimate=True)
print(f'minimum {fval:<4} at x = {x_star} after {it:<4} iterations with remaining gradient norm {grad_norm}')
print('distance to true solution:', np.linalg.norm(x_true_0 - x_star))

minimum 8.911349853946626006860014086E-17 at x = [6.23062418e-10 1.00000000e+00] after 9    iterations with remaining gradient norm 1.9377023536224526e-07
distance to true solution: 2.695824431509028e-09


<h3>Problem 2.2:</h3>
$$f(x_1, x_2)=150(x_1 x_2)^2+(0.5x_1 + 2x_2 - 2)^2$$
$$f'(x_1, x_2)=\begin{pmatrix}
300x_2^2 x_1 + 0.5 x_1 + 2x_2 - 2\\ 
300x_1^2 x_2 + 2x_1 + 8x_2 - 8\\ 
\end{pmatrix}$$
$$f''(x_1, x_2)=\begin{pmatrix}
300x_2^2 +0.5 & 600x_1 x_2+2\\ 
600x_1 x_2+2 & 300x_1^2 +8\\ 
\end{pmatrix}$$

on the starting points $x_0$:
$$(3.8, 0.1)$$
is close to the soltion $x^*=(4,0)$:

<h4>using exact gradient:</h4>

In [900]:
x_0= np.array([3.8, 0.1])
x_star, fval, it, grad_norm = SR1_line(fct_2, grad_fct_2, x_0, c=0.45, rho=0.99, max_iter=1000)
print(f'minimum {fval:<4} at x = {x_star} after {it:<4} iterations with remaining gradient norm {grad_norm}')
print('distance to true solution:', np.linalg.norm(x_true_1 - x_star))

minimum 2.5380414714139796e-13 at x = [3.99999899e+00 4.10398969e-10] after 6    iterations with remaining gradient norm 5.052765037900244e-07
distance to true solution: 1.0084185597888156e-06


<h4>using approximated gradient:</h4>

In [901]:
x_star, fval, it, grad_norm = SR1_line(fct_2_dec, grad_estimate_np, x_0, c=0.45, rho=0.99, max_iter=1000, estimate=True)
print(f'minimum {fval:<4} at x = {x_star} after {it:<4} iterations with remaining gradient norm {grad_norm}')
print('distance to true solution:', np.linalg.norm(x_true_1 - x_star))

minimum 2.538040743787989848281280723E-13 at x = [3.99999899e+00 4.10348911e-10] after 6    iterations with remaining gradient norm 5.052765032401458e-07
distance to true solution: 1.0084184109985716e-06


<h3>Problem 2.3:</h3>
$$f(x_1, x_2)=150(x_1 x_2)^2+(0.5x_1 + 2x_2 - 2)^2$$
$$f'(x_1, x_2)=\begin{pmatrix}
300x_2^2 x_1 + 0.5 x_1 + 2x_2 - 2\\ 
300x_1^2 x_2 + 2x_1 + 8x_2 - 8\\ 
\end{pmatrix}$$
$$f''(x_1, x_2)=\begin{pmatrix}
300x_2^2 +0.5 & 600x_1 x_2+2\\ 
600x_1 x_2+2 & 300x_1^2 +8\\ 
\end{pmatrix}$$

on the starting points $x_0$:
$$(1.9, 0.6)$$
close $x^*=(0,1)$

<h4>using exact gradient:</h4>

In [902]:
x_0= np.array([1.9, 0.6])
x_star, fval, it, grad_norm = SR1_line(fct_2, grad_fct_2, x_0, c=0.45, rho=0.99, max_iter=100)
print(f'minimum {fval:<4} at x = {x_star} after {it:<4} iterations with remaining gradient norm {grad_norm}')
print('distance to true solution:', np.linalg.norm(x_true_1 - x_star))

minimum 1.782281402169303e-18 at x = [4.00000000e+00 2.25634757e-11] after 11   iterations with remaining gradient norm 1.0531290483455241e-07
distance to true solution: 1.5876336082988915e-09


<h4>using approximated gradient:</h4>

In [903]:
x_star, fval, it, grad_norm = SR1_line(fct_2_dec, grad_estimate_np, x_0, c=0.45, rho=0.99, max_iter=10000, estimate=True)
print(f'minimum {fval:<4} at x = {x_star} after {it:<4} iterations with remaining gradient norm {grad_norm}')
print('distance to true solution:', np.linalg.norm(x_true_1 - x_star))

minimum 1.776903008042981077282984878E-18 at x = [4.00000000e+00 2.25134128e-11] after 11   iterations with remaining gradient norm 1.0531290275664545e-07
distance to true solution: 1.5874828105344866e-09
