In [179]:
import numpy as np
import decimal
from decimal import Decimal

<h3>Problem 1:</h3>
$$f(x_1, x_2)=100(x_2-x_1^2)^2+(1-x_1)^2$$
$$f'(x_1, x_2)=\begin{pmatrix}
-400x_1(x_2-x_1^2)-2(1-x_1)\\ 
200(x_2-x_1^2)\\ 
\end{pmatrix}$$
$$f''(x_1, x_2)=\begin{pmatrix}
1200x_1^2-400x_2+2 & -400x_1\\ 
-400x_1 & 200\\ 
\end{pmatrix}$$
starting points $x_0$:
$$(1.2, 1.2), (-1.2, 1), (0.2, 0.8)$$
with the minimum at $x^*=(1,1)$

In [180]:
def fct_1(x: np.array) -> int:
    return 100*(x[1]-x[0]**2)**2 + (1-x[0])**2

def grad_fct_1(x: np.array) -> np.array:
    return np.array([-400*x[0]*(x[1]-x[0]**2)-2*(1-x[0]),
                    200*(x[1]-x[0]**2)], dtype=np.float64)

def hessian_fct_1(x: np.array) -> np.array:
    return np.array([[1200*x[0]**2-400*x[1]+2, -400*x[0]], 
                     [-400*x[0], 200]], dtype=np.float64)

<h3>Problem 2:</h3>
$$f(x_1, x_2)=150(x_1 x_2)^2+(0.5x_1 + 2x_2 - 2)^2$$
$$f'(x_1, x_2)=\begin{pmatrix}
300x_2^2 x_1 + 0.5 x_1 + 2x_2 - 2\\ 
300x_1^2 x_2 + 2x_1 + 8x_2 - 8\\ 
\end{pmatrix}$$
$$f''(x_1, x_2)=\begin{pmatrix}
300x_2^2 +0.5 & 600x_1 x_2+2\\ 
600x_1 x_2+2 & 300x_1^2 +8\\ 
\end{pmatrix}$$

starting points $x_0$:
$$(-0.2, 1.2), (3.8, 0.1), (1.9, 0.6)$$<br>
with the minimums at $x^*=(0,1)$ and $x^*=(4,0)$ and a saddle point at $(0.43685, 0.10921)$

In [181]:
def fct_2(x: np.array) -> int:
    return 150*(x[0] * x[1])**2 + (0.5 * x[0] + 2*x[1] - 2)**2

def fct_2_dec(x: np.array) -> int:
    return 150*(Decimal(x[0]) * Decimal(x[1]))**2 + (Decimal(0.5) * Decimal(x[0]) + 2*Decimal(x[1]) - 2)**2

def grad_fct_2(x: np.array) -> np.array:
    return np.array([300*x[0]*x[1]**2 + 0.5*x[0]+2*x[1]-2,
                     300*(x[0]**2)*x[1] + 2*x[0]+8*x[1]-8], dtype=np.float64)

def hessian_fct_2(x: np.array) -> np.array:
    return np.array([[300*x[1]**2 + 0.5, 600*x[0]*x[1] + 2], 
                     [600*x[0]*x[1] + 2, 300*x[0]**2 + 8]], dtype=np.float64)

<h2>1. Implementation of BFGS</h2>

In [182]:
def Backtrack(f, x, gradient, pk, alpha_zero=1, rho=0.5, c=0.5):
    alpha = alpha_zero
    
    while not f(x + alpha * pk) <= float(f(x)) + c * alpha * np.dot(gradient, pk):
        alpha *= rho
        
    return alpha

In [183]:
def BFGS(f, grad, initial_guess, max_iter=1000, tol=1e-6, c=0.5, rho=0.5, estimate=False):
    x = initial_guess
    num_iter = 0
    e_g = Decimal(0.0000000000001)
    e_h = Decimal(0.00000001)
    
    identity = np.eye(len(x))
    H = identity

    while num_iter < max_iter:
        if estimate:
            x_dec = np.array([Decimal(val) for val in x])
            gradient = grad(f,x_dec,e_g)
        else:  
            gradient = grad(x)

        grad_norm = np.linalg.norm(gradient)
        if grad_norm < tol:
            break

        search_direction = -np.dot(H, gradient)
        
        alpha = Backtrack(f=f, x=x, gradient=gradient, pk=search_direction, alpha_zero=1, rho=rho, c=c)

        x_new = x + alpha * search_direction

        if estimate:
            x_new_dec = np.array([Decimal(val) for val in x_new])
            gradient_new = grad(f,x_new_dec,e_g)
            yk = gradient_new - gradient
        else:
            gradient_new = grad(x_new)
            yk = grad(x_new) - gradient

        sk = x_new - x
        
        if (yk.dot(sk))==0:
            print('combination of c,rho leads to error')
            break

        rh = 1.0 / np.dot(yk, sk)
        A = identity - rh * np.outer(sk, yk)
        H = np.dot(A, np.dot(H, A.T)) + rh * np.outer(sk, sk)

        x = x_new
        num_iter += 1

    return x, f(x), num_iter, grad_norm

<h2>2. Implementation of SR1</h2>

<h3>using line search</h3>

In [184]:
import numpy as np
from scipy.optimize import line_search

def SR1_line(f, grad, initial_guess, max_iter=1000, c=0.5, rho=0.5, tol=1e-6, estimate=False):
    x = initial_guess
    n = len(x)
    e_g = Decimal(0.0000000000001)

    Bk = np.eye(n)

    for iteration in range(max_iter):

        if estimate:
            x_dec = np.array([Decimal(val) for val in x])
            gradient = grad(f,x_dec,e_g)
        else:  
            gradient = grad(x)

        if np.linalg.norm(gradient) < tol:
            break

        search_direction = -np.dot(Bk, gradient)

        alpha = Backtrack(f=f, x=x, gradient=gradient, pk=search_direction, alpha_zero=1, rho=rho, c=c)

        x_new = x + alpha * search_direction

        if estimate:
            x_new_dec = np.array([Decimal(val) for val in x_new])
            gradient_new = grad(f,x_new_dec,e_g)
            yk = gradient_new  - gradient
        else:
            gradient_new = grad(x_new)
            yk = gradient_new - gradient
        sk = x_new - x

        if np.abs(np.dot(sk, yk)) > tol:

            Bk += np.outer(sk - np.dot(Bk, yk), sk - np.dot(Bk, yk)) / \
                 np.dot(sk - np.dot(Bk, yk), yk)

        x = x_new
    
    f_star = f(x)
    x_star = x
    return x_star, f_star, iteration, np.linalg.norm(gradient_new)

<h2>3. Gradient and hessian approximation</h2>

In [185]:
# use to get gradient as np.array
def grad_estimate_np(f, x: np.array, eps: decimal.Decimal):
    grad=np.full(len(x), Decimal(0))
    for i in range(len(x)):
        unit_vector = np.full(len(x), Decimal(0))
        unit_vector[i] = Decimal(1)
        grad[i] = round(float((f(x + (eps * unit_vector)) - f(x)) / eps), 15)
    return np.array(grad, dtype=float)

# use for further calculation of hessian estimate
def grad_estimate(f, x: np.array, eps: decimal.Decimal):
    grad=np.full(len(x), Decimal(0))
    for i in range(len(x)):
        unit_vector = np.full(len(x), Decimal(0))
        unit_vector[i] = Decimal(1)
        grad[i] = (f(x + (eps * unit_vector)) - f(x)) / eps
    return np.array(grad)

In [186]:
def hessian_estimate(f, x: np.array, eps_hess: decimal.Decimal, eps_grad):
    hessian = np.full((len(x), len(x)), Decimal(0))
    for i in range(len(x)):
        unit_vector = np.full(len(x), Decimal(0))
        unit_vector[i] = Decimal(1)
        hessian[:, i] = np.array([round(float(g),15) for g in (np.divide(grad_estimate(f=f, x=(x + (eps_hess * unit_vector)), eps=eps_grad) - grad_estimate(f=f, x=x, eps=eps_grad), eps_hess))])
    return np.array(hessian, dtype=float)

<h2>4. Problems to test BFGS</h2>

<h3>Problem 1.1:</h3>
$$f(x_1, x_2)=100(x_2-x_1^2)^2+(1-x_1)^2$$
$$f'(x_1, x_2)=\begin{pmatrix}
-400x_1(x_2-x_1^2)-2(1-x_1)\\ 
200(x_2-x_1^2)\\ 
\end{pmatrix}$$
$$f''(x_1, x_2)=\begin{pmatrix}
1200x_1^2-400x_2+2 & -400x_1\\ 
-400x_1 & 200\\ 
\end{pmatrix}$$
on the starting points $x_0$:
$$(1.2, 1.2)$$

<h4>using exact gradient:</h4>

In [187]:
x_0= np.array([1.2, 1.2])
x_true = [1, 1]
x_star, fval, it, grad_norm = BFGS(fct_1, grad_fct_1, x_0, c=0.51, rho=0.99, max_iter=1000)
print(f'minimum {fval:<4} \nx = {x_star} \n{it} iterations \nremaining gradient norm {grad_norm}')
print('distance to true solution:',np.linalg.norm(x_true - x_star))

minimum 5.254864052829333e-17 
x = [1.00000001 1.00000001] 
12 iterations 
remaining gradient norm 1.551951036325939e-07
distance to true solution: 1.4249732950384684e-08


<h4>using approximated gradient:</h4>

In [188]:
x_star, fval, it, grad_norm = BFGS(fct_1, grad_estimate_np, x_0, c=0.51, rho=0.99, max_iter=1000, estimate=True)
print(f'minimum {fval:<4} \nx = {x_star} \n{it} iterations \nremaining gradient norm {grad_norm}')
print('distance to true solution:',np.linalg.norm(x_true - x_star))

minimum 1.1597360898233273e-16 
x = [1.00000001 1.00000002] 
12 iterations 
remaining gradient norm 2.3080410061693277e-07
distance to true solution: 2.116338497564563e-08


<h3>Problem 1.2:</h3>
$$f(x_1, x_2)=100(x_2-x_1^2)^2+(1-x_1)^2$$
$$f'(x_1, x_2)=\begin{pmatrix}
-400x_1(x_2-x_1^2)-2(1-x_1)\\ 
200(x_2-x_1^2)\\ 
\end{pmatrix}$$
$$f''(x_1, x_2)=\begin{pmatrix}
1200x_1^2-400x_2+2 & -400x_1\\ 
-400x_1 & 200\\ 
\end{pmatrix}$$
on the starting points $x_0$:
$$(-1.2, 1)$$

<h4>using exact gradient:</h4>

In [189]:
x_0= np.array([-1.2, 1])
x_star, fval, it, grad_norm = BFGS(fct_1, grad_fct_1, x_0, c=0.5, rho=0.95, max_iter=1000)
print(f'minimum {fval:<4} \nx = {x_star} \n{it} iterations \nremaining gradient norm {grad_norm}')
print('distance to true solution:',np.linalg.norm(x_true - x_star))

minimum 1.7886157927455308e-20 
x = [1. 1.] 
30 iterations 
remaining gradient norm 1.6133208902232782e-09
distance to true solution: 2.8827528592999823e-10


<h4>using approximated gradient:</h4>

In [190]:
x_star, fval, it, grad_norm = BFGS(fct_1, grad_estimate_np, x_0, c=0.5, rho=0.95, max_iter=1000, estimate=True)
print(f'minimum {fval:<4} \nx = {x_star} \n{it} iterations \nremaining gradient norm {grad_norm}')
print('distance to true solution:',np.linalg.norm(x_true - x_star))

minimum 2.6475530861201032e-20 
x = [1. 1.] 
30 iterations 
remaining gradient norm 1.6121252949085563e-09
distance to true solution: 3.5549669645521607e-10


<h3>Problem 1.3:</h3>
$$f(x_1, x_2)=100(x_2-x_1^2)^2+(1-x_1)^2$$
$$f'(x_1, x_2)=\begin{pmatrix}
-400x_1(x_2-x_1^2)-2(1-x_1)\\ 
200(x_2-x_1^2)\\ 
\end{pmatrix}$$
$$f''(x_1, x_2)=\begin{pmatrix}
1200x_1^2-400x_2+2 & -400x_1\\ 
-400x_1 & 200\\ 
\end{pmatrix}$$
on the starting points $x_0$:
$$(0.2, 0.8)$$

<h4>using exact gradient:</h4>

In [191]:
x_0= np.array([0.2, 0.8])
x_star, fval, it, grad_norm = BFGS(fct_1, grad_fct_1, x_0, c=0.5, rho=0.99, max_iter=1000)
print(f'minimum {fval:<4} \nx = {x_star} \n{it} iterations \nremaining gradient norm {grad_norm}')
print('distance to true solution:',np.linalg.norm(x_true - x_star))

minimum 2.7492625029467825e-18 
x = [1. 1.] 
20 iterations 
remaining gradient norm 3.2573494428790844e-08
distance to true solution: 3.3348498894191675e-09


<h4>using approximated gradient:</h4>

In [192]:
x_star, fval, it, grad_norm = BFGS(fct_1, grad_estimate_np, x_0, c=0.5, rho=0.99, max_iter=1000, estimate=True)
print(f'minimum {fval:<4} \nx = {x_star} \n{it} iterations \nremaining gradient norm {grad_norm}')
print('distance to true solution:',np.linalg.norm(x_true - x_star))

minimum 7.58418492950192e-21 
x = [1. 1.] 
20 iterations 
remaining gradient norm 3.1828489299054078e-09
distance to true solution: 1.102901332926877e-10


<h3>Problem 2.1:</h3>
$$f(x_1, x_2)=150(x_1 x_2)^2+(0.5x_1 + 2x_2 - 2)^2$$
$$f'(x_1, x_2)=\begin{pmatrix}
300x_2^2 x_1 + 0.5 x_1 + 2x_2 - 2\\ 
300x_1^2 x_2 + 2x_1 + 8x_2 - 8\\ 
\end{pmatrix}$$
$$f''(x_1, x_2)=\begin{pmatrix}
300x_2^2 +0.5 & 600x_1 x_2+2\\ 
600x_1 x_2+2 & 300x_1^2 +8\\ 
\end{pmatrix}$$

on the starting points $x_0$:
$$(-0.2, 1.2)$$
is close to the soltion $x^*=(0,1)$:

<h4>using exact gradient:</h4>

In [193]:
x_0= np.array([-0.2, 1.2])
x_true_0 = [0,1]
x_true_1 = [4,0]
x_star, fval, it, grad_norm = BFGS(fct_2, grad_fct_2, x_0, c=0.51, rho=0.99, max_iter=1000)
print(f'minimum {fval:<4} \nx = {x_star} \n{it} iterations \nremaining gradient norm {grad_norm}')
print('distance to true solution:',np.linalg.norm(x_true_0 - x_star))

minimum 2.0926138567587262e-18 
x = [-8.22191363e-11  1.00000000e+00] 
9 iterations 
remaining gradient norm 2.398960860063864e-08
distance to true solution: 5.460623059450867e-10


<h4>using approximated gradient:</h4>

In [194]:
x_star, fval, it, grad_norm = BFGS(fct_2_dec, grad_estimate_np, x_0, c=0.51, rho=0.99, max_iter=10000, estimate=True)
print(f'minimum {fval:<4} \nx = {x_star} \n{it} iterations \nremaining gradient norm {grad_norm}')
print('distance to true solution:',np.linalg.norm(x_true_0 - x_star))

minimum 2.093633286543300799614711971E-18 
x = [-8.22688848e-11  1.00000000e+00] 
9 iterations 
remaining gradient norm 2.3989608103448146e-08
distance to true solution: 5.460324819507533e-10


<h3>Problem 2.2:</h3>
$$f(x_1, x_2)=150(x_1 x_2)^2+(0.5x_1 + 2x_2 - 2)^2$$
$$f'(x_1, x_2)=\begin{pmatrix}
300x_2^2 x_1 + 0.5 x_1 + 2x_2 - 2\\ 
300x_1^2 x_2 + 2x_1 + 8x_2 - 8\\ 
\end{pmatrix}$$
$$f''(x_1, x_2)=\begin{pmatrix}
300x_2^2 +0.5 & 600x_1 x_2+2\\ 
600x_1 x_2+2 & 300x_1^2 +8\\ 
\end{pmatrix}$$

on the starting points $x_0$:
$$(3.8, 0.1)$$
is close to the soltion $x^*=(4,0)$:

<h4>using exact gradient:</h4>

In [195]:
x_0= np.array([3.8, 0.1])
x_star, fval, it, grad_norm = BFGS(fct_2, grad_fct_2, x_0, c=0.5, rho=0.99, max_iter=1000)
print(f'minimum {fval:<4} \nx = {x_star} \n{it} iterations \nremaining gradient norm {grad_norm}')
print('distance to true solution:',np.linalg.norm(x_true_1 - x_star))

minimum 4.583218874437106e-15 
x = [3.99999986e+00 2.90509620e-11] 
6 iterations 
remaining gradient norm 1.4771310771991682e-07
distance to true solution: 1.354852341488151e-07


<h4>using approximated gradient:</h4>

In [196]:
x_star, fval, it, grad_norm = BFGS(fct_2_dec, grad_estimate_np, x_0, c=0.5, rho=0.99, max_iter=896, estimate=True)
print(f'minimum {fval:<4} \nx = {x_star} \n{it} iterations \nremaining gradient norm {grad_norm}')
print('distance to true solution:',np.linalg.norm(x_true_1 - x_star))

minimum 4.583215299463996185352639926E-15 
x = [3.99999986e+00 2.90009009e-11] 
6 iterations 
remaining gradient norm 1.4771310308313813e-07
distance to true solution: 1.3548508403594068e-07


<h3>Problem 2.3:</h3>
$$f(x_1, x_2)=150(x_1 x_2)^2+(0.5x_1 + 2x_2 - 2)^2$$
$$f'(x_1, x_2)=\begin{pmatrix}
300x_2^2 x_1 + 0.5 x_1 + 2x_2 - 2\\ 
300x_1^2 x_2 + 2x_1 + 8x_2 - 8\\ 
\end{pmatrix}$$
$$f''(x_1, x_2)=\begin{pmatrix}
300x_2^2 +0.5 & 600x_1 x_2+2\\ 
600x_1 x_2+2 & 300x_1^2 +8\\ 
\end{pmatrix}$$

on the starting points $x_0$:
$$(1.9, 0.6)$$
close $x^*=(0,1)$

<h4>using exact gradient:</h4>

In [197]:
x_0= np.array([1.9, 0.6])
x_star, fval, it, grad_norm = BFGS(fct_2, grad_fct_2, x_0, c=0.5, rho=0.99, max_iter=1000)
print(f'minimum {fval:<4} \nx = {x_star} \n{it} iterations \nremaining gradient norm {grad_norm}')
print('distance to true solution:',np.linalg.norm(x_true_1 - x_star))

minimum 7.670302364212236e-18 
x = [ 4.00000000e+00 -2.84353418e-11] 
9 iterations 
remaining gradient norm 1.4608400056245542e-07
distance to true solution: 4.673719094477779e-09


<h4>using approximated gradient:</h4>

In [198]:
x_star, fval, it, grad_norm = BFGS(fct_2_dec, grad_estimate_np, x_0, c=0.5, rho=0.99, max_iter=10000, estimate=True)
print(f'minimum {fval:<4} \nx = {x_star} \n{it} iterations \nremaining gradient norm {grad_norm}')
print('distance to true solution:',np.linalg.norm(x_true_1 - x_star))

minimum 7.677261353896161266960228935E-18 
x = [ 4.00000000e+00 -2.84854271e-11] 
9 iterations 
remaining gradient norm 1.460841073911588e-07
distance to true solution: 4.673567967861984e-09


<h2>5. Problems to test SR1</h2>

<h3>Problem 1.1:</h3>
$$f(x_1, x_2)=100(x_2-x_1^2)^2+(1-x_1)^2$$
$$f'(x_1, x_2)=\begin{pmatrix}
-400x_1(x_2-x_1^2)-2(1-x_1)\\ 
200(x_2-x_1^2)\\ 
\end{pmatrix}$$
$$f''(x_1, x_2)=\begin{pmatrix}
1200x_1^2-400x_2+2 & -400x_1\\ 
-400x_1 & 200\\ 
\end{pmatrix}$$
on the starting points $x_0$:
$$(1.2, 1.2)$$

<h4>using exact gradient:</h4>

In [199]:
x_0= np.array([1.2, 1.2])
x_true = [1, 1]
x_star, fval, it, grad_norm = SR1_line(fct_1, grad_fct_1, x_0, c=0.34, rho=0.77, max_iter=2000)
print(f'minimum {fval:<4} \nx = {x_star} \n{it} iterations \nremaining gradient norm {grad_norm}')
print('distance to true solution:',np.linalg.norm(x_true - x_star))

minimum 2.1672212678386607e-18 
x = [1. 1.] 
11 iterations 
remaining gradient norm 3.9148685859557664e-08
distance to true solution: 2.650703594869426e-09


<h4>using approximated gradient:</h4>

In [200]:
x_star, fval, it, grad_norm = SR1_line(fct_1, grad_estimate_np, x_0, c=0.34, rho=0.77, max_iter=10000, estimate=True)
print(f'minimum {fval:<4} \nx = {x_star} \n{it} iterations \nremaining gradient norm {grad_norm}')
print('distance to true solution:',np.linalg.norm(x_true - x_star))

minimum 2.098175001404131e-18 
x = [1. 1.] 
11 iterations 
remaining gradient norm 3.9148503855478725e-08
distance to true solution: 2.583473219952715e-09


<h3>Problem 1.2:</h3>
$$f(x_1, x_2)=100(x_2-x_1^2)^2+(1-x_1)^2$$
$$f'(x_1, x_2)=\begin{pmatrix}
-400x_1(x_2-x_1^2)-2(1-x_1)\\ 
200(x_2-x_1^2)\\ 
\end{pmatrix}$$
$$f''(x_1, x_2)=\begin{pmatrix}
1200x_1^2-400x_2+2 & -400x_1\\ 
-400x_1 & 200\\ 
\end{pmatrix}$$
on the starting points $x_0$:
$$(-1.2, 1)$$

<h4>using exact gradient:</h4>

In [201]:
x_0= np.array([-1.2, 1])
x_star, fval, it, grad_norm = SR1_line(fct_1, grad_fct_1, x_0, c=0.51, rho=0.95, max_iter=10000)
print(f'minimum {fval:<4} \nx = {x_star} \n{it} iterations \nremaining gradient norm {grad_norm}')
print('distance to true solution:',np.linalg.norm(x_true - x_star))

minimum 4.442869342414104e-14 
x = [0.99999979 0.99999958] 
38 iterations 
remaining gradient norm 1.8849505868609327e-07
distance to true solution: 4.7169788067320353e-07


<h4>using approximated gradient:</h4>

In [202]:
x_star, fval, it, grad_norm = SR1_line(fct_1, grad_estimate_np, x_0, c=0.51, rho=0.95, max_iter=10000, estimate=True)
print(f'minimum {fval:<4} \nx = {x_star} \n{it} iterations \nremaining gradient norm {grad_norm}')
print('distance to true solution:',np.linalg.norm(x_true - x_star))

minimum 2.646190809511522e-13 
x = [1.00000051 1.00000103] 
37 iterations 
remaining gradient norm 5.313738863320231e-07
distance to true solution: 1.1511021952801512e-06


<h3>Problem 1.3:</h3>
$$f(x_1, x_2)=100(x_2-x_1^2)^2+(1-x_1)^2$$
$$f'(x_1, x_2)=\begin{pmatrix}
-400x_1(x_2-x_1^2)-2(1-x_1)\\ 
200(x_2-x_1^2)\\ 
\end{pmatrix}$$
$$f''(x_1, x_2)=\begin{pmatrix}
1200x_1^2-400x_2+2 & -400x_1\\ 
-400x_1 & 200\\ 
\end{pmatrix}$$
on the starting points $x_0$:
$$(0.2, 0.8)$$

<h4>using exact gradient:</h4>

In [203]:
x_0= np.array([0.2, 0.8])
x_star, fval, it, grad_norm = SR1_line(fct_1, grad_fct_1, x_0, c=0.59, rho=0.95, max_iter=10000)
print(f'minimum {fval:<4} \nx = {x_star} \n{it} iterations \nremaining gradient norm {grad_norm}')
print('distance to true solution:',np.linalg.norm(x_true - x_star))

minimum 8.376977605410461e-17 
x = [1. 1.] 
25 iterations 
remaining gradient norm 4.0676682963734096e-07
distance to true solution: 2.4573247140190953e-09


<h4>using approximated gradient:</h4>

In [204]:
x_star, fval, it, grad_norm = SR1_line(fct_1, grad_estimate_np, x_0, c=0.59, rho=0.95, max_iter=10000, estimate=True)
print(f'minimum {fval:<4} \nx = {x_star} \n{it} iterations \nremaining gradient norm {grad_norm}')
print('distance to true solution:',np.linalg.norm(x_true - x_star))

minimum 8.368241720822556e-17 
x = [1. 1.] 
25 iterations 
remaining gradient norm 4.067416706858997e-07
distance to true solution: 2.39080812319887e-09


<h3>Problem 2.1:</h3>
$$f(x_1, x_2)=150(x_1 x_2)^2+(0.5x_1 + 2x_2 - 2)^2$$
$$f'(x_1, x_2)=\begin{pmatrix}
300x_2^2 x_1 + 0.5 x_1 + 2x_2 - 2\\ 
300x_1^2 x_2 + 2x_1 + 8x_2 - 8\\ 
\end{pmatrix}$$
$$f''(x_1, x_2)=\begin{pmatrix}
300x_2^2 +0.5 & 600x_1 x_2+2\\ 
600x_1 x_2+2 & 300x_1^2 +8\\ 
\end{pmatrix}$$

on the starting points $x_0$:
$$(-0.2, 1.2)$$
is close to the soltion $x^*=(0,1)$:

<h4>using exact gradient:</h4>

In [205]:
x_0= np.array([-0.2, 1.2])
x_true_0 = [0,1]
x_true_1 = [4,0]
x_star, fval, it, grad_norm = SR1_line(fct_2, grad_fct_2, x_0, c=0.45, rho=0.99, max_iter=10000)
print(f'minimum {fval:<4} \nx = {x_star} \n{it} iterations \nremaining gradient norm {grad_norm}')
print('distance to true solution:',np.linalg.norm(x_true_0 - x_star))

minimum 8.912391347522993e-17 
x = [6.23112165e-10 1.00000000e+00] 
9 iterations 
remaining gradient norm 1.9377023511855158e-07
distance to true solution: 2.6958726549807412e-09


<h4>using approximated gradient:</h4>

In [206]:
x_star, fval, it, grad_norm = SR1_line(fct_2_dec, grad_estimate_np, x_0, c=0.45, rho=0.99, max_iter=10000, estimate=True)
print(f'minimum {fval:<4} \nx = {x_star} \n{it} iterations \nremaining gradient norm {grad_norm}')
print('distance to true solution:',np.linalg.norm(x_true_0 - x_star))

minimum 8.911349853946626006860014086E-17 
x = [6.23062418e-10 1.00000000e+00] 
9 iterations 
remaining gradient norm 1.9377023536224526e-07
distance to true solution: 2.695824431509028e-09


<h3>Problem 2.2:</h3>
$$f(x_1, x_2)=150(x_1 x_2)^2+(0.5x_1 + 2x_2 - 2)^2$$
$$f'(x_1, x_2)=\begin{pmatrix}
300x_2^2 x_1 + 0.5 x_1 + 2x_2 - 2\\ 
300x_1^2 x_2 + 2x_1 + 8x_2 - 8\\ 
\end{pmatrix}$$
$$f''(x_1, x_2)=\begin{pmatrix}
300x_2^2 +0.5 & 600x_1 x_2+2\\ 
600x_1 x_2+2 & 300x_1^2 +8\\ 
\end{pmatrix}$$

on the starting points $x_0$:
$$(3.8, 0.1)$$
is close to the soltion $x^*=(4,0)$:

<h4>using exact gradient:</h4>

In [207]:
x_0= np.array([3.8, 0.1])
x_star, fval, it, grad_norm = SR1_line(fct_2, grad_fct_2, x_0, c=0.45, rho=0.99, max_iter=1000)
print(f'minimum {fval:<4} \nx = {x_star} \n{it} iterations \nremaining gradient norm {grad_norm}')
print('distance to true solution:',np.linalg.norm(x_true_1 - x_star))

minimum 2.5380414714139796e-13 
x = [3.99999899e+00 4.10398969e-10] 
6 iterations 
remaining gradient norm 5.052765037900244e-07
distance to true solution: 1.0084185597888156e-06


<h4>using approximated gradient:</h4>

In [208]:
x_star, fval, it, grad_norm = SR1_line(fct_2_dec, grad_estimate_np, x_0, c=0.45, rho=0.99, max_iter=1000, estimate=True)
print(f'minimum {fval:<4} \nx = {x_star} \n{it} iterations \nremaining gradient norm {grad_norm}')
print('distance to true solution:',np.linalg.norm(x_true_1 - x_star))

minimum 2.538040743787989848281280723E-13 
x = [3.99999899e+00 4.10348911e-10] 
6 iterations 
remaining gradient norm 5.052765032401458e-07
distance to true solution: 1.0084184109985716e-06


<h3>Problem 2.3:</h3>
$$f(x_1, x_2)=150(x_1 x_2)^2+(0.5x_1 + 2x_2 - 2)^2$$
$$f'(x_1, x_2)=\begin{pmatrix}
300x_2^2 x_1 + 0.5 x_1 + 2x_2 - 2\\ 
300x_1^2 x_2 + 2x_1 + 8x_2 - 8\\ 
\end{pmatrix}$$
$$f''(x_1, x_2)=\begin{pmatrix}
300x_2^2 +0.5 & 600x_1 x_2+2\\ 
600x_1 x_2+2 & 300x_1^2 +8\\ 
\end{pmatrix}$$

on the starting points $x_0$:
$$(1.9, 0.6)$$
close $x^*=(0,1)$

<h4>using exact gradient:</h4>

In [209]:
x_0= np.array([1.9, 0.6])
x_star, fval, it, grad_norm = SR1_line(fct_2, grad_fct_2, x_0, c=0.45, rho=0.99, max_iter=100)
print(f'minimum {fval:<4} \nx = {x_star} \n{it} iterations \nremaining gradient norm {grad_norm}')
print('distance to true solution:',np.linalg.norm(x_true_1 - x_star))

minimum 1.782281402169303e-18 
x = [4.00000000e+00 2.25634757e-11] 
11 iterations 
remaining gradient norm 1.0531290483455241e-07
distance to true solution: 1.5876336082988915e-09


<h4>using approximated gradient:</h4>

In [210]:
x_star, fval, it, grad_norm = SR1_line(fct_2_dec, grad_estimate_np, x_0, c=0.45, rho=0.99, max_iter=10000, estimate=True)
print(f'minimum {fval:<4} \nx = {x_star} \n{it} iterations \nremaining gradient norm {grad_norm}')
print('distance to true solution:',np.linalg.norm(x_true_1 - x_star))

minimum 1.776903008042981077282984878E-18 
x = [4.00000000e+00 2.25134128e-11] 
11 iterations 
remaining gradient norm 1.0531290275664545e-07
distance to true solution: 1.5874828105344866e-09
