# Homework #3 
## Opal Issan, due Oct 23rd. 

# Trust Region method. 

![](homework3_q.png)

In [124]:
import numpy as np 
import matplotlib.pyplot as plt
import matplotlib.cm as cm


%matplotlib notebook
plt.rcParams['figure.figsize'] = [10, 5] # default fig size.

In [125]:
def rosenbrock_fun(xk):
    """ This function returns the output of the Rosenbrock function."""
    x1, x2 = xk
    return 100*((x2 - x1**2)**2) + (1 - x1)**2

In [126]:
def rosenbrock_gradient(xk):
    """ return [df/dx1 df/dx2]"""
    x1, x2 = xk
    dfx1 = -400*x2*x1 + 400*(x1**3) - 2 + 2*x1
    dfx2 = 200*x2 - 200*(x1**2)
    return np.array([dfx1, dfx2])

In [127]:
def rosenbrock_hessian(xk):
    """ return [d2f/dx1^2   d2f/dx1dx2
                d2f/dx1dx2  d2f/dx2^2]"""
    x1, x2 = xk
    h = np.zeros((2, 2))
    h[0, 0] = -400*x2 + 1200*(x1**2) + 2
    h[0, 1] = -400*x1
    h[1, 0] = -400*x1
    h[1, 1] = 200 
    return h 

The “model” is based on the Taylor expansion of the objective function of the current point xk:

$ m_{k}(\bar p) = f( \bar x_{k}) + \bar p^{T} \nabla f(\bar x_{k}) +\frac{1}{2} ̄ \bar p^{T} B_{k} \bar p $,

In [128]:
def mk_fun(xk, pk):
    """ mk taylor approximation of the objective function"""
    Bk = rosenbrock_hessian(xk)
    return rosenbrock_fun(xk) + np.dot(pk, rosenbrock_gradient(xk)) + 0.5*np.dot(pk, np.matmul(Bk, pk))

In [129]:
def rho_k(xk, pk):
    """ return rho_k = (f(xk) - f(xk+pk))/(mk(0) - mk(pk))"""
    return (rosenbrock_fun(xk) - rosenbrock_fun(xk + pk))/(mk_fun(xk, [0, 0]) - mk_fun(xk, pk))

In [130]:
def find_tau(pk_u, pk_fs, delta):
    """ return tau between 0 and 2. 
        ||pk_u + (tau)(pk_fs - pk_u)||^2 = delta^2
    """
    diff = pk_fs - pk_u
    dot_diff = np.dot(diff, diff)
    dot_pku_diff = np.dot(pk_u, diff)
    dot_pu = np.dot(pk_u, pk_u)
    elem = dot_pku_diff ** 2 - dot_diff * (dot_pu - delta ** 2)
    tau = (-dot_pku_diff + np.sqrt(elem)) / dot_diff

    return pk_u + tau * diff

In [131]:
def get_pk_fs(gradient, hessian):
    """ search direction for Newton's method."""
    h_inv = np.linalg.inv(hessian)
    return -np.matmul(h_inv, gradient)

![](dogleg_pc.png)

In [132]:
def dogleg(xk, delta):
    Bk = rosenbrock_hessian(xk)
    g = rosenbrock_gradient(xk)
    
    num = np.dot(g, np.dot(g, g))
    den = np.dot(np.matmul(g.T, Bk), g)
    pk_u = -np.divide(num, den)
    
    pk_fs = get_pk_fs(g, Bk)
    
    if np.linalg.norm(pk_u) >= delta:
        pk_dl = delta * pk_u / np.linalg.norm(pk_u)
        
    elif np.linalg.norm(pk_fs) <= delta:
        pk_dl = pk_fs
        
    else:
        pk_dl = find_tau(pk_u, pk_fs, delta)
        
    return pk_dl

![](tspc.png)

In [133]:
def trust_region(x0, delta=0.1, delta_max=10, eta=10**-3, tol=1e-8):
    k = 0
    xk = x0
    
    # while optimality is not satisfied. 
    while (rosenbrock_fun(xk) > tol and np.linalg.norm(rosenbrock_gradient(xk)) > tol):
        
        # get pk approximate solution. Using dogleg method. 
        pk = dogleg(xk, delta)
        
        # evaluate rho_k
        rk = rho_k(xk, pk)
        
        if rk < 0.25:
            delta = 0.25*delta 
            
        else:
            
            if rk > 3/4 and np.linalg.norm(pk) == delta:
                delta = min(2*delta, delta_max)
                
            else:
                delta = delta 
                
        if rk > eta:
            xk = xk + pk
            
        else:
            xk = xk
        
        k +=1
        print("iteration #", k)
        print("f = ", rosenbrock_fun(xk))
        print("||gradient(f(x))|| = ", rosenbrock_fun(xk))
        print("xk = ", xk)
        print("delta = ", delta)
        print("\n")

    return xk, k, pk

In [137]:
trust_region(x0=[1.2, 1.2], delta=0.1, delta_max=10, eta=10**-3, tol=1e-8)

iteration # 1
f =  0.039296400966136735
||gradient(f(x))|| =  0.039296400966136735
xk =  [1.13649612 1.27724802]
delta =  0.2


iteration # 2
f =  0.010409968131794633
||gradient(f(x))|| =  0.010409968131794633
xk =  [1.10127204 1.21155937]
delta =  0.2


iteration # 3
f =  0.004739445004725171
||gradient(f(x))|| =  0.004739445004725171
xk =  [1.02013414 1.0340903 ]
delta =  0.2


iteration # 4
f =  0.0001315164751837106
||gradient(f(x))|| =  0.0001315164751837106
xk =  [1.01144316 1.02294174]
delta =  0.2


iteration # 5
f =  1.6438701689056464e-06
||gradient(f(x))|| =  1.6438701689056464e-06
xk =  [1.00017029 1.00021354]
delta =  0.2


iteration # 6
f =  1.78916719383414e-11
||gradient(f(x))|| =  1.78916719383414e-11
xk =  [1.00000422 1.00000841]
delta =  0.2




(array([1.00000422, 1.00000841]), 6, array([-0.00016607, -0.00020513]))

In [138]:
trust_region(x0=[-1.2, 1.], delta_max=20, delta= 10,  eta=10**-3, tol=1e-8)

iteration # 1
f =  4.731884325266611
||gradient(f(x))|| =  4.731884325266611
xk =  [-1.1752809   1.38067416]
delta =  10


iteration # 2
f =  4.731884325266611
||gradient(f(x))|| =  4.731884325266611
xk =  [-1.1752809   1.38067416]
delta =  2.5


iteration # 3
f =  4.731884325266611
||gradient(f(x))|| =  4.731884325266611
xk =  [-1.1752809   1.38067416]
delta =  0.625


iteration # 4
f =  4.000531912634553
||gradient(f(x))|| =  4.000531912634553
xk =  [-0.92751653  0.80688167]
delta =  0.625


iteration # 5
f =  3.1805633215706473
||gradient(f(x))|| =  3.1805633215706473
xk =  [-0.76250423  0.55418364]
delta =  0.625


iteration # 6
f =  2.776328005834224
||gradient(f(x))|| =  2.776328005834224
xk =  [-0.48907021  0.16442351]
delta =  0.625


iteration # 7
f =  1.955654029503801
||gradient(f(x))|| =  1.955654029503801
xk =  [-0.39573049  0.14789032]
delta =  0.625


iteration # 8
f =  1.955654029503801
||gradient(f(x))|| =  1.955654029503801
xk =  [-0.39573049  0.14789032]
delta =  0.1

(array([0.99999304, 0.99998604]), 23, array([0.00020747, 0.00058257]))