In [1]:
import numpy as np
import matplotlib.pyplot as plt
import Oliver as o

def from_x_to_matrix(x):
    n = int((-3 + np.sqrt(9+8*x.size))//2)
    k = x.size - n
    A = np.zeros((n, n))
    b = np.zeros(n)
    
    #Insert first k coefficients into matrix
    end = 0
    for j in range(n):
        start = end
        end = start + n-j
        A[j, j:] = x[start:end]
        A[j+1:,j] = A[j, j+1:]
    b = x[-n:] #Insert last n coefficients into vector
    return A, b

# Calculate h for single point z
def hi(x, zi):
    A, b = from_x_to_matrix(x)
    return zi.dot(A.dot(zi)) + b.dot(zi) - 1

# Calculate residual r for single point z
def r(x, zi, wi):
    return np.maximum(wi * hi(x, zi), 0)

#Calculate residual vector
def R(x, Z, W):
    m, n = Z.shape
    R = np.zeros(m)
    for i in range(R.size):
        R[i] = r(x, Z[i], W[i])
    return R

# Calculate objective function
def f(x, Z, W):
    m, n = Z.shape
    return np.sum(R(x, Z, W)**2)

# Calculate gradient oh h for single point z
def dhi(x, zi):
    n = zi.size
    k = n*(n+1)//2
    dh = np.zeros(k+n)
    end = 0
    for j in range(n):
        start = end
        end = start + n-j
        dh[start] = zi[j]**2
        dh[start+1:end] = 2 * zi[j] * zi[start+1:end]
    dh[-n:] = zi
    return dh

# Calculate gradient of residual r for  single point z
# h is the hi value for the given point
def dri(x, zi, wi, ri = None):
    n = zi.size
    dr = np.zeros(x.size)
    if ri == None:
        ri = r(x, zi, wi)
    return (ri > 0) * dhi(x, zi) * wi

# Calculate jacobian of residual vector R
def jacobi(x, Z, W, h = None):
    m, n = Z.shape
    J = np.zeros((m, x.size))
    for i in range(m):
        J[i] = dri(x, Z[i], W[i], h)
    return J

# Calculate gradient of objective function
def df(x, Z, W, h = None):
    return 2 * (jacobi(x, Z, W, h).T).dot(R(x, Z, W))

In [2]:
# x = np.array((1, 0, 0.25, 1, 0))
x = np.array((2, 3, 0.25, 3, 2))
Z = np.array((0, 0, 0, 1, 0, 2, 0, 3, 0, 4)).reshape(5, 2)
# Z = np.array((0,0, 1, 0, 2, 0, 3, 0, 4, 0)).reshape(5, 2)
# Z = np.array((0, 0, 1, 1, 2, 2, 3, 3, 4, 4)).reshape(5,2)
# Z = np.array((2, 0, 2, 1, 2, 2, 2, 3, 2, 4)).reshape(5, 2)
W = np.array((1, 1, 1, 1, 1)) 
print('Z: \n', Z)
print('W: \n', W)
m,n = Z.shape

Z: 
 [[0 0]
 [0 1]
 [0 2]
 [0 3]
 [0 4]]
W: 
 [1 1 1 1 1]


In [3]:
Z = 2 * np.random.randn(5*2).reshape(5,2)
W = np.random.choice([-1.0, 1.0], 5)
x = np.random.randn(5)
print('Z: \n', Z)
print('W: \n', W)
from_x_to_matrix(x)

Z: 
 [[ 1.60682176  0.7755585 ]
 [ 0.51502077  2.50721538]
 [-0.34413295  3.17218173]
 [ 0.87489856 -0.64510557]
 [-0.67903178 -2.48876823]]
W: 
 [-1.  1. -1. -1.  1.]


(array([[-1.61416428, -0.95499172],
        [-0.95499172, -0.51726041]]), array([ 2.06915579,  2.33912531]))

In [4]:
jacobi(x, Z, W)

array([[-2.58187618, -2.49236855, -0.60149099, -1.60682176, -0.7755585 ],
       [ 0.        ,  0.        ,  0.        ,  0.        ,  0.        ],
       [-0.        ,  0.        , -0.        ,  0.        , -0.        ],
       [-0.76544749,  1.12880387, -0.4161612 , -0.87489856,  0.64510557],
       [ 0.        ,  0.        ,  0.        , -0.        , -0.        ]])

In [5]:
o.jacobi(Z, W, x, n , 2)

array([[-2.58187618, -2.49236855, -0.60149099, -1.60682176, -0.7755585 ],
       [ 0.        ,  0.        ,  0.        ,  0.        ,  0.        ],
       [ 0.        ,  0.        ,  0.        ,  0.        ,  0.        ],
       [-0.76544749,  1.12880387, -0.4161612 , -0.87489856,  0.64510557],
       [ 0.        ,  0.        ,  0.        ,  0.        ,  0.        ]])

In [6]:
dhi(x, Z[2])

array([  0.11842749,  -2.18330454,  10.06273692,  -0.34413295,   3.17218173])

In [7]:
o.gradr(Z[2], W[2], x, n, 2)

array([  0.11842749,  -2.18330454,  10.06273692,  -0.34413295,   3.17218173])

In [8]:
R(x, Z, W)

array([ 2.71999805,  0.        ,  0.        ,  1.0715047 ,  0.        ])

In [9]:
o.residuals(Z, W, x, n, 2)

array([ 2.71999805,  0.        ,  0.        ,  1.0715047 ,  0.        ])

In [10]:
df(x, Z, W)

array([-15.68575752, -11.13943788,  -4.16394599, -10.61601997,  -2.8365679 ])

In [11]:
o.gradient(Z, W, x, n, 2)

array([-15.68575752, -11.13943788,  -4.16394599, -10.61601997,  -2.8365679 ])

In [12]:
r(x, Z[0], W[0])

2.719998050686498

In [13]:
hi(x, Z[0])

-2.719998050686498

In [14]:
R(x, Z, W)

array([ 2.71999805,  0.        ,  0.        ,  1.0715047 ,  0.        ])

In [15]:
f(x, Z, W)

8.5465117276260507

In [16]:
df(x, Z, W)

array([-15.68575752, -11.13943788,  -4.16394599, -10.61601997,  -2.8365679 ])

In [17]:
from_x_to_matrix(x)

(array([[-1.61416428, -0.95499172],
        [-0.95499172, -0.51726041]]), array([ 2.06915579,  2.33912531]))

In [18]:
Z = 2 * np.random.randn(20*2).reshape(20,2)
W = np.random.choice([-1.0, 1.0], 20)

m, n = Z.shape
k = n*(n+1)//2

x = np.random.randn(n+k)
p = np.random.randn(n+k)
p = p/np.linalg.norm(p)
f0 = f(x, Z, W)
g = df(x, Z, W).dot(p)
if g == 0:
    print("p: \n", p)
    print(df(x, Z, W))
    
else:
    print("g = %e" %g)
    for ep in 10.0**np.arange(2, -9, -1):
        g_app = (f(x+ep*p, Z, W)-f0)/ep
        error = abs(g_app-g)/abs(g)
        print('ep = %e, error = %e, g_app = %e' % (ep,error, g_app))

g = -4.252855e+02
ep = 1.000000e+02, error = 7.755577e+01, g_app = 3.255806e+04
ep = 1.000000e+01, error = 5.395506e+00, g_app = 1.869345e+03
ep = 1.000000e+00, error = 2.093496e-01, g_app = -3.362522e+02
ep = 1.000000e-01, error = 2.126593e-02, g_app = -4.162414e+02
ep = 1.000000e-02, error = 2.126593e-03, g_app = -4.243811e+02
ep = 1.000000e-03, error = 2.126593e-04, g_app = -4.251951e+02
ep = 1.000000e-04, error = 2.126594e-05, g_app = -4.252765e+02
ep = 1.000000e-05, error = 2.126623e-06, g_app = -4.252846e+02
ep = 1.000000e-06, error = 2.127274e-07, g_app = -4.252854e+02
ep = 1.000000e-07, error = 2.212900e-08, g_app = -4.252855e+02
ep = 1.000000e-08, error = 4.886089e-08, g_app = -4.252855e+02


## Implementation 

In [19]:
def backtracking_line_search(f, gradf, p, x, Z, W):
    ρ = 0.5
    c = 0.05
    α = 10
    
    ϕₖ = f(x + α * p, Z, W)
    dF = gradf(x, Z, W)
    it = 0
    while (ϕₖ >= f(x, Z, W) + c * α * dF.dot(p) and it < 200):
        α = ρ * α
        ϕₖ = f(x + α * p, Z, W)
        it += 1
#     print(it)
    return α

In [26]:
def steepest_descent(f, grad, x0, Z, W, tol = 1e-3):
    p = -df(x0, Z, W)
    xₖ = x0
    it = 0
    while np.linalg.norm(p) > tol and it < 3000:
        α = backtracking_line_search(f, grad, p, xₖ, Z, W)
#         print(α)
#         print(xₖ)
#         print("f(x) =", f(xₖ, Z, W))#, "df(x) =" , df(xₖ, Z ,W))

        xₖ = xₖ + α * p
        p = -df(xₖ, Z, W)
        it += 1
        if it % 100 == 0:
            print(it)
            print(α)
            print("f(x) =", f(xₖ, Z, W))
    print("f(x) =", f(xₖ, Z, W), "df(x) =" , df(xₖ, Z ,W))

    return x

In [27]:
Z = 2 * np.random.randn(20*2).reshape(20,2)
W = np.random.choice([-1.0, 1.0], 20)
x = np.random.randn(5)

In [None]:
steepest_descent(f, df, x, Z, W)

100
0.0006103515625
f(x) = 0.242142771208
200
0.009765625
f(x) = 0.211941914779
300
0.0048828125
f(x) = 0.210002320448
400
0.0006103515625
f(x) = 0.209795361457


In [23]:
df(x+100*p, Z, W)

array([-18350.33368713, -32485.06108226, -22150.0780761 ,  -1981.04328085,
        -2306.44374992])

In [24]:
o.steepesDescent(x,Z,W,2, 2)

NameError: name 'N' is not defined

In [None]:
Z = 2 * np.random.randn(20*2).reshape(20,2)
W = np.random.choice([-1.0, 1.0], 20)
x = np.random.randn(5)*3
backtracking_line_search(f, df, p, x, Z, W)

In [None]:
o.steepestDescent(x, Z, W, f, df)