# Model 2: Implementation

In [1]:
import numpy as np
import matplotlib as plt

In [42]:
def h(A, b, zi, wi):
    return zi.T @ A @ zi + b.T @ zi -1

# Residual for single point
def r(A, b, zi, wi):
    x = zi.T @ A @zi + b.T @ zi - 1
    return np.maximum(wi * x, 0)

# Residual vector for all points
def R(A, b, Z, W):
    R = np.zeros_like(W, dtype = float)
    for i in range(R.size):
#         print(r(A, b, Z[i], W[i]))
        R[i] = r(A, b, Z[i], W[i])
    
    return R

#Objective function
def f(x, Z, W):
    m, n = Z.shape
    A = np.zeros((n, n))
    b = np.zeros(n)
    update_A(A, x)
    update_b(b, x)
#     print(A)
#     print(b)
    return np.sum(R(A, b, Z, W)**2)

Se etter for de deriverte. Sørg for at gradienten følger opp riktig område.

In [43]:
U = np.array((1,2,3,4,5,6,7,8,9)).reshape(3,3)
W = np.array((1, -1, 1))
(U.T * (W == 1)).T

array([[1, 2, 3],
       [0, 0, 0],
       [7, 8, 9]])

In [171]:
# return partial derivatives of residual i, wrt. the matrix coefficients
def r_delA(A, b, zi, wi):
    n = b.size
    rdA = 2 * (zi * (zi * np.ones_like(A)).T)
    rdA = rdA - 1/2 * np.identity(n) * rdA
    return extract_a(rdA)

# Return vector of all partial derivatives of residual i
def del_r(A, b, zi, wi):
    n = b.size
    k = n*(n+1)//2
    dr = np.zeros(k+n)
    dr[:k] = r_delA(A, b, zi, wi)
    dr[k:] = zi
    return wi * dr

#Jacobian of residual vector
def jacobian_R(A, b, Z, W):
    m, n = Z.shape
    k = n*(n+1)//2
    J = np.zeros((m, k+n))
    print(A)
    for i in range(m):
        J[i] = del_r(A, b, Z[i], W[i])
    return (J.T * (R(A, b, Z, W) > 0) ).T

# Gradient of objective function
def df(x, Z, W):
    m, n = Z.shape
    A = np.zeros((n, n))
    b = np.zeros(n)
    update_A(A, x)
    update_b(b, x)
    print(A)
    return 4 * jacobian_R(A, b, Z, W).T @ R(A, b, Z, W)

In [172]:
# Insert coefficients in (nxn) matrix A, given (m+k) vector x
def update_A(A, x):
    n = A.shape[0]
    k = n*(n+1)//2    
    end = 0
    for j in range(n):
        start = end
        end = start + n-j
        A[j, j:] = x[start:end]
        A[j+1:,j] = A[j, j+1:]
    
    return A

# Insert coefficients in (n) centering vector given (n+k) vector x
def update_b(b, x):
    n = b.size
    b = x[-n:]
    return b

# Get the first (k) coefficients of (n+k) vector x, given an (nxn) matrix A
def extract_a(A):
    n = A.shape[0]
    k = n*(n+1)//2
    a = np.zeros(k)
    end = 0
    
    for j in range(n):
        start = end
        end = start + n-j
        a[start:end] = A[j, j:]
        
    return a

In [173]:
Z = np.array((0, 0.5, 4, 0)).reshape(2, 2)
W = np.array((-1, 1))
x = np.array((1, 0, 1, 0, 0))
A = np.zeros((2, 2))
b = np.zeros(2)
A = update_A(A, x)


jacobian_R(A, b, Z, W)

[[ 1.  0.]
 [ 0.  1.]]


array([[ -0.  ,  -0.  ,  -0.25,  -0.  ,  -0.5 ],
       [ 16.  ,   0.  ,   0.  ,   4.  ,   0.  ]])

In [174]:
Z

array([[ 0. ,  0.5],
       [ 4. ,  0. ]])

In [175]:
W

array([-1,  1])

In [176]:
A = update_A(A, x)
A

array([[ 1.,  0.],
       [ 0.,  1.]])

In [177]:
b = update_b(b, x)
b

array([0, 0])

In [178]:
R(A, b, Z, W)

array([  0.75,  15.  ])

In [179]:
f(x, Z, W)

225.5625

In [180]:
df(x, Z, W)

[[ 1.  0.]
 [ 0.  1.]]
[[ 1.  0.]
 [ 0.  1.]]


array([  9.60000000e+02,   0.00000000e+00,  -7.50000000e-01,
         2.40000000e+02,  -1.50000000e+00])

In [319]:
#Generate random values
#np.random.seed(0)
m = 10
n = 2
k = n*(n+1)//2
x = np.array((1, 0, 1, 0, 0))
# x = np.random.randn(k+n)
Z = 2*np.random.randn(n*m).reshape(m,n)
W = np.random.choice([-1, 1], m)
#W = np.array((-1, -1, -1))
p = np.random.randn(n+k)
p = p/np.linalg.norm(p)
f0 = f(x, Z, W)
g = df(x, Z, W).dot(p)

print(W)
print(f0)
print(g)
print(x, '\n')
for ep in 10.0**np.arange(3, -12, -1):
    g_app = ( f(x+ep*p, Z, W) - f0 )/ep
    error = abs(g_app-g)/abs(g)
    
    print('ep = %e, error = %e, g_app = %e' % (ep,error,g_app))

[[ 1.  0.]
 [ 0.  1.]]
[[ 1.  0.]
 [ 0.  1.]]
[-1  1  1 -1 -1  1  1 -1  1 -1]
474.461886798
223.404200132
[1 0 1 0 0] 

ep = 1.000000e+03, error = 1.340814e+02, g_app = 3.017775e+04
ep = 1.000000e+02, error = 1.313562e+01, g_app = 3.157957e+03
ep = 1.000000e+01, error = 1.019366e+00, g_app = 4.511349e+02
ep = 1.000000e+00, error = 3.556355e-01, g_app = 1.439537e+02
ep = 1.000000e-01, error = 5.537211e-01, g_app = 9.970057e+01
ep = 1.000000e-02, error = 5.735297e-01, g_app = 9.527526e+01
ep = 1.000000e-03, error = 5.755106e-01, g_app = 9.483272e+01
ep = 1.000000e-04, error = 5.757086e-01, g_app = 9.478847e+01
ep = 1.000000e-05, error = 5.757285e-01, g_app = 9.478405e+01
ep = 1.000000e-06, error = 5.757304e-01, g_app = 9.478360e+01
ep = 1.000000e-07, error = 5.757306e-01, g_app = 9.478356e+01
ep = 1.000000e-08, error = 5.757308e-01, g_app = 9.478353e+01
ep = 1.000000e-09, error = 5.757311e-01, g_app = 9.478345e+01
ep = 1.000000e-10, error = 5.757332e-01, g_app = 9.478299e+01
ep = 1.00000