In [1]:
import numpy as np
from numpy.linalg import norm
import scipy
import time 
import pandas as pd
from scipy.optimize import nnls

In [3]:
def mus(n,a=-1,b=1):
    mu = lambda a,b,k:(b**(k+1)-a**(k+1))/(k+1)
    res = [mu(a,b,k) for k in range(n)]
    return np.array(res)

def x_matrix(K,xs):
    return np.array([xs**k for k in range(K)])

def wxks(n,xs,ws):

    res = np.zeros(n)
    for k in range(n):
        res[k] = (xs**k)@ws
    return res

def f(point,a=-1,b=1):
    n = point.shape[0]

    xs = point[:n//2]
    ws = point[n//2:]
    
    Mus = mus(n,a,b)
    Wxks = wxks(n,xs,ws)
    return ((Mus-Wxks)**2).sum()

def grad_x(point,a=-1,b=1):
    n = point.shape[0]
    xs = point[:n//2]
    ws = point[n//2:]
    result = np.zeros(n)
    Ms = mus(n,a,b)
    Wxks = wxks(n,xs,ws) 
    for j in range(n):
        for k in range(n):
            term1 = Ms[k]
            term2 = (xs**k)@ws 
            if j <= n/2 - 1:
                if k == 0:
                    result[j] += 0
                else:
                    result[j] += -2*ws[j]*k*(xs[j])**(k-1)*(term1-term2)
            else:
                result[j] += 0
    return result

def grad_w(point,a=-1,b=1):
    n = point.shape[0]
    xs = point[:n//2]
    ws = point[n//2:]
    result = np.zeros(n)
    Ms = mus(n,a,b)
    Wxks = wxks(n,xs,ws) 
    for j in range(n):
        for k in range(n):
            term1 = Ms[k]
            term2 = (xs**k)@ws 
            if j <= n/2 - 1:
                result[j] += 0
            else:
                result[j] += -2*((xs[j-n//2])**k)*(term1-term2)
    return result   

def grad_f(point,a=-1,b=1):
    n = point.shape[0]
    xs = point[:n//2]
    ws = point[n//2:]
    result = np.zeros(n)
    Ms = mus(n,a,b)
    Wxks = wxks(n,xs,ws) 
    for j in range(n):
        for k in range(n):
            term1 = Ms[k]
            term2 = (xs**k)@ws 
            if j <= n/2 - 1:
                if k == 0:
                    result[j] += 0
                else:
                    result[j] += -2*ws[j]*k*(xs[j])**(k-1)*(term1-term2)
            else:
                result[j] += -2*((xs[j-n//2])**k)*(term1-term2)
    return result

In [60]:
def epsilon_inactive(x,epsilon,a=-1,b=1):
    n = x.shape[0]
    xs , ws = x[:n//2] , x[n//2:]
    term1 = ((xs<b-epsilon) & (xs>a+epsilon))
    term2 = ((ws>epsilon) & (ws<1e2 - epsilon))

    return np.diag(np.concatenate([term1,term2]))

def epsilon_active(x,epsilon,a=-1,b=1):
    n = x.shape[0]
    xs , ws = x[:n//2] , x[n//2:]
    term1 = ((xs>=b-epsilon) | (xs<=a+epsilon))
    term2 = ((ws<=epsilon) | (ws>=1e2 - epsilon))

    return np.diag(np.concatenate([term1,term2]))

In [83]:
def project(point,a=-1,b=1):
    n = point.shape[0]
    xs = point[:n//2]
    ws = point[n//2:]
    xs = np.where(xs>=a,xs,a)
    xs = np.where(xs<=b,xs,b)
    ws = np.where(ws>=0,ws,0)
    return np.concatenate([xs,ws],axis=0)

def project_line_search(point,p):
    alpha = 1
    pj_point = project(point + alpha*p)
    while f(pj_point) - f(point) > -1e-4/alpha*(norm(pj_point-point))**2:
        alpha *= 0.9
        if alpha <= 1e-10:
            break
        pj_point = project(point + alpha*p)
    return alpha

def line_search(grad,point,p):
    alpha = 1
    point_proposal = point + alpha*p
    while f(point_proposal) - f(point) > 1e-4*alpha*p.T@grad(point):
        alpha *= 0.9
        if alpha <= 1e-10:
            break
        point_proposal = point + alpha*p
    return alpha

def bfgsrec(n,sks,yks,H0,d):
    if n == 0:
        return H0@d
    rhok_inv = (yks[n-1]).T@sks[n-1]
    if rhok_inv == 0.:
            rhok = 1000.0
    else:
        rhok = 1. / rhok_inv 

    alpha = (sks[n-1]).T@d/rhok
    d -= alpha*yks[n-1]
    d = bfgsrec(n-1,sks,yks,H0,d)
    d += (alpha - ((yks[n-1]).T@d/((yks[n-1]).T@sks[n-1])))*sks[n-1]
    return d

def bfgsrecb(n,sk3,yk3,A0,d,pI):
    d = pI@d
    if n==0:
        return A0@d
    alpha = (sk3[n-1]).T@d/(yk3[n-1].T@sk3[n-1])
    d -= alpha*yk3[n-1]
    d = bfgsrec(n-1,sk3,yk3,A0,d)
    d += (alpha - (yk3[n-1].T@d/(yk3[n-1].T@sk3[n-1]))*sk3[n-1])*sk3[n-1]
    return pI@d

def bfgsoptb(x,tau_a=1e-10,tau_r=1e-10,grad=grad_f):
    ns,n=0,0
    pg0 = x-project(x-grad(x))
    pg = pg0
    epsilon = min(1,norm(pg))
    pA = epsilon_active(x,epsilon)
    pI = epsilon_inactive(x,epsilon)
    A0 = pI
    sks,yks = [0]*1000,[0]*1000
    while norm(pg) > tau_a+tau_r*norm(pg0):

        d = -grad(x)
        d = bfgsrecb(ns,sks,yks,A0,d,pI)
        d += pA@grad(x)
        lam = project_line_search(x,d)
        sks[ns] = pI@(x+lam*d)


        xp = x+lam*d
        y = grad(xp)-grad(x)
        x=xp

        yks[ns] = pI@y

        
        if yks[ns].T@sks[ns]>0:
            ns += 1
        else:
            ns = 0
        pg = x-project(x-grad(x))
        epsilon = min(1,norm(pg))
        pA = epsilon_active(x,epsilon)
        pI = epsilon_inactive(x,epsilon)

        n += 1
        if n >= 5000:
            break
    print(n)
    return x
    



In [84]:
n = 3
point = np.concatenate([np.linspace(-1,1,n),np.ones(n)])
bfgsoptb(point)

5


  result[j] += -2*ws[j]*k*(xs[j])**(k-1)*(term1-term2)
  result[j] += -2*((xs[j-n//2])**k)*(term1-term2)
  yks[ns] = pI@y
  d = pI@d
  d += pA@grad(x)
  return ((Mus-Wxks)**2).sum()


array([nan, nan, nan, nan, nan, nan])

In [42]:
def alternative_ls(xs,ws):
    n = xs.shape[0]
    point = np.concatenate([xs,ws])
    it = 0
    while True:
        it += 1
        point_proposed1 = bfgsoptb(point,grad=grad_w)
        point_proposed2 = bfgsoptb(point_proposed1,grad=grad_x)
        if norm(point_proposed2-point) <= 1e-6:
            break
        
        point = point_proposed2

    return point[:n] , point[n:]

In [43]:
xs = np.linspace(-1,1,30)
ws = np.ones(30)
t1 = time.time()
res = alternative_ls(xs,ws)
print(time.time()-t1)
print(res)


KeyboardInterrupt: 

In [None]:
a = -1
b = 1
N = []
W = []
T = []
for n in range(2,6):
    xs = np.linspace(a, b, n) 
    ws = np.ones(n)
    t1 = time.time()
    nodes,weights = alternative_ls(xs,ws)
    T.append(time.time()-t1)
    N.append(nodes)
    W.append(weights)

In [None]:
for i in range(len(N)):
    print("Nodes:{}\nWeights:{}\nTime:{}".format(N[i],W[i],T[i]))
    print('*'*100)

In [None]:
def bfgs(x0, grad,maxiter=None):
    """
    This function estimates the inverse of Hessian matrix and use a line search with wolfe condition
    """


    if maxiter is None:
        maxiter = 1000


    gfk = grad(x0)

    k = 0
    N = len(x0)
    I = np.eye(N, dtype=int)
    Hk = I
    r_0 = norm(x0 - project(x0 - grad(x0)))
    t_a = 1e-4
    t_r = 1e-4
    
    pg0 = x0 - project(x0 - grad(x0))
    pg = pg0
    xk = x0
    sks,yks = [],[]
    while k < maxiter:
        pk = bfgsrec(k,sks,yks,Hk,-gfk)
    #     pk = -np.dot(Hk, gfk)
        alpha_k = project_line_search(xk,pk)
        xkp1 = project(xk + alpha_k * pk)
        pI = epsilon_inactive(xk,min(1,norm(pg)))
        sk = pI@(xkp1 - xk)
        xk = xkp1
        gfkp1 = grad(xkp1)

        yk = pI@(gfkp1 - gfk)
        gfk = gfkp1
        sks.append(sk)
        yks.append(yk)
        k += 1
        pg = xk - project(xk-grad(xk))
        if (norm(pg) <= t_a + t_r*r_0):
            # print('Converges at {}th iteration.'.format(k))
            break


    #     rhok_inv = np.dot(yk, sk)
    #     # Avoid illegal divide
    #     if rhok_inv == 0.:
    #         rhok = 1000.0
    #     else:
    #         rhok = 1. / rhok_inv

    #     A1 = I - sk[:, np.newaxis] * yk[np.newaxis, :] * rhok
    #     A2 = I - yk[:, np.newaxis] * sk[np.newaxis, :] * rhok
    #     Hk = np.dot(A1, np.dot(pI@Hk@pI, A2)) + (rhok * sk[:, np.newaxis] *
    #                                              sk[np.newaxis, :])
    # # if k == maxiter:
    # #     print('After {} iters, we get a gradient with norm {}.'.format(k,norm(pg)))
    
    return xk