# Numerical Solution For Finding Langrange Multipliers


In [1]:
import numpy as np
np.set_printoptions(suppress=True, precision=5)


cs = []

for i in range(10):
    cs.append(np.random.randn(5,1))


In [44]:
np.set_printoptions(suppress=True, precision=5)
def calcM(ls,cs):
    """
    Find the covariance matrix, M, as the lambda weighted sum of c c^T
    """
    d = len(cs[0])
    M = np.zeros([d,d])
    ccTs = []
    for l,c in zip(ls,cs):        
        ccT = np.dot(c,c.T)
        #print c,ccT,l,M
        M = M + l*ccT       
        ccTs.append(ccT)
    return M

def L(ls,cs):
    """
    Find L = -log |M| + sum(lambda_i * (1-c^T M^-1 c))
    """
    M = calcM(ls,cs)
    Minv = np.linalg.inv(M)
    #print M
    #print np.linalg.det(M)
    t = 0
    for l,c in zip(ls,cs):        
        t+=l * (1-np.trace(np.dot(np.dot(c,c.T),Minv)))
    return -np.log(np.linalg.det(M)) + t

def dL_dl(ls,cs):
    """
    Find the gradient dL/dl_j = -Tr(M^-1 c_j c_j^T) + 1 - c^T M^-1 c
    """
    M = calcM(ls,cs)
    Minv = np.linalg.inv(M)

    grads = []
    for l,c in zip(ls,cs):
        grad = -np.trace(np.dot(Minv,np.dot(c,c.T))) + (1-np.trace(np.dot(np.dot(c,c.T),Minv)))
        grads.append(grad)
       # print grad.shape
    return np.array(grads)
    
    
def findLambdas(cs):
    """
    Gradient descent to find the lambda_is which maximise -log |sum(lambda_i c_i c_i^T)|
    
    Parameters:
        cs = list of column vectors (these are the gradients of df*/df_i)
    
    Returns:
        ls = vector of lambdas
    """
    
    #lambdas
    ls = np.ones(len(cs))
    M = calcM(ls,cs)
    ls /= np.sum(ls)
    ls *= np.linalg.matrix_rank(M)    


    #learning rate
    lr = 0.00001
    #gradient descent
    lsbefore = ls.copy()
    Nits = 1000
    for it in range(Nits): 
        delta_ls = dL_dl(ls,cs)*lr
        #print ls
        #print delta_ls.shape
        ls =  ls + delta_ls
        #print ls.shape
        
        
     #   ls[ls>1.0] = 1.0
     #   ls[ls<0.0] = 0.0
     #   ls /= np.sum(ls)        
     #   ls *= np.linalg.matrix_rank(M)
        
        #if (np.sum((lsbefore-ls)**2)<1e-10):
        #    print "Converged after %d iterations" % it
        #    break #reached ~maximum
        #lsbefore = ls.copy()
        if (it%10000) == 0:
            M = calcM(ls,cs)
            Minv = np.linalg.inv(M)
            print ls           
            for l,c in zip(ls,cs):
                print("%0.2f" % np.dot(np.dot(c.transpose(), Minv),c)[0,0]),
            print "-=-"
            
    if it==Nits-1:
        print "Ended before convergence"
    
    return ls

ls = findLambdas(cs)       
print "Lambdas:"
print ls

M = calcM(ls,cs)
Minv = np.linalg.pinv(M)
print "Solutions to c^T M^-1 c (should all be 0<=x<=1)"
for l,c in zip(ls,cs):
    print np.dot(np.dot(c.transpose(), Minv),c)
    

[ 0.50001  0.49999  0.5      0.49999  0.49998  0.49997  0.49998  0.49999
  0.5      0.49999]
0.22 0.79 0.61 1.01 1.59 1.91 1.67 1.05 0.29 0.87 -=-
Ended before convergence
Lambdas:
[ 0.50557  0.494    0.49773  0.48964  0.47762  0.47075  0.47595  0.48882
  0.50409  0.4925 ]
Solutions to c^T M^-1 c (should all be 0<=x<=1)
[[ 0.22346]]
[[ 0.806]]
[[ 0.61739]]
[[ 1.02786]]
[[ 1.65255]]
[[ 2.02102]]
[[ 1.74164]]
[[ 1.0726]]
[[ 0.29763]]
[[ 0.88186]]


In [45]:
M.shape

(5, 5)

In [46]:
np.linalg.matrix_rank(M)

5

In [47]:
ls=np.random.rand(len(ls))

In [48]:
d = 0.0001
for i in range(len(ls)):
    delta = np.zeros_like(ls)
    delta[i]+=d
    print (L(ls+delta,cs)-L(ls-delta,cs))/(2*d)

. . . . . . . . . . . . . . . . . . . . 0.699245544569
. . . . . . . . . . . . . . . . . . . . -1.31148380335
. . . . . . . . . . . . . . . . . . . . 0.512173945566
. . . . . . . . . . . . . . . . . . . . -0.0702817601539
. . . . . . . . . . . . . . . . . . . . 0.0770214148238
. . . . . . . . . . . . . . . . . . . . -4.04344099965
. . . . . . . . . . . . . . . . . . . . -3.07765476962
. . . . . . . . . . . . . . . . . . . . 0.103421497855
. . . . . . . . . . . . . . . . . . . . 0.742345523772
. . . . . . . . . . . . . . . . . . . . -1.65357109007


In [49]:
dL_dl(ls,cs)

array([ 0.39849, -3.62297,  0.02435, -1.14056, -0.84596, -9.08688,
       -7.15531, -0.79316,  0.48469, -4.30714])