# <div style="text-align: center">18.335/6.337 Final Project - The L-BFGS algorithm</div>
### <div style="text-align: center">Created by Yusu Liu and Simon Batzner</div>

## Import different benchmarking functions

In [30]:
include("testfns.jl")

easom (generic function with 1 method)

## L-BFGS

In [31]:
function lbfgs!(F, x0, maxIt, m, τgrad=1e-6, verbose = 0)
    
    # params
    # 
    # F:      function to be optimized
    # x0:     initial guess
    # maxIt:  maximum Iteration
    # m:      number of input differences and gradient differences to be stored
    # τgrad:  tolerance for norm of the slope


    # returns: 
    # x1:     optimized variable
    # f1:     function value at x1
    # k       iteration number

    k=0
    n=length(x0)
    Sm=zeros(n,m) # S_k=x_k+1-x_k
    Ym=zeros(n,m) # Y_k=g_k+1-g_k
    f0,g0=F(x0)
    
    # use the simplest line search to find step size
    α, f1, g1 = backtracking(F,-g0,x0)
    x1 = x0 - α.*g0
    
    # counter
    k = 1

    while true
        
        if k > maxIt
            #println("Maximum number of iterations reached: $(k-1)")
            break; 
        end
        
        gnorm = norm(g0)
        
        if gnorm < τgrad
            
            break; 
        end
        
        s0 = x1-x0
        y0 = g1-g0
        
        # println("y0=$y0")
        H0 = s0'*y0/(y0'*y0) # hessian diagonal satisfying secant condition

        # update Sm and Ym
        if k <= m
            Sm[:,k]=s0
            Ym[:,k]=y0
            p=-approxInvHess(g1,Sm[:,1:k],Ym[:,1:k],H0)
        # only keep m entries in Sm and Ym so purge the old ones
            
        elseif (k>m)
            Sm[:,1:(m-1)]=Sm[:,2:m]
            Ym[:,1:(m-1)]=Sm[:,2:m]
            Sm[:,m]=s0
            Ym[:,m]=y0
            p=-approxInvHess(g1,Sm,Ym,H0)
        end
        
        # new direction=p, find new step size
        α, fs, gs=backtracking(F,p,x1)
        
        # update for next iteration
        x0=x1
        g0=g1
        x1=x1+α.*p
        f1=fs
        g1=gs
        k=k+1
        
        if verbose == 1 
            println("Iteration: $k -- x = $x1")
        end
    end
    
    k=k-1
    return x1, f1, k
end

lbfgs! (generic function with 3 methods)

In [32]:
function backtracking(F,d,x,r=0.5,c=1e-4,nmax=100)
    
    # params
    # F: function to be optimized
    # x: variable
    # d: direction
    # r: factor by which to reduce step size at each iteration
    # c: parameter [0,1]
    # nmax: max iteration

    # return
    # α step size
    # fk1: function value at new x
    # gkk: gradient at new x

    #https://en.wikipedia.org/wiki/Backtracking_line_search
    α=1
    fk,gk=F(x)
    xx=x
    x=x+α*d
    fk1,gk1=F(x)
    n=1
    
    while fk1>fk+c*α*(gk'*d) && n < nmax
        n=n+1
        α=α*0.5
        x=xx+α*d
        fk1,gk1=F(x)
    end
    
    return α, fk1, gk1
end

backtracking (generic function with 4 methods)

In [33]:
function approxInvHess(g,S,Y,H0)

    # params: 
    # g: gradient nx1 vector
    # S: nxk matrixs storing S[i]=x[i+1]-x[i]
    # Y: nxk matrixs storing Y[i]=g[i+1]-g[i]
    # H0: initial hessian diagnol scalar

    # return:
    # p:  the approximate inverse hessian multiplied by the gradient g
    #     which is the new direction
    #
    # notation follows: https://en.wikipedia.org/wiki/Limited-memory_BFGS

    n,k=size(S)
    rho=zeros(k)
    for i=1:k
        rho[i]=1/(Y[:,i]'*S[:,i])
    end

    q=zeros(n,k+1)
    r=zeros(n,1)
    α=zeros(k,1)
    β=zeros(k,1)

    q[:,k+1]=g

    for i=k:-1:1
        α[i]=rho[i]*S[:,i]'*q[:,i+1]
        q[:,i]=q[:,i+1]-α[i]*Y[:,i]
    end

    z=H0*q[:,1]


    for i=1:k
        β[i]=rho[i]*Y[:,i]'*z
        z=z+S[:,i]*(α[i]-β[i])
    end

    p=z

    return p
end

approxInvHess (generic function with 1 method)

In [34]:
function test_range(a0, a1, b0, b1, step, tol)
    x1_opt = [1.0, 1.0]
    
    for a in a0:step:a1
        for b in b0:step:b1
            
            x0 = [a, b]
            x1, f1, k=lbfgs!(rosenbrock, x0, 100, 2); 
            
            if (norm(x1 - x1_opt)) < tol
                break;
            else
                println("======\nfailed for: x0 = [$a, $b]")
                println("x1 found was: $(x1)\n")
            end
            
        end
    end
end

test_range (generic function with 1 method)

In [35]:
# initial guess
x0=[10, 0.1] 
x1_opt = [1.0, 1.0]
m = 2

# define test function
function rosenbrock(x::Vector)
    F=(1-x[1])^2+100*(x[2]-x[1]^2)^2
    d=[-2*(1-x[1])-400*(x[2]-x[1]^2)*x[1],200 * (x[2] - x[1]^2)]
   return F, d
end

# scan over range
test_range(-2, 2, -2, 2, 0.1, 1e-6)

x0 = [1, 1.5]
x1, f1, k = lbfgs!(rosenbrock, x0, 100, m)

failed for: x0 = [-1.3, -2.0]
x1 found was: [NaN, NaN]

failed for: x0 = [-0.9, -2.0]
x1 found was: [NaN, NaN]

failed for: x0 = [-0.6, -2.0]
x1 found was: [1.0, 1.0]



([1.0, 1.0], 3.1739028310543608e-15, 25)

# Try different optimization test functions

In [36]:
lbfgs!(himmelblau, x0, 10, 2, 1e-6, 0)

([3.00034, 1.9907], 0.0014051682266910735, 10)

In [37]:
lbfgs!(booth, x0, 10, 2, 1e-6, 0)

([1.00001, 3.0], 1.0350538584755263e-10, 10)

In [38]:
lbfgs!(bohachevsky1, x0, 10, 2, 1e-6, 0)

([0.0122283, 0.0128619], 0.007683875360763537, 10)

In [39]:
lbfgs!(easom, x0, 10, 2, 1e-6, 0)

([1.305, 1.30499], -8.110223889894006e-5, 8)

# Timing

In [40]:
maxIt = 100
m = 2
tol = 1e-6
verbose = 0
range = 10
t = 0.0

funs = [rosenbrock, himmelblau, booth, bohachevsky1, easom]

for fun in funs
    for i in 1:range
        x0 = [rand(-1:0.1:1), rand(-1:0.1:1)]
        t += @elapsed lbfgs!(fun, x0, maxIt, m, tol, verbose)
    end
    
    println("\nTime elapsed for $(range) evaluations of $(fun): $t")
end


Time elapsed for 10 evaluations of rosenbrock: 0.009143528000000001

Time elapsed for 10 evaluations of himmelblau: 0.033537361

Time elapsed for 10 evaluations of booth: 0.039275296999999994

Time elapsed for 10 evaluations of bohachevsky1: 0.052118979999999995

Time elapsed for 10 evaluations of easom: 0.05443652199999999
