# HW5

Chi Po Choi

### Newton's Methods code:

In [8]:
# Modified Newton method
# Modify the eigenvalues to be positive
function BkFunInv(H, ε)
    D, V = eig(H)
    Dp = ifelse(D .> ε, D, ε)
    Dpinv = 1./Dp
    return V * Diagonal(Dpinv) * V'
end

# Back tracking method
function backTracking(obj, x, d, g)
    α = 1
    while (obj(x + α*d)[1] > obj(x)[1] + α * 1e-4 * dot(g, d))
       α = α * 0.5
       if α < 1e-6
           break
       end
    end
    return α
end

function wolfe(obj, x, d, g)
	t = 1
	alpha = 0
	beta = Inf
	c1 = 1e-4
	c2 = 0.9
	maxItr = 50
	for i = 1:maxItr
		if (obj(x + t*d)[1] > obj(x)[1] + t * c1 * dot(g, d))
			beta = t
			t = (alpha + beta)/2
		elseif dot(obj(x + t*d)[2], d) < c2 * dot(g, d)
			alpha = t
			if isinf(beta)
				t = 2*alpha
			else
				t = (alpha + beta)/2
			end
		else
			break
		end
	end
	return t
end



# Newton Method
function newtmin(obj, x0; maxIts=1000, optTol=1e-6, BkFlag = true, btFlag = true)
    # Minimize a function f using Newton’s method.
    # obj: a function that evaluates the objective value,
    # gradient, and Hessian at a point x, i.e.,
    # (f, g, H) = obj(x)
    # x0: starting point.
    # maxIts (optional): maximum number of iterations.
    # optTol (optional): optimality tolerance based on
    # ||grad(x)|| <= optTol*||grad(x0)||
    # BkFlag (optional): true for doing Modified Hessian
    # btFlag (optional): true for doing Back Tracking
    f0, g0, H0 = obj(x0)
    its = 0
    Opt = Float64[]
    xkp = x0
    for i in 1:maxIts
        xk = xkp
        fk, gk, Hk = obj(xk)
        opt = norm(gk, 2)
        push!(Opt, opt)
        #if opt < optTol*norm(g0)
        if opt < optTol
            break
        end
        if BkFlag == true
            Bkinv = BkFunInv(Hk, 0.01)
            dk = Bkinv * (- gk)
        else
            dk = Hk \ (- gk)
        end
        if btFlag == true
            αk = backTracking(obj, xk, dk, gk)
            #αk = wolfe(obj, xk, dk, gk)
        else
            αk = 1
        end
        xkp = xk + αk * dk
        its = its + 1
    end
    return xkp, its, Opt
end

# BFGS
function newtminBFGS(obj, x0; maxIts=1000, optTol=1e-6, btFlag=true)
    # Minimize a function f using BFGS Newton’s method.
    # obj: a function that evaluates the objective value,
    # gradient, and Hessian at a point x, i.e.,
    # (f, g, H) = obj(x)
    # x0: starting point.
    # maxIts (optional): maximum number of iterations.
    # optTol (optional): optimality tolerance based on
    # ||grad(x)|| <= optTol*||grad(x0)||
    # btFlag (optional): true for doing Back Tracking
    f0, g0, H0 = obj(x0)
    #D, V = eig(H0)
    #Hkp = V * Diagonal(map(x -> 1/max(x,1), D)) * V'
    Hkp = H0
    its = 0
    Opt = Float64[]
    xkp = copy(x0)
    for i in 1:maxIts
        xk = copy(xkp)
        Hk = copy(Hkp)
        gk = obj(xk)[2]
        opt = norm(gk, 2)
        push!(Opt, opt)
        #if opt < optTol*norm(g0)
        if opt < optTol
            break
        end
        dk = Hk * (- gk)
        if btFlag == true
            #αk = backTracking(obj, xk, dk, gk)
            αk = wolfe(obj, xk, dk, gk)
        else
            αk = 1
        end
        xkp = xk + αk * dk
        fkp, gkp, Hkp = obj(xkp)
        sk = xkp - xk
        yk = gkp - gk
        ρk = 1/(yk' * sk)[]
        Hkp = (eye(Hk) - ρk * sk * yk') * Hk * (eye(Hk) - ρk * yk * sk') + ρk * sk * sk'
        its = its + 1
    end
    return xkp, its
end

newtminBFGS (generic function with 1 method)

### The test problem

In [9]:
function prob(k)
	if k == 6
		x0 = [-1.2, 1]
		f(x::Vector) = (1-x[1])^2
		c(x::Vector) = [ 10*(x[2] - x[1])^2 ]
	elseif k == 7
		x0 = [2.0, 2.0]
		f(x::Vector) = log(1+x[1]^2) - x[2]
		c(x::Vector) = [(1 + x[1]^2)^2 + x[2]^2 - 4 ]
	elseif k == 8
		x0 = [2.0, 1.0]
		f(x::Vector) = -1
		c(x::Vector) = [ x[1]^2 + x[2]^2 - 25 , x[1]*x[2] - 9 ]
	elseif k == 9
		x0 = [0.0, 0.0]
		f(x::Vector) = sin(pi*x[1]/12) * cos(pi*x[2]/16)
		c(x::Vector) = [4*x[1] - 3*x[2]]
	else
		x0 = [123.0, 321.0]
		f(x::Vector) = 100
		c(x::Vector) = [ x[1] - 123, x[2] - 321 ]
	end

	return (x0, f, c)
end

prob (generic function with 1 method)

### Augmented Lagrangian Method

In [10]:
import ForwardDiff

function AugLag(myprob; newtonFlag = true, maxItr=30, optTolck = 1e-8, optTolKKT = 1e-8)

	x0 = myprob[1]
	rho0 = 10.0
	y0 = zeros(myprob[3](myprob[1]))
	
	rhok = rho0
	yk = y0
	xk = copy(x0)

	function obj(x0, myprob, y, rho)
		f(x) = myprob[2](x) - dot(y, myprob[3](x)) + rho / 2 * dot(myprob[3](x), myprob[3](x))
		g = ForwardDiff.gradient(f)
		h = ForwardDiff.jacobian(g)
        return (f(x0), g(x0), h(x0))
    end

    g0 = ForwardDiff.gradient(myprob[2])
    j = ForwardDiff.jacobian(myprob[3])

    rho_counter = 0

	counter = 0    

	for itr = 1:maxItr
		if newtonFlag == true
			xkp = newtmin(x -> obj(x, myprob, yk, rhok), xk, maxIts=1000, optTol = (1e-6 / 10^itr ) )[1]
		else 
			xkp = newtminBFGS(x -> obj(x, myprob, yk, rhok), xk, maxIts=1000, optTol = (1e-6 / 10^itr ), btFlag=true)[1]
		end

		if norm(myprob[3](xkp)) < 1e-4 * (1/2)^rho_counter
			rhokp = rhok
			ykp = yk - rhokp * myprob[3](xkp)
			rho_counter = rho_counter + 1
		else
			rhokp = 10 * rhok
			ykp = yk
		end

		rhok = rhokp
		yk = ykp
		xk = copy(xkp)

		counter = itr
		if (norm(myprob[3](xk)) < optTolck) & (norm( g0(xk) - j(xk)' * yk) < optTolKKT)
			break
		end

	end

	return xk, yk, rhok, counter
end

AugLag (generic function with 1 method)

### Performance of Augmented Lagrangian Method

In [12]:

xk = Array{Float64,1}[]
xkBFGS = Array{Float64,1}[]

yk = Array{Float64,1}[]
ykBFGS = Array{Float64,1}[]

rhok = Float64[]
rhokBFGS = Float64[]

counter = Int64[]
counterBFGS = Int64[]


for k = 1:4
	(x, y, rho, cnt) = AugLag(prob(k+5))
	(xb, yb, rhob, cntb)  = AugLag(prob(k+5), newtonFlag = false)

	push!(xk, x)
	push!(yk, y)
	push!(rhok, rho)
	push!(counter, cnt)

	push!(xkBFGS, xb)
	push!(ykBFGS, yb)
	push!(rhokBFGS, rhob)
	push!(counterBFGS, cntb)
end

@printf("Comparing the performance between Newton method and quasi-Newton method BFGS\n")
@printf("%6s %12s %12s\n", "Prob", "Newton Itr", "BFGS Itr")
for k = 1:4
    @printf("No.%3d %12d %12d\n", k+5, counter[k], counterBFGS[k])
end

Comparing the performance between Newton method and quasi-Newton method BFGS
  Prob   Newton Itr     BFGS Itr
No.  6            3            2
No.  7            5            5
No.  8            1            1
No.  9            4            4


We can see that, Newton method and BFGS perform similarly.