## Preparation

Imported necessary packages

In [None]:
using ForwardDiff
using NBInclude
@nbinclude("Visualization.ipynb");

Defined Rosenbrock function

In [None]:
rosenbrock(x) = sum([100*(x[i+1]-x[i]^2)^2 + (1-x[i])^2 for i in 1:(length(x)-1)])

Defined util functions for logging the results of optimizations

In [None]:
function log(x, y, last::Bool=false)
    n = length(y)
    for i in (last ? n : 1):n
        println("function value in iteration $i for x=$(x[:, i]) is $(y[i])")
    end
end

In [None]:
log(xy::Tuple, last::Bool=false) = log(xy[1], xy[2], last)

In [None]:
ITERS = 10000;
ERROR = 10e-6;

## Gradient Descent

In [None]:
rosenbrock_gradient(x) = ForwardDiff.gradient(rosenbrock, x)

In [None]:
function rosenbrock_gradient_descent(x, learningRate, max_iters, max_error=0)
    dimensions = length(x)
    arguments = zeros(dimensions, max_iters)
    values = zeros(max_iters)
    
    for i in 1:max_iters
        arguments[1, i] = x[1]
        arguments[2, i] = x[2]
        values[i] = rosenbrock(x)
        
        gradient = rosenbrock_gradient(x)
        x -= (learningRate * gradient)
        
        if values[i] < max_error
            return (arguments[:, 1:i], values[1:i], i)
        end
    end
    return (arguments, values, max_iters)
end

In [None]:
log(rosenbrock_gradient_descent(rand(2), 1, 5))

In [None]:
arg, val = rosenbrock_gradient_descent(rand(2), 0.001953125, ITERS, ERROR)
log(arg, val, true)

In [None]:
y = [round(v, digits=8) for v in val]
visualize_training_process(length(y), y, 
    "The optimization process of 2-dimensional Rosenbrock function using Gradient Descent", 
    "Iteration", "Rosenbrock function value")

In [None]:
ax = LinRange(0, 1.5, 100);
levels = [LinRange(0, 100, 20)...];

In [None]:
contour_cost(
    ax, ax,
    (x, y) -> rosenbrock( [x, y] ), 
    levels, arg, 100,
    "The optimization process of 2-dimensional Rosenbrock using Gradient Descent",
    "x1", "x2",
)

## Newton

In [None]:
rosenbrock_hessian(x) = ForwardDiff.hessian(rosenbrock, x)

In [None]:
function rosenbrock_newton(x, max_iters, max_error=0)
    dimensions = length(x)
    arguments = zeros(dimensions, max_iters)
    values = zeros(max_iters)
    
    for i in 1:max_iters
        arguments[1, i] = x[1]
        arguments[2, i] = x[2]
        values[i] = rosenbrock(x)
        
        hessian = rosenbrock_hessian(x)
        hessian_inverted = inv(hessian)
        gradient = rosenbrock_gradient(x)
        x -= hessian_inverted * gradient
        
        if values[i] < max_error
            return (arguments[:, 1:i], values[1:i], i)
        end
    end
    return (arguments, values, max_iters)
end
    

In [None]:
arg, val = rosenbrock_newton(rand(2), ITERS, ERROR)
log(arg, val)

In [None]:
y = [round(v, digits=8) for v in val]
visualize_training_process(length(y), y, 
    "The optimization process of 2-dimensional Rosenbrock function using Newton's Method", 
    "Iteration", "Rosenbrock function value")

In [None]:
ax = LinRange(0, 1.5, 100);
levels = [LinRange(0, 100, 20)...];

In [None]:
contour_cost(
    ax, ax,
    (x, y) -> rosenbrock( [x, y] ), 
    levels, arg, 1,
    "The optimization process of 2-dimensional Rosenbrock using using Newton's Method",
    "x1", "x2",
)

## 4-dimension functions

In [None]:
log(rosenbrock_gradient_descent(zeros(4), 1, 5))

In [None]:
arg, val = rosenbrock_gradient_descent(zeros(4), 0.001, ITERS, ERROR)
log(arg, val, true)

In [None]:
visualize_training_process(length(val), val, 
    "The optimization process of 4-dimensional Rosenbrock function using Gradient Descent", 
    "Iteration", "Rosenbrock function value")

In [None]:
arg, val = rosenbrock_newton(rand(4), ITERS, ERROR)
log(arg, val)

In [None]:
visualize_training_process(length(val), val, 
    "The optimization process of 2-dimensional Rosenbrock function using Newton's Method", 
    "Iteration", "Rosenbrock function value")