In [None]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

## Example 1
\begin{equation}
f(x) = x^2 - 4x + 6
\end{equation}

In [None]:
def f(x):
    return x**2 - 4*x + 6

# f = lambda x: x**2 - 4*x + 6

In [None]:
NumberOfPoints = 101
x = np.linspace(-5., 5, NumberOfPoints)

In [None]:
fx = f(x)

In [None]:
plt.plot(x,fx)
plt.grid()
plt.xlabel('x')
plt.ylabel('f(x)')
plt.title('plot of f(x)')
plt.show()

## Newton  Method

With initial $x^{(0)}$, calculate the following equation :
\begin{equation}
x^{(k+1)} = x^{(k)} - \alpha \nabla^2 f(x^{(k)})^{-1}\nabla f(x^{(k)})
\end{equation}

### TODO 1

$f(x) = x^2 - 4x + 6$의 Hessian function을 `hess_f`에 구현해보세요.

In [None]:
def grad_fx(x):
    return 2*x - 4
def hess_f(x):
    # TODO 1
    return None

### TODO 2

다음을 식을 이용하여, `x1`을 구하는 코드를 작성하세요.

\begin{equation}
x^{(k+1)} = x^{(k)} - \alpha \nabla^2 f(x^{(k)})^{-1}\nabla f(x^{(k)})
\end{equation}

In [None]:
x0 = 0.
MaxIter = 10
for i in range(MaxIter):
    # TODO 2
    x1 = None
    print(i, x0, f(x0))
    x0 = x1

In [None]:
def newton_descent(func, grad_func, hess_fun, x0, learning_rate=1, MaxIter=10, verbose=True):
    paths = []
    for i in range(MaxIter):
        x1 = x0 - learning_rate * grad_func(x0) / hess_fun(x0)
        if verbose:
            print('{0:03d} : {1:4.3f}, {2:4.2E}'.format(i, x0, func(x0)))
        x0 = x1
        paths.append(x0)
    return(x0, func(x0), paths)

In [None]:
xopt, fopt, paths = newton_descent(f, grad_fx, hess_f, 0.0)

In [None]:
x = np.linspace(0.5, 2.5, 1000)
paths = np.array(paths)
plt.plot(x,f(x))
plt.grid()
plt.xlabel('x')
plt.ylabel('f(x)')
plt.title('plot of f(x)')

plt.plot(paths, f(paths), 'o-')
plt.show()

In [None]:
plt.plot(f(paths), 'o-')
plt.grid()
plt.xlabel('x')
plt.ylabel('cost')
plt.title('plot of cost')
plt.show()

In [None]:
from scipy.optimize import minimize
f = lambda x: x**2 - 4*x + 6
x0 = 0.
minimize(f, x0)

In [None]:
res = minimize(f, x0)
print(res.x, res.fun)

## (Additional) non-convex function
\begin{equation}
\min_x x \sin(x)
\end{equation}
1. Define $f(x)$ : loss function
1. Define $\nabla f(x)$ : gradient of $f$
1. Define $\nabla^2 f(x)$ known as Hessian Matrix of $f$
1. Tune parameters : `learning_rate, x0, MaxIter`

### TODO 3

$f(x) = x\sin(x)$의 Hessian function을 `hess_f`에 구현해보세요.

In [None]:
x = np.linspace(-2, 8, 501)
f = lambda x : x * np.sin(x)
grad_f = lambda x: np.sin(x) + x * np.cos(x)
# TODO 3
hess_f = lambda x: None

In [None]:
fx = f(x)
plt.plot(x, fx)
plt.grid()
plt.show()

In [None]:
# x0 = 5.7
# MaxIter = 10
# learning_rate = 1.0
x0 = 5.0
MaxIter = 1000
learning_rate = 0.00250
# x0 = -0.2
# MaxIter = 50
# learning_rate = 0.1250

xopt, fopt, paths = newton_descent(f, grad_fx, hess_f, x0, 
                                   MaxIter=MaxIter, learning_rate=learning_rate, verbose=False)

plt.plot(x,f(x))
plt.grid()
plt.xlabel('x')
plt.ylabel('f(x)')
plt.title('plot of f(x)')

plt.plot(paths, f(paths), 'o-')
plt.show()
plt.plot(f(paths))
plt.grid()
plt.xlabel('x')
plt.ylabel('cost')
plt.title('plot of cost')
plt.show()

## Example 02
\begin{equation}
f(x, y) = (x-2)^2 + (y-2)^2
\end{equation}

Contour

In [None]:
xmin, xmax, xstep = -4.0, 4.0, .25
ymin, ymax, ystep = -4.0, 4.0, .25

In [None]:
x, y = np.meshgrid(np.arange(xmin, xmax + xstep, xstep), np.arange(ymin, ymax + ystep, ystep))

In [None]:
f = lambda x,y : (x-2)**2 + (y-2)**2

In [None]:
z = f(x, y)

In [None]:
minima = np.array([2., 2.])

In [None]:
f(*minima)

In [None]:
minima_ = minima.reshape(-1, 1)

In [None]:
from visualize import surf

In [None]:
surf(f, x, y, minima=minima_)

In [None]:
from visualize import contour_with_quiver

In [None]:
grad_f_x = lambda x, y: 2 * (x-2)
grad_f_y = lambda x, y: 2 * (y-2)

In [None]:
hessian_f = lambda x, y: np.array([[2.0, 0.0],[0.0, 2.0]])

### TODO 4

다음을 식을 이용하여, `x1`을 구하는 코드를 작성하세요.

\begin{equation}
x^{(k+1)} = x^{(k)} - \alpha \nabla^2 f(x^{(k)})^{-1}\nabla f(x^{(k)})
\end{equation}

In [None]:
x0 = np.array([-2., -2.])
MaxIter = 10
learning_rate = 1
for i in range(MaxIter):
    grad = np.array([grad_f_x(*x0), grad_f_y(*x0)])
    hess = hessian_f(*x0)
    # TODO 4
    x1 = None
    fval = f(*x0)
    print(i, x0, fval)
    x0 = x1

In [None]:
def newton_descent_2d(func, gradx, grady, hessian, x0, MaxIter=10, learning_rate=1, verbose=True):
    paths = [x0]
    fval_paths = [f(x0[0], x0[1])]
    for i in range(MaxIter):
        grad = np.array([grad_f_x(*x0), grad_f_y(*x0)])
        hess = hessian(*x0)
        x1 = x0 - learning_rate * np.linalg.solve(hess, grad)
        fval = f(*x0)
        if verbose:
            print(i, x0, fval)
        x0 = x1
        paths.append(x0)
        fval_paths.append(fval)
    paths = np.array(paths)
    paths = np.array(np.matrix(paths).T)
    fval_paths = np.array(fval_paths)
    return(x0, fval, paths, fval_paths)

In [None]:
x0 = np.array([-2., -2.])
xopt, fopt, paths, fval_paths = newton_descent_2d(f, grad_f_x, grad_f_y, hessian_f, x0)

In [None]:
from visualize import contour_with_path

In [None]:
contour_with_path(f, x, y, paths, minima=np.array([[2],[2]]))

## Example 03
\begin{equation}
f(x, y) = 3(x-2)^2 + (y-2)^2
\end{equation}

In [None]:
f = lambda x,y : 3*(x-2)**2 + (y-2)**2

### TODO 5

$f(x, y) = 3(x-2)^2 + (y-2)^2$의 Hessian function을 `hessian_f`에 구현해보세요.

In [None]:
grad_f_x = lambda x, y: 6 * (x-2)
grad_f_y = lambda x, y: 2 * (y-2)

# TODO 5
hessian_f = None

In [None]:
xopt, fopt, paths, fval_paths = newton_descent_2d(f, grad_f_x, grad_f_y, hessian_f, x0)

In [None]:
contour_with_path(f, x, y, paths, minima=np.array([[2],[2]]))

### Steepest Descent vs. Newton Method

In [None]:
from numerical_optimizers import steepest_descent_2d
x0 = np.array([-3., -3.])
learning_rate = 1
xopt, fopt, paths, fval_paths = steepest_descent_2d(f, grad_f_x, grad_f_y, x0, 
                                                    learning_rate=learning_rate)
contour_with_path(f, x, y, paths, minima=np.array([[2],[2]]))

In [None]:
from numerical_optimizers import steepest_descent_2d
x0 = np.array([-3., -3.])
learning_rate = 0.25
xopt, fopt, paths, fval_paths = steepest_descent_2d(f, grad_f_x, grad_f_y, x0, 
                                                    learning_rate=learning_rate)
contour_with_path(f, x, y, paths, minima=np.array([[2],[2]]))

In [None]:
x0 = np.array([-2., -3.])
learning_rate = 1.0
xopt, fopt, paths, fval_paths = newton_descent_2d(f, grad_f_x, grad_f_y, hessian_f, x0,
                                                 learning_rate=learning_rate)
contour_with_path(f, x, y, paths, minima=np.array([[2],[2]]))

In [None]:
x0 = np.array([-2., -3.])
learning_rate = 0.5
xopt, fopt, paths, fval_paths = newton_descent_2d(f, grad_f_x, grad_f_y, hessian_f, x0,
                                                 learning_rate=learning_rate)
contour_with_path(f, x, y, paths, minima=np.array([[2],[2]]))