In [None]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

## Example 1
\begin{equation}
f(x) = x^2 - 4x + 6
\end{equation}

In [None]:
def f(x):
    return x**2 - 4*x + 6

# f = lambda x: x**2 - 4*x + 6

$f(x)$의 그래프를 그려보기 위해 `np.linspace`를 사용하여 `-5`부터 `5`까지 범위에서 `NumberOfPoints`개 만큼을 같은 간격으로 점을 뽑습니다.

In [None]:
NumberOfPoints = 101
x = np.linspace(-5., 5, NumberOfPoints)
print(x)

In [None]:
fx = f(x)
print(fx)

In [None]:
plt.plot(x,fx)
plt.grid()
plt.xlabel('x')
plt.ylabel('f(x)')
plt.title('plot of f(x)')
plt.show()

## 첫번째 시도 : 그냥 다 해보기(Brute Force)
모든 점을 다 계산한 후 그중에 가장 작은 값을 찾습니다.

In [None]:
xid = np.argmin(fx)
xopt = x[xid]
print(xopt, f(xopt))

In [None]:
plt.plot(x,fx)
plt.grid()
plt.xlabel('x')
plt.ylabel('f(x)')
plt.title('plot of f(x)')

plt.plot(xopt, f(xopt), 'xr')
plt.show()

In [None]:
def visualize(x, func):
    plt.plot(x,func(x))
    plt.grid()
    plt.xlabel('x')
    plt.ylabel('f(x)')
    plt.title('plot of f(x)')
    plt.show()

In [None]:
visualize(x, f)

## 두번째 시도 : Steepest Descent Method

With initial $x^{(0)}$, calculate the following equation :
\begin{equation}
x^{(k+1)} = x^{(k)} - \alpha \nabla f(x^{(k)})
\end{equation}

In [None]:
def grad_fx(x):
    return 2*x - 4

### TODO 1
다음을 식을 이용하여, `x1`을 구하는 코드를 작성하세요.

$$
x^{(k+1)} = x^{(k)} - \alpha \nabla f(x^{(k)})
$$

In [None]:
x0 = 0.
MaxIter = 100
learning_rate = 0.01
for i in range(MaxIter):
    # TODO 1
    x1 = None
    print(i, x1, f(x1))
    x0 = x1

In [None]:
def steepest_descent(func, grad_func, x0, learning_rate=0.01, MaxIter=10, verbose=True):
    paths = []
    for i in range(MaxIter):
        x1 = x0 - learning_rate * grad_func(x0)
        if verbose:
            print('{0:03d} : {1:4.3f}, {2:4.2E}'.format(i, x1, func(x1)))
        x0 = x1
        paths.append(x0)
    return(x0, func(x0), paths)

### TODO 2

`steepest_descent`를 사용하여, 시작 지점은 `0.0`, `learning_rate`은 `1.2`로 Steepest Descent Method를 실행하세요.

In [None]:
# TODO 2
xopt, fopt, paths = steepest_descent(None, None, x0, learning_rate=None)

In [None]:
x = np.linspace(0.5, 2.5, 1000)
paths = np.array(paths)
plt.plot(x,f(x))
plt.grid()
plt.xlabel('x')
plt.ylabel('f(x)')
plt.title('plot of f(x)')

plt.plot(paths, f(paths), 'o-')
plt.show()

In [None]:
plt.plot(f(paths), 'o-')
plt.grid()
plt.xlabel('x')
plt.ylabel('cost')
plt.title('plot of cost')
plt.show()

In [None]:
xopt, fopt, paths = steepest_descent(f, grad_fx, 1.0, learning_rate=1)

In [None]:
x = np.linspace(0.5, 3.5, 1000)
paths = np.array(paths)
plt.plot(x,f(x))
plt.grid()
plt.xlabel('x')
plt.ylabel('f(x)')
plt.title('plot of f(x)')

plt.plot(paths, f(paths), 'o-')
plt.show()

In [None]:
plt.plot(f(paths))
plt.grid()
plt.xlabel('x')
plt.ylabel('cost')
plt.title('plot of cost')
plt.show()

### TODO 3

`steepest_descent`를 사용하여, 시작 지점은 `1.0`, `learning_rate`은 `0.001`로 Steepest Descent Method를 실행하세요.

In [None]:
# TODO 3
xopt, fopt, paths = steepest_descent(None, None, None, learning_rate=None)

In [None]:
x = np.linspace(0.5, 3.5, 1000)
paths = np.array(paths)
plt.plot(x,f(x))
plt.grid()
plt.xlabel('x')
plt.ylabel('f(x)')
plt.title('plot of f(x)')

plt.plot(paths, f(paths), 'o-')
plt.show()

In [None]:
plt.plot(f(paths))
plt.grid()
plt.xlabel('x')
plt.ylabel('cost')
plt.title('plot of cost')
plt.show()

In [None]:
xopt, fopt, paths = steepest_descent(f, grad_fx, 3.0, learning_rate=0.9)

In [None]:
x = np.linspace(0.5, 3.5, 1000)
paths = np.array(paths)
plt.plot(x,f(x))
plt.grid()
plt.xlabel('x')
plt.ylabel('f(x)')
plt.title('plot of f(x)')

plt.plot(paths, f(paths), 'o-')
plt.show()

In [None]:
plt.plot(f(paths))
plt.grid()
plt.xlabel('x')
plt.ylabel('cost')
plt.title('plot of cost')
plt.show()

### TODO 4

`steepest_descent`를 사용하여, 시작 지점은 `3.0`, `learning_rate`은 `1.1`로 Steepest Descent Method를 실행하세요.

In [None]:
# TODO 4
xopt, fopt, paths = steepest_descent(None)

In [None]:
x = np.linspace(-2, 10, 1000)
paths = np.array(paths)
plt.plot(x,f(x))
plt.grid()
plt.xlabel('x')
plt.ylabel('f(x)')
plt.title('plot of f(x)')

plt.plot(paths, f(paths), 'o-')
plt.show()

In [None]:
plt.plot(f(paths))
plt.grid()
plt.xlabel('x')
plt.ylabel('cost')
plt.title('plot of cost')
plt.show()

In [None]:
from scipy.optimize import minimize

In [None]:
f = lambda x: x**2 - 4*x + 6
x0 = 0.
minimize(f, x0)

In [None]:
res = minimize(f, x0)
print(res.x, res.fun)

## (Additional) non-convex function
\begin{equation}
\min_x x \sin(x)
\end{equation}

1. Define $f(x)$
1. Define $\nabla f(x)$
1. Tune parameters $x_0$, $\alpha$,MaxIter
1. Call `steepest_descent(,,,)`

In [None]:
x = np.linspace(-2, 8, 501)
f = lambda x : x * np.sin(x)
fx = f(x)

In [None]:
plt.plot(x, fx)
plt.grid()
plt.show()

### TODO 5

`grad_f`이름을 갖는 $x \sin(x)$의 Gradient를 함수를 구현하세요.

In [None]:
# TODO 5
grad_f = None

### TODO 6

`steepest_descent`를 사용하여, 시작 지점은 `1.5`, `learning_rate`은 `0.5`로 Steepest Descent Method를 실행하세요.

In [None]:
# TODO 6
x0 = None
xopt, fopt, paths = steepest_descent(f, None, x0, learning_rate=None)

In [None]:
paths = np.array(paths)
plt.plot(x,f(x))
plt.grid()
plt.xlabel('x')
plt.ylabel('f(x)')
plt.title('plot of f(x)')

plt.plot(paths, f(paths), 'o-')
plt.show()

In [None]:
plt.plot(f(paths))
plt.grid()
plt.xlabel('x')
plt.ylabel('cost')
plt.title('plot of cost')
plt.show()

### TODO 7

`steepest_descent`를 사용하여, 시작 지점은 `7.7`, `learning_rate`은 `0.125`로 Steepest Descent Method를 실행하세요. 최대 Iteration 횟수는 `100`으로 넣어주세요

In [None]:
# TODO 7

x0 = None
MaxIter = None
learning_rate = None

xopt, fopt, paths = steepest_descent(f, grad_f, x0, learning_rate=learning_rate, \
                                     MaxIter=MaxIter, verbose=False)
paths = np.array(paths)
plt.plot(x,f(x))
plt.grid()
plt.xlabel('x')
plt.ylabel('f(x)')
plt.title('plot of f(x)')

plt.plot(paths, f(paths), 'o-')
plt.show()
plt.plot(f(paths))
plt.grid()
plt.xlabel('x')
plt.ylabel('cost')
plt.title('plot of cost')
plt.show()

In [None]:
x0 = -1.0
MaxIter = 100
learning_rate = 1.0

xopt, fopt, paths = steepest_descent(f, grad_f, x0, learning_rate=learning_rate, MaxIter=MaxIter, verbose=False)
paths = np.array(paths)
plt.plot(x,f(x))
plt.grid()
plt.xlabel('x')
plt.ylabel('f(x)')
plt.title('plot of f(x)')

plt.plot(paths, f(paths), 'o-')
plt.show()
plt.plot(f(paths))
plt.grid()
plt.xlabel('x')
plt.ylabel('cost')
plt.title('plot of cost')
plt.show()

## Example 02
\begin{equation}
f(x, y) = (x-2)^2 + (y-2)^2
\end{equation}

Contour

In [None]:
xmin, xmax, xstep = -4.0, 4.0, .25
ymin, ymax, ystep = -4.0, 4.0, .25

In [None]:
x, y = np.meshgrid(np.arange(xmin, xmax + xstep, xstep), np.arange(ymin, ymax + ystep, ystep))

In [None]:
f = lambda x,y : (x-2)**2 + (y-2)**2

In [None]:
z = f(x, y)

In [None]:
minima = np.array([2., 2.])

In [None]:
f(*minima)

In [None]:
minima_ = minima.reshape(-1, 1)

In [None]:
from visualize import surf

In [None]:
surf(f, x, y, minima=minima_)

In [None]:
from visualize import contour_with_quiver

In [None]:
grad_f_x = lambda x, y: 2 * (x-2)
grad_f_y = lambda x, y: 2 * (y-2)

In [None]:
contour_with_quiver(f, x, y, grad_f_x, grad_f_y, minima=minima_)

### TODO 8

다음을 식을 이용하여, `x1`을 구하는 코드를 작성하세요.

$$
x^{(k+1)} = x^{(k)} - \alpha \nabla f(x^{(k)})
$$

In [None]:
x0 = np.array([-2., -2.])
MaxIter = 10
learning_rate = .25
for i in range(MaxIter):
    # TODO 8
    grad = np.array([grad_f_x(*x0), grad_f_y(*x0)])
    x1 = None
    fval = f(*x1)
    print(i, x1, fval)
    x0 = x1

In [None]:
def steepest_descent_twod(func, gradx, grady, x0, MaxIter=10, learning_rate=0.25, verbose=True):
    paths = [x0]
    fval_paths = [f(x0[0], x0[1])]
    for i in range(MaxIter):
        grad = np.array([grad_f_x(*x0), grad_f_y(*x0)])
        x1 = x0 - learning_rate * grad
        fval = f(*x1)
        if verbose:
            print(i, x1, fval)
        x0 = x1
        paths.append(x0)
        fval_paths.append(fval)
    paths = np.array(paths)
    paths = np.array(np.matrix(paths).T)
    fval_paths = np.array(fval_paths)
    return(x0, fval, paths, fval_paths)

In [None]:
x0 = np.array([-2., -2.])
xopt, fopt, paths, fval_paths = steepest_descent_twod(f, grad_f_x, grad_f_y, x0)

In [None]:
from visualize import contour_with_path

In [None]:
contour_with_path(f, x, y, paths, minima=np.array([[2],[2]]))

## Example 03
\begin{equation}
f(x, y) = 3(x-2)^2 + (y-2)^2
\end{equation}

In [None]:
f = lambda x,y : 3*(x-2)**2 + (y-2)**2

In [None]:
grad_f_x = lambda x, y: 6 * (x-2)
grad_f_y = lambda x, y: 2 * (y-2)

In [None]:
from numerical_optimizers import steepest_descent_2d
xopt, fopt, paths, fval_paths = steepest_descent_2d(f, grad_f_x, grad_f_y, x0)

In [None]:
surf(f, x, y, minima=minima_)
# contour_with_quiver(f, x, y, grad_f_x, grad_f_y, minima=minima_)
contour_with_path(f, x, y, paths, minima=np.array([[2],[2]]))

## Example 04
\begin{equation}
f(x, y) = 3(x-2)^2 + (y-2)^4
\end{equation}

In [None]:
f = lambda x,y : 3*(x-2)**2 + (y-2)**4

# TODO 9
grad_f_x = lambda x, y: 6 * (x-2)
grad_f_y = lambda x, y: None

x0 = np.array([-2., -2.])
learning_rate = 0.01

xopt, fopt, paths, fval_paths = steepest_descent_2d(f, grad_f_x, grad_f_y, x0, 
                                                    learning_rate=learning_rate, MaxIter=200, verbose=False)

In [None]:
surf(f, x, y, minima=minima_)
# contour_with_quiver(f, x, y, grad_f_x, grad_f_y, minima=minima_)
contour_with_path(f, x, y, paths, minima=np.array([[2],[2]]))

## Example 04
\begin{equation}
f(x,y) = \sin(2\pi x)\sin(2\pi y)
\end{equation}

In [None]:
xmin, xmax, xstep = -4.0, 4.0, .0625
ymin, ymax, ystep = -4.0, 4.0, .0625
x, y = np.meshgrid(np.arange(xmin, xmax + xstep, xstep), np.arange(ymin, ymax + ystep, ystep))

In [None]:
f = lambda x,y : np.sin(np.pi*x) * np.sin(np.pi*y)

grad_f_x = lambda x, y: np.pi*np.cos(np.pi*x) * np.sin(np.pi*y)
grad_f_y = lambda x, y: np.pi * np.sin(np.pi*x) * np.cos(np.pi*y)

learning_rate = 0.01
xopt, fopt, paths, fval_paths = steepest_descent_2d(f, grad_f_x, grad_f_y, x0, 
                                                    learning_rate=learning_rate, MaxIter=200, verbose=False)

In [None]:
surf(f, x, y, norm=None)
# contour_with_quiver(f, x, y, grad_f_x, grad_f_y, minima=minima_)
contour_with_path(f, x, y, paths, norm=None, level=np.linspace(-1, 1, 10))

In [None]:
x0 = np.array([0, 0.01])
xopt, fopt, paths, fval_paths = steepest_descent_2d(f, grad_f_x, grad_f_y, x0, 
                                                    learning_rate=learning_rate, MaxIter=200, verbose=False)
contour_with_path(f, x, y, paths, norm=None, level=np.linspace(-1, 1, 10))
xopt, fopt

In [None]:
x0 = np.array([0, -0.01])
xopt, fopt, paths, fval_paths = steepest_descent_2d(f, grad_f_x, grad_f_y, x0, 
                                                    learning_rate=learning_rate, MaxIter=200, verbose=False)
contour_with_path(f, x, y, paths, norm=None, level=np.linspace(-1, 1, 10))
print(xopt, fopt)

In [None]:
x0 = np.array([0, 0.249])
learning_rate = 0.025
xopt, fopt, paths, fval_paths = steepest_descent_2d(f, grad_f_x, grad_f_y, x0, 
                                                    learning_rate=learning_rate, MaxIter=200, verbose=False)
contour_with_path(f, x, y, paths, norm=None, level=np.linspace(-1, 1, 10))
print(xopt, fopt)