***1. Gradijentni spust***

In [1]:
import numpy as np

In [2]:
def f(x):
    return 0.5*(x[0]**2 + 10*x[1]**2)

In [3]:
def gradient(x):
    return np.array([x[0], 10*x[1]])

In [7]:
def gradient_descent(f, gradient, x0, alpha, eps, max_iter):
    x = x0
    
    for i in range(max_iter):
        x_new = x - alpha*gradient(x)
        
        if np.abs(f(x_new) - f(x)) < eps:
            break

        x = x_new

    converged = i != max_iter
    
    result = {}
    result['converged'] = converged
    result['num_iter'] = i
    result['x'] = x_new

    return result

In [5]:
x0 = np.array([3, 5])
alpha = 0.1
eps = 0.001
max_iter = 1000

In [8]:
gradient_descent(f, gradient, x0, alpha, eps, max_iter)

{'converged': True, 'num_iter': 33, 'x': array([0.08343852, 0.        ])}

***2. Inercija***

In [13]:
def momentum(f, gradient, x0, alpha, eps, max_iter, beta):
    x = x0
    d = 0 # prosek

    for i in range(max_iter):
        d = beta*d + alpha*gradient(x)
        x_new = x-d

        if np.abs(f(x_new) - f(x)) < eps:
            break

        x = x_new

    converged = i != max_iter
    
    result = {}
    result['converged'] = converged
    result['num_iter'] = i
    result['x'] = x_new

    return result

In [14]:
momentum(f, gradient, x0, alpha, eps, max_iter, beta=0.9)

{'converged': True, 'num_iter': 91, 'x': array([-0.00569491,  0.02442859])}

***3. Nesterov ubrzani gradijentni spust***

In [15]:
def nesterov(f, gradient, x0, alpha, eps, max_iter, beta):
    x = x0
    d = 0

    for i in range(max_iter):
        d = beta*d + alpha*gradient(x-beta*d)
        x_new = x-d

        if np.abs(f(x_new) - f(x)) < eps:
            break

        x = x_new

    converged = i != max_iter
    
    result = {}
    result['converged'] = converged
    result['num_iter'] = i
    result['x'] = x_new

    return result

In [16]:
nesterov(f, gradient, x0, alpha, eps, max_iter, beta=0.9)

{'converged': True, 'num_iter': 25, 'x': array([-0.04738745,  0.        ])}

***4. Adam***

In [17]:
def adam(f, gradient, x0, alpha, eps, max_iter, beta1, beta2, delta):
    # prvi moment EX
    m = 0
    # drugi moment EX^2
    v = 0

    x = x0
    for i in range(1, max_iter):
        m = beta1*m + (1-beta1)*gradient(x)
        v = beta2*v + (1-beta2)*gradient(x)**2

        m_hat = m / (1-beta1**i)
        v_hat = v / (1-beta2**i)

        x_new = x - alpha*(m_hat / (np.sqrt(v_hat) + delta))

        if np.abs(f(x_new) - f(x)) < eps:
            break

        x = x_new

    converged = i != max_iter
    
    result = {}
    result['converged'] = converged
    result['num_iter'] = i
    result['x'] = x_new

    return result

In [18]:
adam(f, gradient, x0, alpha, eps, max_iter, beta1=0.9, beta2=0.999, delta=1e-7)

{'converged': True, 'num_iter': 86, 'x': array([0.01802498, 0.00438169])}