In [1]:
import numpy as np
import pandas as pd
import sympy as sp
import itertools as it
import math
from sympy.abc import x, y
from sympy import Matrix
import random

# don't show warnings in Jupyter (e.g. log = inf)
import warnings
warnings.filterwarnings('ignore')

In [2]:
%%capture
%run Lab02.ipynb

In [3]:
f1 = 100 * (y - x ** 2)  ** 2 + (1 - x) ** 2
f1_x0 = pt(-1.9, 2.0)
f1_xmin = pt(1, 1)
f1_min = 0

f2 = (x - 4) ** 2 + 4 * (y - 2) ** 2
f2_x0 = pt(0.1, 0.3)
f2_xmin = pt(4, 2)
f2_min = 0

f3 = (x - 2) ** 2 + (y + 3) ** 2
f3_x0 = pt(0, 0)
f3_xmin = pt(2, -3)
f3_min = 0

f4 = (x - 3) ** 2 + y ** 2
f4_x0 = pt(0, 0)
f4_xmin = pt(3, 0)
f4_min = 0

In [4]:
from sympy.tensor.array import derive_by_array

def lambdify(sp_fun):
    return sp.lambdify([(x, y)], sp_fun)

@counted
def eval_lambda(f, pt):
    return f(pt)

@counted
def evalf(f, pt):
    return sp.lambdify([(x,y)], f)(pt)

@counted
def gradient(f):
    xy = (x, y)
    return lambda pt: evalf(derive_by_array(f, xy), pt)

@counted
def hessian(f):
    xy = (x, y)
    return lambda pt: evalf(derive_by_array(derive_by_array(f, xy), xy), pt)

## Optimizacijske funkcije

In [5]:
@counted
def optimize_move_callable(f, d, epsilon):
    l, r = unimodal(f, d)
    return golden_section(f, l, r, epsilon)

@counted
def no_optimization(*args):
    return 1.0

In [6]:
def reset():
    hessian.called = 0
    gradient.called = 0
    evalf.called = 0
    optimize_move_callable.called = 0
    no_optimization.called = 0

# Gradijentni spust

In [7]:
def gradient_descent(f, x0, epsilon = 1e-6, max_iter = 1000,
                     optimize_move_callable = lambda *x: 1.0):
    gradient_callable = gradient(f)
    gradient_vector = gradient_callable(x0)
    hessian_callable = hessian(f)
    hessian_vector = hessian_callable(x0)
    x = Point(x0)
    i = 0
    
    while np.linalg.norm(gradient_vector) > epsilon and i < max_iter:
        move_vector = np.dot(np.linalg.inv(hessian_vector), gradient_vector)
        vector_mul = optimize_move_callable(
            lambda d: evalf(f, x - d * move_vector), 1, epsilon)
        x -= vector_mul * move_vector
        
        gradient_vector = gradient_callable(x)
        hessian_vector = hessian_callable(x)
        
        i += 1
        
    return x

gradient_descent(f1, f1_x0)

Point([ 1.,  1.])

In [8]:
gradient_descent(f1, f1_x0, optimize_move_callable=optimize_move_callable)

Point([ 1.,  1.])

# Newton Raphson

In [9]:
def newton_raphson(f, x0, e = 1e-6, optimize_move_callable=lambda *x: 1.0, max_iter=1000):
    x = Point(x0)

    gradient_callable = gradient(f)
    hessian_callable = hessian(f)
    gradient_vector = gradient_callable(x)
    
    i = 0
    while norm(gradient_vector) > e and i < max_iter:
        i += 1
        
        gradient_vector = np.dot(np.linalg.inv(hessian_callable(x)), 
                                 gradient_callable(x)).reshape(-1)
        x -= gradient_vector * optimize_move_callable(
            lambda d: evalf(f, x - d * gradient_vector), 1, e)
        
    return x

In [10]:
newton_raphson(f1, f1_x0)

Point([ 1.,  1.])

# Box

In [11]:
def box(f, x0, gs, xd, xg, alpha=1.3, epsilon1 = 1e-6, epsilon2 = 1e-6, max_iter=1000):
    
    xc = Point(x0)
    n = len(x0)
    h, h2 = -1, -2
    
    reflect = lambda xc, xh: xc + alpha * (xc - xh)
    x = np.ones([2 * n, n])
    
    for t in range(2 * n):
        for i in range(n):
            x[t][i] = xd[i] + random.random() * (xg[i] - xd[i])
        while any(g(x[t]) < 0 for g in gs):
            x[t] = 0.5 * (x[t] + xc)
        
        xc = np.mean(x, axis=0)
            
    i = 0
    while i < max_iter:
        i += 1
        x = sorted(x, key=lambda x: f(x))
        
        xc = np.mean(x[:h], axis=0)
        xr = reflect(xc, x[h])
        
        for i in range(n):
            if xr[i] < xd[i]:
                xr[i] = xd[i]
            elif xr[i] > xg[i]:
                xr[i] = xg[i]
                
        while any(g(xr) < 0 for g in gs):
            xr = 0.5 * (xr + xc)
            
        if f(xr) > f(x[h2]):
            xr = 0.5 * (xr + xc)
            
        x[h] = xr
        
        if norm(x[h] - xc) <= epsilon1 or abs(f(x[h]) - f(xc)) < epsilon2:
            break
            
    return Point(xc)

# Transformacije 

In [12]:
from sympy import log

In [13]:
def u(f, gs, hs):
    return lambda t: f \
        - 1 / t * sum(log(g) for g in gs) \
        + t * sum(h ** 2 for h in hs)

def transform(f, x0, gs=[], hs=[], t=1.0, epsilon=10e-6):
    f_modified = u(f, gs, hs)
    x = Point(x0)
    
    while True:
        x_old = Point(x)
        f_modified_t = f_modified(t)
        x = hooke_jeeves(lambda point: evalf(f_modified_t, point), x)
        t *= 10

        if all(abs(xi1 - xi2) < epsilon for xi1, xi2 in zip(x_old, x)):
            return x, int(math.log(t, 10) + 1)

In [14]:
transform(f1, f1_x0, gs=[y - x, 2 - x, y + 100, 100 - y, x + 100, 100 - x, ])

(Point([ 0.01021347,  0.01021385]), 8)

# 1.
Primijenite postupak gradijentnog spusta na funkciju 3, uz i bez određivanja optimalnog iznosa
koraka. Što možete zaključiti iz rezultata?

Obje metode daju jednako dobar rezultat.

In [15]:
gradient_descent(f3, f3_x0)

Point([ 2., -3.])

In [16]:
gradient_descent(f3, f3_x0, optimize_move_callable=optimize_move_callable)

Point([ 2., -3.])

In [17]:
f3_xmin

Point([ 2., -3.])

In [18]:
reset()

# 2.
Primijenite postupak gradijentnog spusta i Newton-Raphsonov postupak na funkcije 1 i 2 s
određivanjem optimalnog iznosa koraka. Kako se Newton-Raphsonov postupak ponaša na ovim
funkcijama? Ispišite broj izračuna funkcije, gradijenta i Hesseove matrice. 

In [19]:
fs = [f1, f2]
xs = [f1_x0, f2_x0]
opts = [no_optimization, optimize_move_callable]
search_fs = [newton_raphson, gradient_descent]

for search_f, optimize_callable, (f_min, x0) in it.product(search_fs, opts, zip(fs, x0s)):
    reset()
    result = search_f(
        f_min, x0, optimize_move_callable=optimize_callable)
    
    print(search_f.__name__, 
          "\n(" + optimize_callable.__name__, ",", str(f_min) + ")", "\n"
          "Result:", result)
    print(optimize_callable.called, "iterations\n")

newton_raphson 
(no_optimization , (-x + 1)**2 + 100*(-x**2 + y)**2) 
Result: [ 1.  1.]
6 iterations

newton_raphson 
(no_optimization , (x - 4)**2 + 4*(y - 2)**2) 
Result: [ 4.  2.]
2 iterations

newton_raphson 
(optimize_move_callable , (-x + 1)**2 + 100*(-x**2 + y)**2) 
Result: [ 1.  1.]
19 iterations

newton_raphson 
(optimize_move_callable , (x - 4)**2 + 4*(y - 2)**2) 
Result: [ 4.  2.]
3 iterations

gradient_descent 
(no_optimization , (-x + 1)**2 + 100*(-x**2 + y)**2) 
Result: [ 1.  1.]
5 iterations

gradient_descent 
(no_optimization , (x - 4)**2 + 4*(y - 2)**2) 
Result: [ 4.  2.]
1 iterations

gradient_descent 
(optimize_move_callable , (-x + 1)**2 + 100*(-x**2 + y)**2) 
Result: [ 1.  1.]
19 iterations

gradient_descent 
(optimize_move_callable , (x - 4)**2 + 4*(y - 2)**2) 
Result: [ 4.  2.]
2 iterations



# 3.
Primijenite postupak po Boxu na funkcije 1 i 2 uz implicitna ograničenja: (x2-x1 >= 0), (2-x1 >= 0) i
eksplicitna ograničenja prema kojima su sve varijable u intervalu [-100, 100]. Mijenja li se položaj
optimuma uz nametnuta ograničenja?

S obzirom da za f1 zadovoljava uvjete, optimum se ne mijenja. Za f2, x (=4) ne zadovoljava jednadzbu 2 - x >= 0 pa je pomaknut u tocku 2.

In [20]:
gs = [y - x, 2 - x, y + 100, 100 - y, x + 100, 100 - x, ]
gs = [lambdify(g) for g in gs]

In [21]:
box(lambdify(f1), f1_x0, gs, Point([-100, -100]), Point([100, 100]), max_iter=10000)

Point([ 1.00133642,  1.00273114])

In [22]:
f1_xmin

Point([ 1.,  1.])

In [23]:
box(lambdify(f2), f2_x0, gs, Point([-100, -100]), Point([100, 100]), max_iter=10000)

Point([ 1.9999993 ,  2.00093146])

In [24]:
f2_xmin

Point([ 4.,  2.])

# 4.
Primijenite postupak transformacije u problem bez ograničenja na funkcije 1 i 2 s ograničenjima iz
prethodnog zadatka (zanemarite eksplicitna ograničenja). Novodobiveni problem optimizacije bez
ograničenja minimizirajte koristeći postupak Hooke-Jeeves ili postupak simpleksa po Nelderu i
Meadu. Može li se uz zadanu početnu točku pronaći optimalno rješenje problema s ograničenjima?
Ako ne, probajte odabrati početnu točku iz koje je moguće pronaći rješenje.

In [27]:
# eksplicitna ogranicenja zanemarena
gs = [y - x, 2 - x, ]

In [28]:
reset()
sol, n = transform(f1, f1_x0, gs=gs)

In [29]:
print(sol)
print(n)

[ 0.01021347  0.01021385]
8


In [30]:
reset()
sol, n = transform(f2, f2_x0, gs=gs)

In [31]:
print(sol)
print(n)

[ 1.99999962  2.00111656]
8


# 5.
Za funkciju 4 s ograničenjima (3-x1-x2>=0), (3+1.5*x1-x2>=0) i (x2-1=0) probajte pronaći
minimum koristeći postupak transformacije u problem bez ograničenja (također koristite HookeJeeves
ili postupak simpleksa po Nelderu i Meadu za minimizaciju). Probajte kao početnu točku
postaviti neku točku koja ne zadovoljava ograničenja nejednakosti (primjerice točku (5,5)) te
pomoću postupka pronalaženja unutarnje točke odredite drugu točku koja zadovoljava ograničenja
nejednakosti te ju iskoristite kao početnu točku za postupak minimizacije.

In [32]:
gs=[3 - x - y, 3 + 1.5 * x - y]
hs=[(y - 1) ** 2]

In [33]:
sol, n = transform(f4, f4_x0, gs=gs, hs=hs)

In [34]:
sol

Point([ 2.0213089 ,  0.97868919])

In [35]:
n

8

In [36]:
f4_xmin

Point([ 3.,  0.])