# Exercise 1 b)

In [26]:
# optimization algorithm to minimize unconstrained function
from scipy.optimize import newton
from math import sqrt
import pandas as pd
pd.options.plotting.backend = 'plotly'
import numpy as np
import matplotlib as plt

import plotly.express as px
import cufflinks as cf
import plotly.offline
cf.go_offline()
cf.set_config_file(offline=False, world_readable=True)

Function to minimize:<br>

$f(x) = x_1^2 + x_2^2 + 16x_3^2$ <br>
$\nabla f(x) = (2x_1, 8x_2, 32x_3)^T$

constraint: <br>

$h(x) = x_1x_2 - 1$



In [27]:
# function to minimize
f = lambda x: x[0]**2 + 4*x[1]**2 + 16*x[2]**2
# its gradient/jacobian
JacF = lambda x: [2*x[0], 8*x[1], 32*x[2]]
# norm of its gradient/jacobian
normJacF = lambda x: np.linalg.norm(JacF(x))
# constraint
h = lambda x: x[0] * x[1] - 1

# Penalty Method

$p(x) = \frac{1}{2}h(x)^Th(x) = ||h(x)||_2^2 = \frac{1}{2}(x_1x_2 - 1)^T (x_1x_2 - 1)$

$P(x,\mu) = f(x) + \mu p(x) =  x_1^2 + 4x_2^2 + 16x_3^2 + \frac{\mu}{2}(x_1x_2 - 1)^T (x_1x_2 - 1)$

In [28]:
# feasibility penalization function
p = lambda x: 0.5 * h(x)**2
# Merit function
P = lambda mu: lambda x: f(x) + mu*p(x)
JacP = lambda mu: lambda x: np.array([2*x[0] + mu*(x[0]*x[1]**2 -x[1]), 8*x[1] + mu*(x[1]*x[0]**2 -x[0]), 32*x[2]])

## minimize

In [50]:
data = pd.DataFrame(columns=['iteration','x', 'f(x)',"||f'(x)||",'P(x)','p(x)','h(x)','mu', 'mu*h(x)']).set_index('iteration')

# initial conditions
x_start = [20,30,15]
mu_start = 5 # if we change to 2 for example, the method fails to converge

i=0 # iteration
x = x_start
mu = mu_start
mu_update_coef = 15

# start optimization loop
max_iter = 100
while i <= max_iter:
    data.loc[i] = [x, f(x), normJacF(x), P(mu)(x), p(x), h(x), mu, mu*h(x)]
    i += 1 
    x = newton(JacP(mu), x, maxiter=5000)
    mu += mu_update_coef
data.tail(3)

Unnamed: 0_level_0,x,f(x),||f'(x)||,P(x),p(x),h(x),mu,mu*h(x)
iteration,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
98,"[1.4112937163339696, 0.7056628387620372, 0.0]",3.98359,6.311612,3.991879,8e-06,-0.00410247,985,-4.04093
99,"[1.4113216598858518, 0.7056782374910909, 0.0]",3.98376,6.311747,3.991961,8e-06,-0.00406102,995,-4.04071
100,"[1.4113462227415576, 0.7056950357153815, 0.0]",3.98392,6.311889,3.992041,8e-06,-0.00401998,1005,-4.04008


In [48]:
data['h(x)'][1:].iplot()

## Plotting 

Values to pay attention to 

In [42]:
x_star1 = (sqrt(2),1/sqrt(2),0) # minimum 1
x_star2 = (-sqrt(2),-1/sqrt(2),0) # minimum 1
print(f"{x_star1=}")
print(f"{x_star2=}")
print(f"{f(x_star1)=}")
print(f"{f(x_star2)=}")
print("Lagrangian multiplier: 4") # lagrange mult. associated with the problem

x_star1=(1.4142135623730951, 0.7071067811865475, 0)
x_star2=(-1.4142135623730951, -0.7071067811865475, 0)
f(x_star1)=4.0
f(x_star2)=4.0
Lagrangian multiplier: 4


How is the convergence of f(x) to the minimum?

In [43]:
def save_plotly_fig_x_pure(fig, x_start,mu_start,mu_update):
    fig.write_image(f"media/Ex1_pure_xk_plot__initial_condition_x={x_start}_mu={mu_start}_mu_update_rule=muX{mu_update}.png")
    
def save_plotly_fig_muh_pure(fig, x_start,mu_start,mu_update):
    fig.write_image(f"media/Ex1_pure_muh_plot__initial_condition_x={x_start}_mu={mu_start}_mu_update_rule=muX{mu_update}.png")

In [44]:
layout = dict(title_text='log10 ||x_k - x_min1||', title_x=0.5, xaxis_title='iterations', yaxis_title='value')
diff = data.x.apply(lambda x: np.array(x) - np.array(x_star1))
diff_mod = diff.apply(np.linalg.norm)
diff_mod_log = diff_mod.apply(np.log)

fig = px.scatter(diff_mod_log)
fig.update_layout(layout) # add titles

save_plotly_fig_x_pure(fig,x_start,mu_start,mu_update_coef)
fig.show()

Conclusions:

After 20 iterations the method achieves 5 decimal points of precision of the minimum estimate!

Now let's look at the term $\mu \cdot h(x)$ and check if it converges to the lagrange multiplier associated with this problem (which is equal to -4).

In [45]:
# title information
layout = dict(title_text='behaviour of mu * h(x)', title_x=0.5, xaxis_title='iterations', yaxis_title='value')
# horizontal line with true value for lambda
lagrange_multiplier_line = dict(type= 'line', y0= -4, y1= -4, 
                                x0= 0, x1= 35,
                                line=dict(color="Red",width=4))

fig = px.scatter(data['mu*h(x)'][1:])
fig.update_layout(layout) # add titles
fig.update_layout(shapes=[lagrange_multiplier_line]) # add horizontal line with true value

save_plotly_fig_muh_pure(fig, x_start, mu_start, mu_update_coef)
fig.show()

Conclusions:

From the 1st iteration, the method achieves 8 decimals of precision and stays very close to that for the following iterations. It is unnable to converge to -4.

# Augmented Lagrangian Method

$
\begin{equation}
\begin{aligned}
L(x,\lambda,\mu) &= f(x) + \lambda^Th(x) + \frac{\mu}{2}h(x)^Th(x) \\
&= x_1^2 + x_2^2 + 16x_3^2 + \lambda^T(x_1x_2 - 1) + \frac{\mu}{2} (x_1x_2 - 1)^T (x_1x_2 - 1)
\end{aligned}
\end{equation}
$

In [14]:
# feasibility penalization function
p = lambda x: 1/2 * h(x)**2
# Merit function
L = lambda mu,lamb: lambda x: f(x) + lamb*h(x) + mu*p(x)
JacL = lambda mu,lamb: lambda x: np.array([2*x[0] + (lamb*x[1]) + mu*(x[0]*x[1]**2 -x[1]), 
                                           8*x[1] + (lamb*x[0]) + mu*(x[1]*x[0]**2 -x[0]), 32*x[2]])
normJacL = lambda mu,lamb: lambda x: np.linalg.norm(JacL(mu,lamb)(x))

## minimize

In [16]:
data = pd.DataFrame(columns=['iteration','x', 'f(x)',"||f'(x)||",'L(x)',"||L'(x)||",'p(x)','h(x)','mu','lambda']).set_index('iteration')

# initial conditions
j=0 # iteration
x = [20,30,15]
mu = 5 # se o mu começar em 2 já não dá! Experimentar isto e tentar explicar no relatório.
lamb = 2

# start optimization loop
max_iter = 25
while j <= max_iter: 
    data.loc[j] = [x, f(x), normJacF(x), L(mu,lamb)(x),normJacL(mu,lamb)(x), p(x), h(x), mu, lamb]
    x = newton(JacL(mu,lamb), x, maxiter=10000, disp=False)
    j += 1
    mu *= 2
    lamb = lamb + mu * h(x)
data.tail(5)

Unnamed: 0_level_0,x,f(x),||f'(x)||,L(x),||L'(x)||,p(x),h(x),mu,lambda
iteration,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
21,"[1.4141514748448758, 0.7071372884251028, 0.0]",4,6.324718,4.000006,18.916137,2.892986e-13,-7.60656e-07,10485760,-7.98809
22,"[1.4141521432495332, 0.7071377610262188, 0.0]",4,6.324722,4.000003,18.916618,7.232466e-14,3.80328e-07,20971520,-0.0120328
23,"[1.4141518090929701, 0.7071375247028442, 0.0]",4,6.32472,4.000002,18.916141,1.808116e-14,-1.90164e-07,41943040,-7.98809
24,"[1.4141519761940942, 0.7071376428531198, 0.0]",4,6.324721,4.000001,18.916615,4.520291e-15,9.5082e-08,83886080,-0.0120329
25,"[1.4141518926549506, 0.7071375837722766, 0.0]",4,6.324721,4.0,18.916142,1.130073e-15,-4.7541e-08,167772160,-7.98809


## Plotting

In [61]:
layout = dict(title_text='log10 ||x_k - x_min1||', title_x=0.5, xaxis_title='iterations', yaxis_title='value')
diff = data.x.apply(lambda x: np.array(x) - np.array(x_star1))
diff_mod = diff.apply(np.linalg.norm)
diff_mod_log = diff_mod.apply(np.log)

fig = px.scatter(diff_mod_log)
fig.update_layout(layout) # add titles

Conclusions:

In 20 iterations, the method achieves 5 decimals of precision, which is practically the same result of the previous method.

How does the lambda estimate behave over the iterations?

In [59]:
# horizontal line of correct lagrange multiplier
lagrange_multiplier_line = dict(type= 'line', y0= -4, y1= -4, x0= 0, x1= 35,line=dict(color="Red",width=4))
# title

fig = px.scatter(data,x=dataAug.index,y='lambda')
fig.update_layout(shapes=[lagrange_multiplier_line]) # add horizontal line
fig.update_layout(title={'text': "lambda estimate over the iterations",'x':0.5}) # add title
