# Exercise 3

In [15]:
# optimization algorithm to minimize unconstrained function
from scipy.optimize import minimize
import pandas as pd
import numpy as np
import matplotlib as plt
from scipy.optimize import newton

import plotly.express as px
import cufflinks as cf
import plotly.offline
cf.go_offline()
cf.set_config_file(offline=False, world_readable=True)

Function we want to minimize

$f(x) = x + y$ 

$\nabla f(x) = (1, 1)^T$

constraints:
<br>
$h_1(x) = x_1x_2 - 1 \\
h_2(x) = x1^2 + 2x_2^2 + x_3^2 - 1$


In [2]:
# function to minimize
f = lambda x: x[0] + x[1]
# its gradient/jacobian
JacF = lambda x: [1, 1]
# norm of its gradient/jacobian
normJacF = lambda x: np.linalg.norm(JacF(x))
# constraint
h1 = lambda x: x[0] + x[1] + x[2] - 1
h2 = lambda x: x[0]**2 + 2*x[1]**2 + x[2]**2 - 1

Minimum of the problem

In [3]:
x_min = (0,0,1)
print(f"{f(x_min)=}")
lambda1_lagrange = -1
lambda2_lagrange = 1/2

f(x_min)=0


# Penalty Method

$
\begin{equation}
\begin{aligned}
p(x) &= \frac{1}{2}||h(x)||_2^2 = \frac{1}{2}h_{1}(x)^Th_{1}(x) + \frac{1}{2}h_{2}(x)^Th_{2}(x) \\
&= \frac{1}{2}(x + y + z - 1)^T (x + y + z - 1) + \frac{1}{2}(x^{2} + 2y^{2} + z^{2} - 1)^T(x^{2} + 2y^{2} + z^{2} - 1)
\end{aligned}
\end{equation}
$

$
\begin{equation}
\begin{aligned}
P(x,\mu) &= f(x) + \mu p(x) \\ 
&= x + y + \frac{\mu}{2} \Big[(x + y + z - 1)^T (x + y + z - 1) + (x^{2} + 2y^{2} + z^{2} - 1)^T(x^{2} + 2y^{2} + z^{2} - 1)\Big]
\end{aligned}
\end{equation}
$

In [4]:
# feasibility penalization function
p = lambda x: 1/2 *( h1(x)**2 + h2(x)**2 )
# Merit function
P = lambda mu: lambda x: f(x) + mu*p(x)
JacP = lambda mu: lambda x: np.array([2*mu*x[0]*(x[0]**2 +2*x[1]**2 +x[2]**2 -1 ) + mu *(x[0]+x[1]+x[2]-1) +1 ,
                                     4*mu*x[1]*(x[0]**2 +2*x[1]**2 +x[2]**2 -1 ) + mu *(x[0]+x[1]+x[2]-1) +1,
                                     mu*(2*x[2]*(x[0]**2 +2*x[1]**2 +x[2]**2 -1) +x[0]+x[1]+x[2]-1)])

## Minimization algorithm

In [5]:
data = pd.DataFrame(columns=['iteration','x', 'f(x)',"||f'(x)||",'P(x)','p(x)','h1(x)','h2(x)','mu', 'mu*h1(x)', 'mu*h2(x)']).set_index('iteration')

# initial conditions
i=0 # iteration
x = [20,30,15]
mu = 5

# start optimization loop
max_iter=25
while i <= max_iter:
    data.loc[i] = [x,f(x),normJacF(x),P(mu)(x),p(x),h1(x), h2(x), mu, mu*h1(x), mu*h2(x)]
    x = newton(JacP(mu), x, maxiter=10000,disp=False)
    mu *= 2
    i += 1 
data.tail(3)

Unnamed: 0_level_0,x,f(x),||f'(x)||,P(x),p(x),h1(x),h2(x),mu,mu*h1(x),mu*h2(x)
iteration,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
23,"[-0.00022274182941286162, 0.000222751062286358...",9.23287e-09,1.414214,8.273864e-08,1.752514e-15,-4.75707e-08,3.52428e-08,41943040,-1.99526,1.47819
24,"[-0.00022276789112124593, 0.000222777011530264...",9.12041e-09,1.414214,1.522958e-07,1.706783e-15,-5.22613e-08,2.61214e-08,83886080,-4.38399,2.19122
25,"[-0.0001959114339332982, 0.0001959551149161763...",4.3681e-08,1.414214,5.693536e-08,7.900224e-17,-1.19335e-08,3.94906e-09,167772160,-2.00212,0.662542


In [6]:
# start optimization loop
# Run this cell again to get more iterates
i += 1 
result = newton(JacP(mu), x, maxiter=10000,disp=False)
x = result
data.loc[i] = [x, f(x), normJacF(x), P(mu)(x), p(x), h1(x), h2(x), mu, mu*h1(x), mu*h2(x)]
mu *= 2
data.tail(3)

Unnamed: 0_level_0,x,f(x),||f'(x)||,P(x),p(x),h1(x),h2(x),mu,mu*h1(x),mu*h2(x)
iteration,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
24,"[-0.00022276789112124593, 0.000222777011530264...",9.12041e-09,1.414214,1.522958e-07,1.706783e-15,-5.22613e-08,2.61214e-08,83886080,-4.38399,2.19122
25,"[-0.0001959114339332982, 0.0001959551149161763...",4.3681e-08,1.414214,5.693536e-08,7.900224e-17,-1.19335e-08,3.94906e-09,167772160,-2.00212,0.662542
27,"[-0.0001959251939405155, 0.0001959688293712601...",4.36354e-08,1.414214,6.979834e-08,7.797156e-17,-1.11701e-08,5.58323e-09,335544320,-3.74806,1.87342


## Plots

How is the convergence of x to $x_{min}$?

In [12]:
layout = dict(title_text='log10 ||x_k - x_min||', title_x=0.5, xaxis_title='iterations', yaxis_title='value')
diff = data.x.apply(lambda x: np.array(x) - np.array(x_min))
diff_mod = diff.apply(np.linalg.norm)
diff_mod_log = diff_mod.apply(np.log10)

diff_mod_log.iplot(mode='markers',xTitle='iterations',yTitle='||x - x_min||',layout=layout)

Conclusions:

The method converges generally the more iterations we use. There are few jumps. Maybe the Hessian isn't Semi-PD in those places.

# Augmented Lagrangian Method

$
\begin{equation}
\begin{aligned}
L(x,\lambda,\mu) &= f(x) + \lambda^Th(x) + \mu \cdot p(x)\\
\end{aligned}
\end{equation}
$

In [8]:
# feasibility penalization function
p = lambda x: 1/2 * (h1(x)**2 + h2(x)**2)
# Merit function
L = lambda mu,lamb1,lamb2: lambda x: f(x) + lamb1*h1(x) + lamb2*h2(x) + mu*p(x)

JacL = lambda mu,lamb1,lamb2: lambda x: np.array([
    1 + lamb1 + 2*lamb2*x[0] + 2*mu*x[0]*(x[0]**2 +2*x[1]**2 +x[2]**2 -1 ) + mu *(x[0]+x[1]+x[2]-1),
    1 + lamb1 + 4*lamb2*x[1] + 4*mu*x[1]*(x[0]**2 +2*x[1]**2 +x[2]**2 -1 ) + mu *(x[0]+x[1]+x[2]-1),
    lamb1 + 2*lamb2*x[2] + mu*(2*x[2]*(x[0]**2 +2*x[1]**2 +x[2]**2 -1) +x[0]+x[1]+x[2]-1)])

## Minimization algorithm

In [9]:
data = pd.DataFrame(columns=['iteration','x', 'f(x)',"||f'(x)||",'L(x)','p(x)','h1(x)', 'h2(x)','mu','lambda1','lambda2']).set_index('iteration')
# initial condition
i=0 # iteration
w = [20,30,15]
mu = 2
lamb1 = 0
lamb2 = 0

#begin optimization
max_iter=35
while i <= max_iter:
    data.loc[i] = [x, f(x), normJacF(x), L(mu,lamb1,lamb2)(x), p(x), h1(x), h2(x), mu, lamb1,lamb2]
    x = newton(JacL(mu,lamb1,lamb2), x, maxiter=20000,disp=False)    
    mu *=2
    lamb1 = lamb1 + mu * h1(x)
    lamb2 = lamb2 + mu * h2(x)
    i +=1
data.tail(3)

Unnamed: 0_level_0,x,f(x),||f'(x)||,L(x),p(x),h1(x),h2(x),mu,lambda1,lambda2
iteration,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
33,"[-7.11070496350141e-05, 7.113803126070706e-05,...",3.098163e-08,1.414214,1e-05,2.872063e-16,2.394175e-08,1.097708e-09,17179869184,213.335,76.5685
34,"[-7.107533112066351e-05, 7.110634411974134e-05...",3.1013e-08,1.414214,2.9e-05,2.650914e-16,1.447934e-08,-1.790339e-08,34359738368,710.841,-538.587
35,"[-0.00010422228827890625, 0.000104210219500454...",-1.206878e-08,1.414214,4.5e-05,3.313149e-16,-2.072754e-08,1.52643e-08,68719476736,-713.545,510.367


## Plots

Convergence of x

In [16]:
layout = dict(title_text='log10 ||x_k - x_min||', title_x=0.5, xaxis_title='iterations', yaxis_title='value')
diff = data.x.apply(lambda x: np.array(x) - np.array(x_min))
diff_mod = diff.apply(np.linalg.norm)
diff_mod_log = diff_mod.apply(np.log10)

diff_mod_log.iplot(mode='markers',xTitle='iterations',yTitle='||x - x_min||',layout=layout)

Behaviour of mu*h(x)

In [19]:
layout = dict(title_text='lambda1 estimate over the iterations', title_x=0.5,xaxis_title='iterations', yaxis_title='mu1 * h1(x)')

# horizontal line of correct lagrange multiplier
lagrange_multiplier_line = dict(type= 'line', y0= lambda1_lagrange, y1= lambda1_lagrange, 
                                x0= 0, x1= max_iter,
                                line=dict(color="Red",width=4))
# title

fig = px.scatter(data,x=data.index,y='lambda1')
fig.update_layout(shapes=[lagrange_multiplier_line]) # add horizontal line
fig.update_layout(layout) # add titles


Conclusions. The method converges very well to the first lagrangian from the start. But starts exploding after iteration 28

In [20]:
layout = dict(title_text='lambda2 estimate over the iterations', title_x=0.5,xaxis_title='iterations', yaxis_title='mu1 * h1(x)')

# horizontal line of correct lagrange multiplier
lagrange_multiplier_line = dict(type= 'line', y0= lambda2_lagrange, y1= lambda2_lagrange, 
                                x0= 0, x1= max_iter,
                                line=dict(color="Red",width=4))
# title

fig = px.scatter(data,x=data.index,y='lambda2')
fig.update_layout(shapes=[lagrange_multiplier_line]) # add horizontal line
fig.update_layout(layout) # add titles

