In [1]:
import numpy as np
import random
import pandas as pd

Consider the function $f({\bf x}) = 5x_{0}^{2} + x_{1}^{2} + 4x_{0}x_{1} - 14x_{0} - 6x_{1} +20$.

Define
$$
{\bf x}=
\left[\begin{array}{cc}
x_{0}\\
x_{1}
\end{array}\right],
Q=
\left[\begin{array}{cc}
10&4\\
4&2
\end{array}\right]
\hbox{ and }
{\bf q}=
\left[\begin{array}{cc}
-14\\
-6
\end{array}\right].
$$

Then $f({\bf x})=\displaystyle \frac{1}{2}{\bf x}^tQ{\bf x}+{\bf q}^t {\bf x}+20$. 
<hr>

In [2]:
def functionValue(x,Q,q,f0):
    y=(1/2)*x.transpose().dot(Q).dot(x)+q.transpose().dot(x)+f0
    return y

Since
$$
\frac{\partial f}{\partial x_0}=10x_0+4x_1-14
$$
and 
$$
\frac{\partial f}{\partial x_1}=2x_1+4x_0-6,
$$
we obtain the graident $\nabla f(x)$ of $f(x)$ is 
$$
\nabla f(x)=\left[\begin{array}{c} 10x_0+4x_1-14 \\ 2x_1+4x_0-6 \end{array}\right].
$$
<hr>

In [3]:
def functionGradient(x,Q,q):
    d=Q.dot(x)+q
    return d

From the definition of gradients, we consider
$$f({\bf x}) = f({\bf\bar{x}}) + \nabla f({\bf\bar{x}})^{t}({\bf x}-{\bf\bar{x}}) + \| {\bf x}-{\bf\bar{x}} \| \alpha ({\bf\bar{x}},{\bf x}-{\bf\bar{x}}),$$
The computation is given as follows:
$$f({\bf x}) - f({\bf\bar{x}}) = \frac{1}{2}{\bf x}^tQ{\bf x}+{\bf q}^t{\bf x} - \left(\frac{1}{2}{\bf\bar{x}}^tQ{\bf\bar{x}}+{\bf q}^t{\bf\bar{x}}\right)$$
$$
= \frac{1}{2}{\bf x}^tQ{\bf x}+{\bf q}^t({\bf x}-{\bf\bar{x}}) -{\bf\bar{x}}^tQ{\bf\bar{x}} +\frac{1}{2}{\bf\bar{x}}^tQ{\bf\bar{x}}+ {\bf\bar{x}}^tQ{\bf x}-{\bf\bar{x}}^tQ{\bf x}
$$
$$
= {\bf\bar{x}}^t Q({\bf x}-{\bf\bar{x}}) +{\bf q}^t({\bf x}-{\bf\bar{x}})+\frac{1}{2}{\bf x}^tQ{\bf x}  +\frac{1}{2}{\bf\bar{x}}^tQ{\bf\bar{x}}-{\bf\bar{x}}^tQ{\bf x}
$$
$$
= (Q{\bf\bar{x}})^t({\bf x}-{\bf\bar{x}}) +{\bf q}^t({\bf x}-{\bf\bar{x}})+\frac{1}{2}{\bf x}^tQ({\bf x}-{\bf\bar{x}})  -\frac{1}{2}{\bf\bar{x}}^tQ({\bf x} -{\bf\bar{x}})
$$
$$
= (Q{\bf\bar{x}}+{\bf q})^t({\bf x}-{\bf\bar{x}}) +\frac{1}{2}({\bf x}-{\bf\bar{x}})^tQ({\bf x}-{\bf\bar{x}})
$$

Since
$$
\frac{\partial f}{\partial x_0}=10x_0+4x_1-14
$$
and 
$$
\frac{\partial f}{\partial x_1}=2x_1+4x_0-6,
$$
we obtain the graident $\nabla f(x)$ of $f(x)$ is 
$$
\nabla f(x_0,x_1)=\left[\begin{array}{c} 10x_0+4x_1-14 \\ 2x_1+4x_0-6 \end{array}\right].
$$
Moreover, 
$$
\nabla f(x_0,x_1)=Q\left[\begin{array}{c}x_0 \\ x_1 \end{array}\right]+q
$$
where 
$$
Q=\left[\begin{array}{cc}10 &４\\4 & 2\end{array}\right], 
q=\left[\begin{array}{cc}-14\\-6\end{array}\right].
$$
At the same time, the function $f(x_0,x_1) = 5x_{0}^{2} + x_{1}^{2} + 4x_{0}x_{1} - 14x_{0} - 6x_{1} +20$ can be rewritten as 
$$
f(x_0,x_1)=\frac{1}{2}\left[\begin{array}{cc}x_0 & x_1 \end{array}\right]
Q\left[\begin{array}{c}x_0 \\ x_1 \end{array}\right]+q^t\left[\begin{array}{c}x_0 \\ x_1 \end{array}\right]
$$
i.e.
$$
f({\bf x})=\frac{1}{2}{\bf x}^t Q{\bf x}+q^t{\bf x}\quad and\quad \nabla f({\bf x})=Q{\bf x}+q 
$$
where ${\bf x}=\left[\begin{array}{c}x_0 \\ x_1 \end{array}\right]$.
<hr>

In [4]:
Q=np.zeros((2,2))
Q[0,0]=10
Q[0,1]=4
Q[1,0]=4
Q[1,1]=2
print(Q)

[[10.  4.]
 [ 4.  2.]]


In [5]:
q=np.zeros((2,1))
q[0,0]=-14
q[1,0]=-6
print(q)

[[-14.]
 [ -6.]]


In [6]:
x = np.zeros((2,1))
x[0,0]=2
x[1,0]=2
f0=20
print(functionValue(x,Q,q,f0))
print(functionValue(x,Q,q,f0)[0,0])
print(functionGradient(x,Q,q))
print(functionGradient(x,Q,q)[0,0])
print(functionGradient(x,Q,q)[1,0])


[[20.]]
20.0
[[14.]
 [ 6.]]
14.0
6.0


A natural consequence of this is the following algorithm, called the steepest
descent algorithm.
    
Step 0: Given ${\bf x}^{0}$, set $k:=0$

Step 1: ${\bf d}^{k}:= -\nabla f({\bf x}^{k})$. If ${\bf d}^{k}=0$, then stop.

Step 2: Solve $\displaystyle \min_{\alpha >0} f({\bf x}^{k} + \alpha {\bf d}^{k})$ for the step size $\alpha^{k}$, perhaps chosen by an exact or inexact line search.

Step 3: Set ${\bf x}^{k+1} \leftarrow {\bf x}^{k} + \alpha^{k} {\bf d}^{k}$, $k \leftarrow k+1$.Go to Step 1.

Note from Step 2 and the fact that ${\bf d}^{k} = - \nabla f({\bf x}^{k})$
is a descent direction, it follows that $f({\bf x}^{k+1}) < f({\bf x}^{k}).$
<hr>

<hr>
Step 0: Given ${\bf x}^{0}$, set $k:=0$

<hr>
Step 1: $d^{k}:= -\nabla f(x^{k})$.

<hr>
Step 2: Solve $\displaystyle \min_{\alpha >0} f({\bf x}^{k} + \alpha {\bf d}^{k})$ for the step size $\alpha^{k}$, perhaps chosen by an exact or inexact line search.

$$
\begin{array}{rcl}
f({\bf x}+\alpha {\bf d}) &= &5(x_{0}+\alpha d_{0})^{2} + (x_{1}+\alpha d_{1})^{2} + 4(x_{0}+\alpha d_{0})(x_{1}+\alpha d_{1}) - 14(x_{0}+\alpha d_{0}) - 6(x_{1}+\alpha d_{1}) +20\\
& = & (5d_{0}^{2}+d_{1}^{2}+4d_{0}d_{1}) \alpha^2 +(10x_{0}d_{0}+2x_{1}d_{1}+4x_{1}d_{0}+4x_{0}d_{1}-14d_{0}-6d_{1})\alpha +5x_{0}^{2} + x_{1}^{2} + 4x_{0}x_{1} - 14x_{0} - 6x_{1} +20\\
\end{array}
$$
Hence 
$$
\frac{d}{d \alpha} f({\bf x}+\alpha {\bf d})=2(5d_{0}^{2}+d_{1}^{2}+4d_{0}d_{1}) \alpha +10x_{0}d_{0}+2x_{1}d_{1}+4x_{1}d_{0}+4x_{0}d_{1}-14d_{0}-6d_{1}
$$
and the minimum solution of $\displaystyle \min_{\alpha >0} f({\bf x}^{k} + \alpha {\bf d}^{k})$ is given by
$$
\alpha^{*}=-\frac{10x_{0}d_{0}+2x_{1}d_{1}+4x_{1}d_{0}+4x_{0}d_{1}-14d_{0}-6d_{1}}{2(5d_{0}^{2}+d_{1}^{2}+4d_{0}d_{1})}
$$

In [7]:
def determineStepSize(x,Q,q,d):
    alpha = 0
    alpha = alpha+10*x[0,0]*d[0,0]
    alpha = alpha+2*x[1,0]*d[1,0]
    alpha = alpha+4*x[1,0]*d[0,0]
    alpha = alpha+4*x[0,0]*d[1,0]
    alpha = alpha-14*d[0,0]
    alpha = alpha-6*d[1,0]
    alpha = -alpha/(2*(5*d[0,0]**2+d[1,0]**2+4*d[0,0]*d[1,0]))
    return alpha


In [8]:
x = np.zeros((2,1))
x[0,0]=2
x[1,0]=2
f0=20
d=-functionGradient(x,Q,q)
print(determineStepSize(x,Q,q,d))
########################
print(functionValue(x,Q,q,f0))
alpha=determineStepSize(x,Q,q,d)
print(functionValue(x+alpha*d,Q,q,f0))

0.08579881656804733
[[20.]]
[[10.04733728]]


In [9]:
x = np.zeros((2,1))
x[0,0]=2
x[1,0]=2
f0=20
########################
d=-functionGradient(x,Q,q)
print(determineStepSize(x,Q,q,d))
print(functionValue(x,Q,q,f0))
alpha=determineStepSize(x,Q,q,d)
print(functionValue(x+alpha*d,Q,q,f0))
x=x+alpha*d
#######################

0.08579881656804733
[[20.]]
[[10.04733728]]


In [10]:
x = np.zeros((2,1))
x[0,0]=2
x[1,0]=2
f0=20
########################
d=-functionGradient(x,Q,q)
print(determineStepSize(x,Q,q,d))
print(functionValue(x,Q,q,f0))
alpha=determineStepSize(x,Q,q,d)
print(functionValue(x+alpha*d,Q,q,f0))
x=x+alpha*d
#######################
d=-functionGradient(x,Q,q)
print(determineStepSize(x,Q,q,d))
print(functionValue(x,Q,q,f0))
alpha=determineStepSize(x,Q,q,d)
print(functionValue(x+alpha*d,Q,q,f0))
x=x+alpha*d
#######################

0.08579881656804733
[[20.]]
[[10.04733728]]
2.9000000000000163
[[10.04733728]]
[[10.00022408]]


In [11]:
x = np.zeros((2,1))
x[0,0]=2
x[1,0]=2
f0=20
l=1
########################
while l<=10:
    d=-functionGradient(x,Q,q)
    print(determineStepSize(x,Q,q,d))
    print(functionValue(x,Q,q,f0))
    alpha=determineStepSize(x,Q,q,d)
    print(functionValue(x+alpha*d,Q,q,f0))
    x=x+alpha*d
    l=l+1
#######################

0.08579881656804733
[[20.]]
[[10.04733728]]
2.9000000000000163
[[10.04733728]]
[[10.00022408]]
0.08579881656804594
[[10.00022408]]
[[10.00000106]]
2.8999999999986015
[[10.00000106]]
[[10.00000001]]
0.08579881656808273
[[10.00000001]]
[[10.]]
2.900000000154647
[[10.]]
[[10.]]
0.0857988165560287
[[10.]]
[[10.]]
2.899999982672918
[[10.]]
[[10.]]
0.08579883434584973
[[10.]]
[[10.]]
2.899962905574441
[[10.]]
[[10.]]


In [12]:
x = np.zeros((2,1))
x[0,0]=2
x[1,0]=2
f0=20
l=1
########################
while l<=10:
    d=-functionGradient(x,Q,q)
    #https://numpy.org/doc/stable/reference/generated/numpy.linalg.norm.html
    print(np.linalg.norm(d))
    print(determineStepSize(x,Q,q,d))
    print(functionValue(x,Q,q,f0))
    alpha=determineStepSize(x,Q,q,d)
    print(functionValue(x+alpha*d,Q,q,f0))
    x=x+alpha*d
    l=l+1
#######################

15.231546211727817
0.08579881656804733
[[20.]]
[[10.04733728]]
0.18025498475417512
2.9000000000000163
[[10.04733728]]
[[10.00022408]]
0.0721019939016559
0.08579881656804594
[[10.00022408]]
[[10.00000106]]
0.0008532780343383944
2.8999999999986015
[[10.00000106]]
[[10.00000001]]
0.0003413112137674292
0.08579881656808273
[[10.00000001]]
[[10.]]
4.0391859621014965e-06
2.900000000154647
[[10.]]
[[10.]]
1.6156744048765226e-06
0.0857988165560287
[[10.]]
[[10.]]
1.9120406180458515e-08
2.899999982672918
[[10.]]
[[10.]]
7.648157294098356e-09
0.08579883434584973
[[10.]]
[[10.]]
9.051082968651244e-11
2.899962905574441
[[10.]]
[[10.]]


In [13]:
x = np.zeros((2,1))
x[0,0]=2
x[1,0]=2
f0=20
l=1
d=-functionGradient(x,Q,q)
########################
while l<=10 and np.linalg.norm(d)>1e-6:
    d=-functionGradient(x,Q,q)
    #https://numpy.org/doc/stable/reference/generated/numpy.linalg.norm.html
    print(np.linalg.norm(d))
    print(determineStepSize(x,Q,q,d))
    print(functionValue(x,Q,q,f0))
    alpha=determineStepSize(x,Q,q,d)
    print(functionValue(x+alpha*d,Q,q,f0))
    x=x+alpha*d
    l=l+1
#######################

15.231546211727817
0.08579881656804733
[[20.]]
[[10.04733728]]
0.18025498475417512
2.9000000000000163
[[10.04733728]]
[[10.00022408]]
0.0721019939016559
0.08579881656804594
[[10.00022408]]
[[10.00000106]]
0.0008532780343383944
2.8999999999986015
[[10.00000106]]
[[10.00000001]]
0.0003413112137674292
0.08579881656808273
[[10.00000001]]
[[10.]]
4.0391859621014965e-06
2.900000000154647
[[10.]]
[[10.]]
1.6156744048765226e-06
0.0857988165560287
[[10.]]
[[10.]]
1.9120406180458515e-08
2.899999982672918
[[10.]]
[[10.]]


In [14]:
Q=np.zeros((2,2))
Q[0,0]=10
Q[0,1]=4
Q[1,0]=4
Q[1,1]=2
######################
q=np.zeros((2,1))
q[0,0]=-14
q[1,0]=-6
#######################
f0=20
#############################
x = np.zeros((2,1))
x[0,0]=2
x[1,0]=2
k=0
d=-functionGradient(x,Q,q)
while np.linalg.norm(d)>1e-6 and k<=100:
    print(k)
    d=-functionGradient(x,Q,q)
    #https://numpy.org/doc/stable/reference/generated/numpy.linalg.norm.html
    print(np.linalg.norm(d))
    print(determineStepSize(x,Q,q,d))
    print(functionValue(x,Q,q,f0))
    alpha=determineStepSize(x,Q,q,d)
    print(functionValue(x+alpha*d,Q,q,f0))
    x=x+alpha*d
    k=k+1
print("final step:") 
print(functionValue(x,Q,q,f0))

0
15.231546211727817
0.08579881656804733
[[20.]]
[[10.04733728]]
1
0.18025498475417512
2.9000000000000163
[[10.04733728]]
[[10.00022408]]
2
0.0721019939016559
0.08579881656804594
[[10.00022408]]
[[10.00000106]]
3
0.0008532780343383944
2.8999999999986015
[[10.00000106]]
[[10.00000001]]
4
0.0003413112137674292
0.08579881656808273
[[10.00000001]]
[[10.]]
5
4.0391859621014965e-06
2.900000000154647
[[10.]]
[[10.]]
6
1.6156744048765226e-06
0.0857988165560287
[[10.]]
[[10.]]
7
1.9120406180458515e-08
2.899999982672918
[[10.]]
[[10.]]
final step:
[[10.]]
