/
so2b.tex
238 lines (198 loc) · 8.63 KB
/
so2b.tex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
\documentclass{article}
% Include macros here
\input{macros}
\usepackage{fancyhdr}
%\include{macros}
\usepackage{pifont}
% Number of problem sheet
\newcounter{problemSheetNumber}
\setcounter{problemSheetNumber}{2}
\newcommand{\matlabprob}{\ding{100} \ }
\newcommand{\examprob}{\ding{80} \ }
%\setcounter{section}{\theproblemSheetNumber}
%\renewcommand{\theparagraph}{(\thesection.\arabic{paragraph})}
\newcounter{problems}
\setcounter{problems}{3}
%\setlength{\parindent}{0cm}
\renewcommand{\problem}[1]{\paragraph{(\theproblemSheetNumber.\theproblems)}\addtocounter{problems}{1}\label{#1}}
\renewcommand{\solution}[1]{\paragraph{Solution (\theproblemSheetNumber.\theproblems)}\addtocounter{problems}{1}\label{#1}}
\pagestyle{fancy}
\lhead{MATH36061}
\chead{Convex Optimization}
\rhead{October 9, 2016}
\begin{document}
\begin{center}
{\Large {\bf Solutions to Part B of Problem Sheet \theproblemSheetNumber}}
\end{center}
\solution{pr:1} In order to apply Newton's method, we need to know the derivatives of $f$. These are given by
\begin{equation*}
f'(x) = 6(x-1)^5, \quad f''(x) = 30(x-1)^4.
\end{equation*}
We implement the function $f(x)$ and its derivatives in Python as follows
\begin{ipythonnb}
def f(x):
return (x-1)**6
def df(x):
return 6*(x-1)**5
def ddf(x):
return 30*(x-1)**4
\end{ipythonnb}
\begin{figure}[h!]
\centering
\includegraphics[width=0.4\textwidth]{images/flat_cropped.pdf}
\end{figure}
Newton's method is then the iteration
\begin{equation*}
x_{k+1} = x_k - \frac{f'(x_k)}{f''(x_k)} = x_k - \frac{1}{5}(x_k-1) = 0.8x_k+0.2.
\end{equation*}
To run Newton's method by hand, we choose a starting point $x_0$ and then compute $x_1, x_2$, etc. For example, if we start with $x_0=2$, we get the iterates $x_1=1.8, x_2 = 1.64$, and so on. Eventually the iterates will approach $1$, but the process is slow.
We then run Newton's method, implemented as follows, with the three stopping criteria and with tolerance $\e=10^{-6}$.
\begin{ipythonnb}
def newton(f, df, ddf, x0, tol, maxiter=100):
"""
Newton's method
"""
x, xold = x0, x0+2*tol*np.ones(x0.shape)
iter = 0
while ( la.norm(x-xold)>tol ) and ( iter < maxiter ):
grad = df(x)
hess = ddf(x)
z = la.solve(hess,grad)[0]
xold = x
x = x-z
iter += 1
return x, iter
\end{ipythonnb}
The results are:
\begin{itemize}
\item[(a)] Iterations: 67, Solution found: $\overline{x}=1.0000$;
\item[(b)] Iterations: 25, Solution found: $\overline{x}=1.0425$;
\item[(c)] Iterations: 100, Solution found: $\overline{x}=1.0000$.
\end{itemize}
While (b) was the fastest, the accuracy was not so great. Stopping criterium (a) is reasonable, since in view of the quadratic convergence, the inequality
\begin{equation*}
\norm{\vct{x}_{k+1}-\vct{x}_k}\geq \norm{\vct{x}_k-\vct{x}^*}-\norm{\vct{x}_{k+1}-\vct{x}^*} \geq \norm{\vct{x}_k-\vct{x}^*}-M\norm{\vct{x}_{k}-\vct{x}^*}^2
\end{equation*}
shows that the difference of successive iterates gives a good bound on the error. Criterium (b) is problematic, as the function can have a very flat slope while still being far away from the minimizer, as the example shows.
This issue is related to the notion of conditioning of the function $f$. Criterium (c) always has the same running time, and 100 iterations would normally be more than enough for Newton's method. However, this is a very pessimistic bound, and takes much longer than necessary.
\solution{pr:2} The code is recreated below. We first define the methods, then the function, after that the parameters and run the code. In the end, the results are plotted.
The code uses the first and second derivatives of the function $f$, which are rather complicated. They are given as
\begin{equation*}
\nabla f(\vct{x}) = \begin{pmatrix}
e^{x_1+3x_2-0.1} + e^{x_1-3x_2-0.1} - e^{-x_1-0.1}\\
3e^{x_1+3x_2-0.1} - 3e^{x_1-3x_2-0.1}
\end{pmatrix},
\end{equation*}
and
\begin{equation*}
\nabla^2 f(\vct{x}) = \begin{pmatrix}
e^{x_1+3x_2-0.1} + e^{x_1-3x_2-0.1} + e^{-x_1-0.1} & 3e^{x_1+3x_2-0.1} - 3e^{x_1-3x_2-0.1}\\
3e^{x_1+3x_2-0.1} - 3e^{x_1-3x_2-0.1} & 9e^{x_1+3x_2-0.1} + 9e^{x_1-3x_2-0.1}
\end{pmatrix}.
\end{equation*}
\begin{ipythonnb}
def newton(f, df, ddf, x0, tol, maxiter=100):
"""
Newton's method with stopping criteria (a)
"""
x = np.vstack((x0+2*tol*np.ones(x0.shape),x0)).transpose()
i = 1
while ( la.norm(x[:,i]-x[:,i-1]) > tol ) and ( i < maxiter ):
grad = df(x[:,i])
hess = ddf(x[:,i])
z = la.solve(hess,grad)
xnew = x[:,i]-z
x = np.concatenate((x,xnew.reshape((len(x0), 1))), axis=1)
i += 1
return x[:,1:]
def graddesc_bt(f, df, ddf, x0, tol, maxiter=100, rho=0.5, c=0.1):
"""
Gradient descent with backtracking
"""
x = np.vstack((x0+2*tol*np.ones(x0.shape),x0)).transpose()
i = 1
while ( la.norm(x[:,i]-x[:,i-1]) > tol ) and ( i < maxiter ):
p = -df(x[:,i])
# Start backtracking
alpha = 1
xnew = x[:,i] + alpha*p
while (f(xnew) >= f(x[:,i]) + alpha*c*np.dot(p, df(x[:,i]))):
alpha = alpha*rho
xnew = x[:,i] + alpha*p
x = np.concatenate((x,xnew.reshape((len(x0),1))), axis=1)
i += 1
return x[:,1:]
def graddesc_co(f, df, ddf, x0, tol, maxiter=100):
"""
Gradient descent with constant step length
"""
alpha = 0.1
x = np.vstack((x0+2*tol*np.ones(x0.shape),x0)).transpose()
i = 1
while ( la.norm(x[:,i]-x[:,i-1]) > tol ) and ( i < maxiter ):
r = df(x[:,i])
xnew = x[:,i] - alpha*r
x = np.concatenate((x,xnew.reshape((len(x0),1))), axis=1)
i += 1
return x[:,1:]
\end{ipythonnb}
\begin{ipythonnb}
def f(x):
return np.exp(x[0]+3*x[1]-0.1)+np.exp(x[0]-3*x[1]-0.1)+np.exp(-x[0]-0.1)
def df(x):
return np.array([np.exp(x[0]+3*x[1]-0.1)+np.exp(x[0]-3*x[1]-0.1)-np.exp(-x[0]-0.1),
3*np.exp(x[0]+3*x[1]-0.1)-3*np.exp(x[0]-3*x[1]-0.1)])
def ddf(x):
return np.array([[np.exp(x[0]+3*x[1]-0.1)+np.exp(x[0]-3*x[1]-0.1)+np.exp(-x[0]-0.1),
3*np.exp(x[0]+3*x[1]-0.1)-3*np.exp(x[0]-3*x[1]-0.1)],
[3*np.exp(x[0]+3*x[1]-0.1)-3*np.exp(x[0]-3*x[1]-0.1),
9*np.exp(x[0]+3*x[1]-0.1)+9*np.exp(x[0]-3*x[1]-0.1)]])
\end{ipythonnb}
\begin{ipythonnb}
tol = 1e-6
x0 = np.array([-1.,0.7])
# Run the three methods
xbt = graddesc_bt(f, df, ddf, x0, tol)
xco = graddesc_co(f, df, ddf, x0, tol)
xn = newton(f, df, ddf, x0, tol)
\end{ipythonnb}
\begin{ipythonnb}
xvals = np.array([[np.linspace(-4,-0.5,20)], [np.zeros(20)]])
yvals = list(reversed(f(xvals)[0]))
# Create a meshgrid and a contour plot
xx = np.linspace(-2.5,1,100)
yy = np.linspace(-0.8,0.8,100)
X, Y = np.meshgrid(xx, yy)
# The construction inside looks odd: we want to transform the set of input pairs given
# by the meshgrid into a 2 x n array of values that we can apply f to (calling f on such
# an array will apply the function f to each column)
Z = f(np.dstack((X,Y)).reshape((X.size, 2)).transpose())
# the result of applying f is a long list, but we want a matrix
Z = Z.reshape(X.shape)
% matplotlib inline
cmap = plt.cm.get_cmap("coolwarm")
fig, ax = plt.subplots(1, 3, figsize=(15, 5))
ax[0].contour(X, Y, Z, yvals, cmap = cmap)
ax[0].plot(xco[0,:], xco[1,:], color='black', linewidth=3)
ax[1].contour(X, Y, Z, yvals, cmap = cmap)
ax[1].plot(xbt[0,:], xbt[1,:], color='black', linewidth=3)
ax[2].contour(X, Y, Z, yvals, cmap = cmap)
ax[2].plot(xn[0,:], xn[1,:], color='black', linewidth=3)
plt.show()
\end{ipythonnb}
\begin{figure}[h!]
\centering
\includegraphics[width=0.85\textwidth]{images/threemethods.png}
\end{figure}
The number of iterations with constant step length is 44, the number of iterations with backtracking is 28, while the number of iterations with Newton's method is 6.
\solution{pr:3}
\begin{itemize}
\item[(a)] The verification follows from applying the definition of convexity.
\item[(b)] The code below minimizes the function using Newton's method with starting point $(a,b)=(0.5,0.5)$ and tolerance $10^{-8}$. The number of iterations is 6 and the solution is $(a,b) = (1.724, -4.434)$. Plotting the probability of passing the exam as a function of the number of preparation hours, with the parameters $(a,b)$, gives the curve in Figure~\ref{fig:log}.
\begin{figure}[h!]
\centering
\includegraphics[width=0.6\textwidth]{images/logcurve.png}
\caption{Probability of passing exam as function of preparation}\label{fig:log}
\end{figure}
\end{itemize}
\end{document}