-
Notifications
You must be signed in to change notification settings - Fork 3
/
so1a.tex
173 lines (158 loc) · 9.95 KB
/
so1a.tex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
\documentclass{article}
% Include macros here
\input{macros}
\usepackage{fancyhdr}
%\include{macros}
\usepackage{pifont}
% Number of problem sheet
\newcounter{problemSheetNumber}
\setcounter{problemSheetNumber}{1}
\newcommand{\matlabprob}{\ding{100} \ }
\newcommand{\examprob}{\ding{80} \ }
%\setcounter{section}{\theproblemSheetNumber}
%\renewcommand{\theparagraph}{(\thesection.\arabic{paragraph})}
\newcounter{problems}
\setcounter{problems}{0}
%\setlength{\parindent}{0cm}
\renewcommand{\problem}[1]{\paragraph{(\theproblemSheetNumber.\theproblems)}\addtocounter{problems}{1}\label{#1}}
\renewcommand{\solution}[1]{\paragraph{Solution (\theproblemSheetNumber.\theproblems)}\addtocounter{problems}{1}\label{#1}}
\pagestyle{fancy}
\lhead{MATH36061}
\chead{Convex Optimization}
\rhead{October 6, 2016}
\begin{document}
\begin{center}
{\Large {\bf Solutions to Part A of Problem Sheet \theproblemSheetNumber}}
\end{center}
\solution{pr:1}
\begin{itemize}
\item[(a)] The function $f(x)=x^4$ has a strict minimum at $x=0$, but the second derivative satisfies $f''(0)=0$.
\item[(b)] We construct a function that has a strict minimizer $x^*$, but such that every open neighourhood $U$ of $x^*$ contains other local minimizers. One such function is
\begin{equation*}
f(x) = \begin{cases} x^4 (\cos(1/x)+2) & \text{ for } x\neq 0\\
0 & \text{ for } x=0.
\end{cases}
\end{equation*}
\begin{figure}[h!]
\centering
\includegraphics[width=0.5\textwidth]{images/strictmin_cropped.pdf}
\end{figure}
We explain the construction of this function:
\begin{enumerate}
\item Start out with $g(x) = \cos(1/x)+2$ for $x\neq 0$ and $g(0)=1$. This function has minimizers $x_0=0$ and $x_k = 1/(\pi (2k+1)$ for $k\geq 0$, with values $g(x_k)=1$ at all minimizers. Therefore, any open interval around $0$ contains (infinitely many) local minimizers $x_k$ other than $x_0=0$.
\item Multipy $x^4$ to the function: $f(x) = x^4g(x)$. This ensures that $f(0)=0$ and $f(x)>0$ for $x\neq 0$. There are still local minima in every neighbourhood of $0$. To see this, compute the derivative
\begin{equation}\label{eq:der}\tag{1}
f'(x) = x^2(4x\cos(1/x)+\sin(1/x)+8x).
\end{equation}
Set $z_m=1/(\pi/2+m\pi)$ for $m>0$. Since $\sin(1/z_m)=\sin(\pi/2+m\pi)= 1$ for $m$ even and $-1$ for $m$ odd, and for $m$ sufficiently large the contribution of the other terms is negligible (as the $z_m$ become arbitrary small), the derivative~\eqref{eq:der} changes signs between successive $z_m$. Since $f'(x)$ is continuous, it has roots between any $z_m$ and $z_{m+1}$ for large enough $m$, and these correspond to maxima and minima of $f$.
\end{enumerate}
The function is in $C^2(\R)$. For $x\neq 0$ this is clear, and to verify this at $x=0$, one shows that the right and left limits as $x\to 0$ of $f'(x)$ and $f''(x)$ coincide (they are in fact $0$).
Note the subtle point that one minimizer $x^*$ can have local minimizers that are arbitrary close: while each open interval $I$ surrounding $x^*$ has another local minimizer $\tilde{x}$, every such $\tilde{x}$ has an interval $\tilde{I}$ surrounding it where this $\tilde{x}$ is the only minimizer!
\end{itemize}
\solution{pr:2} We want to show that the function
\begin{equation}\label{eq:quad}\tag{2}
f(\vct{x}) = \frac{1}{2}\vct{x}^{\trans}\mtx{A}\vct{x}+\vct{b}^{\trans}\vct{x}+c
\end{equation}
is convex if and only $\mtx{A}$ is positive semidefinite.
To see this, we compute the partial derivatives and the Hessian of $f$. The parts $\vct{b}^{\trans}\vct{x}$ and $c$ disappear when computing second derivatives. The function $\vct{x}^{\trans}\mtx{A}\vct{x}$ can be written as
\begin{equation*}
\vct{x}^{\trans}\mtx{A}\vct{x} = \sum_{i=1}^m\sum_{j=1}^n a_{ij}x_ix_j,
\end{equation*}
so that the first derivative is
\begin{equation*}
\frac{\partial f}{\partial x_i} = \frac{1}{2} \sum_{j\neq i} (a_{ij}+a_{ji}) x_j + a_{ii}x_i+b_i = \sum_{j=1}^n a_{ij}x_j+b_i,
\end{equation*}
where we used the symmetry of $\mtx{A}$ (i.e., $a_{ij}=a_{ji}$). The gradient and Hessian are therefore just given by
\begin{equation*}
\nabla f(\vct{x}) = \mtx{A}\vct{x}+\vct{b}, \quad \nabla^2f(\vct{x}) = \mtx{A}.
\end{equation*}
An interesting special case is when the quadratic function~\eqref{eq:quad} arises in the form
\begin{equation}\label{eq:least}\tag{3}
f(\vct{x}) = \frac{1}{2}\norm{\mtx{A}\vct{x}-\vct{b}}_2^2.
\end{equation}
The quadratic system then has the form
\begin{equation}\label{eq:normquad}\tag{4}
\norm{\mtx{A}\vct{x}-\vct{b}}_2^2 = (\mtx{A}\vct{x}-\vct{b})^{\trans}(\vct{A}\vct{x}-\vct{b}) = \vct{x}^{\trans}\mtx{A}^{\trans}\mtx{A}\vct{x}-2\vct{b}^{\trans}\mtx{A}\vct{x}+\norm{\vct{b}}_2^2.
\end{equation}
The matrix $\mtx{A}^{\trans}\mtx{A}$ is always symmetric and positive semidefinite:
\begin{equation*}
(\mtx{A}^{\trans}\mtx{A})^{\trans} = \mtx{A}^{\trans}(\mtx{A}^{\trans})^{\trans} = \mtx{A}^{\trans}\mtx{A}, \quad \text{ and } \quad \vct{x}^{\trans}\mtx{A}^\trans\mtx{A}\vct{x} = \norm{\mtx{A}\vct{x}}_2^2>0 \text{ if and only if } \vct{x}\neq \zerovct,
\end{equation*}
so that the function~\eqref{eq:least} is convex. From~\eqref{eq:normquad} we also see that the derivative of~\eqref{eq:least} is
\begin{equation*}
\mtx{A}^{\trans}(\mtx{A}\vct{x}-\vct{b}).
\end{equation*}
Note that the matrix $\mtx{A}$ in~\eqref{eq:quad} is not the same as the matrix $\mtx{A}$ in~\eqref{eq:least}!
\solution{pr:1} The general procedure is as follows: we first make an educated guess as to whether the function could be convex or not. If we think it is not convex, then it is enough to find a {\em counterexample}: find points in $S$ for which the line segment joining them is not completely contained in $S$. If we think it is convex, then we can show that for any two points the line segment joining them is in $S$.
\begin{itemize}
\item[(a)] This set is not convex: take $\vct{x}=(1,0,0)^{\trans}$ and $\vct{y}=(-1,0,0)^{\trans}$, then $\frac{1}{2}\vct{x}+\frac{1}{2}\vct{y}=\zerovct \not\in S$.
\item[(b)] This set is convex: if $\vct{x},\vct{y}\in S$, then $1\leq x_1-x_2<2$ and $1\leq y_1-y_2<2$, and
\begin{equation*}
\lambda x_1+(1-\lambda)y_1-\lambda x_2-(1-\lambda)y_2 = \lambda (x_1-x_2)+(1-\lambda)(y_1-y_2)<\lambda 2+(1-\lambda)2 = 2,
\end{equation*}
with the same argument giving the lower bound.
\item[(c)] This set is convex. In fact, $S$ is the unit ball of the $1$-norm
\begin{equation*}
\norm{\vct{x}}_1 = \sum_{i=1}^n |x_i|.
\end{equation*}
Given $\vct{x},\vct{y}\in S$,
\begin{equation*}
\norm{\lambda \vct{x}+(1-\lambda)\vct{y}}_1 \leq \lambda \norm{\vct{x}}_1+(1-\lambda)\norm{\vct{y}}_1 \leq \lambda \cdot 1+(1-\lambda)\cdot 1 = 1.
\end{equation*}
\item[(d)] This set is convex. Here, one needs to show that convex combinations preserve symmetry and positive definiteness of a matrix. The symmetry is clear. As for the positive definiteness, let $\vct{x}\neq \zerovct$ be given. Then
\begin{equation*}
\vct{x}^{\trans}(\lambda \mtx{A}+(1-\lambda)\mtx{B})\vct{x} = \lambda \vct{x}^{\trans}\mtx{A}\vct{x}+(1-\lambda)\vct{x}^{\trans}\mtx{B}\vct{x} \geq 0,
\end{equation*}
which shows that positive definiteness is also preserved.
\end{itemize}
\solution{pr:2}
\begin{itemize}
\item[(a)] This function is not convex. There are various ways of deriving this. For example, one can verify that the Hessian, or second derivative, is $-1/x^2$, which is not positive semidefinite.
Alternatively, one can also prove the statement using a pedestrian approach. We have to show that there are points $y\neq x$ and $\lambda \in [0,1]$ such that
\begin{equation*}
\log(\lambda x+(1-\lambda)y) > \lambda \log(x)+(1-\lambda)\log(y).
\end{equation*}
Let's choose $y=1$. Then what needs to be shown is that for the points $\vct{p}_1=(1,0)$ and $\vct{p}_2=(x,\log(x))$,
the line joining $\vct{p}_1$ and $\vct{p}_2$ lies {\em below} the curve $(t,\log(t))$ between $1$ and $x$. The line is given by the equation
\begin{equation*}
\ell(t) = \frac{\log(x)}{x-1}(t-1).
\end{equation*}
Evaluating this, for example, at $x=2$ and $t=1.5$, one sees that $\ell(t)>\log(t)$, which is enough evidence that $\log(t)$ is not convex.
With a little more effort one can deduce that the function is actually concave.
\item[(b)] The function $f(x)=x^4$ is convex, as we will verify using Theorem 2.10. First, note that the derivative $4x^3$ is an increasing function with $x$.
Given two points $(x,x^4)$ and $(y,y^4)$ with $y>x$, the line connecting them has slope $(y^4-x^4)/(y-x)$. By the mean value theorem, there exists a $z\in (x,y)$ such that
\begin{equation*}
\frac{y^4-x^4}{y-x} = f'(z) = 4z^3 \geq 4x^3.
\end{equation*}
Rearranging this inequality, we get
\begin{equation*}
f(y)-f(x) = y^4-x^4 \geq 4x^3(y-x) = f'(x)(y-x),
\end{equation*}
which is precisely the criterium for convexity in Theorem 2.10(1).
\item[(c)] Using Theorem 2.10(2), we compute the Hessian as
\begin{equation*}
\nabla^2f(\vct{x}) = \begin{pmatrix}
0 & 1\\
1 & 0
\end{pmatrix}.
\end{equation*}
This matrix is positive semidefinite on $\R^2_{++}$, since for all $\vct{x}\in \R^2_{++}$ we have
\begin{equation*}
\vct{x}^{\trans}\nabla^2f(\vct{x})\vct{x} = 2x_1x_2>0.
\end{equation*}
It follows that the function $f(\vct{x})=x_1x_2$ is convex.
\item[(d)] The Hessian matrix of $f(\vct{x})=x_1/x_2$ is
\begin{equation*}
\nabla^2f(\vct{x}) = \begin{pmatrix}
0 & -\frac{1}{x_2^2}\\
-\frac{1}{x_2^2} & 2\frac{x_1}{x_2^3}
\end{pmatrix}.
\end{equation*}
This matrix is not positive semidefinite for all valid values of $\vct{x}$ (take for example $\vct{x}=(1,1)^{\trans}$, which leads to a negative eigenvalue).
\item[(e)] The function $e^x-1$ is convex, as is easily seen using Theorem 2.10(2) by computing the second derivative.
\item[(f)] The function $f(\vct{x})=\max_i x_i$ is convex. Here, we can't use the criteria from Theorem 2.10 since the function is not differentiable, so we have to verify convexity directly:
\begin{equation*}
\max_i \lambda x_i+(1-\lambda)y_i \leq \lambda \max_i x_i+(1-\lambda) \max_i y_i.
\end{equation*}
\end{itemize}
\end{document}