problems/so1a.tex

\documentclass{article}
% Include macros here
\input{macros}
\usepackage{fancyhdr}
%\include{macros}
\usepackage{pifont}

% Number of problem sheet
\newcounter{problemSheetNumber}
\setcounter{problemSheetNumber}{1}
\newcommand{\matlabprob}{\ding{100} \ }
\newcommand{\examprob}{\ding{80} \ }
%\setcounter{section}{\theproblemSheetNumber}  
%\renewcommand{\theparagraph}{(\thesection.\arabic{paragraph})}
\newcounter{problems}
\setcounter{problems}{0}
%\setlength{\parindent}{0cm}
\renewcommand{\problem}[1]{\paragraph{(\theproblemSheetNumber.\theproblems)}\addtocounter{problems}{1}\label{#1}}
\renewcommand{\solution}[1]{\paragraph{Solution (\theproblemSheetNumber.\theproblems)}\addtocounter{problems}{1}\label{#1}}

\pagestyle{fancy}
\lhead{MATH36061}
\chead{Convex Optimization}
\rhead{October 6, 2016}

\begin{document} 
\begin{center}
{\Large {\bf Solutions to Part A of Problem Sheet \theproblemSheetNumber}}
\end{center}

\solution{pr:1}
\begin{itemize}
 \item[(a)] The function $f(x)=x^4$ has a strict minimum at $x=0$, but the second derivative satisfies $f''(0)=0$. 
 \item[(b)] We construct a function that has a strict minimizer $x^*$, but such that every open neighourhood $U$ of $x^*$ contains other local minimizers. One such function is
 \begin{equation*}
  f(x) = \begin{cases} x^4 (\cos(1/x)+2) & \text{ for } x\neq 0\\
          0 & \text{ for } x=0.
         \end{cases}
 \end{equation*}

\begin{figure}[h!]
\centering
 \includegraphics[width=0.5\textwidth]{images/strictmin_cropped.pdf}
\end{figure}
We explain the construction of this function:
\begin{enumerate}
 \item Start out with $g(x) = \cos(1/x)+2$ for $x\neq 0$ and $g(0)=1$. This function has minimizers $x_0=0$ and $x_k = 1/(\pi (2k+1)$ for $k\geq 0$, with values $g(x_k)=1$ at all minimizers. Therefore, any open interval around $0$ contains (infinitely many) local minimizers $x_k$ other than $x_0=0$. 
 \item Multipy $x^4$ to the function: $f(x) = x^4g(x)$. This ensures that $f(0)=0$ and $f(x)>0$ for $x\neq 0$. There are still local minima in every neighbourhood of $0$. To see this, compute the derivative
 \begin{equation}\label{eq:der}\tag{1}
  f'(x) = x^2(4x\cos(1/x)+\sin(1/x)+8x).
 \end{equation}
 Set $z_m=1/(\pi/2+m\pi)$ for $m>0$. Since $\sin(1/z_m)=\sin(\pi/2+m\pi)= 1$ for $m$ even and $-1$ for $m$ odd, and for $m$ sufficiently large the contribution of the other terms is negligible (as the $z_m$ become arbitrary small), the derivative~\eqref{eq:der} changes signs between successive $z_m$. Since $f'(x)$ is continuous, it has roots between any $z_m$ and $z_{m+1}$ for large enough $m$, and these correspond to maxima and minima of $f$.
\end{enumerate}
The function is in $C^2(\R)$. For $x\neq 0$ this is clear, and to verify this at $x=0$, one shows that the right and left limits as $x\to 0$ of $f'(x)$ and $f''(x)$ coincide (they are in fact $0$).

Note the subtle point that one minimizer $x^*$ can have local minimizers that are arbitrary close: while each open interval $I$ surrounding $x^*$ has another local minimizer $\tilde{x}$, every such $\tilde{x}$ has an interval $\tilde{I}$ surrounding it where this $\tilde{x}$ is the only minimizer!
\end{itemize}


\solution{pr:2} We want to show that the function
\begin{equation}\label{eq:quad}\tag{2}
f(\vct{x}) = \frac{1}{2}\vct{x}^{\trans}\mtx{A}\vct{x}+\vct{b}^{\trans}\vct{x}+c  
 \end{equation}
is convex if and only $\mtx{A}$ is positive semidefinite.
To see this, we compute the partial derivatives and the Hessian of $f$. The parts $\vct{b}^{\trans}\vct{x}$ and $c$ disappear when computing second derivatives. The function $\vct{x}^{\trans}\mtx{A}\vct{x}$ can be written as
\begin{equation*}
 \vct{x}^{\trans}\mtx{A}\vct{x} = \sum_{i=1}^m\sum_{j=1}^n a_{ij}x_ix_j,
\end{equation*}
so that the first derivative is
\begin{equation*}
 \frac{\partial f}{\partial x_i} = \frac{1}{2} \sum_{j\neq i} (a_{ij}+a_{ji}) x_j + a_{ii}x_i+b_i = \sum_{j=1}^n a_{ij}x_j+b_i,
\end{equation*}
where we used the symmetry of $\mtx{A}$ (i.e., $a_{ij}=a_{ji}$). The gradient and Hessian are therefore just given by
\begin{equation*}
 \nabla f(\vct{x}) = \mtx{A}\vct{x}+\vct{b}, \quad \nabla^2f(\vct{x}) = \mtx{A}.
\end{equation*}

An interesting special case is when the quadratic function~\eqref{eq:quad} arises in the form
\begin{equation}\label{eq:least}\tag{3}
 f(\vct{x}) = \frac{1}{2}\norm{\mtx{A}\vct{x}-\vct{b}}_2^2. 
\end{equation}
The quadratic system then has the form
\begin{equation}\label{eq:normquad}\tag{4}
 \norm{\mtx{A}\vct{x}-\vct{b}}_2^2 = (\mtx{A}\vct{x}-\vct{b})^{\trans}(\vct{A}\vct{x}-\vct{b}) = \vct{x}^{\trans}\mtx{A}^{\trans}\mtx{A}\vct{x}-2\vct{b}^{\trans}\mtx{A}\vct{x}+\norm{\vct{b}}_2^2.
\end{equation}
The matrix $\mtx{A}^{\trans}\mtx{A}$ is always symmetric and positive semidefinite:
\begin{equation*}
 (\mtx{A}^{\trans}\mtx{A})^{\trans} = \mtx{A}^{\trans}(\mtx{A}^{\trans})^{\trans} = \mtx{A}^{\trans}\mtx{A}, \quad \text{ and } \quad \vct{x}^{\trans}\mtx{A}^\trans\mtx{A}\vct{x} = \norm{\mtx{A}\vct{x}}_2^2>0 \text{ if and only if } \vct{x}\neq \zerovct,
\end{equation*}

so that the function~\eqref{eq:least} is convex. From~\eqref{eq:normquad} we also see that the derivative of~\eqref{eq:least} is
\begin{equation*}
 \mtx{A}^{\trans}(\mtx{A}\vct{x}-\vct{b}).
\end{equation*}
Note that the matrix $\mtx{A}$ in~\eqref{eq:quad} is not the same as the matrix $\mtx{A}$ in~\eqref{eq:least}!

\solution{pr:1} The general procedure is as follows: we first make an educated guess as to whether the function could be convex or not. If we think it is not convex, then it is enough to find a {\em counterexample}: find points in $S$ for which the line segment joining them is not completely contained in $S$. If we think it is convex, then we can show that for any two points the line segment joining them is in $S$.
\begin{itemize}
 \item[(a)] This set is not convex: take $\vct{x}=(1,0,0)^{\trans}$ and $\vct{y}=(-1,0,0)^{\trans}$, then $\frac{1}{2}\vct{x}+\frac{1}{2}\vct{y}=\zerovct \not\in S$.
 \item[(b)] This set is convex: if $\vct{x},\vct{y}\in S$, then $1\leq x_1-x_2<2$ and $1\leq y_1-y_2<2$, and
 \begin{equation*}
  \lambda x_1+(1-\lambda)y_1-\lambda x_2-(1-\lambda)y_2 = \lambda (x_1-x_2)+(1-\lambda)(y_1-y_2)<\lambda 2+(1-\lambda)2 = 2,
 \end{equation*}
 with the same argument giving the lower bound. 
 \item[(c)] This set is convex. In fact, $S$ is the unit ball of the $1$-norm
 \begin{equation*}
  \norm{\vct{x}}_1 = \sum_{i=1}^n |x_i|.
 \end{equation*}
 Given $\vct{x},\vct{y}\in S$,
 \begin{equation*}
  \norm{\lambda \vct{x}+(1-\lambda)\vct{y}}_1 \leq \lambda \norm{\vct{x}}_1+(1-\lambda)\norm{\vct{y}}_1 \leq \lambda \cdot 1+(1-\lambda)\cdot 1 = 1.
 \end{equation*}
 \item[(d)] This set is convex. Here, one needs to show that convex combinations preserve symmetry and positive definiteness of a matrix. The symmetry is clear. As for the positive definiteness, let $\vct{x}\neq \zerovct$ be given. Then
 \begin{equation*}
  \vct{x}^{\trans}(\lambda \mtx{A}+(1-\lambda)\mtx{B})\vct{x} = \lambda \vct{x}^{\trans}\mtx{A}\vct{x}+(1-\lambda)\vct{x}^{\trans}\mtx{B}\vct{x} \geq 0,
 \end{equation*}
which shows that positive definiteness is also preserved.
\end{itemize}


\solution{pr:2}
\begin{itemize}
 \item[(a)] This function is not convex. There are various ways of deriving this. For example, one can verify that the Hessian, or second derivative, is $-1/x^2$, which is not positive semidefinite. 
 
 Alternatively, one can also prove the statement using a pedestrian approach. We have to show that there are points $y\neq x$ and $\lambda \in [0,1]$ such that
 \begin{equation*}
  \log(\lambda x+(1-\lambda)y) > \lambda \log(x)+(1-\lambda)\log(y).
 \end{equation*}
 Let's choose $y=1$. Then what needs to be shown is that for the points $\vct{p}_1=(1,0)$ and $\vct{p}_2=(x,\log(x))$,
 the line joining $\vct{p}_1$ and $\vct{p}_2$ lies {\em below} the curve $(t,\log(t))$ between $1$ and $x$. The line is given by the equation
 \begin{equation*}
  \ell(t) = \frac{\log(x)}{x-1}(t-1).
 \end{equation*}
 Evaluating this, for example, at $x=2$ and $t=1.5$, one sees that $\ell(t)>\log(t)$, which is enough evidence that $\log(t)$ is not convex.
 With a little more effort one can deduce that the function is actually concave.
\item[(b)] The function $f(x)=x^4$ is convex, as we will verify using Theorem 2.10. First, note that the derivative $4x^3$ is an increasing function with $x$. 
Given two points $(x,x^4)$ and $(y,y^4)$ with $y>x$, the line connecting them has slope $(y^4-x^4)/(y-x)$. By the mean value theorem, there exists a $z\in (x,y)$ such that
\begin{equation*}
 \frac{y^4-x^4}{y-x} = f'(z) = 4z^3 \geq 4x^3.
\end{equation*}
Rearranging this inequality, we get
\begin{equation*}
 f(y)-f(x) = y^4-x^4 \geq 4x^3(y-x) = f'(x)(y-x),
\end{equation*}
which is precisely the criterium for convexity in Theorem 2.10(1).
\item[(c)] Using Theorem 2.10(2), we compute the Hessian as
\begin{equation*}
 \nabla^2f(\vct{x}) = \begin{pmatrix}
                       0 & 1\\
                       1 & 0
                      \end{pmatrix}.
\end{equation*}
This matrix is positive semidefinite on $\R^2_{++}$, since for all $\vct{x}\in \R^2_{++}$ we have
\begin{equation*}
 \vct{x}^{\trans}\nabla^2f(\vct{x})\vct{x} = 2x_1x_2>0.
\end{equation*}
It follows that the function $f(\vct{x})=x_1x_2$ is convex.
\item[(d)] The Hessian matrix of $f(\vct{x})=x_1/x_2$ is 
\begin{equation*}
 \nabla^2f(\vct{x}) = \begin{pmatrix}
                       0 & -\frac{1}{x_2^2}\\
                       -\frac{1}{x_2^2} & 2\frac{x_1}{x_2^3}
                      \end{pmatrix}.
\end{equation*}
This matrix is not positive semidefinite for all valid values of $\vct{x}$ (take for example $\vct{x}=(1,1)^{\trans}$, which leads to a negative eigenvalue).
\item[(e)] The function $e^x-1$ is convex, as is easily seen using Theorem 2.10(2) by computing the second derivative.
\item[(f)] The function $f(\vct{x})=\max_i x_i$ is convex. Here, we can't use the criteria from Theorem 2.10 since the function is not differentiable, so we have to verify convexity directly:
\begin{equation*}
 \max_i \lambda x_i+(1-\lambda)y_i \leq \lambda \max_i x_i+(1-\lambda) \max_i y_i.
\end{equation*}
 \end{itemize}

\end{document}