In [None]:
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from scipy import linalg

### Consider  the quadratic
### $$ f(x_1,x_2) = (1+\lambda)x_1^2 + 2(1-\lambda)x_1x_2 + (1+\lambda) x_1^2
$$
### Note that this function can also be written as 
$$
f(x) = (Ax)\cdot  x \qquad \text{where} \qquad A = \begin{bmatrix}
1+\lambda & 1 - \lambda \\
1- \lambda & 1+ \lambda
\end{bmatrix}
$$

### Plot this function for $\lambda=1$, $\lambda = 10$ and $\lambda=100$ 

In [None]:
def f_for_plot(x1,x2):
    lbd = 10
    z = (1+lbd)*x1**2 + 2*(1-lbd)*x1*x2 + (1+lbd)*x2**2
    return z

In [None]:
x1 = np.linspace(-1,1,20)
x2 = np.linspace(-1,1,20)
X1,X2 = np.meshgrid(x1,x2)

In [None]:
Z = f_for_plot(X1,X2)
ax = plt.axes(projection='3d')
ax.plot_wireframe(X1, X2, Z, color = 'black')
plt.show()

In [None]:
x1 = np.linspace(-1,1,100)
x2 = np.linspace(-1,1,100)
X1,X2 = np.meshgrid(x1,x2)

In [None]:
Z = f_for_plot(X1,X2)
plt.contour(X1, X2, Z,50)
plt.colorbar()
plt.show()

### The gradient of
### $$ f(x,y) = (1+\lambda)x_1^2 + 2(1-\lambda)x_1x_2 + (1+\lambda) x_2^2
$$
### is given by
### $$
\nabla f(x,y) =  2
\begin{bmatrix}
(1+\lambda)x_1 + (1 - \lambda)x_2 \\
(1- \lambda)x_1 + (1+ \lambda)x_2
\end{bmatrix}
$$
### Another way to compute the gradient is as follow: Since 
### $$f(x)=(Ax) \cdot x \qquad \text{with} \qquad \qquad A = \begin{bmatrix}
1+\lambda & 1 - \lambda \\
1- \lambda & 1+ \lambda
\end{bmatrix} $$
### we have seen in class that
### $$
\nabla f(\vec x) = 2 (A\vec x) 
$$


### Let's implement the gradient with this matrix formula. Let's work with $\lambda=10$ from now on.

In [None]:
lbd = 10.0
A = np.array([[1.+lbd,1.-lbd],[1.-lbd,1.+lbd]])
A

In [None]:
def gradf(x):
    g = 2 * np.dot(A,x)
    return g

### Now let's write down the gradient descent algorithm:

In [None]:
def grad_desc(gradf,alpha,x0,num_iter):
    x = x0
    for i in range(0,num_iter):
        x = x - alpha*gradf(x)
        print(x)

In [None]:
def grad_desc_store(gradf,alpha,x0,num_iter):
    
    x = x0
    iterates = np.zeros( (num_iter,2))
    
    for i in range(0,num_iter):
        iterates[i,:] = x
        x = x - alpha*gradf(x)
        
    return iterates

### Start with initial guess $x_1 = 0.0$ and $x_2 = 0.7$ and choose the step size to be $\alpha = 0.045$

In [None]:
x0 = np.array([0.0,0.7])
alpha = 0.045
num_iter=50
iterates = grad_desc_store(gradf,alpha,x0,num_iter)

### Plot the iterates with with the contour of the function

In [None]:
Z = f_for_plot(X1,X2)
plt.contour(X1, X2, Z,100)
plt.plot(iterates[:,0],iterates[:,1],'r-o', markersize=5)
plt.colorbar()
plt.show()

### Play with different step sizes to see how it affect the behavior

In [None]:
x0 = np.array([0,0.7])
alpha = 0.01
num_iter=50
iterates = grad_desc_store(gradf,alpha,x0,num_iter)

In [None]:
Z = f_for_plot(X1,X2)
plt.contour(X1, X2, Z,100)
plt.plot(iterates[:,0],iterates[:,1],'r-o', markersize=5)
plt.colorbar()
plt.show()

### Start with initial guess $x_1 = 0.6$ and $x_2 = 0.7$ and choose the step size to be $\alpha = 0.05$

In [None]:
x0 = np.array([0.6,0.7])
alpha = 0.05
num_iter=50
iterates = grad_desc_store(gradf,alpha,x0,num_iter)

In [None]:
Z = f_for_plot(X1,X2)
plt.contour(X1, X2, Z,100)
plt.plot(iterates[:,0],iterates[:,1],'r-o', markersize=5)
plt.colorbar()
plt.show()