In [None]:
#Implement Nesterov's gradient descent

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = (12.0, 9.0)
import time

Implementation of Nesterov's gradient descent is provided below. We use the algorithm provided in the class notes, whereby:

$\alpha_0 = 0.5$, $\:$ $\alpha^2_{k+1} + (\alpha_k + 1/\kappa)\alpha_{k+1} - \alpha_k^2$, $\:$ $\beta_k = \frac{\alpha_k(1 - \alpha_k)}{\alpha^2_{k} + \alpha_{k+1}}$

where $\kappa = \frac{L}{\ell}$

In [None]:
def nesterov(gradf, L, l, x0, tol, maxit):
  y = x0
  x = x0
  k= 0
  alpha = 1/2
  kappa_inv = l/L
  numit=0
  while np.linalg.norm(gradf(y)) > tol and k<maxit:
    temp=x
    x = y - gradf(y) / L
    temp_alpha = alpha #Store \alpha_k

    #Obtain positive root, obtain \alpha_{k+1}
    alpha = (-(alpha**2 - kappa_inv) + np.sqrt((alpha**2 - kappa_inv)**2 + 4*alpha**2)) / 2. 
    beta = temp_alpha*(1-temp_alpha) / (temp_alpha**2 + alpha)
    
    y = x + beta *(x - temp)
    k+=1
    numit+=1
  #Check for convergence
  if np.linalg.norm(gradf(y)) > tol:
    return y, maxit+1

  return y, numit