Import libraries

In [None]:
import numpy as np
import matplotlib.pyplot as plt
plt.style.use('seaborn-whitegrid')
%matplotlib inline

Let's look at a function of one independent variable:
$$L = (w-10)^3 + 2w.$$

In [None]:
## Let's look at a function of one independent variable
#L = lambda w: (w-10)**3 + 2*w
L = lambda w: (w-10)**2 + 2*w
fig, ax = plt.subplots(1, 1, figsize = (4, 4))
w = np.arange(-30, 30, 1e-03)
ax.plot(w, L(w), 'k-')
ax.set_xlabel('w')
ax.set_ylabel('L(w)')

Gradient descent in 1D

In [None]:
## Gradient descent in 1D
L = lambda w: w**2 + 3
gradL  = lambda w: 2*w

# Try 1e-05 (slow learning rate), 1e-01 (optimal),
# 1e0 (oscillates and does not converge),
# and 0.95 (oscillates towards the end and converges)
alpha = 1e-01 # learning rate (or) step size
tol = 1e-05 # stopping tolerance
iter = 0
maxiter = 1000

w = 1 # starting point

# Learning process
while np.abs(gradL(w)) > tol and iter < maxiter:
  w = w + alpha * -gradL(w)
  iter = iter+1
  print('Iteration = %d, w = %f, gradL(w) = %f'%(iter, w, gradL(w)))

Gradient descent in 2D for a function of two variables: $$L(w_1, w_2) = (w_1-2)^2+(w_1+3)^2.$$

In [None]:
## Gradient descent in 2D
L = lambda w: (w[0]-2)**2 + (w[1]+3)**2
gradL = lambda w: np.array([2*(w[0]-2), 2*(w[1]+3)])
alpha = 1e1 # learning rate (or) step size
tol = 1e-05 # stopping tolerance
iter = 0
maxiter = 1000

w =  np.array([1, 1]) # initial guess

while np.linalg.norm(gradL(w)) > tol and iter < maxiter:
  w = w + alpha *(-gradL(w))
  iter = iter+1
  print('Iteration = %d, w1 = %f, w2 = %f, ||gradL(w)|| = %f'%(iter, w[0], w[1], np.linalg.norm(gradL(w))))