In [1]:
import numpy as np
import matplotlib 
matplotlib.use('nbagg')
import matplotlib.pyplot as plt
import random

In [23]:
# Minimizing the error surface E(u,v) = (u*exp(v) - 2v*exp(-u))^2 using Gradient Descent
def error_func(u,v):
    """
    Inputs 
    u -  current value of the weight along U dimension(a scalar)
    v - current value of the weight along V dimension(a scalar)
    
    Output
    Returns the value of the function at the current point (a scalar) 
    """
    error_value = (u*np.exp(v) - (2*v*np.exp(-u)))**2
    return error_value

def grad_u(u,v):
    """
    Inputs 
    u -  current value of the weight along U dimension(a scalar)
    v - current value of the weight along V dimension(a scalar)
    
    Output
    Returns the gradient computed along the U dimension (a scalar) 
    """
    gradient_wrt_u = 2*(u*np.exp(v) - (2*v*np.exp(-u)))*(np.exp(v) + (2*v*np.exp(-u)))
    return gradient_wrt_u

def grad_v(u,v):
    """
    Inputs 
    u -  current value of the weight along U dimension(a scalar)
    v - current value of the weight along V dimension(a scalar)
    
    Output
    Returns the gradient computed along the V dimension (a scalar)
    """
    gradient_wrt_v = 2*(u*np.exp(v) - (2*v*np.exp(-u)))*(u*np.exp(v) - (2*np.exp(-u)))
    return gradient_wrt_v

def grad_vec(u,v):
    """
    Inputs 
    u -  current value of the weight along U dimension(a scalar)
    v - current value of the weight along V dimension(a scalar)
    
    Output
    Returns the gradient (a column vector)
    """
    return np.array([[grad_u(u,v)],[grad_v(u,v)]])

def update_weights_grad_desc(weight_current,eta):
    """
    Inputs 
    eta - Learning rate as defined in the lectures(a scalar)
    weight_current - current set of weights(a column vector)
    
    Output
    Returns the updated weights as per the Gradient Descent Rule (a column vector)
    """
    u = weight_current[0,0]
    v = weight_current[1,0]
    weight_update = weight_current - (eta*grad_vec(u,v))
    return weight_update

def update_weights_coord_desc(weight_current,eta):
    """
    Inputs 
    eta - Learning rate as defined in the lectures(a scalar)
    weight_current - current set of weights(a column vector)
    
    Output
    Returns the updated weights as per the Coordinate Gradient Descent Rule (a column vector)
    """
    u = weight_current[0,0]
    v = weight_current[1,0]
    u = u - eta*grad_u(u,v)
    v = v - eta*grad_v(u,v)
    weight_update = np.array([[u],[v]])
    return weight_update

In [24]:
# Running the Gradient Descent algorithm
threshold = 10**(-14)
eta = 0.1
weight_init = np.array([[1],[1]])
error = 1
iterations = 0
while error>=threshold:
    weight_init = update_weights_grad_desc(weight_init, eta)
    error = error_func(weight_init[0,0],weight_init[1,0])
    iterations = iterations + 1
print("The number of iterations for reducing the error below the threshold is: ",iterations)
print("The error is: ",error)
print("The final weights are: ", weight_init)

The number of iterations for reducing the error below the threshold is:  10
The error is:  1.20868339442e-15
The final weights are:  [[ 0.04473629]
 [ 0.02395871]]


In [26]:
# Comparing the gradient descent with Coordinate Descent Algorithm for the same eta and weight_init
iter_threshold = 15
iterations = 0
weight_init = np.array([[1],[1]])
eta = 0.1
while iterations<iter_threshold:
    weight_init = update_weights_coord_desc(weight_init, eta)
    error = error_func(weight_init[0,0],weight_init[1,0])
    iterations = iterations + 1
print("The number of iterations is: ",iterations)
print("The error after these many iterations is: ",error)
print("The final weights are: ", weight_init)

The number of iterations is:  15
The error after these many iterations is:  0.139813791996
The final weights are:  [[ 6.2970759 ]
 [-2.85230695]]
