# Gradient Descent

In [5]:
from __future__ import division
import math
import random

In [11]:
def step(v,direction,step_size):
    return [v_i + (direction_i*step_size)
           for v_i, direction_i in zip(v,direction)]

def sum_of_squares_gradient(v):
    return [2*v_i for v_i in v]

def distance(v,w):
    subtracted = [v_i - w_i for v_i, w_i in zip(v,w)]
    return math.sqrt(sum([s_i **2 for s_i in subtracted]))

In [37]:
v = [random.randint(-10,10) for _ in range(3)]
tolerance = 0.0000001
i = 0
while True:
#     print("Step => ",i," ==> ",v)
    i = i+1
    gradient = sum_of_squares_gradient(v);
    next_v = step(v,gradient,-0.01)
    if distance(v,next_v) < tolerance:
        break
    v = next_v
print("Optimised => ",v)

Optimised =>  [1.6392420853404806e-06, 3.688294692016085e-06, 2.868673649345841e-06]


## Sometime we use a list of stepsizes to do the work

In [42]:
def safe(f):
    def safe_f(*args,**kwargs):
        try:
            return f(*args,**kwargs)
        except:
            return float('inf')
    return safe_f

def minimize_batch(target_fn,gradient_fn,theta_0,tolerance = 0.0000001):
    step_sizes = [10 ** (2-i) for i in range(8)]
    theta = theta_0
    target_fn = safe(target_fn)
    gradient_fn = safe(gradient_fn)
    value = target_fn(theta)
    
    while True:
        gradient = gradient_fn(theta)
        next_thetas = [step(theta,gradient,-step_size) for step_size in step_sizes]
        next_theta = min(next_thetas,key=target_fn)
        next_value = target_fn(next_theta)
        
        if abs(next_value - value)< tolerance:
            return theta
        else:
            theta,value = next_theta,next_value

In [43]:
def target_function(v):
    return sum([v_i **2 for v_i in v])
minimize_batch(target_function,sum_of_squares_gradient,[random.randint(-10,10) for _ in range(3)])

[-4.3556142965880136e-05, -0.00013066842889764046, 0.0004355614296588014]

## Sometime we want to maximize a function(It would be `-ve` of minimum

In [44]:
def negate(f):
    return lambda *args,**kwargs: -f(*args,**kwargs)
def negate_all(f):
    # Needed when f returns a list(like in gradient descent)
    return lambda *args,**kwargs: [-y for y in f(*args,**kwargs)]

def maximize_batch(target_fn,gradient_fn,theta_0,tolerance = 0.0000001):
    return minimize_batch(negate(target_fn),
                         negate_all(gradient_fn),tolerance)