# Gradients Descent - Part 2


In [1]:
% matplotlib inline
import random

from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
from scipy import stats
from sklearn import datasets, linear_model
import math

#### Generate some data

Using function $y = 4 + 3 x_1 + 10 x_2$

In [2]:
real_thetas = [4, 3, 10] 
n_thetas = len(real_thetas)

In [3]:
def generate_data(real_thetas, size, vmin, vmax, noise_var):
    x0=np.ones(size)
    X = np.array([x0] + \
                 [np.random.uniform(vmin, vmax, size=size)
                  for _ in range(len(real_thetas)-1)])

    # add noise
    y = np.dot(real_thetas, X) + np.random.normal(0.0, noise_var, size=size) 
    return X, y

In [4]:
X, y = generate_data(real_thetas=real_thetas, size=1000, vmin=0, vmax=1, noise_var=1.0)

### Define the cost function

In [5]:
def cost_function(thetas, X, y):
    return np.mean(np.power(np.dot(X.T, thetas) - y, 2))/2.0

## Gradient Descent

In [6]:
def gradient_descent_step(thetas, alpha, X, y, momentum=1.0, momentum_factor=0.9):
    n = float(len(X))
    
    # Make sure X has theta[0]
    pred = np.dot(X.T, thetas) 
    loss = pred - y
    gradient =  np.dot(X, loss.T) / n 
    # gradient =  np.sum(X * loss) / n 
    delta = momentum_factor*momentum + alpha * gradient
    thetas = thetas - delta
    
    return thetas, delta

In [7]:
def print_thetas_cost(thetas, cost, delta_cost = None):
    print "C:{0:8.4f}\tXs:".format(cost),
    for t in thetas:
        print "{0:8.4f}".format(t),

    if delta_cost and delta_cost > 0:
        print "↓"
    else:
        print "↑"

In [8]:
def gradient_descent(X, y, alpha=0.0005, max_steps=1000, n_thetas=n_thetas, momentum=0.0, momentum_factor=0.9, verbose=1):

    thetas = np.ones(n_thetas)

    costs = [cost_function(thetas, X, y)]
    if verbose > 0:
        print_thetas_cost(thetas, costs[-1])
    
    for i in range(max_steps):
        thetas, momentum = gradient_descent_step(thetas, alpha, X, y, momentum)
        cost = cost_function(thetas, X, y)
        costs.append(cost)
        if len(costs) > 1:
            delta_cost = costs[-2] - costs[-1]

            if verbose > 0:
                print_thetas_cost(thetas, cost, delta_cost)
            
            if abs(delta_cost) < 0.00001:
                print 
                print "Converged @", i
                break
                
    return thetas, costs[-1]
        


In [9]:
thetas, cost = gradient_descent(X, y, verbose=0)
print "Found this:"
print_thetas_cost(thetas, cost)
print "Real is:"
print_thetas_cost(real_thetas, 0.0)


Converged @ 129
Found this:
C:  0.5132	Xs:   4.0017   3.0214  10.0217 ↑
Real is:
C:  0.0000	Xs:   4.0000   3.0000  10.0000 ↑


### Exercise: Test another data set generated from by: $y = 10 + 4x_1 + 9x_2 - 40x_3 + 23x_4$

In [10]:
# code here

