# Gradients Descent - Part 2


In [1]:
% matplotlib inline
import random

from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
from scipy import stats
from sklearn import datasets, linear_model
import math

#### Generate some data

Using function $y = 4 + 3 x_1 + 10 x_2$

In [2]:
real_thetas = [4, 3, 10] 
n_thetas = len(real_thetas)

In [3]:
def generate_data(real_thetas, size, vmin, vmax, noise_var):
    x0=np.ones(size)
    X = np.array([x0] + \
                 [np.random.uniform(vmin, vmax, size=size)
                  for _ in range(len(real_thetas)-1)])

    # add noise
    y = np.dot(real_thetas, X) + np.random.normal(0.0, noise_var, size=size) 
    return X, y

In [4]:
X, y = generate_data(real_thetas=real_thetas, size=1000, vmin=0, vmax=1, noise_var=1.0)

### Define the cost function

In [5]:
def cost_function(thetas, X, y):
    return np.mean(np.power(np.dot(X.T, thetas) - y, 2))/2.0

## Gradient Descent

In [6]:
def gradient_descent_step(thetas, alpha, X, y, momentum=1.0, momentum_factor=0.9):
    n = float(len(X))
    
    # Make sure X has theta[0]
    pred = np.dot(X.T, thetas) 
    loss = pred - y
    gradient =  np.dot(X, loss.T) / n 
    # gradient =  np.sum(X * loss) / n 
    delta = momentum_factor*momentum + alpha * gradient
    thetas = thetas - delta
    
    return thetas, delta

In [7]:
def print_thetas_cost(thetas, cost, delta_cost = None):
    print "C:{0:8.4f}\tXs:".format(cost),
    for t in thetas:
        print "{0:8.4f}".format(t),

    if delta_cost and delta_cost > 0:
        print "↓"
    else:
        print "↑"

In [18]:
def gradient_descent(X, y, n_thetas, alpha=0.0005, max_steps=1000, 
                     momentum=0.0, momentum_factor=0.9, verbose=1):

    thetas = np.ones(n_thetas)

    costs = [cost_function(thetas, X, y)]
    if verbose > 0:
        print_thetas_cost(thetas, costs[-1])
    
    for i in range(max_steps):
        thetas, momentum = gradient_descent_step(thetas, alpha, X, y, momentum)
        cost = cost_function(thetas, X, y)
        costs.append(cost)
        if len(costs) > 1:
            delta_cost = costs[-2] - costs[-1]

            if verbose > 0:
                print_thetas_cost(thetas, cost, delta_cost)
            
            if abs(delta_cost) < 0.0000001:
                print 
                print "Converged @", i
                break
                
    return thetas, costs[-1]
        


In [19]:
thetas, cost = gradient_descent(X, y, verbose=0)
print "Found this:"
print_thetas_cost(thetas, cost)
print "Real is:"
print_thetas_cost(real_thetas, 0.0)


Converged @ 144
Found this:
C:  0.4755	Xs:   9.9713   4.1502   8.9524 -40.0674  23.0712 ↑
Real is:
C:  0.0000	Xs:  10.0000   4.0000   9.0000 -40.0000  23.0000 ↑


### Exercise: Test another data set generated from by: $y = 10 + 4x_1 + 9x_2 - 40x_3 + 23x_4$

In [24]:
# code here

real_thetas = [10, 4, 9, -40, 23] 
n_thetas = len(real_thetas)

X, y = generate_data(real_thetas=real_thetas, size=1000, vmin=0, vmax=1, noise_var=1.0)

In [25]:
thetas, cost = gradient_descent(X, y, n_thetas=n_thetas, verbose=1)
print "Found this:"
print_thetas_cost(thetas, cost)
print "Real is:"
print_thetas_cost(real_thetas, 0.0)

C:116.3142	Xs:   1.0000   1.0000   1.0000   1.0000   1.0000 ↑
C:108.7416	Xs:   1.5839   1.3248   1.3692   0.9202   1.4848 ↓
C: 98.6267	Xs:   2.5793   1.8806   2.0119   0.7144   2.3441 ↓
C: 90.9277	Xs:   3.7517   2.5404   2.8007   0.3042   3.4351 ↓
C: 87.0813	Xs:   4.8578   3.1724   3.6038  -0.3951   4.6110 ↓
C: 84.7950	Xs:   5.6958   3.6671   4.3109  -1.4484   5.7477 ↓
C: 80.6083	Xs:   6.1415   3.9568   4.8515  -2.8837   6.7630 ↓
C: 72.8150	Xs:   6.1661   4.0243   5.2039  -4.6828   7.6258 ↓
C: 62.5317	Xs:   5.8314   3.9018   5.3929  -6.7841   8.3539 ↓
C: 52.4785	Xs:   5.2684   3.6580   5.4790  -9.0930   9.0029 ↓
C: 44.7985	Xs:   4.6431   3.3808   5.5410 -11.4996   9.6477 ↓
C: 39.6939	Xs:   4.1189   3.1568   5.6569 -13.8972  10.3628 ↓
C: 35.7239	Xs:   3.8236   3.0532   5.8867 -16.1993  11.2049 ↓
C: 31.2301	Xs:   3.8262   3.1063   6.2613 -18.3509  12.2008 ↓
C: 25.6329	Xs:   4.1289   3.3164   6.7780 -20.3328  13.3427 ↓
C: 19.6747	Xs:   4.6730   3.6515   7.4035 -22.1582  14.5917 ↓
C: 14.63