# Linear regression from scratch

Shows the math behind linear regression

In [1]:
import numpy as np

In [2]:
# collect our data
points = np.genfromtxt('data.csv', delimiter=',')

In [3]:
# define our hyperparamters
learning_rate = 0.0001 # how fast should our model converge?
initial_b = 0
initial_m = 0
num_iterations = 1000

In [4]:
# compute error for line
# we want to minimize this error
def compute_error( b, m, points ):
    error = 0
    for i in range( len(points) ):
        x = points[i, 0]
        y = points[i, 1]
        y_pred = m*x + b
        error += (y - y_pred) ** 2     
    return error / float(len(points))

In [5]:
# update b and m using gradient
def step_gradient( b_current, m_current, points, learning_rate ):
    # starting points for our gradient descent
    b_gradient = 0
    m_gradient = 0
    n = len(points)
    for i in range( n ):
        x = points[i,0]
        y = points[i,1]
        # direction with respect to b and m by computing partial derivatives of our error function
        b_gradient += -(2/n) * (y - ( m_current * x + b_current) )
        m_gradient += -(2/n) * x * ( y - ( m_current * x + b_current ) )
        
    # #update b and m and using partial derivatives
    new_b = b_current - (learning_rate * b_gradient)
    new_m = m_current - (learning_rate * m_gradient)
    
    return (new_b, new_m)

In [6]:
# run gradient descent
def gradient_descent_runner( points, initial_b, inital_m, learning_rate, num_iterations ):
    b = initial_b
    m = initial_m
    # gradient descent
    for i in range(num_iterations):
        # update the b and m with more accurate b and m by performing this gradient step
        b, m = step_gradient(b, m, points, learning_rate)
    return [b,m]

In [7]:
# train our model
print('starting gradient descent at b = {0}, m = {1}, error = {2}'
      .format(initial_b, initial_m, compute_error(initial_b, initial_m, points)))

starting gradient descent at b = 0, m = 0, error = 5565.107834483211


In [8]:
[b,m] = gradient_descent_runner( points, initial_b, initial_m, learning_rate, num_iterations)

In [9]:
print('ending point at b = {1}, m = {2}, error = {3}'
      .format(num_iterations, b, m, compute_error(b, m, points)))

ending point at b = 0.08893651993741346, m = 1.4777440851894448, error = 112.61481011613473
