In [1]:
# -------------------------------------------------------------------
# Linear regression using gradient descent

# Equation of straight line: y = mx + b

# m is slope
# b is y-intercept
# -------------------------------------------------------------------

import numpy as np

In [2]:
# Calculating y value using current (b, m) them subtracting it by actual y value we get our error for a point
# Squaring to get postive value and to get local minimum as it is Parabolic function
# finally dividing to calculate average
def compute_error(b, m, points):
    total_error = 0
    for i in range(0, len(points)):
        x = points[i, 0]
        y = points[i, 1]
        total_error += (y - (m * x + b)) ** 2

    return total_error / float(len(points))

<img src="error_function.png">

In [3]:
# Meat of the program optimizing by calulating partial derivative 
def step_gradient(b_current, m_current, points, learningRate):
    b_gradient = 0
    m_gradient = 0
    N = float(len(points))
    for i in range(0, len(points)):
        x = points[i, 0]
        y = points[i, 1]
        # 2 from derivate of square
        # / N for average
        # 2 y - y (calculated)
        b_gradient += (2/N) * (y - ((m_current * x) + b_current)) # Partial derivative with respect to b
        m_gradient += (2/N) * x * (y - ((m_current * x) + b_current)) # Partial derivative with respect to m
        
    # From derivative we get direction and by multyplying it from learning rate we reduce error
    new_b = b_current + (learningRate * b_gradient)
    new_m = m_current + (learningRate * m_gradient)

    return [new_b, new_m]

In [4]:
# Performing step_gradient 1000 times and updating (b, m) accordingly
def gradient_descent_runner(points, starting_b, starting_m, learning_rate, iterations):
    b = starting_b
    m = starting_m
    for i in range(iterations):
        b, m = step_gradient(b, m, np.array(points), learning_rate)

    return [b, m]

In [5]:
# Predict new point (y) given (x, m and b)  
def predict(x, m, b):
    return (m * x) + b

In [11]:
# Initializing
points = np.genfromtxt('gradient_descent.csv', delimiter=',')
learning_rate = 0.0001
initial_b = 0
initial_m = 0
iterations = 5000

print('Initial_b: ' + str(initial_b))
print('Initial_m: ' + str(initial_m))
print('Error: ' + str(compute_error(initial_b, initial_m, points)))
print('Prediction: ' + str(predict(32.16847071685779, initial_m, initial_b)))

print("\nAfter Training\n")
[b, m] = gradient_descent_runner(points, initial_b, initial_m, learning_rate, iterations)

print('Updated_b: ' + str(b))
print('Updated_m: ' + str(m))
print('Error: ' + str(compute_error(b, m, points)))
print('Prediction: ' + str(predict(32.16847071685779, m, b)))

Initial_b: 0
Initial_m: 0
Error: 5562.977159312569
Prediction: 0.0

After Training

Updated_b: 0.32192936901782715
Updated_m: 1.4727969824643692
Error: 115.36161807826575
Prediction: 47.699555971299404
