# Linear Regression from scratch
Coding the Linear Regression model from scratch

In [16]:
import numpy as np

## Define the function for error caculation between the point and current line

In [17]:
def compute_error_for_line_given_points(b, m, points):
    totalError = 0
    for i in range(0, len(points)):
        x = points[i, 0]
        y = points[i, 1]
        totalError += (y - (m*x + b))**2
    return totalError/len(points)

## Define the function to caculate the gradien descent

In [18]:
def gradient_descent_runner(points, starting_m, starting_b, learning_rate, num_iterations):
    # Starting points
    m = starting_m
    b = starting_b
    # Gradient descent
    for i in range(0, num_iterations):
        # Update b & m each iteration
        b, m = step_gradient(np.array(points), b, m, learning_rate)
    return [b, m]

In [25]:
def step_gradient(points, b_current, m_current, learning_rate):
    b_gradient = 0
    m_gradient = 0
    N = len(points)
    
    for i in range(0, len(points)):
        x = points[i,0]
        y = points[i,1]
        # direction with b,m. Computing partial derivatives of error function
        b_gradient += -(2/N)*(y - (m_current*x + b_current))
        m_gradient += -(2/N)*x*(y - (m_current*x + b_current))
        
    # Update b and m using partial derivatives
    new_b = b_current - (learning_rate * b_gradient)
    new_m = m_current - (learning_rate * m_gradient)
    
    return [new_b, new_m]

## Running the algorithm

In [27]:
def run():
    # Step 1 - Collect data points
    path_file = "./linear_regression_live/data.csv"
    points = np.genfromtxt(path_file, delimiter = ",")
    
    # Step 2 - Define hyper parameters
    # converge rate
    learning_rate = 0.0001
    # Number of iterations
    num_iterations = 10000
    # Slope formula :: y = mx+b
    initial_b = 0
    initial_m = 0
    
    # Step 3 - Train the model
    print('Starting gradient descent at b = {0}, m = {1}, error = {2}'.format(initial_b, initial_m, compute_error_for_line_given_points(initial_b, initial_m, points)))
    [b, m] = gradient_descent_runner(points, initial_m, initial_b, learning_rate, num_iterations)
    print('Ending gradient descent at b = {0}, m = {1}, error = {2}'.format(b, m, compute_error_for_line_given_points(b, m, points)))
    
run()

Starting gradient descent at b = 0, m = 0, error = 5565.107834483211
Ending gradient descent at b = 0.6078985997054931, m = 1.4675440436333027, error = 112.31533427075733
