# Linear Regression
The aim of linear regression is to draw a line of best fit. Typically gradient descent is used to find this line of best fit.

In [1]:
from numpy import *

#Error for the line is defined as 1/(len(points))sigma(y(i)-(mx(i)+b))^2
def calc_error_from_points(c, m, points):
    totalError = 0
    #loop over all the points to get the error
    for i in range(0,len(points)):
        y = points[i,1]
        x = points[i,0]
        totalError += (y - (m*x + c))**2
        
    return totalError/ float(len(points))

#This function calculates the partial derivative of the error function and uses gradient descent
# gradient descent runs over a bunch of iterations to slowly move towards a local minima by taking one 
#step per iteration in the direction where the error reduces.
def step_gradient_descent(c, m, points, learning_rate):
    N = float(len(points))
    m_gradient = 0
    c_gradient = 0
    #loop over the points to calculate the c and m gradient values
    for i in range(0,len(points)):
        y = points[i,1]
        x = points[i,0]
        
        #calculate the partial derivative for the error func for the parameters
        m_gradient += (-2/N)*x*(y - (m*x + c))
        c_gradient += (-2/N)*(y - (m*x + c))
        
    new_m = m - learning_rate * m_gradient
    new_c = c - learning_rate * c_gradient
    
    return [new_m, new_c]

# run the gradient descent algorithm for 1000 iterations to get the final value of c and m
def gradient_descend_runner(learning_rate, num_iterations, initial_m, initial_c, points):
    c = initial_c
    m = initial_m
    for i in range(num_iterations):
        [m,c] = step_gradient_descent(c,m,array(points),learning_rate)
    return [c,m]

def run():
    # get the points for the numver of hours studied vs test scores
    points = genfromtxt('data.csv', delimiter = ',')
    
    #Hyperparameters:
    #A machine learning model is the definition of a mathematical formula with a number of parameters 
    #that need to be learned from the data. That is the crux of machine learning: fitting a model to the data. 
    #This is done through a process known as model training. In other words, by training a model with existing data,
    #we are able to fit the model parameters.However, there is another kind of parameters that cannot be
    #directly learned from the regular training process. These parameters express “higher-level” properties 
    #of the model such as its complexity or how fast it should learn. They are called hyperparameters. 
    #Hyperparameters are usually fixed before the actual training process begins
    learning_rate = 0.0001
    
    # define the parameters which the model is based upon
    #y = mx + c. Here we need to fit the data with a linear line and need to learn the slope m and the y-intercept c
    initial_m = 0
    initial_c = 0
    
    #number of iterations for training the model. Since we have very few data points we will need only 1000 iterations
    num_iterations = 1000
    
    #Train the model to learn the parameters
    [c,m] = gradient_descend_runner(learning_rate, num_iterations, initial_m, initial_c, points)
    original_error = calc_error_from_points(initial_c, initial_m, points)
    print ("The original error is {}".format(original_error))
    error = calc_error_from_points(c,m,points)
    print("After running {} iterations we get m = {} , c = {} and the error = {} ".format(num_iterations,c,m,error))
    
if __name__ == '__main__':
    run()
    

The original error is 5565.107834483211
After running 2000 iterations we get m = 0.1483551275485007 , c = 1.4765762305610064 and the error = 112.57949073360243 
