<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Manually-perform-linear-regression" data-toc-modified-id="Manually-perform-linear-regression-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Manually perform linear regression</a></span></li><li><span><a href="#Use-sklearn-to-perform-linear-regression" data-toc-modified-id="Use-sklearn-to-perform-linear-regression-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Use sklearn to perform linear regression</a></span></li></ul></div>

## Manually perform linear regression

In [63]:
# Load numpy
from numpy import *

def compute_error_for_line_given_points(b, m, points):
    
    #Initialize to zero
    totalError = 0
    
    #For every point
    for i in range(0, len(points)):
        x = points[i, 0] #Get x value
        y = points[i, 1] #Get y value
        totalError += (y - (m * x + b)) ** 2 #Calculate total squared error
    return totalError / float(len(points)) #Mean squared error calculation 

In [44]:
def step_gradient(b_current, m_current, points, learningRate):
    b_gradient = 0
    m_gradient = 0
    N = float(len(points))
    for i in range(0, len(points)):
        x = points[i, 0]
        y = points[i, 1]
        b_gradient += -(2/N) * (y - ((m_current * x) + b_current))
        m_gradient += -(2/N) * x * (y - ((m_current * x) + b_current))
    #Update b and m using partial derivates and learning rate
    new_b = b_current - (learningRate * b_gradient)
    new_m = m_current - (learningRate * m_gradient)
    return [new_b, new_m]

In [45]:
def gradient_descent_runner(points, starting_b, starting_m, learning_rate, num_iterations):
    b = starting_b
    m = starting_m
    
    #gradient descent
    for i in range(num_iterations):
        #update b and m 
        b, m = step_gradient(b, m, array(points), learning_rate)
    return [b, m]

In [62]:
def run():
    
    #Load data
    points = genfromtxt("data.csv", delimiter=",")
    
    #Define hyperparameter - Learning rate (How fast should our model converge?)
    learning_rate = 0.0001
    
    #Initial values for b and m  (Slope formula: y = mx + b)
    initial_b = 7 # initial y-intercept guess
    initial_m = 100 # initial slope guess
    num_iterations = 1000
    
    ## Train linear regression model
    #Initial values
    print ("Starting gradient descent at b = {0}, m = {1}, error = {2}".format(initial_b, initial_m, compute_error_for_line_given_points(initial_b, initial_m, points)))
    
    #Run gradient descent 
    print ("Running...")
    [b, m] = gradient_descent_runner(points, initial_b, initial_m, learning_rate, num_iterations)
    
    #Final b, m and error 
    print ("After {0} iterations b = {1}, m = {2}, error = {3}".format(num_iterations, b, m, compute_error_for_line_given_points(b, m, points)))

if __name__ == '__main__':
    run()

Starting gradient descent at b = 7, m = 100, error = 24245679.088709317
Running...
After 1000 iterations b = 5.083681043274138, m = 1.3795739034241414, error = 110.57649790915275


## Use sklearn to perform linear regression

In [53]:
# Load library
from sklearn import datasets, linear_model
from sklearn.metrics import mean_squared_error, r2_score
import pandas as pd

In [54]:
# Load the same dataset
df = pd.read_csv('data.csv',header=None,names =['x','y'])

In [16]:
# Create linear regression object
regr = linear_model.LinearRegression()

In [57]:
# Train the model 
regr.fit(df[['x']], df[['y']])

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None,
         normalize=False)

In [58]:
# Make predictions
y_pred = regr.predict(df[['x']])

In [59]:
# The coefficient
print('Coefficients: \n', regr.coef_)

# Intercept
print('Intercept: \n', regr.intercept_)

# The mean squared error
print("Mean squared error: %.2f"      % mean_squared_error(df[['y']], y_pred))

# Explained variance score: 1 is perfect prediction
print('Variance score: %.2f' % r2_score(df[['y']], y_pred))

Coefficients: 
 [[1.32243102]]
Intercept: 
 [7.99102099]
Mean squared error: 110.26
Variance score: 0.60


References:

1. https://github.com/llSourcell/linear_regression_live
2. https://scikit-learn.org/stable/auto_examples/linear_model/plot_ols.html
3. https://mathinsight.org/partial_derivative_introduction