Simple Linear Regression

**Implementing Linear Regression and Sum of Residual error using Normal Equation**

In [62]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [63]:
X = [0,1,2,3,4,5,6,7,8,9]
Y = [1,3,2,5,7,8,8,9,10,12]
X = np.c_[np.ones((len(X),1)),X]

In [64]:
theta_normal = np.linalg.inv(X.T.dot(X)).dot(X.T).dot(Y)

In [65]:
print(theta_normal)

[1.23636364 1.16969697]


In [66]:
Y_pred = X.dot(theta_normal)

In [67]:
print(Y_pred)

[ 1.23636364  2.40606061  3.57575758  4.74545455  5.91515152  7.08484848
  8.25454545  9.42424242 10.59393939 11.76363636]


**Calculating Residual - SSE**

In [68]:
sum = 0
for i in range(Y_pred.shape[0]):
  sum+= (Y[i]-Y_pred[i])**2
print(sum)

5.6242424242424205


In [69]:
#R2_Score from Scratch
ss_total = np.sum((Y - np.mean(Y)) ** 2)
ss_residual = np.sum((Y - Y_pred) ** 2)
r2 = 1 - (ss_residual / ss_total)
print(r2)

0.952538038613988


**Implementing Gradient Descent - Full batch**

In [70]:
from sklearn.linear_model import LinearRegression
x = np.array([0,1,2,3,4,5,6,7,8,9]).reshape(-1,1)
y = [1,3,2,5,7,8,8,9,10,12]
model = LinearRegression()
#The model should be trained on the same dataset that is being used to predict
model.fit(x,y)
y_pred = model.predict(x)

In [71]:
y_pred = model.predict(x)

In [72]:
print(model.intercept_,model.coef_[0])

1.2363636363636363 1.1696969696969697


In [73]:
#SSE from Scratch for Full batch gradient
sum = 0
for i in range(y_pred.shape[0]):
  sum+= (y[i]-y_pred[i])**2
print(sum)

5.624242424242421


In [74]:
#R2_Score from Scratch
ss_total = np.sum((y - np.mean(y)) ** 2)
ss_residual = np.sum((y - y_pred) ** 2)
r2 = 1 - (ss_residual / ss_total)
print(r2)

0.952538038613988


**Implementing using Stochastic Gradient Descent**

In [75]:
from sklearn.linear_model import SGDRegressor
model = SGDRegressor()
model.fit(x,y)

In [76]:
y_pred = model.predict(x)

In [77]:
#SSE from Scratch for Stochastic batch gradient
sum = 0
for i in range(y_pred.shape[0]):
  sum+= (y[i]-y_pred[i])**2
print(sum)

6.789082381936932


In [78]:
#R2_Score from Scratch for Stochastic Gradient Descent
ss_total = np.sum((y - np.mean(y)) ** 2)
ss_residual = np.sum((y - y_pred) ** 2)
r2 = 1 - (ss_residual / ss_total)
print(r2)

0.9427081655532749
