## Load Dataset

In [1]:
import pandas as pd

dataset = pd.read_csv('Datasets/50_Startups.csv')
dataset.head(2)

Unnamed: 0,R&D Spend,Administration,Marketing Spend,State,Profit
0,165349.2,136897.8,471784.1,New York,192261.83
1,162597.7,151377.59,443898.53,California,191792.06


In [2]:
dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 50 entries, 0 to 49
Data columns (total 5 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   R&D Spend        50 non-null     float64
 1   Administration   50 non-null     float64
 2   Marketing Spend  50 non-null     float64
 3   State            50 non-null     object 
 4   Profit           50 non-null     float64
dtypes: float64(4), object(1)
memory usage: 2.1+ KB


In [3]:
dataset.corr()

Unnamed: 0,R&D Spend,Administration,Marketing Spend,Profit
R&D Spend,1.0,0.241955,0.724248,0.9729
Administration,0.241955,1.0,-0.032154,0.200717
Marketing Spend,0.724248,-0.032154,1.0,0.747766
Profit,0.9729,0.200717,0.747766,1.0


In [4]:
X = dataset.iloc[:, 1:3].values
y = dataset.iloc[:, -1].values

## Feature Scaling

In [5]:
from sklearn.preprocessing import StandardScaler

sc_X = StandardScaler()
X = sc_X.fit_transform(X)

sc_y = StandardScaler()
y = sc_y.fit_transform(y.reshape(-1, 1)).flatten()

## Linear Regression

In [9]:
from sklearn.linear_model import LinearRegression

linreg_model_1 = LinearRegression()
linreg_model_1.fit(X, y)

linreg_model_1.coef_, linreg_model_1.intercept_

(array([0.22499275, 0.75500011]), -3.4009091082788556e-16)

## Statsmodels.api

In [21]:
import statsmodels.api as sm

X_new = sm.add_constant(X)

linreg_model_2 = sm.OLS(endog = y, exog = X_new).fit()

linreg_model_2.summary2()

0,1,2,3
Model:,OLS,Adj. R-squared:,0.593
Dependent Variable:,y,AIC:,100.8489
Date:,2020-07-14 23:10,BIC:,106.585
No. Observations:,50,Log-Likelihood:,-47.424
Df Model:,2,F-statistic:,36.71
Df Residuals:,47,Prob (F-statistic):,2.5e-10
R-squared:,0.610,Scale:,0.41519

0,1,2,3,4,5,6
,Coef.,Std.Err.,t,P>|t|,[0.025,0.975]
const,-0.0000,0.0911,-0.0000,1.0000,-0.1833,0.1833
x1,0.2250,0.0912,2.4678,0.0173,0.0416,0.4084
x2,0.7550,0.0912,8.2810,0.0000,0.5716,0.9384

0,1,2,3
Omnibus:,6.584,Durbin-Watson:,1.279
Prob(Omnibus):,0.037,Jarque-Bera (JB):,6.524
Skew:,-0.512,Prob(JB):,0.038
Kurtosis:,4.443,Condition No.:,1.0


## Gradient Descent

In [39]:
w1, w2, b, learning_rate, m, num_of_iterations = 0, 0, 0, .001, len(y), 10000

for _ in range(num_of_iterations):
    ddw1 = -1/m * sum((y.flatten() - w1*X[:, 0] - w2*X[:, 1] - b) * X[:, 0])
    ddw2 = -1/m * sum((y.flatten() - w1*X[:, 0] - w2*X[:, 1] - b) * X[:, 1])
    ddb = -1/m * sum((y.flatten() - w1*X[:, 0] - w2*X[:, 1] - b))
    w1 -= learning_rate * ddw1
    w2 -= learning_rate * ddw2
    b -= learning_rate * ddb

print(f'w1 = {w1}, w2 = {w2} and b = {b}')

w1 = 0.2249708851126484, w2 = 0.7549608948207247 and b = -3.333999742949346e-16


## Normal Equation

In [27]:
import numpy as np

x = np.asmatrix(X_new)
y = y.reshape(len(y), 1)

x_transpose_x_inverse = np.linalg.inv(x.T * x)
x_transpose_y = x.T * y

intercept_and_coefficient = x_transpose_x_inverse * x_transpose_y

b = intercept_and_coefficient[0, 0]
w1 = intercept_and_coefficient[1, 0]
w2 = intercept_and_coefficient[2, 0]

print(f'w1 = {w1}, w2 = {w2} and b = {b}')

w1 = 0.22499274753978332, w2 = 0.7550001104499884 and b = -3.2974388695454647e-16
