In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [3]:
df_AdvData = pd.read_csv("https://raw.githubusercontent.com/justmarkham/scikit-learn-videos/master/data/Advertising.csv")
del df_AdvData['Unnamed: 0']
df_AdvData.head()

Unnamed: 0,TV,Radio,Newspaper,Sales
0,230.1,37.8,69.2,22.1
1,44.5,39.3,45.1,10.4
2,17.2,45.9,69.3,9.3
3,151.5,41.3,58.5,18.5
4,180.8,10.8,58.4,12.9


In [4]:
X = df_AdvData[['TV', 'Radio', 'Newspaper']]
y = df_AdvData['Sales']

In [5]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [6]:
X_train = (X_train.values).astype(np.float32)
y_train = (y_train.values).astype(np.float32)
X_test = (X_test.values).astype(np.float32)
y_test = (y_test.values).astype(np.float32)

#### Standard Scaling

In [7]:
from sklearn.preprocessing import StandardScaler

sc = StandardScaler()

sc.fit(X_train)         # Computes mean and dev for X_train to scale whole of X

X_train_std = sc.transform(X_train)
X_test_std = sc.transform(X_test)

In [8]:
from sklearn.linear_model import LinearRegression

model = LinearRegression()
model.fit(X_train_std, y_train)

print(model.score(X_test_std, y_test))

# Print parameters
print(model.intercept_)
print(model.coef_)

0.899437992963907
14.1
[3.764196   2.7923071  0.05597566]


#### Normal Equation

In [9]:
X_train_std_bias = np.c_[np.ones((X_train_std.shape[0], 1)), X_train_std]

In [10]:
print(X_train_std_bias[:3, :])

[[ 1.         -0.40424839 -1.0282371  -0.33767536]
 [ 1.          0.32060772 -0.9198277  -1.1614393 ]
 [ 1.         -1.27051091  0.25912377  0.25425074]]


In [11]:
inv_term = np.linalg.inv(np.dot(X_train_std_bias.T, X_train_std_bias))
ThetaEq = inv_term.dot(X_train_std_bias.T).dot(y_train)

print(ThetaEq)

[14.10000006  3.76419617  2.7923068   0.05597551]


#### Gradient Descent Implementation - From Scratch 

- NOTE: Standardization is required for gradient descent

In [20]:
m = X_train_std.shape[0]
eta = 0.1
epochs = 100
n_iterations = epochs
theta_FullBatchGD = np.random.randn(4, 1)

theta_FullBatchGD

array([[-0.68042969],
       [ 1.34856498],
       [-0.4660893 ],
       [-2.48292726]])

In [21]:
print(X_train_std_bias.shape, X_train_std_bias.T.shape, theta_FullBatchGD.shape)

(160, 4) (4, 160) (4, 1)


In [27]:
d = y_train
print(d.shape)
print(d.reshape(-1, 1).shape)

(160,)


(160, 1)

In [23]:
for iteration in range(n_iterations):

    nable_MSE = (2/m) * X_train_std_bias.T.dot(X_train_std_bias.dot(theta_FullBatchGD) - y_train.reshape(-1, 1))
    theta_FullBatchGD = theta_FullBatchGD - eta * nable_MSE

In [24]:
theta_FullBatchGD

array([[14.10000006],
       [ 3.76419623],
       [ 2.79230621],
       [ 0.05597609]])