In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
df_AdvData = pd.read_csv("https://raw.githubusercontent.com/justmarkham/scikit-learn-videos/master/data/Advertising.csv")
del df_AdvData['Unnamed: 0']
df_AdvData.head()

Unnamed: 0,TV,Radio,Newspaper,Sales
0,230.1,37.8,69.2,22.1
1,44.5,39.3,45.1,10.4
2,17.2,45.9,69.3,9.3
3,151.5,41.3,58.5,18.5
4,180.8,10.8,58.4,12.9


In [3]:
X = df_AdvData[['TV', 'Radio', 'Newspaper']]
y = df_AdvData['Sales']

In [4]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [5]:
X_train = (X_train.values).astype(np.float32)
y_train = (y_train.values).astype(np.float32)
X_test = (X_test.values).astype(np.float32)
y_test = (y_test.values).astype(np.float32)

#### Standard Scaling

In [6]:
from sklearn.preprocessing import StandardScaler

sc = StandardScaler()

sc.fit(X_train)         # Computes mean and dev for X_train to scale whole of X

X_train_std = sc.transform(X_train)
X_test_std = sc.transform(X_test)

In [7]:
from sklearn.linear_model import LinearRegression

model = LinearRegression()
model.fit(X_train_std, y_train)

print(model.score(X_test_std, y_test))

# Print parameters
print(model.intercept_)
print(model.coef_)

0.899437992963907
14.1
[3.764196   2.7923071  0.05597566]


#### Normal Equation

In [8]:
X_train_std_bias = np.c_[np.ones((X_train_std.shape[0], 1)), X_train_std]

In [9]:
print(X_train_std_bias[:3, :])

[[ 1.         -0.40424839 -1.0282371  -0.33767536]
 [ 1.          0.32060772 -0.9198277  -1.1614393 ]
 [ 1.         -1.27051091  0.25912377  0.25425074]]


In [10]:
inv_term = np.linalg.inv(np.dot(X_train_std_bias.T, X_train_std_bias))
ThetaEq = inv_term.dot(X_train_std_bias.T).dot(y_train)

print(ThetaEq)

[14.10000006  3.76419617  2.7923068   0.05597551]


#### Gradient Descent Implementation - From Scratch 

- NOTE: Standardization is required for gradient descent

In [11]:
m = X_train_std.shape[0]
eta = 0.1
epochs = 100
n_iterations = epochs
theta_FullBatchGD = np.random.randn(4, 1)

theta_FullBatchGD

array([[ 0.36684787],
       [ 0.38050137],
       [-0.1889217 ],
       [-0.63331843]])

In [12]:
print(X_train_std_bias.shape, X_train_std_bias.T.shape, theta_FullBatchGD.shape)

(160, 4) (4, 160) (4, 1)


In [13]:
d = y_train
print(d.shape)
print(d.reshape(-1, 1).shape)

(160,)
(160, 1)


In [14]:
for iteration in range(n_iterations):

    nable_MSE = (2/m) * X_train_std_bias.T.dot(X_train_std_bias.dot(theta_FullBatchGD) - y_train.reshape(-1, 1))
    theta_FullBatchGD = theta_FullBatchGD - eta * nable_MSE

In [15]:
theta_FullBatchGD

array([[14.10000006],
       [ 3.76419637],
       [ 2.79230458],
       [ 0.05597771]])

In [16]:
n_iterations = 50
t0 = 7
t1 = 50
t = 0

np.random.seed(42)

theta_StochaisticGD = np.random.rand(4, 1)

def lr_sim_annealing(t):

    return (t0)/(t + t1)

In [17]:
for iteration in range(n_iterations):

    for i in range(m):
        t = t + 1
        
        random_sample = np.random.randint(m)
        X_train_std_bias_sample = X_train_std_bias[random_sample:random_sample+1]
        y_train_sample = y_train[random_sample:random_sample+1]

        nable_MSE = 2 * X_train_std_bias_sample.T.dot(X_train_std_bias_sample.dot(theta_StochaisticGD) - y_train_sample)
        
        theta_StochaisticGD = theta_StochaisticGD - eta * nable_MSE

        eta = lr_sim_annealing(eta)

In [18]:
theta_StochaisticGD

array([[14.66912076],
       [ 3.14696776],
       [ 2.65614395],
       [-0.2760452 ]])

In [19]:
n_iterations = 100
t0 = 7
t1 = 50
t = 0
batch_size = 16
theta_MiniBatchGD = np.random.randn(4, 1)

for iteration in range(n_iterations):

    random_indices = np.random.permutation(m)

    X_train_std_bias_shuffled = X_train_std_bias[random_indices]
    y_train_shuffled = y_train[random_indices]

    for i in range(0, m, batch_size):

        t = t + 1

        X_i = X_train_std_bias_shuffled[i : i + batch_size]
        y_i = y_train_shuffled[i : i + batch_size]

        nabla_MSE = 2/batch_size * X_i.T.dot(X_i.dot(theta_MiniBatchGD) - y_i.reshape(-1, 1))

        theta_MiniBatchGD = theta_MiniBatchGD - eta * nabla_MSE

        eta = lr_sim_annealing(t)

In [20]:
theta_MiniBatchGD

array([[14.09808192],
       [ 3.76583336],
       [ 2.79537353],
       [ 0.05779406]])

In [36]:
np.random.permutation(12)
time.time()

1683436500.8491154

#### Polynomial Regression

In [52]:
from sklearn.preprocessing import PolynomialFeatures

poly_features = PolynomialFeatures(degree = 3, include_bias = False)

X_poly = poly_features.fit_transform(X_train)

poly_reg = LinearRegression()

poly_reg.fit(X_poly, y_train)

In [53]:
X_new_poly = poly_features.transform(X_test)
y_pred_poly= poly_reg.predict(X_new_poly)

In [54]:
poly_df_pred = pd.DataFrame([y_test, y_pred_poly])
poly_df_pred.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,30,31,32,33,34,35,36,37,38,39
0,16.9,22.4,21.4,7.3,24.700001,12.6,22.299999,8.4,11.5,14.9,...,10.6,11.3,23.700001,8.7,16.1,20.700001,11.6,20.799999,11.9,6.9
1,17.121777,22.231934,21.456097,7.278682,24.254593,13.130607,22.543484,8.601265,11.62699,15.536464,...,11.227007,10.716135,22.725266,8.449516,16.172367,20.841984,10.91869,20.718019,12.141545,6.423879


In [55]:
poly_reg.score(X_new_poly, y_pred_poly)

1.0