In [1]:
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from ISLP import load_data
from ISLP.models import (ModelSpec as MS, summarize, poly)
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression

In [2]:
# Data preparation with islp
df = pd.read_csv('../data/Advertising.csv')

# drop the first column
df = df.drop(df.columns[0], axis=1) # axis=1 means column axis and data.columns[0] is the first column

X, y = df.drop('Sales', axis=1), df['Sales']

model_poly = PolynomialFeatures(degree=3, include_bias=False)
X_poly = model_poly.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X_poly, y, test_size=0.33, random_state=42)

X_train.shape, X_test.shape, y_train.shape, y_test.shape


((134, 19), (66, 19), (134,), (66,))

In [3]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Remove sample mean and diver by sample standard deviation

print(f'Scaled x_train mean: {X_train_scaled.mean()}, std: {X_train_scaled.std()}')
print(f'Scaled x_test mean: {X_test_scaled.mean()}, std: {X_test_scaled.std()}')

Scaled x_train mean: -3.34898382919136e-17, std: 1.0
Scaled x_test mean: -0.11982457640326809, std: 1.1245966534380971


In [4]:
# Apply ridge regression
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error, mean_absolute_error

def ridge_regression(X, penalty=0):
    # alpha 0 is equivalent to linear regression
    model_ridge = Ridge(alpha=penalty)
    model_ridge.fit(X_train_scaled, y_train)
    y_pred = model_ridge.predict(X)
    return y_pred

y_pred = ridge_regression(X_test_scaled, 0.2)
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mse)

print(f'MSE: {mse}, MAE: {mae}, RMSE: {rmse}')



MSE: 0.37323673323812373, MAE: 0.4845959994544078, RMSE: 0.6109310380379472


In [5]:
# We check whit linear regression, RMSE is similar
from sklearn.linear_model import LinearRegression

lin_reg = LinearRegression()
lin_reg.fit(X_train_scaled, y_train)
y_pred = lin_reg.predict(X_test_scaled)
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
mse, mae, np.sqrt(mse)

(0.2650465950553624, 0.37485164412178346, 0.5148267621786599)

In [6]:
# We do lasso regression
from sklearn.linear_model import Lasso

model_lasso = Lasso(alpha=0.1) # alpha 0 is equivalent to linear regression the alpha is the penalty
model_lasso.fit(X_train_scaled, y_train)
y_pred = model_lasso.predict(X_test_scaled)
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mse)
mse, mae, rmse
print(model_lasso.coef_)

[ 1.89480144  0.42062367  0.         -0.          3.55216501  0.
  0.          0.01110965  0.         -0.42677394 -0.         -0.
  0.          0.         -0.          0.          0.06706906  0.
  0.        ]


In [7]:
# k-fold cross validation to find the best alpha
# We use the cross_val_score function to evaluate the model with different alpha values


In [8]:
# Ridge regression
from sklearn.linear_model import RidgeCV
model_ridge_cv = RidgeCV(alphas=[0.1, 1.0, 10.0], cv=5)
model_ridge_cv.fit(X_train_scaled, y_train)
model_ridge_cv.alpha_

y_pred = model_ridge_cv.predict(X_test_scaled)
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mse)
mse, mae, rmse

(0.3176335944839659, 0.4343075766386554, 0.5635899169466801)

In [9]:
model_ridge_cv.coef_

array([ 5.84681185,  0.52142086,  0.71689997, -6.17948738,  3.75034058,
       -1.36283352, -0.08571128,  0.08322815, -0.34893776,  2.16952446,
       -0.47840838,  0.68527348,  0.63080799, -0.5950065 ,  0.61661989,
       -0.31335495,  0.36499629,  0.03328145, -0.13652471])