In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [13]:
from sklearn.preprocessing import PolynomialFeatures
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error

In [24]:
from sklearn.linear_model import Ridge

In [25]:
from sklearn.linear_model import RidgeCV

In [4]:
df = pd.read_csv("ML course notebooks/08-Linear-Regression-Models/Advertising.csv")
df.head()

Unnamed: 0,TV,radio,newspaper,sales
0,230.1,37.8,69.2,22.1
1,44.5,39.3,45.1,10.4
2,17.2,45.9,69.3,9.3
3,151.5,41.3,58.5,18.5
4,180.8,10.8,58.4,12.9


In [5]:
# Our model's X-value (the features we will give to the model)
X = df[['TV', 'radio', 'newspaper']]
X

Unnamed: 0,TV,radio,newspaper
0,230.1,37.8,69.2
1,44.5,39.3,45.1
2,17.2,45.9,69.3
3,151.5,41.3,58.5
4,180.8,10.8,58.4
...,...,...,...
195,38.2,3.7,13.8
196,94.2,4.9,8.1
197,177.0,9.3,6.4
198,283.6,42.0,66.2


In [6]:
# Our models y-value (the feature(s) we want the model to predict) - y depends on X:
y = df['sales']
y

0      22.1
1      10.4
2       9.3
3      18.5
4      12.9
       ... 
195     7.6
196     9.7
197    12.8
198    25.5
199    13.4
Name: sales, Length: 200, dtype: float64

In [7]:
polynomial_converter = PolynomialFeatures(degree=3, include_bias=False)

In [8]:
poly_features = polynomial_converter.fit_transform(X)
poly_features

array([[2.30100000e+02, 3.78000000e+01, 6.92000000e+01, ...,
        9.88757280e+04, 1.81010592e+05, 3.31373888e+05],
       [4.45000000e+01, 3.93000000e+01, 4.51000000e+01, ...,
        6.96564990e+04, 7.99365930e+04, 9.17338510e+04],
       [1.72000000e+01, 4.59000000e+01, 6.93000000e+01, ...,
        1.46001933e+05, 2.20434291e+05, 3.32812557e+05],
       ...,
       [1.77000000e+02, 9.30000000e+00, 6.40000000e+00, ...,
        5.53536000e+02, 3.80928000e+02, 2.62144000e+02],
       [2.83600000e+02, 4.20000000e+01, 6.62000000e+01, ...,
        1.16776800e+05, 1.84062480e+05, 2.90117528e+05],
       [2.32100000e+02, 8.60000000e+00, 8.70000000e+00, ...,
        6.43452000e+02, 6.50934000e+02, 6.58503000e+02]])

In [9]:
X_train, X_test, y_train, y_test = train_test_split(poly_features, y, test_size=0.3, random_state=101)

In [11]:
# Scaling data:

scaler = StandardScaler()
scaler.fit(X_train)

StandardScaler()

In [12]:
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

In [15]:
ridge_model = Ridge(alpha=10)

In [16]:
ridge_model.fit(X_train, y_train)

Ridge(alpha=10)

In [18]:
test_predictions = ridge_model.predict(X_test)
print(test_predictions)

[13.7469803  19.17764    11.8623393  16.25574551  8.92891155  8.04595366
 20.13320851 16.94847676 10.29219174 19.78365691 10.31758361 13.44077317
 12.24228062 23.23864236 19.12656362  9.748809   12.84361521  9.14248129
  8.82504966 21.46989971  8.25222201 18.8110776  28.39227333 25.06911066
  9.11912241 11.8071711  20.85199462  9.31962495 12.25122303  9.07811198
  9.25275556 19.87154568 10.55955497  7.86818084 17.67220132  9.31442423
 10.1539168   9.47802944  8.63337302 10.76209179 11.63848891 10.38883411
 10.90270005  8.40019284 11.35336224 10.3514086   8.00082147 15.27139854
 12.94761469 22.49762978 10.80165407 13.01337743 14.82204895 11.37354043
 11.69628953  7.20576885 25.53512232 10.41926545 17.38798022 15.10158819]


In [21]:
print(ridge_model.coef_)

[ 2.11063462  0.62207087  0.06822351  0.20543482  1.83561135  0.33399306
  0.02544162  0.15472622 -0.02220834 -0.62586462  0.41216201 -0.49628796
  1.11873234  0.24222164 -0.00821273 -0.20160416 -0.14825878 -0.06688285
 -0.06004278]


In [22]:
MAE = mean_absolute_error(y_test, test_predictions)
RMSE = np.sqrt(mean_squared_error(y_test, test_predictions))
print(f"{MAE=} & {RMSE=}")

MAE=0.5774404204714171 & RMSE=0.8946386461319675


In [34]:
# How did the model perform on the training set?
train_predictions = ridge_model.predict(X_train)
MAE = mean_absolute_error(y_train, train_predictions)
MAE

0.5288348183025331

In [35]:
from sklearn.metrics import SCORERS
SCORERS.keys()

dict_keys(['explained_variance', 'r2', 'max_error', 'neg_median_absolute_error', 'neg_mean_absolute_error', 'neg_mean_absolute_percentage_error', 'neg_mean_squared_error', 'neg_mean_squared_log_error', 'neg_root_mean_squared_error', 'neg_mean_poisson_deviance', 'neg_mean_gamma_deviance', 'accuracy', 'top_k_accuracy', 'roc_auc', 'roc_auc_ovr', 'roc_auc_ovo', 'roc_auc_ovr_weighted', 'roc_auc_ovo_weighted', 'balanced_accuracy', 'average_precision', 'neg_log_loss', 'neg_brier_score', 'adjusted_rand_score', 'rand_score', 'homogeneity_score', 'completeness_score', 'v_measure_score', 'mutual_info_score', 'adjusted_mutual_info_score', 'normalized_mutual_info_score', 'fowlkes_mallows_score', 'precision', 'precision_macro', 'precision_micro', 'precision_samples', 'precision_weighted', 'recall', 'recall_macro', 'recall_micro', 'recall_samples', 'recall_weighted', 'f1', 'f1_macro', 'f1_micro', 'f1_samples', 'f1_weighted', 'jaccard', 'jaccard_macro', 'jaccard_micro', 'jaccard_samples', 'jaccard_wei

In [26]:
# Choosing a scoring: https://scikit-learn.org/stable/modules/model_evaluation.html
# Negative RMSE so all metrics follow convention "Higher is better"

# See all options: sklearn.metrics.SCORERS.keys()
ridge_cv_model = RidgeCV(alphas=(0.1, 1.0, 10.0), scoring='neg_mean_absolute_error')

In [27]:
# The more alpha options you pass, the longer this will take.
# Fortunately our data set is still pretty small
ridge_cv_model.fit(X_train, y_train)

RidgeCV(alphas=array([ 0.1,  1. , 10. ]), scoring='neg_mean_absolute_error')

In [28]:
ridge_cv_model.alpha_

0.1

In [30]:
test_predictions = ridge_cv_model.predict(X_test)
print(test_predictions)

[14.25006161 19.10205204 12.44776087 17.07029103  7.93609517  7.10314552
 19.94990664 17.24524365 10.33548736 19.85951926  9.37933062 13.9569529
 11.85445773 23.29303451 19.30247423  9.42431717 12.42484839  9.21704461
  8.70229329 21.42382243  7.14698141 19.35684761 27.33199316 24.44887898
  9.23720121 11.46268721 20.47427126  9.4597969  12.55070677  8.39250566
  8.7928193  19.92435249 10.8069225   7.10760344 18.16473242  9.30970124
 10.54163577  9.71377387  7.31109512 10.76826686 12.32939318 10.34155545
 11.39134453  7.74592784 11.73219215 10.60945023  5.74446924 15.33163484
 13.01377062 22.65350817 10.59700761 12.94758932 14.71822795 12.01718014
 11.35600126  5.47860188 24.96469385  9.42600079 17.27904849 15.59419989]


In [31]:
MAE = mean_absolute_error(y_test, test_predictions)
RMSE = np.sqrt(mean_squared_error(y_test, test_predictions))
print(f"{MAE=} & {RMSE=}")

MAE=0.42737748843652973 & RMSE=0.6180719926952091


In [32]:
# Training Set Performance
# Training Set Performance
train_predictions = ridge_cv_model.predict(X_train)
MAE = mean_absolute_error(y_train, train_predictions)
MAE

0.3094132105672553

In [33]:
ridge_cv_model.coef_

array([ 5.40769392,  0.5885865 ,  0.40390395, -6.18263924,  4.59607939,
       -1.18789654, -1.15200458,  0.57837796, -0.1261586 ,  2.5569777 ,
       -1.38900471,  0.86059434,  0.72219553, -0.26129256,  0.17870787,
        0.44353612, -0.21362436, -0.04622473, -0.06441449])