In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, Ridge
from sklearn.metrics import mean_squared_error

In [2]:
X, y = make_regression(n_samples = 100, n_features = 5, noise=15, random_state = 42)

In [3]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)

In [4]:
model = LinearRegression()
model.fit(X_train, y_train)

In [6]:
y_pred = model.predict(X_test)
y_pred

array([ 175.9405271 , -291.08644546,  108.15717979,  180.01141955,
        401.19265326,   68.06189893,  -23.4778347 ,  -96.02980922,
          1.49408833,  -52.47774574,   42.00119542, -127.17797626,
        -84.29145622,   40.08625823,  -89.55313272, -113.04181591,
        -71.42464226,   51.09849336,  -70.62741086,   52.02893852])

In [7]:
ridge = Ridge(alpha = 10)
ridge.fit(X_train, y_train)

In [8]:
y_pred_ridge = ridge.predict(X_test)
y_pred_ridge

array([ 154.73210204, -256.39992222,   88.31852338,  158.97081596,
        352.48873153,   62.51298316,  -18.69903597,  -86.82497784,
         -3.21383876,  -43.49159145,   29.86851497, -116.18801336,
        -75.69093574,   33.63920218,  -81.7409942 ,  -92.32234322,
        -65.42508911,   44.11675234,  -66.3450711 ,   45.85323383])

In [9]:
mean_squared_error(y_test, y_pred)

255.2580071563667

In [11]:
mean_squared_error(y_test, y_pred_ridge)

615.4922766188745

In [12]:
model.coef_

array([61.99439271, 98.38304268, 59.35368698, 54.77333584, 36.1563324 ])

In [13]:
ridge.coef_

array([55.53377764, 85.23501913, 49.0235835 , 48.54364095, 33.45110485])

In [14]:
from sklearn.linear_model import RidgeCV

In [16]:
ridge_cv = RidgeCV(alphas = [0.01, 0.1, 1.0, 10.0, 100.0], scoring = 'neg_mean_squared_error', cv = 5)
ridge_cv.fit(X_train, y_train)

In [17]:
ridge_cv.alpha_

np.float64(0.1)