### Regularization Exercises


In [2]:
import numpy as np
import pandas as pd
from matplotlib.pyplot import subplots
import statsmodels.api as sm
import sklearn.model_selection as skm
import sklearn.linear_model as skl
from sklearn.preprocessing import StandardScaler
from ISLP import load_data
from ISLP.models import (ModelSpec as MS, summarize , poly)
from functools import partial
from sklearn.pipeline import Pipeline
from sklearn.decomposition import PCA
from sklearn.cross_decomposition import PLSRegression
from ISLP.models import (Stepwise, sklearn_selected, sklearn_selection_path)
from l0bnb import fit_path
import matplotlib.pyplot as plt

$C_p = \frac{RSS + 2P\sigma^2}{n}$

##### Functions to be used 
- `sm.OLS()` : For StatsModel API and fitting
- `Stepwise.first_step`(design, forward / backward, max_terms)` : For Strategy Defining
- `sklearn_selected`(model, strategy, scorer) : Wrapper for ISLP and skLearn

In [31]:
# Simulating Data
np.random.seed(1)
n = 100
x = np.random.normal(0,1,n)
epi = np.random.normal(0,1,n)
beta_0 = 1
beta_1 = 2
beta_2 = 3 
beta_3 = 4

response = beta_0 + beta_1*x + beta_2*x**2 + beta_3*x**3 + epi

# DataFrame
power_range = np.arange(0,11)
data = np.power.outer(x,power_range)
data = pd.DataFrame(data, columns = ['x^{}'.format(i) for i in power_range])
design = MS(data).fit(data)
x_data = design.transform(data)

# Scorer
def C_p(sigma,estimator, x, y) : 
    n, p = x.shape
    y_hat = estimator.predict(x)
    RSS = np.sum((y  - y_hat)**2)
    return -(RSS + 2 * p * sigma) / n

sigma = sm.OLS(response, x_data).fit().scale
scorer = partial(C_p, sigma)

# Defining Strategy
forward = Stepwise.first_peak(design, direction="forward", max_terms=len(design.terms))
backwards = Stepwise.first_peak(design, direction="backwards", max_terms=len(design.terms))

# Fitting Models using strategy
model_forward = sklearn_selected(sm.OLS, forward, scoring=scorer)
model_backwards = sklearn_selected(sm.OLS, backwards, scoring=scorer)
model_forward.fit(x_data, response)
model_backwards.fit(x_data, response)

print(f"Forward Selection : {model_forward.selected_state_}")
print(f"Backward Selection : {model_backwards.selected_state_}")

Forward Selection : ('x^1', 'x^2', 'x^3', 'x^6')
Backward Selection : ('x^1', 'x^2', 'x^3', 'x^6')


Forward and Backward selection yield the same result. 

In [55]:
# Fitting a Lasso Model : Need to do this again.
scaler = StandardScaler(with_mean=True, with_std=True)
lambdas = 10 ** np.linspace(10, -2, 100)
cv_design = skm.KFold(n_splits=10, shuffle=True, random_state=1)
lasso = skl.LassoCV(alphas=lambdas, cv=cv_design)
pipeline = Pipeline([("scaler", scaler), ("lasso", lasso)])
pipeline.fit(x_data, response)

# Extracting Lasso coefficients
lasso_model = pipeline.named_steps['lasso']
lasso_coefficients = lasso_model.coef_
best_lambda = lasso_model.alpha_
print(f"Best Lambda : {best_lambda}")
print(f"Lasso Coefficients : {lasso_coefficients}")


Best Lambda : 0.040370172585965494
Lasso Coefficients : [0.00000000e+00 0.00000000e+00 1.67916888e+00 2.75372980e+00
 1.04044973e+01 6.04707855e-01 0.00000000e+00 4.42624141e-03
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00]
