### Regularization Assignments

Use the model below as a basis for comparison to your regularized models.

In [0]:
import pandas as pd
import seaborn as sns
import statsmodels.api as sm
import numpy as np
from sklearn.metrics import r2_score as r2
from sklearn.metrics import mean_absolute_error as mae


computers = pd.read_csv("../Data/Computers.csv")

computers.tail()

In [0]:
computers_eng = (
    pd.get_dummies(
        computers.assign(
            hd2 = computers["hd"] ** 2,
            hd3 = computers["hd"] ** 3,
        ),
    drop_first=True    
    )
)

In [0]:
from sklearn.model_selection import train_test_split

X = sm.add_constant(computers_eng.drop("price", axis=1))
y = np.log(computers["price"])

# Test Split

X, X_test, y, y_test = train_test_split(X, y, test_size=.2, random_state=12345)

In [0]:
model = sm.OLS(y, X).fit()

#model.summary()

print(f"Training R2: {r2(y, model.predict(X))}")
print(f"Training MAE: {mae(y, model.predict(X))}")

In [0]:
print(f"Test R2: {r2(y_test, model.predict(X_test))}")
print(f"Test MAE: {mae(y_test, model.predict(X_test))}")

## Assignment 1: Ridge Regression

Fit a ridge regression model using ridgeCV and compare the accuracy to the model above. 

Don't forget to standardize your data!

In [0]:
# standardization

In [0]:
from sklearn.preprocessing import StandardScaler

std = StandardScaler()
X_tr = std.fit_transform(X.values)
X_te = std.transform(X_test.values)

In [0]:
from sklearn.linear_model import RidgeCV

n_alphas = 200
alphas = 10 ** np.linspace(-3, 3, n_alphas)

ridge_model = RidgeCV(alphas=alphas, cv=5)

ridge_model.fit(X_tr, y)
print(ridge_model.score(X_tr, y))
print(mae(y, ridge_model.predict(X_tr)))
print(ridge_model.alpha_)

In [0]:
print(ridge_model.score(X_te, y_test))
print(mae(y_test, ridge_model.predict(X_te)))

# Assignment 2: Lasso Regression

Fit a lasso regression model using LassoCV and compare the accuracy to the models above! 

Did any coefficient values drop to zero?

In [0]:
from sklearn.linear_model import LassoCV

n_alphas = 200
alphas = 10 ** np.linspace(-3, 3, n_alphas)

lasso_model = LassoCV(alphas=alphas, cv=5)

lasso_model.fit(X_tr, y)
print(lasso_model.score(X_tr, y))
print(mae(y, lasso_model.predict(X_tr)))
print(lasso_model.alpha_)

In [0]:
print(lasso_model.score(X_te, y_test))

In [0]:
list(zip(X.columns, lasso_model.coef_))

## Assignment 3: Elastic Net Regression

Fit an elastic net regression model using ElasticNetCV and compare the accuracy to the models above! 

What was the optimal L1 Ratio?

In [0]:
from sklearn.linear_model import ElasticNetCV

alphas = 10 ** np.linspace(-3, 3, 200)
l1_ratios = np.linspace(.01, 1, 10)

enet_model = ElasticNetCV(alphas=alphas, l1_ratio=l1_ratios, cv=5)

enet_model.fit(X_tr, y)
print(enet_model.score(X_tr, y))
print(enet_model.score(X_te, y_test))
print(enet_model.alpha_)
print(enet_model.l1_ratio_)