In [1]:
# Import libraries

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression, Ridge, RidgeCV, Lasso, LassoCV
from sklearn import metrics
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler

For the Regularization models, I first loaded the feature selected dataframe and my y target dataframe from my Modelling notebook. I then performed the train/test/split function and fit_transformed the X_train split and transformed the X_validation split.

In [2]:
# Load necessary dataframes

X_train_df = pd.read_csv('../data/X_train_df.csv')
y = pd.read_csv('../data/y.csv')

In [3]:
# Train/test/split, scale, and fit_transform/transform the data

X_train, X_validation, y_train, y_validation = train_test_split(X_train_df, y, random_state=42)
sc = StandardScaler()            

scalar_columns = list(X_train.select_dtypes(exclude='object'))

Z_train = sc.fit_transform(X_train[scalar_columns])
Z_validation = sc.transform(X_validation[scalar_columns]) 

I created my alphas, instantiated and cross-validated the RidgeCV model, and fit my RidgeCV model. I checked the RidgeCV alpha and printed the test scores. I then repeated the process for my LassoCV model and printed the RidgeCV and LassoCV coefficients.

In [4]:
# Create alphas, instantiate and cross-validate, and fit

r_alphas = np.logspace(0, 5, 100)   # Get 100 values from 10^0 to 10^5
ridge_cv = RidgeCV(alphas=r_alphas, scoring='r2', cv=5)   # 5-fold CV
ridge_cv.fit(Z_train, y_train);

In [5]:
# Find the optimal alpha value

ridge_cv.alpha_
# ridge_cv.best_score_ 

1.0

In [6]:
# Print RidgeCV scores

print('Training RidgeCV score: ', ridge_cv.score(Z_train, y_train))
print('Test RidgeCV score: ', ridge_cv.score(Z_validation, y_validation))

Training RidgeCV score:  0.9242596217712693
Test RidgeCV score:  0.9232546738290729


In [8]:
# Create alphas, instantiate and cross-validate, and fit

l_alphas = np.logspace(-3, 0, 100)   # 10^-3 = .001 up to 10^0 = 1
lasso_cv = LassoCV(alphas=l_alphas, cv=5, max_iter=50000)
lasso_cv.fit(Z_train, y_train.values.flatten());

In [9]:
# Find the optimal alpha value

lasso_cv.alpha_

1.0

In [10]:
# Print LassoCV scores

print('Training LassoCV score: ', lasso_cv.score(Z_train, y_train))
print('Test LassoCV score: ', lasso_cv.score(Z_validation, y_validation))

Training LassoCV score:  0.9245471133972424
Test LassoCV score:  0.9224535039445056


In [11]:
ridge_cv.coef_

array([[ 1.55430668e+03, -8.50544057e+03,  6.92355164e+03,
         9.21319124e+03,  2.75386970e+03,  1.72216415e+04,
         2.56468193e+03,  1.93652120e+04,  3.11869307e+03,
        -2.66988312e+04,  5.44076093e+04, -3.05182730e+04,
         7.99593296e+03,  1.53947315e+03,  1.09782283e+04,
        -3.42976435e+03, -3.63080432e+04,  8.35107278e+03,
         6.08299001e+04,  1.57837642e+03, -8.67519154e+03,
         1.76521024e+02,  2.34888406e+03,  1.89800582e+03,
         1.22445807e+03,  3.14573737e+02,  3.69368835e+03,
         5.33577508e+01,  3.38522781e+02,  7.33336007e+02,
        -2.48081856e+02,  4.51931509e+02, -2.53610319e+02,
         2.32664996e+03,  5.36315158e+03,  5.94118680e+03,
        -4.04279671e+02,  1.04742740e+03,  6.26810060e+02,
         9.97758299e+02,  1.26139386e+03,  5.48230302e+03,
         6.36596163e+02, -3.78017724e+02,  1.07836049e+03,
         1.37540165e+03, -8.03784824e+02, -3.78710649e+02,
        -1.62577462e+03, -4.48512814e+03, -2.20231496e+0

In [12]:
lasso_cv.coef_

array([ 1.55079649e+03, -8.31248346e+03,  6.93281858e+03,  9.32871969e+03,
        2.69725203e+03,  2.62737847e+04,  2.28454686e+03,  2.26303049e+04,
        3.11868910e+03, -2.89012503e+04,  5.82095124e+04, -5.12788306e+04,
        8.17994716e+03,  1.55517481e+03,  1.08184654e+04, -2.89760063e+03,
       -4.48835455e+04,  8.12703037e+03,  8.02059481e+04,  1.41132413e+03,
       -1.10070632e+04,  1.66741465e+02,  2.38405806e+03,  1.93565702e+03,
        1.22739180e+03,  3.86803996e+02,  3.73569243e+03,  2.43314811e+01,
        5.10786180e+02,  7.31948841e+02, -3.38506468e+02,  5.08007927e+02,
       -2.01049491e+02,  2.27727748e+03,  5.20964270e+03,  5.93970359e+03,
       -3.17231994e+02,  1.01011524e+03,  6.28797912e+02,  1.00446790e+03,
        1.18769283e+03,  5.29119862e+03,  6.96185992e+02, -4.40088324e+02,
        1.10255444e+03,  1.40033955e+03, -7.90592434e+02, -3.69364991e+02,
       -1.58547354e+03, -4.50539462e+03, -2.20997153e+03, -3.01160567e+03,
       -3.88952909e+02, -