In [59]:
import pandas as pd
from matplotlib import pyplot as plt
import numpy as np
import math
import seaborn as sns
import sklearn
from sklearn import linear_model
from sklearn import preprocessing
%matplotlib inline
sns.set_style('white')

In [60]:
# Load the data again. Keep air quality data, drop the index column
# and any missing data columns.
df = pd.read_csv(
    'https://vincentarelbundock.github.io/Rdatasets/csv/ISLR/Default.csv'
).iloc[:,1:].dropna()

In [61]:
# Recode strings to numeric.
df['default'] = np.where(df['default']=='Yes', 1, 0)
df['student'] = np.where(df['student']=='Yes', 1, 0)
names = df.columns
df = pd.DataFrame(preprocessing.scale(df), columns=names)

In [62]:
# Define the training and test sizes.
trainsize = int(df.shape[0] / 2)
df_test = df.iloc[trainsize:, :].copy()
df_train = df.iloc[:trainsize, :].copy()

Y_train = df_train['income'].values.reshape(-1, 1)
X_train = df_train.loc[:, ~(df_train.columns).isin(['income'])]

In [63]:
# Make some new features to capture potential quadratic and cubic
# relationships between solar radiation and day or temperature.
df_train['balance_student'] = df_train['balance'] * df_train['student']
df_train['balance_default'] = df_train['balance'] * df_train['default']
df_train['student_default'] = df_train['student'] * df_train['default']
df_train['balance_sqrt'] = (df_train['balance'] + 100) ** .5
df_train['balance2'] = (df_train['balance'] + 100) ** 2
df_train['balance3'] = (df_train['balance'] + 100) ** 3

X_train2 = df_train.loc[:, ~(df_train.columns).isin(['income'])]

In [64]:
# Test the simpler model with smaller coefficients.
Y_test = df_test['income'].values.reshape(-1, 1)
X_test = df_test.loc[:, ~(df_test.columns).isin(['income'])]

In [65]:
# Test the more complex model with larger coefficients.
df_test['balance_student'] = df_test['balance'] * df_test['student']
df_test['balance_default'] = df_test['balance'] * df_test['default']
df_test['student_default'] = df_test['student'] * df_test['default']
df_test['balance_sqrt'] = (df_test['balance'] + 100) ** .5
df_test['balance2'] = (df_test['balance'] + 100) ** 2
df_test['balance3'] = (df_test['balance'] + 100) ** 3
X_test2 = df_test.loc[:, ~(df_test.columns).isin(['income'])]

In [66]:
# Small number of parameters.
lass = linear_model.Lasso(alpha=.35)
lassfit = lass.fit(X_train, Y_train)
print('R² for the model with few features:')
print(lass.score(X_train, Y_train))
origparams = np.append(lassfit.coef_, lassfit.intercept_)
print('\nParameter estimates for the model with few features:')
print(origparams)

R² for the model with few features:
0.450062579301185

Parameter estimates for the model with few features:
[-0.         -0.40657726 -0.          0.00114596]


In [67]:
# Large number of parameters.
lassBig = linear_model.Lasso(alpha=.35)
lassBig.fit(X_train2, Y_train)
print('\nR² for the model with many features:')
print(lassBig.score(X_train2, Y_train))
origparams = np.append(lassBig.coef_, lassBig.intercept_)
print('\nParameter estimates for the model with many features:')
print(origparams)


R² for the model with many features:
0.44363376712897096

Parameter estimates for the model with many features:
[ 0.00000000e+00 -3.89351238e-01  0.00000000e+00 -0.00000000e+00
  0.00000000e+00 -0.00000000e+00  0.00000000e+00 -2.77688887e-04
 -7.09158792e-07  3.48711577e+00]


In [68]:
print(lass.score(X_test, Y_test))

print(lassBig.score(X_test2, Y_test))

0.44553225151184195
0.4380466345914476


## Regularization parameter: Lasso

The $\lambda$ for lasso can var between 0 (no penalty, acts like OLS) and infinity.  If $\lambda$ is too large, all parameters will be set to zero.  

Create a plot below of how $R^2$ varies across different values of $\lambda$ for ridge and lasso regression. Use logic and code similar to the ridge regression demonstration above, and base your plot on the X_train2 feature set.

Do lasso and ridge yield the same $R^2$ for a given lambda value?

Submit your work and discuss the results with your mentor.

In [69]:
#RIDGE REGRESSIONS 
print('\nR-squared simple model, range of alpha: 10 to 0.01')
alphas = [10, 5, .5, .05, .01]

for item in alphas:
    ridgeregr = linear_model.Ridge(alpha=item, fit_intercept=False) 
    ridgeregr.fit(X_train, Y_train)
    print(('R2 Score for alpha {}').format(item))
    print(ridgeregr.score(X_train, Y_train))
    origparams = ridgeregr.coef_[0]
    print(origparams)
    print('\n')


R-squared simple model, range of alpha: 10 to 0.01
R2 Score for alpha 10
0.5738739164402877
[ 1.36988466e-02 -7.57859433e-01 -3.25298557e-04]


R2 Score for alpha 5
0.5738757447223357
[ 1.36831110e-02 -7.58661482e-01 -1.42519678e-04]


R2 Score for alpha 0.5
0.573876349838232
[ 1.36687345e-02 -7.59384859e-01  2.27104494e-05]


R2 Score for alpha 0.05
0.5738763559016591
[ 1.36672856e-02 -7.59457276e-01  3.92715937e-05]


R2 Score for alpha 0.01
0.5738763559604679
[ 1.36671568e-02 -7.59463714e-01  4.07440315e-05]




In [70]:
#RIDGE REGRESSIONS WITH COMPLEX MODEL

print('\nR-squared complex model, range of alpha: 10 to 0.01')
alphas = [10, 5, .5, .05, .01]

for item in alphas:
    ridgeregr = linear_model.Ridge(alpha=item, fit_intercept=False) 
    ridgeregr.fit(X_train2, Y_train)
    print(('R2 Score for alpha {}').format(item))
    print(ridgeregr.score(X_train2, Y_train))
    origparams = ridgeregr.coef_[0]
    print(origparams)
    print('\n')


R-squared complex model, range of alpha: 10 to 0.01
R2 Score for alpha 10
0.5739464289613794
[-2.18345205e-03 -7.57156891e-01  4.77049994e-02 -3.66908529e-03
  9.52109450e-03 -3.78154074e-03 -4.47224226e-03  4.83414405e-04
 -4.79227772e-06]


R2 Score for alpha 5
0.5739545553547272
[-2.75035289e-03 -7.58006613e-01  8.20932503e-02 -3.25822468e-03
  1.00153856e-02 -3.90435470e-03 -7.69484415e-03  8.32936629e-04
 -8.25484783e-06]


R2 Score for alpha 0.5
0.573971000824615
[-4.78907258e-03 -7.58839870e-01  2.29990088e-01 -2.00381206e-03
  1.19925826e-02 -4.30877426e-03 -2.14959225e-02  2.34255200e-03
 -2.32109719e-05]


R2 Score for alpha 0.05
0.5739723222459411
[-5.45589463e-03 -7.58943559e-01  2.80310564e-01 -1.61027419e-03
  1.26546909e-02 -4.43823944e-03 -2.54469442e-02  2.85474057e-03
 -2.82926495e-05]


R2 Score for alpha 0.01
0.5739723413444707
[-5.52924236e-03 -7.58953395e-01  2.86176923e-01 -1.56712339e-03
  1.27276745e-02 -4.45246087e-03 -2.25713517e-02  2.90612076e-03
 -2.88351

Ill-conditioned matrix detected. Result is not guaranteed to be accurate.
Reciprocal condition number/precision: 9.244323806585906e-17 / 1.1102230246251565e-16
Ill-conditioned matrix detected. Result is not guaranteed to be accurate.
Reciprocal condition number/precision: 9.12542022922663e-18 / 1.1102230246251565e-16
Ill-conditioned matrix detected. Result is not guaranteed to be accurate.
Reciprocal condition number/precision: 1.8225496232404727e-18 / 1.1102230246251565e-16


Virtually zero change in R2 from simple to complex and without regard for the alpha chosen. 

In [71]:
#LASSO REGRESSIONS WITH SIMPLE MODEL
print('\nR-squared simple model, range of alpha: .01 to 1')
alphas = [.01, .05, .35, .75, 1]

for item in alphas:
    lass = linear_model.Lasso(alpha=item)
    lassfit = lass.fit(X_train, Y_train)
    print(('R2 Score for alpha {}').format(item))
    print(lass.score(X_train, Y_train))
    origparams = np.append(lassfit.coef_, lassfit.intercept_)
    print(origparams)
    print('\n')


R-squared simple model, range of alpha: .01 to 1
R2 Score for alpha 0.01
0.5736726279636906
[ 0.00369631 -0.7489349   0.         -0.00129277]


R2 Score for alpha 0.05
0.5711587609212556
[ 0.         -0.70850468 -0.         -0.00097388]


R2 Score for alpha 0.35
0.450062579301185
[-0.         -0.40657726 -0.          0.00114596]


R2 Score for alpha 0.75
0.006043246694259263
[-0.         -0.00400737 -0.          0.00397242]


R2 Score for alpha 1
0.0
[-0.         -0.         -0.          0.00400056]




In [72]:
#LASSO REGRESSIONS WITH COMPLEX MODEL
print('\nR-squared complex model, range of alpha: .01 to 1')

alphas = [.01, .05, .35, .75, 1]

for item in alphas:
    lass = linear_model.Lasso(alpha=item)
    lassfit = lass.fit(X_train2, Y_train)
    print(('R2 Score for alpha {}').format(item))
    print(lass.score(X_train2, Y_train))
    origparams = np.append(lassfit.coef_, lassfit.intercept_)
    print(origparams)
    print('\n')


R-squared complex model, range of alpha: .01 to 1
R2 Score for alpha 0.01
0.5737681044618193
[ 0.00000000e+00 -7.49175475e-01  0.00000000e+00 -0.00000000e+00
  4.64868670e-03 -0.00000000e+00  0.00000000e+00 -0.00000000e+00
 -1.45217294e-09 -1.52452354e-03]


R2 Score for alpha 0.05
0.5710532985531875
[ 0.00000000e+00 -7.07044072e-01 -0.00000000e+00 -0.00000000e+00
  0.00000000e+00 -0.00000000e+00 -0.00000000e+00 -2.89911443e-05
 -2.44078084e-08  3.13333305e-01]


R2 Score for alpha 0.35
0.44363376712897096
[ 0.00000000e+00 -3.89351238e-01  0.00000000e+00 -0.00000000e+00
  0.00000000e+00 -0.00000000e+00  0.00000000e+00 -2.77688887e-04
 -7.09158792e-07  3.48711577e+00]


R2 Score for alpha 0.75
0.026834134834571755
[ 0.00000000e+00 -0.00000000e+00  0.00000000e+00 -0.00000000e+00
  0.00000000e+00 -0.00000000e+00  0.00000000e+00  0.00000000e+00
 -5.42660058e-06  5.43077199e+00]


R2 Score for alpha 1
0.026834134351719777
[ 0.00000000e+00 -0.00000000e+00  0.00000000e+00 -0.00000000e+00
  0

So we can see as expected that Ridge and LASSO respond very differently for various alpha levels. The LASSO regularization with alpha .01 is very similar for simple and complex models. This is likely the result of the zeroing of most of the added features. 