## Implementing Lasso/Ridge
We will use Lasso and Ridge to help us with feature selection.

In [23]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

In [24]:
## first load the data
coffee = pd.read_csv('../data/one_hot_coffee.csv')
coffee = coffee.copy()

In [25]:
## next perform the train test split
coffee_train, coffee_test = train_test_split(coffee,
                                            shuffle=True,
                                            random_state=47,
                                            test_size = .2)

In [26]:
## make a baseline
baseline = coffee['rating'].mean()
print(baseline)

90.4599101988454


In [27]:
## import the LinearRegression object
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import KFold

In [28]:
predictors = ['region_africa_arabia', 'region_caribbean',
       'region_central_america', 'region_hawaii', 'region_asia_pacific',
       'region_south_america', 'type_espresso', 'type_organic',
       'type_fair_trade', 'type_decaffeinated', 'type_pod_capsule',
       'type_blend', 'type_estate', 'Light', 'Medium-Light', 'Medium', 'Medium-Dark', 'Dark', 'Very Dark']

Now we make the Lasso and Ridge Models.

In [30]:
## set values for alpha
alphas = [0.000001,.00001,.0001,.001,.01,.1,.25,.5,.75,1,5,10,25,50,100,1000,10000]

## These will hold our coefficient estimates
ridge_coefs = np.empty((len(alphas),len(predictors)))
lasso_coefs = np.empty((len(alphas),len(predictors)))

## These will hold our mse estimates
ridge_mse = np.empty(len(alphas))
lasso_mse = np.empty(len(alphas))

## These will hold our mae estimates
ridge_mae = np.empty(len(alphas))
lasso_mae = np.empty(len(alphas))

## for each alpha value
for i in range(len(alphas)):
    ## set up the ridge pipeline
    ## first scale
    ## then make polynomial features
    ## then fit the ridge regression model
    ## max_iter=5000000
    ridge_pipe = Pipeline([('scale' ,StandardScaler()),
                              ('ridge',Ridge(alpha=alphas[i], max_iter=5000000))])
    
    ## set up the lasso pipeline
    ## same steps as with ridge
    lasso_pipe = Pipeline([('scale' ,StandardScaler()),
                              ('lasso',Lasso(alpha=alphas[i], max_iter=5000000))])
    
    
    ## fit the ridge
    ridge_pipe.fit(coffee_train[predictors], 
        coffee_train['rating'])
    
    ## fit the lasso
    lasso_pipe.fit(coffee_train[predictors], 
        coffee_train['rating'])

    ## make the predictions
    ridge_preds = ridge_pipe.predict(coffee_train[predictors])
    lasso_preds = lasso_pipe.predict(coffee_train[predictors])
    
    ridge_mse[i] = mean_squared_error(coffee_train['rating'], ridge_preds)
    lasso_mse[i] = mean_squared_error(coffee_train['rating'], lasso_preds)
    
    ridge_mae[i] = mean_absolute_error(coffee_train['rating'], ridge_preds)
    lasso_mae[i] = mean_absolute_error(coffee_train['rating'], lasso_preds)
    
    # record the coefficients
    ridge_coefs[i,:] = ridge_pipe['ridge'].coef_
    lasso_coefs[i,:] = lasso_pipe['lasso'].coef_

In [31]:
print("Ridge Coefficients")

pd.DataFrame(np.round(ridge_coefs,8),
            columns = [str(i) for i in predictors],
            index = ["alpha=" + str(a) for a in alphas])

Ridge Coefficients


Unnamed: 0,region_africa_arabia,region_caribbean,region_central_america,region_hawaii,region_asia_pacific,region_south_america,type_espresso,type_organic,type_fair_trade,type_decaffeinated,type_pod_capsule,type_blend,type_estate,Light,Medium-Light,Medium,Medium-Dark,Dark,Very Dark
alpha=1e-06,0.792444,-0.119954,0.158731,0.125271,0.058175,0.021456,0.611138,-0.009858,0.022518,0.025203,-0.329883,0.063579,0.204739,0.54081,0.867151,0.203126,-0.770579,-0.939555,-0.807664
alpha=1e-05,0.792444,-0.119954,0.158731,0.125271,0.058175,0.021456,0.611138,-0.009858,0.022518,0.025203,-0.329883,0.063579,0.204739,0.540807,0.867147,0.203122,-0.770582,-0.939557,-0.807666
alpha=0.0001,0.792444,-0.119954,0.158731,0.125271,0.058175,0.021456,0.611138,-0.009858,0.022518,0.025203,-0.329883,0.063579,0.204739,0.540807,0.867146,0.203121,-0.770582,-0.939557,-0.807666
alpha=0.001,0.792444,-0.119954,0.158731,0.125271,0.058175,0.021456,0.611138,-0.009858,0.022518,0.025203,-0.329883,0.063579,0.204739,0.540807,0.867146,0.203121,-0.770582,-0.939557,-0.807666
alpha=0.01,0.792443,-0.119954,0.15873,0.12527,0.058174,0.021455,0.611135,-0.009858,0.022518,0.025203,-0.329883,0.063579,0.204739,0.540805,0.867145,0.203121,-0.770581,-0.939555,-0.807664
alpha=0.1,0.792427,-0.119953,0.158725,0.125266,0.058166,0.021454,0.611113,-0.009854,0.022514,0.025199,-0.329881,0.063575,0.204739,0.540792,0.86713,0.203115,-0.770566,-0.939535,-0.807648
alpha=0.25,0.7924,-0.119952,0.158716,0.125259,0.058151,0.021451,0.611074,-0.009849,0.022507,0.025192,-0.329879,0.063569,0.204738,0.540769,0.867105,0.203105,-0.77054,-0.939501,-0.80762
alpha=0.5,0.792356,-0.11995,0.158701,0.125247,0.058127,0.021447,0.611011,-0.00984,0.022496,0.02518,-0.329875,0.063558,0.204737,0.540731,0.867064,0.20309,-0.770498,-0.939445,-0.807574
alpha=0.75,0.792313,-0.119948,0.158686,0.125235,0.058102,0.021442,0.610947,-0.00983,0.022485,0.025168,-0.329871,0.063548,0.204736,0.540693,0.867023,0.203074,-0.770456,-0.939389,-0.807528
alpha=1,0.792269,-0.119946,0.158671,0.125224,0.058078,0.021438,0.610884,-0.009821,0.022474,0.025156,-0.329867,0.063538,0.204735,0.540655,0.866982,0.203058,-0.770414,-0.939334,-0.807483


In [32]:
print("Lasso Coefficients")

pd.DataFrame(np.round(lasso_coefs,8),
            columns = [str(i) for i in predictors],
            index = ["alpha=" + str(a) for a in alphas])

Lasso Coefficients


Unnamed: 0,region_africa_arabia,region_caribbean,region_central_america,region_hawaii,region_asia_pacific,region_south_america,type_espresso,type_organic,type_fair_trade,type_decaffeinated,type_pod_capsule,type_blend,type_estate,Light,Medium-Light,Medium,Medium-Dark,Dark,Very Dark
alpha=1e-06,0.792442,-0.119953,0.158729,0.125269,0.058173,0.021454,0.611136,-0.009855,0.022515,0.025202,-0.329882,0.063577,0.204739,0.415294,0.652198,0.0,-0.937623,-1.041868,-0.895178
alpha=1e-05,0.792422,-0.119947,0.15871,0.125257,0.058156,0.021437,0.611121,-0.009825,0.022489,0.025192,-0.329875,0.063563,0.204737,0.415288,0.652197,0.0,-0.937615,-1.041861,-0.895171
alpha=0.0001,0.792217,-0.11989,0.158524,0.125132,0.057983,0.021268,0.610971,-0.009528,0.022228,0.025084,-0.3298,0.063415,0.204715,0.415232,0.652188,0.0,-0.937532,-1.041794,-0.895106
alpha=0.001,0.790183,-0.119321,0.156672,0.123888,0.056255,0.019583,0.609468,-0.006551,0.019613,0.024013,-0.329061,0.061944,0.204498,0.414655,0.652085,0.0,-0.936712,-1.041121,-0.894456
alpha=0.01,0.771266,-0.113419,0.138963,0.111934,0.039788,0.004205,0.594681,0.0,0.008332,0.013984,-0.322059,0.047296,0.202443,0.409136,0.651001,0.0,-0.928487,-1.033661,-0.888201
alpha=0.1,0.660913,-0.038171,0.044332,0.01172,0.0,-0.0,0.458317,0.0,0.0,0.0,-0.26665,0.0,0.156291,0.345191,0.624818,0.0,-0.849108,-0.955504,-0.818095
alpha=0.25,0.531129,-0.0,0.0,0.0,-0.0,0.0,0.237347,0.0,-0.0,-0.0,-0.176617,-0.0,0.041052,0.229174,0.56621,0.0,-0.715542,-0.822693,-0.695896
alpha=0.5,0.378995,-0.0,0.0,0.0,-0.0,0.0,0.0,0.0,-0.0,-0.0,-0.010699,-0.0,0.0,0.013895,0.439966,0.0,-0.49208,-0.593254,-0.48179
alpha=0.75,0.223406,-0.0,0.0,0.0,-0.0,0.0,0.0,0.0,-0.0,-0.0,-0.0,-0.0,0.0,0.0,0.395815,0.0,-0.225424,-0.325239,-0.219038
alpha=1,0.05872,-0.0,0.0,0.0,-0.0,0.0,0.0,0.0,-0.0,-0.0,-0.0,-0.0,0.0,0.0,0.333413,0.0,-0.0,-0.066309,-0.0


In [33]:
ridge_mse

array([ 9.93090465,  9.93090465,  9.93090465,  9.93090465,  9.93090465,
        9.93090466,  9.93090467,  9.93090472,  9.93090481,  9.93090493,
        9.93091167,  9.93093264,  9.93107817,  9.93158956,  9.93357334,
       10.10843021, 12.41981606])

In [34]:
lasso_mse

array([ 9.93090465,  9.93090466,  9.9309049 ,  9.93092969,  9.93283636,
       10.03865206, 10.45554411, 11.62670354, 12.89997951, 14.51292517,
       15.56493585, 15.56493585, 15.56493585, 15.56493585, 15.56493585,
       15.56493585, 15.56493585])

In [35]:
ridge_mae

array([2.10398867, 2.10398867, 2.10398866, 2.10398861, 2.10398811,
       2.10398307, 2.10397466, 2.10396065, 2.10394665, 2.10393265,
       2.10370933, 2.10344005, 2.10264029, 2.10132279, 2.09885695,
       2.09753653, 2.41785249])

In [36]:
lasso_mae

array([2.10398842, 2.10398614, 2.10396331, 2.1037419 , 2.10116953,
       2.09581888, 2.15344924, 2.32298239, 2.49281961, 2.69696473,
       2.85465915, 2.85465915, 2.85465915, 2.85465915, 2.85465915,
       2.85465915, 2.85465915])

So now we will use cross validation on the alpha values of 0.25, 0.1, and 0.01.  

In [61]:
splits = 5
kfold = KFold(n_splits=splits, shuffle=True, random_state=47)

## These will hold our overall mse estimates
ridge_av_mse = np.empty(len(alphas))
lasso_av_mse = np.empty(len(alphas))

## These will hold our overall mae estimates
ridge_av_mae = np.empty(len(alphas))
lasso_av_mae = np.empty(len(alphas))

## These will hold our coefficient estimates
ridge_av_coefs = np.empty((len(alphas),len(predictors)))
lasso_av_coefs = np.empty((len(alphas),len(predictors)))
    
## reset values for alpha
alphas = []
alphas = [.01,.1,.25]

k=0

## for each alpha value
for i in range(len(alphas)):
    
    j = 0
    
    #The ridge pipeline
    ridge_pipe = Pipeline([('scale' ,StandardScaler()),
                              ('ridge',Ridge(alpha=alphas[i], max_iter=5000000))])
    
    #the lasso pipeline
    lasso_pipe = Pipeline([('scale' ,StandardScaler()),
                              ('lasso',Lasso(alpha=alphas[i], max_iter=5000000))])
    
    #tables for lasso scores
    lasso_mse = np.empty(splits)
    lasso_mae = np.empty(splits)
        
    #tables for ridge scores
    ridge_mse = np.empty(splits)
    ridge_mae = np.empty(splits)
    
    ##tables for coefficient estimates
    ridge_coefs = np.empty((splits,len(predictors)))
    lasso_coefs = np.empty((splits,len(predictors)))
    
    for train_index, test_index in kfold.split(coffee_train):
        
        
        coffee_train_train = coffee_train.iloc[train_index]
        coffee_holdout = coffee_train.iloc[test_index]
        
        ## fit the ridge
        ridge_pipe.fit(coffee_train_train[predictors], 
        coffee_train_train['rating'])

        ## fit the lasso
        lasso_pipe.fit(coffee_train_train[predictors], 
        coffee_train_train['rating'])
        
        ## make the predictions
        ridge_preds = ridge_pipe.predict(coffee_holdout[predictors])
        lasso_preds = lasso_pipe.predict(coffee_holdout[predictors])
            
        ridge_mse[j] = mean_squared_error(coffee_holdout['rating'], ridge_preds)
        lasso_mse[j] = mean_squared_error(coffee_holdout['rating'], lasso_preds)
        
        ridge_mae[j] = mean_absolute_error(coffee_holdout['rating'], ridge_preds)
        lasso_mae[j] = mean_absolute_error(coffee_holdout['rating'], lasso_preds)
        
        # record the coefficients
        ridge_coefs[j,:] = ridge_pipe['ridge'].coef_
        lasso_coefs[j,:] = lasso_pipe['lasso'].coef_
    
        j = j+1
    

    ## These will hold our overall mse estimates
    ridge_av_mse[k] = np.mean(ridge_mse)
    lasso_av_mse[k] = np.mean(lasso_mse)

    ## These will hold our overall mae estimates
    ridge_av_mae[k] = np.mean(ridge_mae)
    lasso_av_mae[k] = np.mean(lasso_mae)

    ## These will hold our coefficient estimates
    ridge_av_coefs[k,:] = np.mean(ridge_coefs, axis=0)
    lasso_av_coefs[k,:] = np.mean(lasso_coefs, axis=0)
    
    k=k+1

In [62]:
print("CV Ridge Coefficients")

pd.DataFrame(np.round(ridge_av_coefs,8),
            columns = [str(i) for i in predictors],
            index = ["alpha=" + str(a) for a in alphas])

CV Ridge Coefficients


Unnamed: 0,region_africa_arabia,region_caribbean,region_central_america,region_hawaii,region_asia_pacific,region_south_america,type_espresso,type_organic,type_fair_trade,type_decaffeinated,type_pod_capsule,type_blend,type_estate,Light,Medium-Light,Medium,Medium-Dark,Dark,Very Dark
alpha=0.01,0.791482,-0.120439,0.157934,0.124943,0.058082,0.021213,0.611055,-0.009003,0.021763,0.026422,-0.33141,0.063468,0.204616,0.541016,0.867201,0.203264,-0.771167,-0.939185,-0.807586
alpha=0.1,0.791462,-0.120439,0.157928,0.124938,0.058071,0.021211,0.611027,-0.008999,0.021758,0.026416,-0.331408,0.063464,0.204616,0.540999,0.867182,0.203257,-0.771148,-0.93916,-0.807566
alpha=0.25,0.79143,-0.120437,0.157916,0.124929,0.058053,0.021208,0.610979,-0.008992,0.02175,0.026407,-0.331405,0.063456,0.204615,0.54097,0.867152,0.203245,-0.771116,-0.939118,-0.807531


In [49]:
print("CV Ridge MSE")

pd.DataFrame(np.round(ridge_av_mse,8),
            index = ["alpha=" + str(a) for a in alphas])

CV Ridge MSE


Unnamed: 0,0
alpha=0.01,10.06537
alpha=0.1,10.065366
alpha=0.25,10.065358


In [50]:
print("CV Ridge MAE")

pd.DataFrame(np.round(ridge_av_mae,8),
            index = ["alpha=" + str(a) for a in alphas])

CV Ridge MAE


Unnamed: 0,0
alpha=0.01,2.1184
alpha=0.1,2.118394
alpha=0.25,2.118383


In [42]:
print("CV Lasso Coefficients")

pd.DataFrame(np.round(lasso_av_coefs,8),
            columns = [str(i) for i in predictors],
            index = ["alpha=" + str(a) for a in alphas])

CV Lasso Coefficients


Unnamed: 0,region_africa_arabia,region_caribbean,region_central_america,region_hawaii,region_asia_pacific,region_south_america,type_espresso,type_organic,type_fair_trade,type_decaffeinated,type_pod_capsule,type_blend,type_estate,Light,Medium-Light,Medium,Medium-Dark,Dark,Very Dark
alpha=0.01,0.773994,-0.113027,0.142356,0.113023,0.041442,0.016078,0.595496,-0.003034,0.01413,0.01834,-0.323709,0.04827,0.199933,0.408722,0.649985,0.0,-0.928688,-1.032676,-0.887459
alpha=0.1,0.66058,-0.03832,0.044232,0.015138,0.0,0.0,0.458025,0.0,0.0,0.0,-0.267004,0.0,0.155451,0.345396,0.625126,0.0,-0.849087,-0.954651,-0.817816
alpha=0.25,0.530491,0.0,0.0,0.0,0.0,0.0,0.236831,0.0,0.0,0.0,-0.176901,0.0,0.040297,0.229519,0.566558,0.0,-0.715332,-0.821898,-0.695813


In [51]:
print("CV Lasso MSE")

pd.DataFrame(np.round(lasso_av_mse,8),
            index = ["alpha=" + str(a) for a in alphas])

CV Lasso MSE


Unnamed: 0,0
alpha=0.01,10.058861
alpha=0.1,10.136754
alpha=0.25,10.546513


In [52]:
print("CV Lasso MAE")

pd.DataFrame(np.round(lasso_av_mae,8),
            index = ["alpha=" + str(a) for a in alphas])

CV Lasso MAE


Unnamed: 0,0
alpha=0.01,2.114303
alpha=0.1,2.10622
alpha=0.25,2.161104


We will choose alpha of 0.1 to test on the final test set.

In [56]:
#The ridge pipeline
ridge_pipe = Pipeline([('scale' ,StandardScaler()),
                        ('ridge',Ridge(alpha=0.1, max_iter=5000000))])
    
#the lasso pipeline
lasso_pipe = Pipeline([('scale' ,StandardScaler()),
                       ('lasso',Lasso(alpha=0.1, max_iter=5000000))])

## fit the ridge
ridge_pipe.fit(coffee_train[predictors], 
    coffee_train['rating'])

## fit the lasso
lasso_pipe.fit(coffee_train[predictors], 
    coffee_train['rating'])

Pipeline(steps=[('scale', StandardScaler()),
                ('lasso', Lasso(alpha=0.1, max_iter=5000000))])

In [57]:
ridge_pred = ridge_pipe.predict(coffee_test[predictors])
ridge_test_mse = mean_squared_error(coffee_test.rating,ridge_pred)
ridge_test_mae = mean_absolute_error(coffee_test.rating,ridge_pred)

print("The average cross validation mean squared error for ridge is", ridge_test_mse)
print("The average cross validation mean absolute error for ridge is", ridge_test_mae)

The average cross validation mean squared error for ridge is 8.943361418129262
The average cross validation mean absolute error for ridge is 2.081775438504316


In [58]:
lasso_pred = lasso_pipe.predict(coffee_test[predictors])
lasso_test_mse = mean_squared_error(coffee_test.rating,lasso_pred)
lasso_test_mae = mean_absolute_error(coffee_test.rating,lasso_pred)

print("The average cross validation mean squared error for lasso is", lasso_test_mse)
print("The average cross validation mean absolute error for lasso is", lasso_test_mae)

The average cross validation mean squared error for lasso is 9.044395823190106
The average cross validation mean absolute error for lasso is 2.075085591696913


Saving test results

In [63]:
import csv

In [65]:
with open('testing_results.csv', mode='a') as coffee_file:
    results_writer = csv.writer(coffee_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
    
    results_writer.writerow(['Ridge', ridge_test_mse, ridge_test_mae])
    results_writer.writerow(['Lasso', lasso_test_mse, lasso_test_mae])