# Entry 39 notebook - Lasso Regression

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import openml

from sklearn.model_selection import train_test_split, cross_validate
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.pipeline import make_pipeline
from sklearn.linear_model import ElasticNet

In [2]:
auto_mpg = openml.datasets.get_dataset(42372).get_data()[0]
auto_mpg.head()

Unnamed: 0,Displacement,Horse_power,Weight,Acceleration,Model_year,Mpg
0,91.0,70.0,1955.0,20.5,71.0,26.0
1,232.0,100.0,2789.0,15.0,73.0,18.0
2,350.0,145.0,4055.0,12.0,76.0,13.0
3,318.0,140.0,4080.0,13.7,78.0,17.5
4,113.0,95.0,2372.0,15.0,70.0,24.0


In [3]:
def split_data(df, target, train_size):
    y = df[[target]]
    X = df.drop(target, axis=1)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=train_size, random_state=12)
    return X_train, X_test, y_train, y_test

In [4]:
X_train, X_test, y_train, y_test = split_data(auto_mpg, 'Mpg', 0.8)
pipe = make_pipeline(SimpleImputer(strategy='median'), StandardScaler(), ElasticNet())
pipe.fit(X_train, y_train)
pipe.score(X_test, y_test)

0.748656575045042

In [5]:
cross_validate(pipe, X_train, y_train, cv=10, scoring=['explained_variance', 'r2', 'max_error',
                                                      'neg_median_absolute_error', 'neg_mean_absolute_error',
                                                      'neg_mean_squared_error', 'neg_mean_squared_log_error',
                                                      'neg_root_mean_squared_error', 'neg_mean_poisson_deviance',
                                                      'neg_mean_gamma_deviance'])

{'fit_time': array([0.01099205, 0.01199174, 0.00699544, 0.00799203, 0.00999475,
        0.00716829, 0.00599527, 0.00899792, 0.00956964, 0.00999522]),
 'score_time': array([0.01498961, 0.01913381, 0.01750183, 0.01699018, 0.01798701,
        0.01898837, 0.0159905 , 0.01099086, 0.03796315, 0.01798892]),
 'test_explained_variance': array([0.65048302, 0.47063489, 0.69618037, 0.64759492, 0.87852473,
        0.76435978, 0.93630181, 0.63013235, 0.86372202, 0.83696177]),
 'test_r2': array([0.63322411, 0.47042485, 0.6901664 , 0.54200774, 0.86463222,
        0.76174482, 0.93485207, 0.62876206, 0.86125317, 0.74557887]),
 'test_max_error': array([ -7.90780651, -13.07912594, -12.91698944,  -8.48917329,
         -4.8456542 ,  -6.40767489,  -2.7299637 , -13.5322625 ,
         -3.35542075,  -7.40746159]),
 'test_neg_median_absolute_error': array([-2.71238679, -3.18974191, -2.78260305, -1.16096015, -1.31678013,
        -2.17059464, -0.39783983, -3.28309021, -2.97856855, -2.23837921]),
 'test_neg_mean_ab