In [13]:
import numpy as np
from sklearn.datasets import load_diabetes
from sklearn.linear_model import Lasso
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.pipeline import Pipeline

In [3]:
X, y = load_diabetes(return_X_y=True)

In [7]:
X[:5]

array([[ 0.03807591,  0.05068012,  0.06169621,  0.02187239, -0.0442235 ,
        -0.03482076, -0.04340085, -0.00259226,  0.01990749, -0.01764613],
       [-0.00188202, -0.04464164, -0.05147406, -0.02632753, -0.00844872,
        -0.01916334,  0.07441156, -0.03949338, -0.06833155, -0.09220405],
       [ 0.08529891,  0.05068012,  0.04445121, -0.00567042, -0.04559945,
        -0.03419447, -0.03235593, -0.00259226,  0.00286131, -0.02593034],
       [-0.08906294, -0.04464164, -0.01159501, -0.03665608,  0.01219057,
         0.02499059, -0.03603757,  0.03430886,  0.02268774, -0.00936191],
       [ 0.00538306, -0.04464164, -0.03638469,  0.02187239,  0.00393485,
         0.01559614,  0.00814208, -0.00259226, -0.03198764, -0.04664087]])

In [8]:
y[:5]

array([151.,  75., 141., 206., 135.])

In [9]:
load_diabetes()

{'data': array([[ 0.03807591,  0.05068012,  0.06169621, ..., -0.00259226,
          0.01990749, -0.01764613],
        [-0.00188202, -0.04464164, -0.05147406, ..., -0.03949338,
         -0.06833155, -0.09220405],
        [ 0.08529891,  0.05068012,  0.04445121, ..., -0.00259226,
          0.00286131, -0.02593034],
        ...,
        [ 0.04170844,  0.05068012, -0.01590626, ..., -0.01107952,
         -0.04688253,  0.01549073],
        [-0.04547248, -0.04464164,  0.03906215, ...,  0.02655962,
          0.04452873, -0.02593034],
        [-0.04547248, -0.04464164, -0.0730303 , ..., -0.03949338,
         -0.00422151,  0.00306441]]),
 'target': array([151.,  75., 141., 206., 135.,  97., 138.,  63., 110., 310., 101.,
         69., 179., 185., 118., 171., 166., 144.,  97., 168.,  68.,  49.,
         68., 245., 184., 202., 137.,  85., 131., 283., 129.,  59., 341.,
         87.,  65., 102., 265., 276., 252.,  90., 100.,  55.,  61.,  92.,
        259.,  53., 190., 142.,  75., 142., 155., 225.,  59

In [11]:
features = load_diabetes()["feature_names"]
features

['age', 'sex', 'bmi', 'bp', 's1', 's2', 's3', 's4', 's5', 's6']

In [12]:
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size= 0.3, random_state= 42)

In [21]:
pipeline = Pipeline(
                    [("scaler",StandardScaler()),
                    ("model",Lasso())]

)

In [22]:
search = GridSearchCV(
                    pipeline,
                    {"model__alpha":np.arange(0.1,3,0.1)},
                     cv =5,
                     scoring = "neg_mean_squared_error",
                     verbose = 3

)

In [23]:
search.fit(X_train,y_train)

Fitting 5 folds for each of 29 candidates, totalling 145 fits
[CV 1/5] END ..............model__alpha=0.1;, score=-3129.370 total time=   0.0s
[CV 2/5] END ..............model__alpha=0.1;, score=-3292.959 total time=   0.0s
[CV 3/5] END ..............model__alpha=0.1;, score=-2840.933 total time=   0.0s
[CV 4/5] END ..............model__alpha=0.1;, score=-3053.636 total time=   0.0s
[CV 5/5] END ..............model__alpha=0.1;, score=-3515.712 total time=   0.0s
[CV 1/5] END ..............model__alpha=0.2;, score=-3161.543 total time=   0.0s
[CV 2/5] END ..............model__alpha=0.2;, score=-3280.634 total time=   0.0s
[CV 3/5] END ..............model__alpha=0.2;, score=-2856.722 total time=   0.0s
[CV 4/5] END ..............model__alpha=0.2;, score=-3054.048 total time=   0.0s
[CV 5/5] END ..............model__alpha=0.2;, score=-3501.771 total time=   0.0s
[CV 1/5] END model__alpha=0.30000000000000004;, score=-3170.054 total time=   0.0s
[CV 2/5] END model__alpha=0.30000000000000004

[CV 5/5] END model__alpha=2.9000000000000004;, score=-3399.778 total time=   0.0s


In [24]:
search.best_params_

{'model__alpha': 0.1}

In [25]:
search.best_estimator_

In [26]:
search.best_score_

-3166.522087748828

In [27]:
search.best_estimator_[1]

In [31]:
coef = search.best_estimator_[1].coef_
coef

array([  1.364918  , -12.21558692,  26.45121861,  18.40929882,
       -30.54131232,  14.55719971,   0.        ,  11.74486066,
        26.79441432,   2.06055063])

In [39]:
coef!=0

array([ True,  True,  True,  True,  True,  True, False,  True,  True,
        True])

In [38]:
np.array(features)

array(['age', 'sex', 'bmi', 'bp', 's1', 's2', 's3', 's4', 's5', 's6'],
      dtype='<U3')

In [33]:
np.array(features)[coef!= 0]

array(['age', 'sex', 'bmi', 'bp', 's1', 's2', 's4', 's5', 's6'],
      dtype='<U3')

In [34]:
np.array(features)[coef == 0]

array(['s3'], dtype='<U3')