In [1]:
import random

import numpy.random as npr
import numpy as np
from sklearn.datasets import load_boston
from sklearn.feature_selection import f_regression, RFECV
from sklearn.linear_model import LinearRegression, LassoCV
from sklearn.model_selection import train_test_split

random.seed(0)

In [2]:
X, y = load_boston(return_X_y = True)

## Boston Data

```
7. Attribute Information:

    1. CRIM      per capita crime rate by town
    2. ZN        proportion of residential land zoned for lots over 
                 25,000 sq.ft.
    3. INDUS     proportion of non-retail business acres per town
    4. CHAS      Charles River dummy variable (= 1 if tract bounds 
                 river; 0 otherwise)
    5. NOX       nitric oxides concentration (parts per 10 million)
    6. RM        average number of rooms per dwelling
    7. AGE       proportion of owner-occupied units built prior to 1940
    8. DIS       weighted distances to five Boston employment centres
    9. RAD       index of accessibility to radial highways
    10. TAX      full-value property-tax rate per $10,000
    11. PTRATIO  pupil-teacher ratio by town
    12. B        1000(Bk - 0.63)^2 where Bk is the proportion of blacks 
                 by town
    13. LSTAT    % lower status of the population
    14. MEDV     Median value of owner-occupied homes in $1000's
```

In [3]:
X.shape, y.shape

((506, 13), (506,))

## Add a whole bunch of useless features

In [4]:
noise = npr.normal(size=(506, 100),)
noisy_X = np.hstack([X, noise])

In [5]:
train_X, test_X, train_y, test_y = split = train_test_split(noisy_X, y, test_size = 0.25)

## Recursive Feature Elimination

In [6]:
estimator = LinearRegression()
rfe = RFECV(estimator, verbose = 1, scoring = 'neg_mean_squared_error')
rfe.fit_transform(train_X, train_y)

Fitting estimator with 88 features.
Fitting estimator with 87 features.
Fitting estimator with 86 features.
Fitting estimator with 85 features.
Fitting estimator with 84 features.
Fitting estimator with 83 features.
Fitting estimator with 82 features.
Fitting estimator with 81 features.
Fitting estimator with 80 features.
Fitting estimator with 79 features.
Fitting estimator with 78 features.
Fitting estimator with 77 features.
Fitting estimator with 76 features.
Fitting estimator with 75 features.
Fitting estimator with 74 features.
Fitting estimator with 73 features.
Fitting estimator with 72 features.
Fitting estimator with 71 features.
Fitting estimator with 70 features.
Fitting estimator with 69 features.
Fitting estimator with 68 features.
Fitting estimator with 67 features.
Fitting estimator with 66 features.
Fitting estimator with 65 features.
Fitting estimator with 64 features.
Fitting estimator with 63 features.
Fitting estimator with 62 features.
Fitting estimator with 61 fe

array([[  0.00000000e+00,   5.32000000e-01,   5.76200000e+00, ...,
          2.02000000e+01,   1.04200000e+01,  -1.16043078e+00],
       [  0.00000000e+00,   4.72000000e-01,   6.61600000e+00, ...,
          1.84000000e+01,   8.93000000e+00,   6.99376070e-02],
       [  0.00000000e+00,   5.24000000e-01,   6.17200000e+00, ...,
          1.52000000e+01,   1.91500000e+01,  -6.04773738e-01],
       ..., 
       [  0.00000000e+00,   4.64000000e-01,   5.85600000e+00, ...,
          1.86000000e+01,   1.30000000e+01,  -1.48871438e+00],
       [  0.00000000e+00,   6.05000000e-01,   7.92900000e+00, ...,
          1.47000000e+01,   3.70000000e+00,   1.87758251e-01],
       [  0.00000000e+00,   6.24000000e-01,   5.82200000e+00, ...,
          2.12000000e+01,   1.50300000e+01,  -1.01222372e-02]])

In [7]:
rfe.n_features_

7

In [8]:
lasso = LassoCV()
lasso.fit(train_X, train_y)

LassoCV(alphas=None, copy_X=True, cv=None, eps=0.001, fit_intercept=True,
    max_iter=1000, n_alphas=100, n_jobs=1, normalize=False, positive=False,
    precompute='auto', random_state=None, selection='cyclic', tol=0.0001,
    verbose=False)

In [9]:
len(lasso.coef_.nonzero()[0])

11

In [10]:
lasso.coef_

array([-0.08276184,  0.06377317, -0.        ,  0.        , -0.        ,
        1.60334002,  0.01764207, -0.73870784,  0.33275231, -0.018117  ,
       -0.76398387,  0.00730439, -0.69271043, -0.        ,  0.        ,
       -0.        ,  0.        , -0.        ,  0.        , -0.        ,
       -0.        ,  0.        ,  0.        , -0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        , -0.        ,
        0.        , -0.        ,  0.        , -0.        ,  0.        ,
       -0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        , -0.        ,  0.        ,
       -0.        , -0.        ,  0.        , -0.        ,  0.        ,
        0.        ,  0.        , -0.        ,  0.        ,  0.        ,
       -0.        ,  0.        ,  0.        , -0.        ,  0.        ,
       -0.        ,  0.        ,  0.        ,  0.        , -0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.  

In [11]:
lasso.score(test_X, test_y)

0.65644805859480837

In [12]:
rfe.score(test_X, test_y)

0.68749227852424388

## Takeaways

If you have a lot of features, just use