In [1]:
import numpy.random as npr
import numpy as np
from sklearn.datasets import load_boston
from sklearn.feature_selection import f_regression, RFECV
from sklearn.linear_model import LinearRegression, LassoCV
from sklearn.model_selection import train_test_split

In [2]:
X, y = load_boston(return_X_y = True)

## Boston Data

```
7. Attribute Information:

    1. CRIM      per capita crime rate by town
    2. ZN        proportion of residential land zoned for lots over 
                 25,000 sq.ft.
    3. INDUS     proportion of non-retail business acres per town
    4. CHAS      Charles River dummy variable (= 1 if tract bounds 
                 river; 0 otherwise)
    5. NOX       nitric oxides concentration (parts per 10 million)
    6. RM        average number of rooms per dwelling
    7. AGE       proportion of owner-occupied units built prior to 1940
    8. DIS       weighted distances to five Boston employment centres
    9. RAD       index of accessibility to radial highways
    10. TAX      full-value property-tax rate per $10,000
    11. PTRATIO  pupil-teacher ratio by town
    12. B        1000(Bk - 0.63)^2 where Bk is the proportion of blacks 
                 by town
    13. LSTAT    % lower status of the population
    14. MEDV     Median value of owner-occupied homes in $1000's
```

In [3]:
X.shape, y.shape

((506, 13), (506,))

## Add a whole bunch of useless features

In [4]:
noise = npr.normal(size=(506, 100))
noisy_X = np.hstack([X, noise])

In [None]:
train_X, test_X, train_y, test_y = split = train_test_split(noisy_X, y, test_size = 0.25, random_state = 0)

## Recursive Feature Elimination

In [None]:
estimator = LinearRegression()
rfe = RFECV(estimator, verbose = 1, scoring = 'neg_mean_squared_error')
rfe.fit_transform(train_X, train_y)

In [None]:
rfe.n_features_

In [None]:
lasso = LassoCV()
lasso.fit(train_X, train_y)

In [None]:
len(lasso.coef_.nonzero()[0])

In [None]:
lasso.score(test_X, test_y)

In [None]:
rfe.score(test_X, test_y)

## Takeaways

If you have a lot of features, just use