In [4]:

# Recursive Feature Elimination
'''Given an external estimator that assigns weights to features, RFE selects features by recursively considering smaller sets of features. 
First, the estimator is trained on the initial set of features and the importance of each feature is obtained either through a coef_ attribute 
or through a feature_importances_ attribute. Then, the least important features are pruned from current set of features. 
That procedure is recursively repeated on the pruned set until the desired number of features to select is eventually reached.
'''

from sklearn.datasets import make_friedman1
from sklearn.feature_selection import RFE
from sklearn.svm import SVR

X, y = make_friedman1(n_samples=50, n_features=10, random_state=0)

estimator = SVR(kernel="linear")

selector = RFE(estimator, 5, step=1)
selector = selector.fit(X, y)

print(selector.support_ )  # Get a mask, or integer index, of the features selected

print(selector.ranking_)


[ True  True  True  True  True False False False False False]
[1 1 1 1 1 6 4 3 2 5]


In [6]:

# Feature Extraction with RFE
from pandas import read_csv
from sklearn.feature_selection import RFE
from sklearn.linear_model import LogisticRegression

# load data
url = "pima-indians-diabetes.data.csv"
names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']

dataframe = read_csv(url, names=names)

array = dataframe.values

X = array[:,0:8]
Y = array[:,8]

# feature extraction
model = LogisticRegression()

rfe = RFE(model, 3)

fit = rfe.fit(X, Y)

print("Num Features:", fit.n_features_)
print("Selected Features: ",  fit.support_)
print("Feature Ranking: ", fit.ranking_)

Num Features: 3
Selected Features:  [ True False False False False  True  True False]
Feature Ranking:  [1 2 3 5 6 1 1 4]
