In [1]:
from sklearn.datasets import samples_generator
from sklearn.feature_selection import SelectKBest, f_regression
from sklearn.pipeline import Pipeline
from sklearn.ensemble import ExtraTreesClassifier

In [2]:
X, y = samples_generator.make_classification(n_samples=150,
        n_features=25, n_classes=3, n_informative=6,
        n_redundant=0, random_state=7)

print(X)
print(y)

[[ 1.01856035 -0.1850947   0.33953529 ... -0.89722827  2.43356744
  -0.69119524]
 [-0.27783108  0.99897481  0.53479038 ...  0.22153721  1.2023667
   0.30568521]
 [ 0.03110801  1.14511831  0.28923459 ... -1.05191555  0.09786165
  -0.65803802]
 ...
 [-0.26170774  0.83418331  0.9540871  ...  0.43996139  1.29141862
   0.58900801]
 [ 0.09728851 -2.19374974  0.82561751 ...  0.19169576  0.38328131
   1.45836114]
 [-0.11176114 -0.44320815 -0.8666994  ...  1.00486657  0.38439339
   1.38943378]]
[0 2 2 0 2 0 2 1 0 1 1 2 1 0 2 2 1 0 0 1 0 1 0 1 2 2 0 0 1 0 1 2 1 0 2 2 1
 1 2 2 2 0 0 0 2 1 1 2 1 0 1 2 2 1 2 0 2 2 0 2 2 0 1 0 2 1 1 1 1 2 0 1 0 2
 0 0 1 2 2 0 0 1 0 2 2 0 0 0 2 2 2 1 2 0 2 0 2 0 0 0 1 1 1 1 2 2 2 2 0 1 1
 0 2 1 1 0 1 1 1 1 0 0 0 1 2 0 0 0 2 1 2 0 0 1 0 1 1 0 1 1 1 1 2 2 0 1 1 0
 2 2]


In [3]:
k_best_selector = SelectKBest(f_regression, k=9)
classifier = ExtraTreesClassifier(n_estimators=60, max_depth=4)

processor_pipeline = Pipeline([('selector', k_best_selector),
                                 ('erf', classifier)])

processor_pipeline.set_params(selector__k=7, erf__n_estimators=30)

processor_pipeline.fit(X, y)

output = processor_pipeline.predict(X)
print("Predicted output:\n", output)

print("\nScore:", processor_pipeline.score(X, y))

# 特徴選択ブロックの状態を得る
status = processor_pipeline.named_steps['selector'].get_support()

# 選択された特徴の番号を取得して表示する
selected = [i for i, x in enumerate(status) if x]
print("\nIndices of selected features:", ','.join([str(x) for x in selected]))


Predicted output:
 [1 2 2 0 2 0 2 1 0 1 1 2 0 0 2 2 1 0 0 1 0 2 1 1 2 2 0 0 1 2 1 2 1 0 2 2 1
 1 2 2 2 0 1 2 2 1 1 2 1 0 1 2 2 2 2 0 2 2 0 2 2 0 1 0 2 1 0 1 1 2 0 1 0 2
 0 0 1 2 2 0 0 1 2 2 2 0 0 0 2 2 2 1 2 0 2 1 2 1 0 0 1 1 1 1 2 1 0 2 0 1 1
 0 2 1 1 0 1 1 1 1 0 0 0 1 2 0 0 0 2 1 2 0 0 1 0 1 1 0 1 1 1 2 2 2 1 1 2 0
 2 2]

Score: 0.8866666666666667

Indices of selected features: 4,7,8,12,14,17,22
