Skip to content

Commit

Permalink
update ensemble doc
Browse files Browse the repository at this point in the history
  • Loading branch information
rasbt committed Apr 21, 2016
1 parent eb26261 commit cc6c525
Showing 1 changed file with 87 additions and 16 deletions.
103 changes: 87 additions & 16 deletions docs/sources/user_guide/classifier/EnsembleVoteClassifier.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
"output_type": "stream",
"text": [
"Sebastian Raschka \n",
"last updated: 2016-04-19 \n",
"last updated: 2016-04-20 \n",
"\n",
"CPython 3.5.1\n",
"IPython 4.0.3\n",
Expand Down Expand Up @@ -493,7 +493,7 @@
},
{
"cell_type": "code",
"execution_count": 33,
"execution_count": 2,
"metadata": {
"collapsed": false
},
Expand All @@ -502,18 +502,18 @@
"name": "stdout",
"output_type": "stream",
"text": [
"0.953 (+/-0.013) for {'pipeline__logreg__C': 1.0, 'pipeline__sfs__k_features': 1, 'randomforestclassifier__n_estimators': 20}\n",
"0.947 (+/-0.017) for {'pipeline__logreg__C': 1.0, 'pipeline__sfs__k_features': 1, 'randomforestclassifier__n_estimators': 200}\n",
"0.953 (+/-0.013) for {'pipeline__logreg__C': 1.0, 'pipeline__sfs__k_features': 2, 'randomforestclassifier__n_estimators': 20}\n",
"0.947 (+/-0.017) for {'pipeline__logreg__C': 1.0, 'pipeline__sfs__k_features': 2, 'randomforestclassifier__n_estimators': 200}\n",
"0.953 (+/-0.013) for {'pipeline__logreg__C': 1.0, 'pipeline__sfs__k_features': 3, 'randomforestclassifier__n_estimators': 20}\n",
"0.953 (+/-0.017) for {'pipeline__logreg__C': 1.0, 'pipeline__sfs__k_features': 3, 'randomforestclassifier__n_estimators': 200}\n",
"0.947 (+/-0.017) for {'pipeline__logreg__C': 100.0, 'pipeline__sfs__k_features': 1, 'randomforestclassifier__n_estimators': 20}\n",
"0.953 (+/-0.017) for {'pipeline__logreg__C': 100.0, 'pipeline__sfs__k_features': 1, 'randomforestclassifier__n_estimators': 200}\n",
"0.947 (+/-0.017) for {'pipeline__logreg__C': 100.0, 'pipeline__sfs__k_features': 2, 'randomforestclassifier__n_estimators': 20}\n",
"0.947 (+/-0.017) for {'pipeline__logreg__C': 100.0, 'pipeline__sfs__k_features': 2, 'randomforestclassifier__n_estimators': 200}\n",
"0.960 (+/-0.012) for {'pipeline__logreg__C': 100.0, 'pipeline__sfs__k_features': 3, 'randomforestclassifier__n_estimators': 20}\n",
"0.953 (+/-0.017) for {'pipeline__logreg__C': 100.0, 'pipeline__sfs__k_features': 3, 'randomforestclassifier__n_estimators': 200}\n"
"0.953 (+/-0.013) for {'pipeline__sfs__k_features': 1, 'pipeline__logreg__C': 1.0, 'randomforestclassifier__n_estimators': 20}\n",
"0.947 (+/-0.017) for {'pipeline__sfs__k_features': 1, 'pipeline__logreg__C': 1.0, 'randomforestclassifier__n_estimators': 200}\n",
"0.953 (+/-0.013) for {'pipeline__sfs__k_features': 2, 'pipeline__logreg__C': 1.0, 'randomforestclassifier__n_estimators': 20}\n",
"0.947 (+/-0.017) for {'pipeline__sfs__k_features': 2, 'pipeline__logreg__C': 1.0, 'randomforestclassifier__n_estimators': 200}\n",
"0.953 (+/-0.013) for {'pipeline__sfs__k_features': 3, 'pipeline__logreg__C': 1.0, 'randomforestclassifier__n_estimators': 20}\n",
"0.953 (+/-0.017) for {'pipeline__sfs__k_features': 3, 'pipeline__logreg__C': 1.0, 'randomforestclassifier__n_estimators': 200}\n",
"0.947 (+/-0.017) for {'pipeline__sfs__k_features': 1, 'pipeline__logreg__C': 100.0, 'randomforestclassifier__n_estimators': 20}\n",
"0.953 (+/-0.017) for {'pipeline__sfs__k_features': 1, 'pipeline__logreg__C': 100.0, 'randomforestclassifier__n_estimators': 200}\n",
"0.947 (+/-0.017) for {'pipeline__sfs__k_features': 2, 'pipeline__logreg__C': 100.0, 'randomforestclassifier__n_estimators': 20}\n",
"0.947 (+/-0.017) for {'pipeline__sfs__k_features': 2, 'pipeline__logreg__C': 100.0, 'randomforestclassifier__n_estimators': 200}\n",
"0.960 (+/-0.012) for {'pipeline__sfs__k_features': 3, 'pipeline__logreg__C': 100.0, 'randomforestclassifier__n_estimators': 20}\n",
"0.953 (+/-0.017) for {'pipeline__sfs__k_features': 3, 'pipeline__logreg__C': 100.0, 'randomforestclassifier__n_estimators': 200}\n"
]
}
],
Expand Down Expand Up @@ -640,7 +640,7 @@
},
{
"cell_type": "code",
"execution_count": 44,
"execution_count": 3,
"metadata": {
"collapsed": false
},
Expand All @@ -651,7 +651,7 @@
"array([0, 1, 2])"
]
},
"execution_count": 44,
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -668,6 +668,77 @@
"eclf.fit(X, y).predict(X[[1, 51, 149]])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Furthermore, we can fit the `SequentialFeatureSelector` separately, outside the grid search hyperparameter optimization pipeline. Here, we determine the best features first, and then we construct a pipeline using these \"fixed,\" best features as seed for the `ColumnSelector`:"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Features: 2/2"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Best features (1, 3)\n"
]
}
],
"source": [
"sfs1 = SequentialFeatureSelector(clf1, \n",
" k_features=2,\n",
" forward=True, \n",
" floating=False, \n",
" scoring='accuracy',\n",
" print_progress=True,\n",
" cv=0)\n",
"\n",
"sfs1.fit(X, y)\n",
"\n",
"print('Best features', sfs1.k_feature_idx_)\n",
"\n",
"col_sel = ColumnSelector(cols=sfs1.k_feature_idx_)\n",
"\n",
"clf1_pipe = Pipeline([('sel', col_sel),\n",
" ('logreg', clf1)])"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"array([0, 1, 2])"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"eclf = EnsembleVoteClassifier(clfs=[clf1_pipe, clf2, clf3], voting='soft')\n",
"eclf.fit(X, y).predict(X[[1, 51, 149]])"
]
},
{
"cell_type": "markdown",
"metadata": {},
Expand Down

0 comments on commit cc6c525

Please sign in to comment.