update ensemble doc

rasbt · Apr 21, 2016 · cc6c525 · cc6c525
1 parent eb26261
commit cc6c525
Showing 1 changed file with 87 additions and 16 deletions.
diff --git a/docs/sources/user_guide/classifier/EnsembleVoteClassifier.ipynb b/docs/sources/user_guide/classifier/EnsembleVoteClassifier.ipynb
@@ -25,7 +25,7 @@
      "output_type": "stream",
      "text": [
       "Sebastian Raschka \n",
-      "last updated: 2016-04-19 \n",
+      "last updated: 2016-04-20 \n",
       "\n",
       "CPython 3.5.1\n",
       "IPython 4.0.3\n",
@@ -493,7 +493,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 33,
+   "execution_count": 2,
    "metadata": {
     "collapsed": false
    },
@@ -502,18 +502,18 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "0.953 (+/-0.013) for {'pipeline__logreg__C': 1.0, 'pipeline__sfs__k_features': 1, 'randomforestclassifier__n_estimators': 20}\n",
-      "0.947 (+/-0.017) for {'pipeline__logreg__C': 1.0, 'pipeline__sfs__k_features': 1, 'randomforestclassifier__n_estimators': 200}\n",
-      "0.953 (+/-0.013) for {'pipeline__logreg__C': 1.0, 'pipeline__sfs__k_features': 2, 'randomforestclassifier__n_estimators': 20}\n",
-      "0.947 (+/-0.017) for {'pipeline__logreg__C': 1.0, 'pipeline__sfs__k_features': 2, 'randomforestclassifier__n_estimators': 200}\n",
-      "0.953 (+/-0.013) for {'pipeline__logreg__C': 1.0, 'pipeline__sfs__k_features': 3, 'randomforestclassifier__n_estimators': 20}\n",
-      "0.953 (+/-0.017) for {'pipeline__logreg__C': 1.0, 'pipeline__sfs__k_features': 3, 'randomforestclassifier__n_estimators': 200}\n",
-      "0.947 (+/-0.017) for {'pipeline__logreg__C': 100.0, 'pipeline__sfs__k_features': 1, 'randomforestclassifier__n_estimators': 20}\n",
-      "0.953 (+/-0.017) for {'pipeline__logreg__C': 100.0, 'pipeline__sfs__k_features': 1, 'randomforestclassifier__n_estimators': 200}\n",
-      "0.947 (+/-0.017) for {'pipeline__logreg__C': 100.0, 'pipeline__sfs__k_features': 2, 'randomforestclassifier__n_estimators': 20}\n",
-      "0.947 (+/-0.017) for {'pipeline__logreg__C': 100.0, 'pipeline__sfs__k_features': 2, 'randomforestclassifier__n_estimators': 200}\n",
-      "0.960 (+/-0.012) for {'pipeline__logreg__C': 100.0, 'pipeline__sfs__k_features': 3, 'randomforestclassifier__n_estimators': 20}\n",
-      "0.953 (+/-0.017) for {'pipeline__logreg__C': 100.0, 'pipeline__sfs__k_features': 3, 'randomforestclassifier__n_estimators': 200}\n"
+      "0.953 (+/-0.013) for {'pipeline__sfs__k_features': 1, 'pipeline__logreg__C': 1.0, 'randomforestclassifier__n_estimators': 20}\n",
+      "0.947 (+/-0.017) for {'pipeline__sfs__k_features': 1, 'pipeline__logreg__C': 1.0, 'randomforestclassifier__n_estimators': 200}\n",
+      "0.953 (+/-0.013) for {'pipeline__sfs__k_features': 2, 'pipeline__logreg__C': 1.0, 'randomforestclassifier__n_estimators': 20}\n",
+      "0.947 (+/-0.017) for {'pipeline__sfs__k_features': 2, 'pipeline__logreg__C': 1.0, 'randomforestclassifier__n_estimators': 200}\n",
+      "0.953 (+/-0.013) for {'pipeline__sfs__k_features': 3, 'pipeline__logreg__C': 1.0, 'randomforestclassifier__n_estimators': 20}\n",
+      "0.953 (+/-0.017) for {'pipeline__sfs__k_features': 3, 'pipeline__logreg__C': 1.0, 'randomforestclassifier__n_estimators': 200}\n",
+      "0.947 (+/-0.017) for {'pipeline__sfs__k_features': 1, 'pipeline__logreg__C': 100.0, 'randomforestclassifier__n_estimators': 20}\n",
+      "0.953 (+/-0.017) for {'pipeline__sfs__k_features': 1, 'pipeline__logreg__C': 100.0, 'randomforestclassifier__n_estimators': 200}\n",
+      "0.947 (+/-0.017) for {'pipeline__sfs__k_features': 2, 'pipeline__logreg__C': 100.0, 'randomforestclassifier__n_estimators': 20}\n",
+      "0.947 (+/-0.017) for {'pipeline__sfs__k_features': 2, 'pipeline__logreg__C': 100.0, 'randomforestclassifier__n_estimators': 200}\n",
+      "0.960 (+/-0.012) for {'pipeline__sfs__k_features': 3, 'pipeline__logreg__C': 100.0, 'randomforestclassifier__n_estimators': 20}\n",
+      "0.953 (+/-0.017) for {'pipeline__sfs__k_features': 3, 'pipeline__logreg__C': 100.0, 'randomforestclassifier__n_estimators': 200}\n"
      ]
     }
    ],
@@ -640,7 +640,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 44,
+   "execution_count": 3,
    "metadata": {
     "collapsed": false
    },
@@ -651,7 +651,7 @@
        "array([0, 1, 2])"
       ]
      },
-     "execution_count": 44,
+     "execution_count": 3,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -668,6 +668,77 @@
     "eclf.fit(X, y).predict(X[[1, 51, 149]])"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Furthermore, we can fit the `SequentialFeatureSelector` separately, outside the grid search hyperparameter optimization pipeline. Here, we determine the best features first, and then we construct a pipeline using these \"fixed,\" best features as seed for the `ColumnSelector`:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Features: 2/2"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Best features (1, 3)\n"
+     ]
+    }
+   ],
+   "source": [
+    "sfs1 = SequentialFeatureSelector(clf1, \n",
+    "                                 k_features=2,\n",
+    "                                 forward=True, \n",
+    "                                 floating=False, \n",
+    "                                 scoring='accuracy',\n",
+    "                                 print_progress=True,\n",
+    "                                 cv=0)\n",
+    "\n",
+    "sfs1.fit(X, y)\n",
+    "\n",
+    "print('Best features', sfs1.k_feature_idx_)\n",
+    "\n",
+    "col_sel = ColumnSelector(cols=sfs1.k_feature_idx_)\n",
+    "\n",
+    "clf1_pipe = Pipeline([('sel', col_sel),\n",
+    "                      ('logreg', clf1)])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([0, 1, 2])"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "eclf = EnsembleVoteClassifier(clfs=[clf1_pipe, clf2, clf3], voting='soft')\n",
+    "eclf.fit(X, y).predict(X[[1, 51, 149]])"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},