Merge pull request #48 from guitarmind/master

Fixed "Tuple Index Out of range error", unit test and example notebook
scikit-learn-contrib · Jan 31, 2019 · eaad6a3 · eaad6a3
2 parents 47f6cd4 + badc33e
commit eaad6a3
Show file tree

Hide file tree

Showing 3 changed files with 52 additions and 38 deletions.
diff --git a/boruta/boruta_py.py b/boruta/boruta_py.py
@@ -333,7 +333,7 @@ def _fit(self, X, y):
         imp_history_rejected = imp_history[1:, not_selected] * -1
 
         # update rank for not_selected features
-        if not_selected.shape[0] > 0 and not_selected.shape[1] > 0:
+        if not_selected.shape[0] > 0:
                 # calculate ranks in each iteration, then median of ranks across feats
                 iter_ranks = self._nanrankdata(imp_history_rejected, axis=1)
                 rank_medians = np.nanmedian(iter_ranks, axis=0)

diff --git a/boruta/examples/Madalon_Data_Set.ipynb b/boruta/examples/Madalon_Data_Set.ipynb
@@ -30,9 +30,7 @@
   {
    "cell_type": "code",
    "execution_count": 2,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "import pandas as pd\n",
@@ -53,21 +51,18 @@
     "    # URLS for dataset via UCI\n",
     "    train_data_url='https://archive.ics.uci.edu/ml/machine-learning-databases/madelon/MADELON/madelon_train.data'\n",
     "    train_label_url='https://archive.ics.uci.edu/ml/machine-learning-databases/madelon/MADELON/madelon_train.labels'\n",
-    "        \n",
-    "    \n",
+    "\n",
     "    X_data = pd.read_csv(train_data_url, sep=\" \", header=None)\n",
     "    y_data = pd.read_csv(train_label_url, sep=\" \", header=None)\n",
-    "    data = X_data.ix[:,0:499]\n",
-    "    data['target'] = y_data[0] \n",
+    "    data = X_data.loc[:, :499]\n",
+    "    data['target'] = y_data[0]\n",
     "    return data"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 4,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "data = load_data()"
@@ -76,9 +71,7 @@
   {
    "cell_type": "code",
    "execution_count": 5,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -267,13 +260,11 @@
   {
    "cell_type": "code",
    "execution_count": 6,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
-    "y=data.pop('target')\n",
-    "X=data.copy()"
+    "y = data.pop('target')\n",
+    "X = data.copy().values"
    ]
   },
   {
@@ -293,9 +284,9 @@
    },
    "outputs": [],
    "source": [
-    "rf = RandomForestClassifier(n_jobs=-1, class_weight='auto', max_depth=7)\n",
-    "# define Boruta feature selection method\n",
-    "feat_selector = BorutaPy(rf, n_estimators='auto', verbose=2)"
+    "rf = RandomForestClassifier(n_jobs=-1, class_weight=None, max_depth=7, random_state=0)\n",
+    "# Define Boruta feature selection method\n",
+    "feat_selector = BorutaPy(rf, n_estimators='auto', verbose=2, random_state=0)"
    ]
   },
   {
@@ -308,12 +299,10 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
-    "feat_selector.fit(X,y)"
+    "feat_selector.fit(X, y)"
    ]
   },
   {
@@ -328,15 +317,13 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
-    "# check selected features\n",
+    "# Check selected features\n",
     "print(feat_selector.support_)\n",
-    "#select the chosen features from our dataframe.\n",
-    "selected = X.ix[:,feat_selector.support_]\n",
+    "# Select the chosen features from our dataframe.\n",
+    "selected = X[:, feat_selector.support_]\n",
     "print (\"\")\n",
     "print (\"Selected Feature Matrix Shape\")\n",
     "print (selected.shape)"
@@ -352,9 +339,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "feat_selector.ranking_"
@@ -386,9 +371,38 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.5.1"
+   "version": "3.6.5"
+  },
+  "varInspector": {
+   "cols": {
+    "lenName": 16,
+    "lenType": 16,
+    "lenVar": 40
+   },
+   "kernels_config": {
+    "python": {
+     "delete_cmd_postfix": "",
+     "delete_cmd_prefix": "del ",
+     "library": "var_list.py",
+     "varRefreshCmd": "print(var_dic_list())"
+    },
+    "r": {
+     "delete_cmd_postfix": ") ",
+     "delete_cmd_prefix": "rm(",
+     "library": "var_list.r",
+     "varRefreshCmd": "cat(var_dic_list()) "
+    }
+   },
+   "types_to_exclude": [
+    "module",
+    "function",
+    "builtin_function_or_method",
+    "instance",
+    "_Feature"
+   ],
+   "window_display": false
   }
  },
  "nbformat": 4,
- "nbformat_minor": 0
+ "nbformat_minor": 1
 }
diff --git a/boruta/test/unit_tests.py b/boruta/test/unit_tests.py
@@ -40,7 +40,7 @@ def test_if_boruta_extracts_relevant_features(self):
         bt.fit(X, y)
 
         # make sure that only all the relevant features are returned
-        self.assertItemsEqual(range(5), list(np.where(bt.support_)[0]))
+        self.assertListEqual(list(range(5)), list(np.where(bt.support_)[0]))
 
 
 if __name__ == '__main__':