Skip to content

Commit

Permalink
Merge pull request #48 from guitarmind/master
Browse files Browse the repository at this point in the history
Fixed "Tuple Index Out of range error", unit test and example notebook
  • Loading branch information
danielhomola committed Jan 31, 2019
2 parents 47f6cd4 + badc33e commit eaad6a3
Show file tree
Hide file tree
Showing 3 changed files with 52 additions and 38 deletions.
2 changes: 1 addition & 1 deletion boruta/boruta_py.py
Expand Up @@ -333,7 +333,7 @@ def _fit(self, X, y):
imp_history_rejected = imp_history[1:, not_selected] * -1

# update rank for not_selected features
if not_selected.shape[0] > 0 and not_selected.shape[1] > 0:
if not_selected.shape[0] > 0:
# calculate ranks in each iteration, then median of ranks across feats
iter_ranks = self._nanrankdata(imp_history_rejected, axis=1)
rank_medians = np.nanmedian(iter_ranks, axis=0)
Expand Down
86 changes: 50 additions & 36 deletions boruta/examples/Madalon_Data_Set.ipynb
Expand Up @@ -30,9 +30,7 @@
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": false
},
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
Expand All @@ -53,21 +51,18 @@
" # URLS for dataset via UCI\n",
" train_data_url='https://archive.ics.uci.edu/ml/machine-learning-databases/madelon/MADELON/madelon_train.data'\n",
" train_label_url='https://archive.ics.uci.edu/ml/machine-learning-databases/madelon/MADELON/madelon_train.labels'\n",
" \n",
" \n",
"\n",
" X_data = pd.read_csv(train_data_url, sep=\" \", header=None)\n",
" y_data = pd.read_csv(train_label_url, sep=\" \", header=None)\n",
" data = X_data.ix[:,0:499]\n",
" data['target'] = y_data[0] \n",
" data = X_data.loc[:, :499]\n",
" data['target'] = y_data[0]\n",
" return data"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": false
},
"metadata": {},
"outputs": [],
"source": [
"data = load_data()"
Expand All @@ -76,9 +71,7 @@
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": false
},
"metadata": {},
"outputs": [
{
"data": {
Expand Down Expand Up @@ -267,13 +260,11 @@
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": false
},
"metadata": {},
"outputs": [],
"source": [
"y=data.pop('target')\n",
"X=data.copy()"
"y = data.pop('target')\n",
"X = data.copy().values"
]
},
{
Expand All @@ -293,9 +284,9 @@
},
"outputs": [],
"source": [
"rf = RandomForestClassifier(n_jobs=-1, class_weight='auto', max_depth=7)\n",
"# define Boruta feature selection method\n",
"feat_selector = BorutaPy(rf, n_estimators='auto', verbose=2)"
"rf = RandomForestClassifier(n_jobs=-1, class_weight=None, max_depth=7, random_state=0)\n",
"# Define Boruta feature selection method\n",
"feat_selector = BorutaPy(rf, n_estimators='auto', verbose=2, random_state=0)"
]
},
{
Expand All @@ -308,12 +299,10 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"metadata": {},
"outputs": [],
"source": [
"feat_selector.fit(X,y)"
"feat_selector.fit(X, y)"
]
},
{
Expand All @@ -328,15 +317,13 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"metadata": {},
"outputs": [],
"source": [
"# check selected features\n",
"# Check selected features\n",
"print(feat_selector.support_)\n",
"#select the chosen features from our dataframe.\n",
"selected = X.ix[:,feat_selector.support_]\n",
"# Select the chosen features from our dataframe.\n",
"selected = X[:, feat_selector.support_]\n",
"print (\"\")\n",
"print (\"Selected Feature Matrix Shape\")\n",
"print (selected.shape)"
Expand All @@ -352,9 +339,7 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"metadata": {},
"outputs": [],
"source": [
"feat_selector.ranking_"
Expand Down Expand Up @@ -386,9 +371,38 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.1"
"version": "3.6.5"
},
"varInspector": {
"cols": {
"lenName": 16,
"lenType": 16,
"lenVar": 40
},
"kernels_config": {
"python": {
"delete_cmd_postfix": "",
"delete_cmd_prefix": "del ",
"library": "var_list.py",
"varRefreshCmd": "print(var_dic_list())"
},
"r": {
"delete_cmd_postfix": ") ",
"delete_cmd_prefix": "rm(",
"library": "var_list.r",
"varRefreshCmd": "cat(var_dic_list()) "
}
},
"types_to_exclude": [
"module",
"function",
"builtin_function_or_method",
"instance",
"_Feature"
],
"window_display": false
}
},
"nbformat": 4,
"nbformat_minor": 0
"nbformat_minor": 1
}
2 changes: 1 addition & 1 deletion boruta/test/unit_tests.py
Expand Up @@ -40,7 +40,7 @@ def test_if_boruta_extracts_relevant_features(self):
bt.fit(X, y)

# make sure that only all the relevant features are returned
self.assertItemsEqual(range(5), list(np.where(bt.support_)[0]))
self.assertListEqual(list(range(5)), list(np.where(bt.support_)[0]))


if __name__ == '__main__':
Expand Down

0 comments on commit eaad6a3

Please sign in to comment.