Skip to content

Commit

Permalink
bugfix, feature_importance_permutation can now accept functions as me…
Browse files Browse the repository at this point in the history
…tric argument
  • Loading branch information
rasbt committed May 10, 2019
1 parent ec2658c commit ab920e4
Show file tree
Hide file tree
Showing 3 changed files with 66 additions and 0 deletions.
1 change: 1 addition & 0 deletions docs/sources/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ The CHANGELOG for the current development version is available at
- Fix unreadable labels in `plot_confusion_matrix` for imbalanced datasets if `show_absolute=True` and `show_normed=True`. ([#504](https://github.com/rasbt/mlxtend/pull/504))
- Raises a more informative error if a `SparseDataFrame` is passed to `apriori` and the dataframe has integer column names that don't start with `0` due to current limitations of the `SparseDataFrame` implementation in pandas. ([#503](https://github.com/rasbt/mlxtend/pull/503))
- SequentialFeatureSelector now supports DataFrame as input for all operating modes (forward/backward/floating). [#506](https://github.com/rasbt/mlxtend/pull/506)
- `mlxtend.evaluate.feature_importance_permutation` now correctly accepts scoring functions with proper function signature as `metric` argument. [#528](https://github.com/rasbt/mlxtend/pull/528)

### Version 0.15.0 (01-19-2019)

Expand Down
3 changes: 3 additions & 0 deletions mlxtend/evaluate/feature_importance.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,9 @@ def score_func(y_true, y_pred):
def score_func(y_true, y_pred):
return np.mean(y_true == y_pred)

else:
score_func = metric

rng = np.random.RandomState(seed)

mean_importance_vals = np.zeros(X.shape[1])
Expand Down
62 changes: 62 additions & 0 deletions mlxtend/evaluate/tests/test_feature_importance.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.svm import SVR
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error
from mlxtend.utils import assert_raises
from mlxtend.evaluate import feature_importance_permutation

Expand Down Expand Up @@ -113,6 +115,66 @@ def test_regression():
assert sum(imp_vals[3:]) <= 0.01


def test_regression_custom_r2():

X, y = make_regression(n_samples=1000,
n_features=5,
n_informative=2,
n_targets=1,
random_state=123,
shuffle=False)

X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.3, random_state=123)

svm = SVR(kernel='rbf', gamma='auto')
svm.fit(X_train, y_train)

imp_vals, imp_all = feature_importance_permutation(
predict_method=svm.predict,
X=X_test,
y=y_test,
metric=r2_score,
num_rounds=1,
seed=123)

assert imp_vals.shape == (X_train.shape[1], )
assert imp_all.shape == (X_train.shape[1], 1)
assert imp_vals[0] > 0.2
assert imp_vals[1] > 0.2
assert sum(imp_vals[3:]) <= 0.01


def test_regression_custom_mse():

X, y = make_regression(n_samples=1000,
n_features=5,
n_informative=2,
n_targets=1,
random_state=123,
shuffle=False)

X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.3, random_state=123)

svm = SVR(kernel='rbf', gamma='auto')
svm.fit(X_train, y_train)

imp_vals, imp_all = feature_importance_permutation(
predict_method=svm.predict,
X=X_test,
y=y_test,
metric=mean_squared_error,
num_rounds=1,
seed=123)

norm_imp_vals = imp_vals / np.abs(imp_vals).max()

assert imp_vals.shape == (X_train.shape[1], )
assert imp_all.shape == (X_train.shape[1], 1)
assert norm_imp_vals[0] == -1.


def test_n_rounds():

X, y = make_classification(n_samples=1000,
Expand Down

0 comments on commit ab920e4

Please sign in to comment.