bugfix, feature_importance_permutation can now accept functions as me…

…tric argument
rasbt · May 10, 2019 · ab920e4 · ab920e4
1 parent ec2658c
commit ab920e4
Show file tree

Hide file tree

Showing 3 changed files with 66 additions and 0 deletions.
diff --git a/docs/sources/CHANGELOG.md b/docs/sources/CHANGELOG.md
@@ -37,6 +37,7 @@ The CHANGELOG for the current development version is available at
 - Fix unreadable labels in `plot_confusion_matrix` for imbalanced datasets if `show_absolute=True` and `show_normed=True`. ([#504](https://github.com/rasbt/mlxtend/pull/504))
 - Raises a more informative error if a `SparseDataFrame` is passed to `apriori` and the dataframe has integer column names that don't start with `0` due to current limitations of the `SparseDataFrame` implementation in pandas. ([#503](https://github.com/rasbt/mlxtend/pull/503))
 - SequentialFeatureSelector now supports DataFrame as input for all operating modes (forward/backward/floating). [#506](https://github.com/rasbt/mlxtend/pull/506)
+- `mlxtend.evaluate.feature_importance_permutation` now correctly accepts scoring functions with proper function signature as `metric` argument. [#528](https://github.com/rasbt/mlxtend/pull/528)
 
 ### Version 0.15.0 (01-19-2019)
 

diff --git a/mlxtend/evaluate/feature_importance.py b/mlxtend/evaluate/feature_importance.py
@@ -80,6 +80,9 @@ def score_func(y_true, y_pred):
         def score_func(y_true, y_pred):
             return np.mean(y_true == y_pred)
 
+    else:
+        score_func = metric
+
     rng = np.random.RandomState(seed)
 
     mean_importance_vals = np.zeros(X.shape[1])

diff --git a/mlxtend/evaluate/tests/test_feature_importance.py b/mlxtend/evaluate/tests/test_feature_importance.py
@@ -12,6 +12,8 @@
 from sklearn.model_selection import train_test_split
 from sklearn.svm import SVC
 from sklearn.svm import SVR
+from sklearn.metrics import r2_score
+from sklearn.metrics import mean_squared_error
 from mlxtend.utils import assert_raises
 from mlxtend.evaluate import feature_importance_permutation
 
@@ -113,6 +115,66 @@ def test_regression():
     assert sum(imp_vals[3:]) <= 0.01
 
 
+def test_regression_custom_r2():
+
+    X, y = make_regression(n_samples=1000,
+                           n_features=5,
+                           n_informative=2,
+                           n_targets=1,
+                           random_state=123,
+                           shuffle=False)
+
+    X_train, X_test, y_train, y_test = train_test_split(
+        X, y, test_size=0.3, random_state=123)
+
+    svm = SVR(kernel='rbf', gamma='auto')
+    svm.fit(X_train, y_train)
+
+    imp_vals, imp_all = feature_importance_permutation(
+        predict_method=svm.predict,
+        X=X_test,
+        y=y_test,
+        metric=r2_score,
+        num_rounds=1,
+        seed=123)
+
+    assert imp_vals.shape == (X_train.shape[1], )
+    assert imp_all.shape == (X_train.shape[1], 1)
+    assert imp_vals[0] > 0.2
+    assert imp_vals[1] > 0.2
+    assert sum(imp_vals[3:]) <= 0.01
+
+
+def test_regression_custom_mse():
+
+    X, y = make_regression(n_samples=1000,
+                           n_features=5,
+                           n_informative=2,
+                           n_targets=1,
+                           random_state=123,
+                           shuffle=False)
+
+    X_train, X_test, y_train, y_test = train_test_split(
+        X, y, test_size=0.3, random_state=123)
+
+    svm = SVR(kernel='rbf', gamma='auto')
+    svm.fit(X_train, y_train)
+
+    imp_vals, imp_all = feature_importance_permutation(
+        predict_method=svm.predict,
+        X=X_test,
+        y=y_test,
+        metric=mean_squared_error,
+        num_rounds=1,
+        seed=123)
+
+    norm_imp_vals = imp_vals / np.abs(imp_vals).max()
+
+    assert imp_vals.shape == (X_train.shape[1], )
+    assert imp_all.shape == (X_train.shape[1], 1)
+    assert norm_imp_vals[0] == -1.
+
+
 def test_n_rounds():
 
     X, y = make_classification(n_samples=1000,