scikit-learn · amanp10 · Dec 11, 2016 · Dec 12, 2016 · Dec 12, 2016 · Dec 13, 2016
diff --git a/doc/modules/classes.rst b/doc/modules/classes.rst
@@ -567,6 +567,7 @@ From text
    feature_selection.chi2
    feature_selection.f_classif
    feature_selection.f_regression
+   feature_selection.featurewise_scorer
    feature_selection.mutual_info_classif
    feature_selection.mutual_info_regression
 

diff --git a/doc/modules/feature_selection.rst b/doc/modules/feature_selection.rst
@@ -114,6 +114,35 @@ samples for accurate estimation.
 
     * :ref:`sphx_glr_auto_examples_feature_selection_plot_f_test_vs_mi.py`
 
+Wrapper for using SciPy score functions
+---------------------------------------
+
+The score functions in `scipy.stats` work on feature vectors i.e. comparing
+each column in the input samples ``X`` with the target vector ``y``. Whereas,
+here we need scoring functions that work on the input samples ``X`` as a whole
+matrix, comparing it to the target vector ``y``. This makes it difficult for
+us to use feature selection algorithms like :class:`SelectKBest` with score
+functions from `scipy.stats`.
+:func:`featurewise_scorer` is a wrapper function which wraps around scoring
+functions like `spearmanr`, `pearsonr` etc. from the `scipy.stats` module and
+makes it usable for feature selection algorithms like :class:`SelectKBest`,
+:class:`SelectPercentile` etc.
+
+The following example illustrates its usage:
+
+  >>> from sklearn.feature_selection import featurewise_scorer, SelectKBest
+  >>> from scipy.stats import spearmanr
+  >>> from sklearn.datasets import make_classification
+  >>> X, y = make_classification(random_state=0)
+  >>> skb = SelectKBest(featurewise_scorer(spearmanr, axis=0), k=10)
+  >>> skb.fit(X, y) #doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
+  SelectKBest(k=10, score_func=...)
+  >>> new_X = skb.transform(X)
+
+This wrapper function returns the absolute value of the scores by default
+i.e. a score of +1 is same as -1. For unchanged score values set
+``absolute_score=False``.
+
 .. _rfe:
 
 Recursive feature elimination

diff --git a/sklearn/feature_selection/__init__.py b/sklearn/feature_selection/__init__.py
@@ -24,6 +24,8 @@
 
 from .mutual_info_ import mutual_info_regression, mutual_info_classif
 
+from .base import featurewise_scorer
+
 
 __all__ = ['GenericUnivariateSelect',
            'RFE',
@@ -39,5 +41,6 @@
            'f_classif',
            'f_oneway',
            'f_regression',
+           'featurewise_scorer',
            'mutual_info_classif',
            'mutual_info_regression']
diff --git a/sklearn/feature_selection/base.py b/sklearn/feature_selection/base.py
@@ -11,7 +11,7 @@
 from scipy.sparse import issparse, csc_matrix
 
 from ..base import TransformerMixin
-from ..utils import check_array, safe_mask
+from ..utils import check_array, check_X_y, safe_mask
 from ..externals import six
 
 
@@ -120,3 +120,76 @@ def inverse_transform(self, X):
         Xt = np.zeros((X.shape[0], support.size), dtype=X.dtype)
         Xt[:, support] = X
         return Xt
+
+
+def featurewise_scorer(score_func, absolute_score=True, **kwargs):
+    """ A wrapper function around score functions.
+
+    Parameters
+    ----------
+    score_func : callable
+        Function taking two 1-d arrays (feature vector and target vector) and
+        returning a pair of values (score, p-value) or just a score.
+    absolute_score : bool
+        If True (default), the absolute value of the scores are returned,
+        which is useful when using correlation coefficients.
+    kwargs : keyword arguments
+        Keyword arguments(comma separated) passed to the score function
+        `score_func`.
+
+    Returns
+    -------
+    scores : array-like, shape (n_features,)
+        Score values returned by the scoring function.
+    p_vals : array-like, shape (n_features,)
+        The set of p-values returned by the scoring function. However, it is
+        dependent on the score function `score_func` whether it returns
+        p-values or just scores.
+
+    Notes
+    -----
+    This wrapper function wraps around scoring functions like `spearmanr`,
+    `pearsonr` etc. from the `scipy.stats` module and makes it usable for
+    feature selection algorithms like `SelectKBest`. Also, this wrapper
+    function returns the absolute value of the scores by default i.e. a score
+    of +1 is same as -1. For unchanged score values set `absolute_score=False`.
+
+    Example
+    -------
+    >>> from sklearn.feature_selection import featurewise_scorer, SelectKBest
+    >>> from scipy.stats import spearmanr
+    >>> from sklearn.datasets import make_classification
+    >>> X, y = make_classification(random_state=0)
+    >>> skb = SelectKBest(featurewise_scorer(spearmanr, axis=0), k=10)
+    >>> skb.fit(X, y) #doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
+    SelectKBest(k=10, score_func=...)
+    >>> new_X = skb.transform(X)
+
+    """
+    def call_scorer(X, y):
+        X, y = check_X_y(X, y, ('csr', 'csc'), multi_output=True)
+
+        scores = []
+        p_vals = []
+
+        for i in six.moves.range(X.shape[1]):
+            score_func_ret = score_func(X[:, i], y, **kwargs)
+
+            if isinstance(score_func_ret, tuple):
+                score, p_val = score_func_ret
+                p_vals.append(p_val)
+            else:
+                score = score_func_ret
+
+            if absolute_score:
+                score = abs(score)
+            scores.append(score)
+
+        scores = np.asarray(scores)
+        if len(p_vals) > 0:
+            p_vals = np.asarray(p_vals)
+            return (scores, p_vals)
+        else:
+            return scores
+
+    return call_scorer
diff --git a/sklearn/feature_selection/tests/test_base.py b/sklearn/feature_selection/tests/test_base.py
@@ -1,10 +1,13 @@
 import numpy as np
 from scipy import sparse as sp
+from scipy.stats import spearmanr
 
 from numpy.testing import assert_array_equal
 
+from sklearn.datasets import make_classification
 from sklearn.base import BaseEstimator
-from sklearn.feature_selection.base import SelectorMixin
+from sklearn.feature_selection.base import featurewise_scorer, SelectorMixin
+from sklearn.feature_selection import SelectKBest
 from sklearn.utils import check_array
 from sklearn.utils.testing import assert_raises, assert_equal
 
@@ -113,3 +116,37 @@ def test_get_support():
     sel.fit(X, y)
     assert_array_equal(support, sel.get_support())
     assert_array_equal(support_inds, sel.get_support(indices=True))
+
+
+def test_featurewise_scorer():
+    X, y = make_classification(random_state=0)
+
+    # spearmanr from scipy.stats with SelectKBest
+    skb = SelectKBest(featurewise_scorer(spearmanr, axis=0), k=10)
+    skb.fit(X, y)
+    new_X = skb.transform(X)
+    assert_equal(new_X.shape[1], 10)
+
+    # Using custom score function returning only scores
+    score1 = featurewise_scorer(lambda *args, **kwargs:
+                                spearmanr(*args, **kwargs)[0], axis=0)(X, y)
+    score2, pval = featurewise_scorer(spearmanr, axis=0)(X, y)
+    assert_array_equal(score1, score2)
+
+    # Test keyword argument absolute_score
+    score_integer, pval = featurewise_scorer(spearmanr, absolute_score=False,
+                                             axis=0)(X, y)
+    assert_array_equal(abs(score_integer), score2)
+
+
+def test_featurewise_scorer_list_input():
+    # Test featurewise_scorer for input X and y as lists.
+    X, y = make_classification(random_state=0)
+    score_arr, pval_arr = featurewise_scorer(spearmanr, axis=0)(X, y)
+
+    X = X.tolist()  # convert X from array to list
+    y = y.tolist()  # convert y from array to list
+    score_list, pval_list = featurewise_scorer(spearmanr, axis=0)(X, y)
+
+    assert_array_equal(score_arr, score_list)
+    assert_array_equal(pval_arr, pval_list)