-
-
Notifications
You must be signed in to change notification settings - Fork 25k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[MRG] Issue#6673:Make a wrapper around functions that score an individual feature #8038
Changes from all commits
a491eb1
d5bcc43
29c62d6
3c6ce4d
9ebd8eb
08a459c
d402b9c
08129f7
c7341ed
dec0bfd
aef38db
92660d5
6becbfe
00b93e7
b57f9fe
f642178
30363f2
efd93d5
3ed84e1
08d3373
5ff8aae
5651f75
30ebf96
4a2f098
7222458
d23d9fd
5f4ccbf
ea7e362
4ea8a8d
0fef58c
becef97
ea794a9
28cff94
b91a5b7
79b9418
8dd85d7
100649c
9520d69
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -11,7 +11,7 @@ | |
from scipy.sparse import issparse, csc_matrix | ||
|
||
from ..base import TransformerMixin | ||
from ..utils import check_array, safe_mask | ||
from ..utils import check_array, check_X_y, safe_mask | ||
from ..externals import six | ||
|
||
|
||
|
@@ -120,3 +120,76 @@ def inverse_transform(self, X): | |
Xt = np.zeros((X.shape[0], support.size), dtype=X.dtype) | ||
Xt[:, support] = X | ||
return Xt | ||
|
||
|
||
def featurewise_scorer(score_func, absolute_score=True, **kwargs): | ||
""" A wrapper function around score functions. | ||
|
||
Parameters | ||
---------- | ||
score_func : callable | ||
Function taking two 1-d arrays (feature vector and target vector) and | ||
returning a pair of values (score, p-value) or just a score. | ||
absolute_score : bool | ||
If True (default), the absolute value of the scores are returned, | ||
which is useful when using correlation coefficients. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Document kwargs also |
||
kwargs : keyword arguments | ||
Keyword arguments(comma separated) passed to the score function | ||
`score_func`. | ||
|
||
Returns | ||
------- | ||
scores : array-like, shape (n_features,) | ||
Score values returned by the scoring function. | ||
p_vals : array-like, shape (n_features,) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Mark this dependent on the score function |
||
The set of p-values returned by the scoring function. However, it is | ||
dependent on the score function `score_func` whether it returns | ||
p-values or just scores. | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You should add docs of what this function returns. |
||
Notes | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Reading this docstring, maybe there's a disconnect of how you think this function wrapper would be useful. It would help you - and people using your code - to provide an example of how they should expect to use it. |
||
----- | ||
This wrapper function wraps around scoring functions like `spearmanr`, | ||
`pearsonr` etc. from the `scipy.stats` module and makes it usable for | ||
feature selection algorithms like `SelectKBest`. Also, this wrapper | ||
function returns the absolute value of the scores by default i.e. a score | ||
of +1 is same as -1. For unchanged score values set `absolute_score=False`. | ||
|
||
Example | ||
------- | ||
>>> from sklearn.feature_selection import featurewise_scorer, SelectKBest | ||
>>> from scipy.stats import spearmanr | ||
>>> from sklearn.datasets import make_classification | ||
>>> X, y = make_classification(random_state=0) | ||
>>> skb = SelectKBest(featurewise_scorer(spearmanr, axis=0), k=10) | ||
>>> skb.fit(X, y) #doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE | ||
SelectKBest(k=10, score_func=...) | ||
>>> new_X = skb.transform(X) | ||
|
||
""" | ||
def call_scorer(X, y): | ||
X, y = check_X_y(X, y, ('csr', 'csc'), multi_output=True) | ||
|
||
scores = [] | ||
p_vals = [] | ||
|
||
for i in six.moves.range(X.shape[1]): | ||
score_func_ret = score_func(X[:, i], y, **kwargs) | ||
|
||
if isinstance(score_func_ret, tuple): | ||
score, p_val = score_func_ret | ||
p_vals.append(p_val) | ||
else: | ||
score = score_func_ret | ||
|
||
if absolute_score: | ||
score = abs(score) | ||
scores.append(score) | ||
|
||
scores = np.asarray(scores) | ||
if len(p_vals) > 0: | ||
p_vals = np.asarray(p_vals) | ||
return (scores, p_vals) | ||
else: | ||
return scores | ||
|
||
return call_scorer |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,10 +1,13 @@ | ||
import numpy as np | ||
from scipy import sparse as sp | ||
from scipy.stats import spearmanr | ||
|
||
from numpy.testing import assert_array_equal | ||
|
||
from sklearn.datasets import make_classification | ||
from sklearn.base import BaseEstimator | ||
from sklearn.feature_selection.base import SelectorMixin | ||
from sklearn.feature_selection.base import featurewise_scorer, SelectorMixin | ||
from sklearn.feature_selection import SelectKBest | ||
from sklearn.utils import check_array | ||
from sklearn.utils.testing import assert_raises, assert_equal | ||
|
||
|
@@ -113,3 +116,37 @@ def test_get_support(): | |
sel.fit(X, y) | ||
assert_array_equal(support, sel.get_support()) | ||
assert_array_equal(support_inds, sel.get_support(indices=True)) | ||
|
||
|
||
def test_featurewise_scorer(): | ||
X, y = make_classification(random_state=0) | ||
|
||
# spearmanr from scipy.stats with SelectKBest | ||
skb = SelectKBest(featurewise_scorer(spearmanr, axis=0), k=10) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You should really be testing the new function alone. We already have checked that selectkbest works. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I meant to test if the wrapper is working as it is supposed to be used. I will try to change the tests, testing the function alone. |
||
skb.fit(X, y) | ||
new_X = skb.transform(X) | ||
assert_equal(new_X.shape[1], 10) | ||
|
||
# Using custom score function returning only scores | ||
score1 = featurewise_scorer(lambda *args, **kwargs: | ||
spearmanr(*args, **kwargs)[0], axis=0)(X, y) | ||
score2, pval = featurewise_scorer(spearmanr, axis=0)(X, y) | ||
assert_array_equal(score1, score2) | ||
|
||
# Test keyword argument absolute_score | ||
score_integer, pval = featurewise_scorer(spearmanr, absolute_score=False, | ||
axis=0)(X, y) | ||
assert_array_equal(abs(score_integer), score2) | ||
|
||
|
||
def test_featurewise_scorer_list_input(): | ||
# Test featurewise_scorer for input X and y as lists. | ||
X, y = make_classification(random_state=0) | ||
score_arr, pval_arr = featurewise_scorer(spearmanr, axis=0)(X, y) | ||
|
||
X = X.tolist() # convert X from array to list | ||
y = y.tolist() # convert y from array to list | ||
score_list, pval_list = featurewise_scorer(spearmanr, axis=0)(X, y) | ||
|
||
assert_array_equal(score_arr, score_list) | ||
assert_array_equal(pval_arr, pval_list) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Clarify that it compares each column of X to y