Skip to content

Commit

Permalink
FIX Split data using _safe_split in _permutaion_test_score (scikit-le…
Browse files Browse the repository at this point in the history
…arn#5697)

Squashed commits:
[94fd9f4] split data using _safe_split in _permutaion_test_scorer
[522053b] adding test case test_permutation_test_score_pandas() to check if permutation_test_score plays nice with pandas dataframe/series
[21b23ce] running test_permutation_test_score_pandas on iris data to prevent warnings.
[15a48bf] adding safe_indexing to _shuffle function
[9ea5c9e] adding test case test_permutation_test_score_pandas() to check if permutation_test_score plays nice with pandas dataframe/series
[3cf5e8f] split  data using _safe_split in _permutaion_test_scorer to fix error when using Pandas DataFrame/Series
  • Loading branch information
Stijn Tonk authored and raghavrv committed Jan 5, 2017
1 parent c76e8dd commit 90c57fc
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 6 deletions.
8 changes: 5 additions & 3 deletions sklearn/cross_validation.py
Expand Up @@ -1756,8 +1756,10 @@ def _permutation_test_score(estimator, X, y, cv, scorer):
"""Auxiliary function for permutation_test_score"""
avg_score = []
for train, test in cv:
estimator.fit(X[train], y[train])
avg_score.append(scorer(estimator, X[test], y[test]))
X_train, y_train = _safe_split(estimator, X, y, train)
X_test, y_test = _safe_split(estimator, X, y, test, train)
estimator.fit(X_train, y_train)
avg_score.append(scorer(estimator, X_test, y_test))
return np.mean(avg_score)


Expand All @@ -1770,7 +1772,7 @@ def _shuffle(y, labels, random_state):
for label in np.unique(labels):
this_mask = (labels == label)
ind[this_mask] = random_state.permutation(ind[this_mask])
return y[ind]
return safe_indexing(y, ind)


def check_cv(cv, X=None, y=None, classifier=False):
Expand Down
8 changes: 5 additions & 3 deletions sklearn/model_selection/_validation.py
Expand Up @@ -688,8 +688,10 @@ def _permutation_test_score(estimator, X, y, groups, cv, scorer):
"""Auxiliary function for permutation_test_score"""
avg_score = []
for train, test in cv.split(X, y, groups):
estimator.fit(X[train], y[train])
avg_score.append(scorer(estimator, X[test], y[test]))
X_train, y_train = _safe_split(estimator, X, y, train)
X_test, y_test = _safe_split(estimator, X, y, test, train)
estimator.fit(X_train, y_train)
avg_score.append(scorer(estimator, X_test, y_test))
return np.mean(avg_score)


Expand All @@ -702,7 +704,7 @@ def _shuffle(y, groups, random_state):
for group in np.unique(groups):
this_mask = (groups == group)
indices[this_mask] = random_state.permutation(indices[this_mask])
return y[indices]
return safe_indexing(y, indices)


def learning_curve(estimator, X, y, groups=None,
Expand Down
19 changes: 19 additions & 0 deletions sklearn/model_selection/tests/test_validation.py
Expand Up @@ -1079,3 +1079,22 @@ def test_score_memmap():
break
except WindowsError:
sleep(1.)


def test_permutation_test_score_pandas():
# check permutation_test_score doesn't destroy pandas dataframe
types = [(MockDataFrame, MockDataFrame)]
try:
from pandas import Series, DataFrame
types.append((Series, DataFrame))
except ImportError:
pass
for TargetType, InputFeatureType in types:
# X dataframe, y series
iris = load_iris()
X, y = iris.data, iris.target
X_df, y_ser = InputFeatureType(X), TargetType(y)
check_df = lambda x: isinstance(x, InputFeatureType)
check_series = lambda x: isinstance(x, TargetType)
clf = CheckingClassifier(check_X=check_df, check_y=check_series)
permutation_test_score(clf, X_df, y_ser)

0 comments on commit 90c57fc

Please sign in to comment.