From 367f04949560726631e43dc4737d3e9b79e7b799 Mon Sep 17 00:00:00 2001 From: dukebody Date: Sat, 19 Dec 2015 21:53:06 +0100 Subject: [PATCH] Deprecate custom CV shims in documentation and code. Refs #11. --- README.rst | 10 ++++++++-- sklearn_pandas/cross_validation.py | 15 +++++++++++++++ tests/test_dataframe_mapper.py | 25 +++++++++++++++++++++++++ tox.ini | 25 ++++++++++++------------- 4 files changed, 60 insertions(+), 15 deletions(-) diff --git a/README.rst b/README.rst index 636b574..63c97b4 100644 --- a/README.rst +++ b/README.rst @@ -7,7 +7,7 @@ This module provides a bridge between `Scikit-Learn `__ for details). It is deprecated and will likely be dropped in ``skearn-pandas==2.0``. Installation ------------ @@ -174,7 +174,7 @@ The stacking of the sparse features is done without ever densifying them. Cross-Validation ---------------- -Now that we can combine features from pandas DataFrames, we may want to use cross-validation to see whether our model works. Scikit-learn provides features for cross-validation, but they expect numpy data structures and won't work with ``DataFrameMapper``. +Now that we can combine features from pandas DataFrames, we may want to use cross-validation to see whether our model works. ``scikit-learn<0.16.0`` provided features for cross-validation, but they expect numpy data structures and won't work with ``DataFrameMapper``. To get around this, sklearn-pandas provides a wrapper on sklearn's ``cross_val_score`` function which passes a pandas DataFrame to the estimator rather than a numpy array:: @@ -190,6 +190,12 @@ Sklearn-pandas' ``cross_val_score`` function provides exactly the same interface Changelog --------- +Development +*********** + +* Deprecate custom cross-validation shim classes. + + 1.1.0 (2015-12-06) ******************* diff --git a/sklearn_pandas/cross_validation.py b/sklearn_pandas/cross_validation.py index 9cd8cbe..2e5d6f9 100644 --- a/sklearn_pandas/cross_validation.py +++ b/sklearn_pandas/cross_validation.py @@ -1,13 +1,24 @@ +import warnings from sklearn import cross_validation from sklearn import grid_search +DEPRECATION_MSG = ''' + Custom cross-validation compatibility shims are no longer needed for + scikit-learn>=0.16.0 and will be dropped in sklearn-pandas==2.0. +''' + def cross_val_score(model, X, *args, **kwargs): + warnings.warn(DEPRECATION_MSG, DeprecationWarning) X = DataWrapper(X) return cross_validation.cross_val_score(model, X, *args, **kwargs) class GridSearchCV(grid_search.GridSearchCV): + def __init__(self, *args, **kwargs): + warnings.warn(DEPRECATION_MSG, DeprecationWarning) + super(GridSearchCV, self).__init__(*args, **kwargs) + def fit(self, X, *params, **kwparams): return super(GridSearchCV, self).fit(DataWrapper(X), *params, **kwparams) @@ -17,6 +28,10 @@ def predict(self, X, *params, **kwparams): try: class RandomizedSearchCV(grid_search.RandomizedSearchCV): + def __init__(self, *args, **kwargs): + warnings.warn(DEPRECATION_MSG, DeprecationWarning) + super(RandomizedSearchCV, self).__init__(*args, **kwargs) + def fit(self, X, *params, **kwparams): return super(RandomizedSearchCV, self).fit(DataWrapper(X), *params, **kwparams) diff --git a/tests/test_dataframe_mapper.py b/tests/test_dataframe_mapper.py index 223a314..fa697b1 100644 --- a/tests/test_dataframe_mapper.py +++ b/tests/test_dataframe_mapper.py @@ -1,4 +1,5 @@ import pytest +from pkg_resources import parse_version # In py3, mock is included with the unittest standard library # In py2, it's a separate package @@ -10,6 +11,8 @@ from pandas import DataFrame import pandas as pd from scipy import sparse +from sklearn import __version__ as sklearn_version +from sklearn.cross_validation import cross_val_score as sklearn_cv_score from sklearn.datasets import load_iris from sklearn.pipeline import Pipeline from sklearn.svm import SVC @@ -277,3 +280,25 @@ def test_with_car_dataframe(cars_dataframe): labels = cars_dataframe["model"] scores = cross_val_score(pipeline, data, labels) assert scores.mean() > 0.30 + + +@pytest.mark.skipIf(parse_version(sklearn_version) < parse_version('0.16')) +def test_direct_cross_validation(iris_dataframe): + """ + Starting with sklearn>=0.16.0 we no longer need CV wrappers for dataframes. + See https://github.com/paulgb/sklearn-pandas/issues/11 + """ + pipeline = Pipeline([ + ("preprocess", DataFrameMapper([ + ("petal length (cm)", None), + ("petal width (cm)", None), + ("sepal length (cm)", None), + ("sepal width (cm)", None), + ])), + ("classify", SVC(kernel='linear')) + ]) + data = iris_dataframe.drop("species", axis=1) + labels = iris_dataframe["species"] + scores = sklearn_cv_score(pipeline, data, labels) + assert scores.mean() > 0.96 + assert (scores.std() * 2) < 0.04 diff --git a/tox.ini b/tox.ini index a3c2ff2..7258d73 100644 --- a/tox.ini +++ b/tox.ini @@ -3,18 +3,17 @@ envlist = py27, py34 [testenv] deps = - pip==7.0.1 - pytest==2.7.1 - setuptools==16.0 - wheel==0.24.0 - flake8==2.4.1 - py27: mock==1.3.0 + pip==7.0.1 + pytest==2.7.1 + setuptools==16.0 + wheel==0.24.0 + flake8==2.4.1 + py27: mock==1.3.0 commands = - pip install numpy --no-index - pip install scipy --no-index - pip install pandas --no-index - pip install scikit-learn --no-index - python setup.py develop - flake8 tests - py.test + pip install numpy --no-index + pip install scipy --no-index + pip install pandas --no-index + pip install scikit-learn --no-index + flake8 tests + py.test