0.20.4: Tests failing due to scikit-learn deprecation warnings #7860

sanjayankur31 opened this issue Jun 2, 2020 · 2 comments


Describe the bug

On Fedora 33, we have two tests failing due to deprecation warnings from scikit-learn.

mne-python-0.20.4. I'm looking at updating the Fedora package to 0.20.5 now.

Steps to reproduce

Run tests: pytest

Expected results

Tests don't fail due to the deprecation warnings.

Actual results

Tests fail. Complete log attached.

Additional information

Full failure log:

=================================== FAILURES ===================================
________________________________ test_get_coef _________________________________

    def test_get_coef():
        """Test getting linear coefficients (filters/patterns) from estimators."""
        from sklearn.base import TransformerMixin, BaseEstimator
        from sklearn.pipeline import make_pipeline
        from sklearn.preprocessing import StandardScaler
        from sklearn import svm
        from sklearn.linear_model import Ridge, LinearRegression
        from sklearn.model_selection import GridSearchCV

        lm_classification = LinearModel()
        assert (is_classifier(lm_classification))

        lm_regression = LinearModel(Ridge())
        assert (is_regressor(lm_regression))

        parameters = {'kernel': ['linear'], 'C': [1, 10]}
        lm_gs_classification = LinearModel(
            GridSearchCV(svm.SVC(), parameters, cv=2, refit=True, n_jobs=1))
        assert (is_classifier(lm_gs_classification))

        lm_gs_regression = LinearModel(
            GridSearchCV(svm.SVR(), parameters, cv=2, refit=True, n_jobs=1))
        assert (is_regressor(lm_gs_regression))

        # Define a classifier, an invertible transformer and an non-invertible one.

        class Clf(BaseEstimator):
            def fit(self, X, y):
                return self

        class NoInv(TransformerMixin):
            def fit(self, X, y):
                return self

            def transform(self, X):
                return X

        class Inv(NoInv):
            def inverse_transform(self, X):
                return X

        X, y, A = _make_data(n_samples=1000, n_features=3, n_targets=1)

        # I. Test inverse function

        # Check that we retrieve the right number of inverse functions even if
        # there are nested pipelines
        good_estimators = [
            (1, make_pipeline(Inv(), Clf())),
            (2, make_pipeline(Inv(), Inv(), Clf())),
            (3, make_pipeline(Inv(), make_pipeline(Inv(), Inv()), Clf())),

        for expected_n, est in good_estimators:
  , y)
            assert (expected_n == len(_get_inverse_funcs(est)))

        bad_estimators = [
            Clf(),  # no preprocessing
            Inv(),  # final estimator isn't classifier
            make_pipeline(NoInv(), Clf()),  # first step isn't invertible
            make_pipeline(Inv(), make_pipeline(
                Inv(), NoInv()), Clf()),  # nested step isn't invertible
        for est in bad_estimators:
  , y)
            invs = _get_inverse_funcs(est)
            assert_equal(invs, list())

        # II. Test get coef for classification/regression estimators and pipelines
        rng = np.random.RandomState(0)
        for clf in (lm_regression,
                    make_pipeline(StandardScaler(), lm_classification),
                    make_pipeline(StandardScaler(), lm_gs_regression)):

            # generate some categorical/continuous data
            # according to the type of estimator.
            if is_classifier(clf):
                n, n_features = 1000, 3
                X = rng.rand(n, n_features)
                y = np.arange(n) % 2
                X, y, A = _make_data(n_samples=1000, n_features=3, n_targets=1)
                y = np.ravel(y)

  , y)

            # Retrieve final linear model
            filters = get_coef(clf, 'filters_', False)
            if hasattr(clf, 'steps'):
                if hasattr(clf.steps[-1][-1].model, 'best_estimator_'):
                    # Linear Model with GridSearchCV
                    coefs = clf.steps[-1][-1].model.best_estimator_.coef_
                    # Standard Linear Model
                    coefs = clf.steps[-1][-1].model.coef_
                if hasattr(clf.model, 'best_estimator_'):
                    # Linear Model with GridSearchCV
                    coefs = clf.model.best_estimator_.coef_
                    # Standard Linear Model
                    coefs = clf.model.coef_
            if coefs.ndim == 2 and coefs.shape[0] == 1:
                coefs = coefs[0]
            assert_array_equal(filters, coefs)
            patterns = get_coef(clf, 'patterns_', False)
            assert (filters[0] != patterns[0])
            n_chans = X.shape[1]
            assert_array_equal(filters.shape, patterns.shape, [n_chans, n_chans])

        # Inverse transform linear model
        filters_inv = get_coef(clf, 'filters_', True)
        assert (filters[0] != filters_inv[0])
        patterns_inv = get_coef(clf, 'patterns_', True)
        assert (patterns[0] != patterns_inv[0])

        # Check with search_light and combination of preprocessing ending with sl:
        slider = SlidingEstimator(make_pipeline(StandardScaler(), lm_regression))
        X = np.transpose([X, -X], [1, 2, 0])  # invert X across 2 time samples
>       clfs = (make_pipeline(Scaler(None, scalings='mean'), slider), slider)

A          = array([[-0.42231482],
       [ 0.10471403],
       [ 0.22805333]])
BaseEstimator = <class 'sklearn.base.BaseEstimator'>
Clf        = <class 'mne.decoding.tests.test_base.test_get_coef.<locals>.Clf'>
GridSearchCV = <class 'sklearn.model_selection._search.GridSearchCV'>
Inv        = <class 'mne.decoding.tests.test_base.test_get_coef.<locals>.Inv'>
LinearRegression = <class 'sklearn.linear_model._base.LinearRegression'>
NoInv      = <class 'mne.decoding.tests.test_base.test_get_coef.<locals>.NoInv'>
Ridge      = <class 'sklearn.linear_model._ridge.Ridge'>
StandardScaler = <class 'sklearn.preprocessing._data.StandardScaler'>
TransformerMixin = <class 'sklearn.base.TransformerMixin'>
X          = array([[[-0.51151882,  0.51151882],
        [ 1.46567234, -1.46567234],
        [-0.53831733,  0.53831733]],


       [[-0.32358505,  0.32358505],
        [ 2.03200796, -2.03200796],
        [-0.52246677,  0.52246677]]])
bad_estimators = [Clf(), <mne.decoding.tests.test_base.test_get_coef.<locals>.Inv object at 0x7ffb445a3370>, Pipeline(steps=[('noinv',
....decoding.tests.test_base.test_get_coef.<locals>.NoInv object at 0x7ffb445a3220>)])),
                ('clf', Clf())])]
clf        = Pipeline(steps=[('standardscaler', StandardScaler()),
                 LinearModel(model=GridSearchCV(cv=2, estimator=SVR(), n_jobs=1,
             param_grid={'C': [1, 10], 'kernel': ['linear']})))])
coefs      = array([-1.96881649,  0.3818861 ,  0.83271859])
est        = Pipeline(steps=[('inv',
                 <mne.decoding.tests.test_base.test_get_coef.<locals>.Inv object at 0x7ffb445a...e.decoding.tests.test_base.test_get_coef.<locals>.NoInv object at 0x7ffb445a3220>)])),
                ('clf', Clf())])
expected_n = 3
filters    = array([-1.96881649,  0.3818861 ,  0.83271859])
filters_inv = array([-2.56903749,  0.9679832 ,  1.49243367])
good_estimators = [(1, Pipeline(steps=[('inv',
                 <mne.decoding.tests.test_base.test_get_coef.<locals>.Inv object at 0x7ff...e.decoding.tests.test_base.test_get_coef.<locals>.Inv object at 0x7ffb445a3490>)])),
                ('clf', Clf())]))]
invs       = []
lm_classification = LinearModel(model=LogisticRegression(solver='liblinear'))
lm_gs_classification = LinearModel(model=GridSearchCV(cv=2, estimator=SVC(), n_jobs=1,
             param_grid={'C': [1, 10], 'kernel': ['linear']}))
lm_gs_regression = LinearModel(model=GridSearchCV(cv=2, estimator=SVR(), n_jobs=1,
             param_grid={'C': [1, 10], 'kernel': ['linear']}))
lm_regression = LinearModel(model=Ridge())
make_pipeline = <function make_pipeline at 0x7ffb44610a60>
n          = 1000
n_chans    = 3
n_features = 3
parameters = {'C': [1, 10], 'kernel': ['linear']}
patterns   = array([-2.48265066,  1.06274699,  1.88762667])
patterns_inv = array([-3.4191915 ,  1.68874717,  2.77278389])
rng        = RandomState(MT19937) at 0x7FFB487EBB40
slider     = <SlidingEstimator(base_estimator=Pipeline(steps=[('standardscaler', StandardScaler()),
                ('linearmodel', LinearModel(model=Ridge()))]),
         n_jobs=1, scoring=None, verbose=None)>
svm        = <module 'sklearn.svm' from '/usr/lib64/python3.9/site-packages/sklearn/svm/'>
y          = array([ 3.89402630e+00,  7.99337228e+00,  6.78081920e+00, -2.45890531e+00,
        3.80097555e+00,  2.23597243e-01,  3...4990e+00,  7.08108388e-02,  1.02111595e+00,
       -3.01212899e+00, -4.47927084e-01,  2.52089705e+00,  3.61384968e+00])

_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
mne/decoding/ in __init__
    self._scaler = StandardScaler(self.with_mean, self.with_std)
        StandardScaler = <class 'sklearn.preprocessing._data.StandardScaler'>
        info       = None
        scalings   = 'mean'
        self       = Scaler(info=None, scalings='mean', with_mean=True, with_std=True)
        with_mean  = True
        with_std   = True
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _

args = (<[FutureWarning('From version 0.24, get_params will raise an AttributeError if a parameter cannot be retrieved as an ...e attribute. Previously it would return None.') raised in repr()] StandardScaler object at 0x7ffb445664f0>, True, True)
kwargs = {}, extra_args = 2, args_msg = ['copy=True', 'with_mean=True']

    def inner_f(*args, **kwargs):
        extra_args = len(args) - len(all_args)
        if extra_args > 0:
            # ignore first 'self' argument for instance methods
            args_msg = ['{}={}'.format(name, arg)
                        for name, arg in zip(kwonly_args[:extra_args],
>           warnings.warn("Pass {} as keyword args. From version 0.25 "
                          "passing these as positional arguments will "
                          "result in an error".format(", ".join(args_msg)),
E           FutureWarning: Pass copy=True, with_mean=True as keyword args. From version 0.25 passing these as positional arguments will result in an error

all_args   = ['self']
args       = (<[FutureWarning('From version 0.24, get_params will raise an AttributeError if a parameter cannot be retrieved as an ...e attribute. Previously it would return None.') raised in repr()] StandardScaler object at 0x7ffb445664f0>, True, True)
args_msg   = ['copy=True', 'with_mean=True']
extra_args = 2
f          = <function StandardScaler.__init__ at 0x7ffb49156310>
kwargs     = {}
kwonly_args = ['copy', 'with_mean', 'with_std']
sig        = <Signature (self, *, copy=True, with_mean=True, with_std=True)>

/usr/lib64/python3.9/site-packages/sklearn/utils/ FutureWarning
_________________________________ test_scaler __________________________________

    def test_scaler():
        """Test methods of Scaler."""
        raw = io.read_raw_fif(raw_fname)
        events = read_events(event_name)
        picks = pick_types(, meg=True, stim=False, ecg=False,
                           eog=False, exclude='bads')
        picks = picks[1:13:3]

        epochs = Epochs(raw, events, event_id, tmin, tmax, picks=picks,
                        baseline=(None, 0), preload=True)
        epochs_data = epochs.get_data()
        y =[:, -1]

        methods = (None, dict(mag=5, grad=10, eeg=20), 'mean', 'median')
        infos = (,, None, None)
        epochs_data_t = epochs_data.transpose([1, 0, 2])
        for method, info in zip(methods, infos):
            if method in ('mean', 'median') and not check_version('sklearn'):
                with pytest.raises(ImportError, match='No module'):
                    Scaler(info, method)
>           scaler = Scaler(info, method)

X          = array([[[ 2.45472304e-11,  1.49036756e-11, -4.38343400e-12, ...,
         -1.40269888e-11, -2.36705436e-11, -4.3834340...[ 2.00840976e-11, -8.84656681e-12,  7.96988001e-13, ...,
          5.86583168e-11,  2.97276524e-11,  2.97276524e-11]]])
X2         = array([[[ 2.45472304e-11,  1.49036756e-11, -4.38343400e-12, ...,
         -1.40269888e-11, -2.36705436e-11, -4.3834340...[ 2.00840976e-11, -8.84656681e-12,  7.96988001e-13, ...,
          5.86583168e-11,  2.97276524e-11,  2.97276524e-11]]])
Xi         = array([[[ 2.45472304e-12,  1.49036756e-12, -4.38343400e-13, ...,
         -1.40269888e-12, -2.36705436e-12, -4.3834340...[ 2.00840976e-12, -8.84656681e-13,  7.96988001e-14, ...,
          5.86583168e-12,  2.97276524e-12,  2.97276524e-12]]])
epochs     = <Epochs  |   14 events (all good), -0.199795 - 0.499488 sec, baseline [None, 0], ~3.2 MB, data loaded,
 'aud_l': 7
 'vis_l': 7>
epochs_data = array([[[ 2.45472304e-12,  1.49036756e-12, -4.38343400e-13, ...,
         -1.40269888e-12, -2.36705436e-12, -4.3834340...[ 2.00840976e-12, -8.84656681e-13,  7.96988001e-14, ...,
          5.86583168e-12,  2.97276524e-12,  2.97276524e-12]]])
epochs_data_t = array([[[ 2.45472304e-12,  1.49036756e-12, -4.38343400e-13, ...,
         -1.40269888e-12, -2.36705436e-12, -4.3834340...[ 2.00840976e-12, -8.84656681e-13,  7.96988001e-14, ...,
          5.86583168e-12,  2.97276524e-12,  2.97276524e-12]]])
events     = array([[27977,     0,     2],
       [28345,     0,     3],
       [28771,     0,     1],
       [29219,     0,     4]...      [38711,     0,     1],
       [39130,     0,     4],
       [39563,     0,     2],
       [39926,     0,     3]])
info       = None
infos      = (<Info | 20 non-empty values
 acq_pars: ACQch001 110113 ACQch002 110112 ACQch003 110111 ACQch004 110122 ...
 bads: []
 proj_id: 1 item (ndarray)
 proj_name: test
 projs: PCA-v1: on, PCA-v2: on, PCA-v3: on
 sfreq: 600.6 Hz
>, None, None)
key        = 'grad'
means      = array([0., 0., 0., 0.])
method     = 'mean'
methods    = (None, {'eeg': 20, 'grad': 10, 'mag': 5}, 'mean', 'median')
picks      = array([ 1,  4,  7, 10])
raw        = <Raw | test_raw.fif, 376 x 14400 (24.0 s), ~3.6 MB, data not loaded>
scaler     = Scaler(info=<Info | 20 non-empty values
 acq_pars: ACQch001 110113 ACQch002 110112 ACQch003 110111 ACQch004 110122 ......: on, PCA-v3: on
 sfreq: 600.6 Hz
    scalings={'mag': 5, 'grad': 10, 'eeg': 20}, with_mean=True,
sd         = {'eeg': 20, 'grad': 10, 'mag': 5}
stds       = array([0.1, 0.1, 0.1, 0.1])
y          = array([3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1])

_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
mne/decoding/ in __init__
    self._scaler = StandardScaler(self.with_mean, self.with_std)
        StandardScaler = <class 'sklearn.preprocessing._data.StandardScaler'>
        info       = None
        scalings   = 'mean'
        self       = Scaler(info=None, scalings='mean', with_mean=True, with_std=True)
        with_mean  = True
        with_std   = True
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _

args = (<[FutureWarning('From version 0.24, get_params will raise an AttributeError if a parameter cannot be retrieved as an ...e attribute. Previously it would return None.') raised in repr()] StandardScaler object at 0x7ffb63c859d0>, True, True)
kwargs = {}, extra_args = 2, args_msg = ['copy=True', 'with_mean=True']

    def inner_f(*args, **kwargs):
        extra_args = len(args) - len(all_args)
        if extra_args > 0:
            # ignore first 'self' argument for instance methods
            args_msg = ['{}={}'.format(name, arg)
                        for name, arg in zip(kwonly_args[:extra_args],
>           warnings.warn("Pass {} as keyword args. From version 0.25 "
                          "passing these as positional arguments will "
                          "result in an error".format(", ".join(args_msg)),
E           FutureWarning: Pass copy=True, with_mean=True as keyword args. From version 0.25 passing these as positional arguments will result in an error

all_args   = ['self']
args       = (<[FutureWarning('From version 0.24, get_params will raise an AttributeError if a parameter cannot be retrieved as an ...e attribute. Previously it would return None.') raised in repr()] StandardScaler object at 0x7ffb63c859d0>, True, True)
args_msg   = ['copy=True', 'with_mean=True']
extra_args = 2
f          = <function StandardScaler.__init__ at 0x7ffb49156310>
kwargs     = {}
kwonly_args = ['copy', 'with_mean', 'with_std']
sig        = <Signature (self, *, copy=True, with_mean=True, with_std=True)>

/usr/lib64/python3.9/site-packages/sklearn/utils/ FutureWarning
----------------------------- Captured stdout call -----------------------------
Opening raw data file /builddir/build/BUILD/mne-python-0.20.4/mne/decoding/tests/../../io/tests/data/test_raw.fif...
    Read a total of 3 projection items:
        PCA-v1 (1 x 102)  idle
        PCA-v2 (1 x 102)  idle
        PCA-v3 (1 x 102)  idle
    Range : 25800 ... 40199 =     42.956 ...    66.930 secs
Current compensation grade : 0
15 matching events found
Applying baseline correction (mode: mean)
Not setting metadata
3 projection items activated
Loading data for 15 events and 421 original time points ...
1 bad epochs dropped

cbrnr commented Jun 3, 2020

This is already fixed in master (not sure about 0.20.5 though).

Great. I'll test these with each new release when I update the Fedora package. Thanks.

