Skip to content

Commit

Permalink
REL backport fixes for 0.23.2 (#18068)
Browse files Browse the repository at this point in the history
Co-authored-by: Thomas J. Fan <thomasjpfan@gmail.com>
Co-authored-by: Jérémie du Boisberranger <34657725+jeremiedbb@users.noreply.github.com>
Co-authored-by: Lucy Liu <jliu176@gmail.com>
Co-authored-by: Kevin Markham <justmarkham@users.noreply.github.com>
Co-authored-by: Juan Carlos Alfaro Jiménez <JuanCarlos.Alfaro@uclm.es>
Co-authored-by: Forrest Koch <forrest.c.koch@gmail.com>
Co-authored-by: Chiara Marmo <cmarmo@users.noreply.github.com>
Co-authored-by: Swier <swierh@users.noreply.github.com>
Co-authored-by: t-kusanagi2 <61999178+t-kusanagi2@users.noreply.github.com>
Co-authored-by: Markus Rempfler <markus.rempfler@tum.de>
Co-authored-by: Olivier Grisel <olivier.grisel@ensta.org>
Co-authored-by: Bruno Charron <bcharron@gmx.com>
Co-authored-by: Bruno Charron <bruno@charron.email>
Co-authored-by: amy12xx <meezamanda@yahoo.com>
Co-authored-by: Allan <allanbutler9@gmail.com>
Co-authored-by: Roman Yurchak <rth.yurchak@gmail.com>
Co-authored-by: Hirofumi Suzuki <hs-nazuna@users.noreply.github.com>
Co-authored-by: Loïc Estève <loic.esteve@ymail.com>
Co-authored-by: Joel Nothman <joel.nothman@gmail.com>
Co-authored-by: Charles Patel <charlespatel07@gmail.com>
  • Loading branch information
21 people committed Aug 4, 2020
1 parent 24e7be0 commit 947f542
Show file tree
Hide file tree
Showing 60 changed files with 895 additions and 528 deletions.
5 changes: 1 addition & 4 deletions .binder/requirements.txt
@@ -1,8 +1,5 @@
--extra-index https://pypi.anaconda.org/scipy-wheels-nightly/simple scikit-learn
--pre
matplotlib
scikit-image
pandas
sphinx-gallery
scikit-learn

scikit-learn>=0.23,<0.24
3 changes: 2 additions & 1 deletion build_tools/azure/test_script.cmd
Expand Up @@ -10,7 +10,8 @@ mkdir %TMP_FOLDER%
cd %TMP_FOLDER%

if "%CHECK_WARNINGS%" == "true" (
set PYTEST_ARGS=%PYTEST_ARGS% -Werror::DeprecationWarning -Werror::FutureWarning
REM numpy's 1.19.0's tostring() deprecation is ignored until scipy and joblib removes its usage
set PYTEST_ARGS=%PYTEST_ARGS% -Werror::DeprecationWarning -Werror::FutureWarning -Wignore:tostring:DeprecationWarning
)

if "%COVERAGE%" == "true" (
Expand Down
3 changes: 2 additions & 1 deletion build_tools/azure/test_script.sh
Expand Up @@ -29,7 +29,8 @@ if [[ "$COVERAGE" == "true" ]]; then
fi

if [[ -n "$CHECK_WARNINGS" ]]; then
TEST_CMD="$TEST_CMD -Werror::DeprecationWarning -Werror::FutureWarning"
# numpy's 1.19.0's tostring() deprecation is ignored until scipy and joblib removes its usage
TEST_CMD="$TEST_CMD -Werror::DeprecationWarning -Werror::FutureWarning -Wignore:tostring:DeprecationWarning"
fi

if [[ "$PYTHON_VERSION" == "*" ]]; then
Expand Down
7 changes: 3 additions & 4 deletions conftest.py
Expand Up @@ -7,7 +7,6 @@

import platform
import sys
from distutils.version import LooseVersion
import os

import pytest
Expand All @@ -17,10 +16,11 @@
from sklearn.utils import _IS_32BIT
from sklearn.externals import _pilutil
from sklearn._build_utils.deprecated_modules import _DEPRECATED_MODULES
from sklearn.utils.fixes import np_version, parse_version

PYTEST_MIN_VERSION = '3.3.0'

if LooseVersion(pytest.__version__) < PYTEST_MIN_VERSION:
if parse_version(pytest.__version__) < parse_version(PYTEST_MIN_VERSION):
raise ImportError('Your version of pytest is too old, you should have '
'at least pytest >= {} installed.'
.format(PYTEST_MIN_VERSION))
Expand Down Expand Up @@ -54,8 +54,7 @@ def pytest_collection_modifyitems(config, items):
# run doctests only for numpy >= 1.14.
skip_doctests = False
try:
import numpy as np
if LooseVersion(np.__version__) < LooseVersion('1.14'):
if np_version < parse_version('1.14'):
reason = 'doctests are only run for numpy >= 1.14'
skip_doctests = True
elif _IS_32BIT:
Expand Down
2 changes: 1 addition & 1 deletion doc/conf.py
Expand Up @@ -80,7 +80,7 @@

# General information about the project.
project = 'scikit-learn'
copyright = '2007 - 2019, scikit-learn developers (BSD License)'
copyright = '2007 - 2020, scikit-learn developers (BSD License)'

# The version info for the project you're documenting, acts as replacement for
# |version| and |release|, also used in various other places throughout the
Expand Down
4 changes: 2 additions & 2 deletions doc/modules/linear_model.rst
Expand Up @@ -997,8 +997,8 @@ Examples of use cases include:
* Risk modeling / insurance policy pricing: number of claim events /
policyholder per year (Poisson), cost per event (Gamma), total cost per
policyholder per year (Tweedie / Compound Poisson Gamma).
* Predictive maintenance: number of production interruption events per year:
Poisson, duration of interruption: Gamma, total interruption time per year
* Predictive maintenance: number of production interruption events per year
(Poisson), duration of interruption (Gamma), total interruption time per year
(Tweedie / Compound Poisson Gamma).


Expand Down
2 changes: 1 addition & 1 deletion doc/modules/model_persistence.rst
Expand Up @@ -20,7 +20,7 @@ Persistence example
-------------------

It is possible to save a model in scikit-learn by using Python's built-in
persistence model, namely `pickle <https://docs.python.org/2/library/pickle.html>`_::
persistence model, namely `pickle <https://docs.python.org/3/library/pickle.html>`_::

>>> from sklearn import svm
>>> from sklearn import datasets
Expand Down
4 changes: 4 additions & 0 deletions doc/templates/index.html
Expand Up @@ -155,6 +155,10 @@ <h4 class="sk-landing-call-header">News</h4>
<ul class="sk-landing-call-list list-unstyled">
<li><strong>On-going development:</strong>
<a href="https://scikit-learn.org/dev/whats_new.html"><strong>What's new</strong> (Changelog)</a>
<li><strong>August 2020.</strong> scikit-learn 0.23.2 is available for download (<a href="whats_new/v0.23.html#version-0-23-2">Changelog</a>).
</li>
<li><strong>May 2020.</strong> scikit-learn 0.23.1 is available for download (<a href="whats_new/v0.23.html#version-0-23-1">Changelog</a>).
</li>
<li><strong>May 2020.</strong> scikit-learn 0.23.0 is available for download (<a href="whats_new/v0.23.html#version-0-23-0">Changelog</a>).
</li>
<li><strong>Scikit-learn from 0.23 requires Python 3.6 or greater.</strong>
Expand Down
2 changes: 1 addition & 1 deletion doc/themes/scikit-learn-modern/static/css/theme.css
Expand Up @@ -83,12 +83,12 @@ span.highlighted {
}

div.highlight {
padding: 0.2rem 0.5rem;
border: 1px solid #ddd;
margin-bottom: 1rem;
}

div.highlight pre {
padding: 0.2rem 0.5rem;
margin-bottom: 0;
line-height: 1.2rem;
}
Expand Down
127 changes: 121 additions & 6 deletions doc/whats_new/v0.23.rst
Expand Up @@ -2,6 +2,122 @@

.. currentmodule:: sklearn

.. _changes_0_23_2:

Version 0.23.2
==============

**August 3 2020**

Changed models
--------------

The following estimators and functions, when fit with the same data and
parameters, may produce different models from the previous version. This often
occurs due to changes in the modelling logic (bug fixes or enhancements), or in
random sampling procedures.

- |Fix| ``inertia_`` attribute of :class:`cluster.KMeans` and
:class:`cluster.MiniBatchKMeans`.

Details are listed in the changelog below.

(While we are trying to better inform users by providing this information, we
cannot assure that this list is complete.)

Changelog
---------

:mod:`sklearn.cluster`
......................

- |Fix| Fixed a bug in :class:`cluster.KMeans` where rounding errors could
prevent convergence to be declared when `tol=0`. :pr:`17959` by
:user:`Jérémie du Boisberranger <jeremiedbb>`.

- |Fix| Fixed a bug in :class:`cluster.KMeans` and
:class:`cluster.MiniBatchKMeans` where the reported inertia was incorrectly
weighted by the sample weights. :pr:`17848` by
:user:`Jérémie du Boisberranger <jeremiedbb>`.

- |Fix| Fixed a bug in :class:`cluster.MeanShift` with `bin_seeding=True`. When
the estimated bandwidth is 0, the behavior is equivalent to
`bin_seeding=False`.
:pr:`17742` by :user:`Jeremie du Boisberranger <jeremiedbb>`.

- |Fix| Fixed a bug in :class:`cluster.AffinityPropagation`, that
gives incorrect clusters when the array dtype is float32.
:pr:`17995` by :user:`Thomaz Santana <Wikilicious>` and
:user:`Amanda Dsouza <amy12xx>`.

:mod:`sklearn.decomposition`
............................

- |Fix| Fixed a bug in
:func:`decomposition.MiniBatchDictionaryLearning.partial_fit` which should
update the dictionary by iterating only once over a mini-batch.
:pr:`17433` by :user:`Chiara Marmo <cmarmo>`.

- |Fix| Avoid overflows on Windows in
:func:`decomposition.IncrementalPCA.partial_fit` for large ``batch_size`` and
``n_samples`` values.
:pr:`17985` by :user:`Alan Butler <aldee153>` and
:user:`Amanda Dsouza <amy12xx>`.

:mod:`sklearn.ensemble`
.......................

- |Fix| Fixed bug in :class:`ensemble.MultinomialDeviance` where the
average of logloss was incorrectly calculated as sum of logloss.
:pr:`17694` by :user:`Markus Rempfler <rempfler>` and
:user:`Tsutomu Kusanagi <t-kusanagi2>`.

- |Fix| Fixes :class:`ensemble.StackingClassifier` and
:class:`ensemble.StackingRegressor` compatibility with estimators that
do not define `n_features_in_`. :pr:`17357` by `Thomas Fan`_.

:mod:`sklearn.feature_extraction`
.................................

- |Fix| Fixes bug in :class:`feature_extraction.text.CountVectorizer` where
sample order invariance was broken when `max_features` was set and features
had the same count. :pr:`18016` by `Thomas Fan`_, `Roman Yurchak`_, and
`Joel Nothman`_.

:mod:`sklearn.linear_model`
...........................

- |Fix| :func:`linear_model.lars_path` does not overwrite `X` when
`X_copy=True` and `Gram='auto'`. :pr:`17914` by `Thomas Fan`_.

:mod:`sklearn.manifold`
.......................

- |Fix| Fixed a bug where :func:`metrics.pairwise_distances` would raise an
error if ``metric='seuclidean'`` and ``X`` is not type ``np.float64``.
:pr:`15730` by :user:`Forrest Koch <ForrestCKoch>`.

:mod:`sklearn.metrics`
......................

- |Fix| Fixed a bug in :func:`metrics.mean_squared_error` where the
average of multiple RMSE values was incorrectly calculated as the root of the
average of multiple MSE values.
:pr:`17309` by :user:`Swier Heeres <swierh>`.

:mod:`sklearn.pipeline`
.......................

- |Fix| :class:`pipeline.FeatureUnion` raises a deprecation warning when
`None` is included in `transformer_list`. :pr:`17360` by `Thomas Fan`_.

:mod:`sklearn.utils`
....................

- |Fix| Fix :func:`utils.estimator_checks.check_estimator` so that all test
cases support the `binary_only` estimator tag.
:pr:`17812` by :user:`Bruno Charron <brcharron>`.

.. _changes_0_23_1:

Version 0.23.1
Expand All @@ -23,6 +139,7 @@ Changelog
provided by the user were modified in place. :pr:`17204` by
:user:`Jeremie du Boisberranger <jeremiedbb>`.


Miscellaneous
.............

Expand All @@ -44,8 +161,6 @@ refer to

.. include:: changelog_legend.inc

Put the changes in their relevant module.

Enforcing keyword-only arguments
--------------------------------

Expand Down Expand Up @@ -171,7 +286,7 @@ Changelog
deprecated. It has no effect. :pr:`11950` by
:user:`Jeremie du Boisberranger <jeremiedbb>`.

- |API| The ``random_state`` parameter has been added to
- |API| The ``random_state`` parameter has been added to
:class:`cluster.AffinityPropagation`. :pr:`16801` by :user:`rcwoolston`
and :user:`Chiara Marmo <cmarmo>`.

Expand Down Expand Up @@ -364,7 +479,7 @@ Changelog
for each feature. :pr:`16403` by :user:`Narendra Mukherjee <narendramukherjee>`.

- |Enhancement| :class:`impute.SimpleImputer`, :class:`impute.KNNImputer`, and
:class:`impute.SimpleImputer` accepts pandas' nullable integer dtype with
:class:`impute.IterativeImputer` accepts pandas' nullable integer dtype with
missing values. :pr:`16508` by `Thomas Fan`_.

:mod:`sklearn.inspection`
Expand Down Expand Up @@ -467,7 +582,7 @@ Changelog
an error when `y_true` and `y_pred` were length zero and `labels` was
not `None`. In addition, we raise an error when an empty list is given to
the `labels` parameter.
:pr:`16442` by `Kyle Parsons <parsons-kyle-89>`.
:pr:`16442` by :user:`Kyle Parsons <parsons-kyle-89>`.

- |API| Changed the formatting of values in
:meth:`metrics.ConfusionMatrixDisplay.plot` and
Expand All @@ -491,7 +606,7 @@ Changelog
:pr:`15622` by :user:`Gregory Morse <GregoryMorse>`.

- |Fix| :func:`model_selection.cross_val_predict` supports
`method="predict_proba"` when `y=None`.:pr:`15918` by
`method="predict_proba"` when `y=None`. :pr:`15918` by
:user:`Luca Kubin <lkubin>`.

- |Fix| :func:`model_selection.fit_grid_point` is deprecated in 0.23 and will
Expand Down
4 changes: 2 additions & 2 deletions examples/cluster/plot_coin_segmentation.py
Expand Up @@ -25,7 +25,6 @@
import time

import numpy as np
from distutils.version import LooseVersion
from scipy.ndimage.filters import gaussian_filter
import matplotlib.pyplot as plt
import skimage
Expand All @@ -34,9 +33,10 @@

from sklearn.feature_extraction import image
from sklearn.cluster import spectral_clustering
from sklearn.utils.fixes import parse_version

# these were introduced in skimage-0.14
if LooseVersion(skimage.__version__) >= '0.14':
if parse_version(skimage.__version__) >= parse_version('0.14'):
rescale_params = {'anti_aliasing': False, 'multichannel': False}
else:
rescale_params = {}
Expand Down
4 changes: 2 additions & 2 deletions examples/cluster/plot_coin_ward_segmentation.py
Expand Up @@ -17,7 +17,6 @@
import time as time

import numpy as np
from distutils.version import LooseVersion
from scipy.ndimage.filters import gaussian_filter

import matplotlib.pyplot as plt
Expand All @@ -28,9 +27,10 @@

from sklearn.feature_extraction.image import grid_to_graph
from sklearn.cluster import AgglomerativeClustering
from sklearn.utils.fixes import parse_version

# these were introduced in skimage-0.14
if LooseVersion(skimage.__version__) >= '0.14':
if parse_version(skimage.__version__) >= parse_version('0.14'):
rescale_params = {'anti_aliasing': False, 'multichannel': False}
else:
rescale_params = {}
Expand Down
4 changes: 2 additions & 2 deletions examples/compose/plot_transformed_target.py
Expand Up @@ -21,7 +21,6 @@
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from distutils.version import LooseVersion

print(__doc__)

Expand All @@ -34,10 +33,11 @@
from sklearn.linear_model import RidgeCV
from sklearn.compose import TransformedTargetRegressor
from sklearn.metrics import median_absolute_error, r2_score
from sklearn.utils.fixes import parse_version


# `normed` is being deprecated in favor of `density` in histograms
if LooseVersion(matplotlib.__version__) >= '2.1':
if parse_version(matplotlib.__version__) >= parse_version('2.1'):
density_param = {'density': True}
else:
density_param = {'normed': True}
Expand Down
5 changes: 2 additions & 3 deletions examples/decomposition/plot_sparse_coding.py
Expand Up @@ -16,12 +16,11 @@
"""
print(__doc__)

from distutils.version import LooseVersion

import numpy as np
import matplotlib.pyplot as plt

from sklearn.decomposition import SparseCoder
from sklearn.utils.fixes import np_version, parse_version


def ricker_function(resolution, center, width):
Expand Down Expand Up @@ -68,7 +67,7 @@ def ricker_matrix(width, resolution, n_components):
('Lasso', 'lasso_lars', 2, None, 'turquoise'), ]
lw = 2
# Avoid FutureWarning about default value change when numpy >= 1.14
lstsq_rcond = None if LooseVersion(np.__version__) >= '1.14' else -1
lstsq_rcond = None if np_version >= parse_version('1.14') else -1

plt.figure(figsize=(13, 6))
for subplot, (D, title) in enumerate(zip((D_fixed, D_multi),
Expand Down
4 changes: 2 additions & 2 deletions examples/miscellaneous/plot_johnson_lindenstrauss_bound.py
Expand Up @@ -19,15 +19,15 @@
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from distutils.version import LooseVersion
from sklearn.random_projection import johnson_lindenstrauss_min_dim
from sklearn.random_projection import SparseRandomProjection
from sklearn.datasets import fetch_20newsgroups_vectorized
from sklearn.datasets import load_digits
from sklearn.metrics.pairwise import euclidean_distances
from sklearn.utils.fixes import parse_version

# `normed` is being deprecated in favor of `density` in histograms
if LooseVersion(matplotlib.__version__) >= '2.1':
if parse_version(matplotlib.__version__) >= parse_version('2.1'):
density_param = {'density': True}
else:
density_param = {'normed': True}
Expand Down

0 comments on commit 947f542

Please sign in to comment.