sdpython · xadupre · Oct 2, 2021 · Sep 27, 2021 · Sep 27, 2021 · Sep 27, 2021
diff --git a/_doc/notebooks/sklearn/decision_tree_logreg.ipynb b/_doc/notebooks/sklearn/decision_tree_logreg.ipynb
diff --git a/_doc/notebooks/sklearn/kmeans_l1.ipynb b/_doc/notebooks/sklearn/kmeans_l1.ipynb
diff --git a/_doc/notebooks/sklearn/piecewise_classification.ipynb b/_doc/notebooks/sklearn/piecewise_classification.ipynb
diff --git a/_doc/notebooks/sklearn/piecewise_linear_regression.ipynb b/_doc/notebooks/sklearn/piecewise_linear_regression.ipynb
diff --git a/_doc/notebooks/sklearn/quantile_mlpregression.ipynb b/_doc/notebooks/sklearn/quantile_mlpregression.ipynb
diff --git a/_doc/notebooks/sklearn/quantile_regression.ipynb b/_doc/notebooks/sklearn/quantile_regression.ipynb
diff --git a/_doc/notebooks/sklearn/regression_confidence_interval.ipynb b/_doc/notebooks/sklearn/regression_confidence_interval.ipynb
diff --git a/_doc/notebooks/sklearn/sklearn_transformed_target.ipynb b/_doc/notebooks/sklearn/sklearn_transformed_target.ipynb
diff --git a/_doc/notebooks/sklearn/visualize_pipeline.ipynb b/_doc/notebooks/sklearn/visualize_pipeline.ipynb
diff --git a/_doc/notebooks/sklearn_c/piecewise_linear_regression_criterion.ipynb b/_doc/notebooks/sklearn_c/piecewise_linear_regression_criterion.ipynb
diff --git a/_doc/notebooks/tree/leave_neighbors.ipynb b/_doc/notebooks/tree/leave_neighbors.ipynb
diff --git a/_unittests/ut_documentation/test_nb_piecewise_c.py b/_unittests/ut_documentation/test_nb_piecewise_c.py
@@ -6,7 +6,8 @@
 import unittest
 import sklearn
 from pyquickhelper.loghelper import fLOG
-from pyquickhelper.pycode import add_missing_development_version
+from pyquickhelper.pycode import (
+    add_missing_development_version, skipif_appveyor)
 from pyquickhelper.ipythonhelper import test_notebook_execution_coverage
 from pyquickhelper.texthelper import compare_module_version
 import mlinsights
@@ -19,6 +20,7 @@ def setUp(self):
 
     @unittest.skipIf(compare_module_version(sklearn.__version__, "0.21") < 0,
                      reason="This notebook uses Criterion API changed in 0.21")
+    @skipif_appveyor('too long')
     def test_notebook_piecewise(self):
         fLOG(
             __file__,

diff --git a/_unittests/ut_documentation/test_nb_poly.py b/_unittests/ut_documentation/test_nb_poly.py
@@ -5,7 +5,8 @@
 import os
 import unittest
 from pyquickhelper.loghelper import fLOG
-from pyquickhelper.pycode import add_missing_development_version
+from pyquickhelper.pycode import (
+    add_missing_development_version, skipif_appveyor)
 from pyquickhelper.ipythonhelper import test_notebook_execution_coverage
 import mlinsights
 
@@ -15,6 +16,7 @@ class TestNotebookPolynomialFeatures(unittest.TestCase):
     def setUp(self):
         add_missing_development_version(["jyquickhelper"], __file__, hide=True)
 
+    @skipif_appveyor('too long')
     def test_notebook_poly(self):
         fLOG(
             __file__,

diff --git a/_unittests/ut_documentation/test_nb_tsne.py b/_unittests/ut_documentation/test_nb_tsne.py
@@ -5,7 +5,8 @@
 import os
 import unittest
 from pyquickhelper.loghelper import fLOG
-from pyquickhelper.pycode import add_missing_development_version
+from pyquickhelper.pycode import (
+    add_missing_development_version, skipif_appveyor)
 from pyquickhelper.ipythonhelper import test_notebook_execution_coverage
 import mlinsights
 
@@ -15,6 +16,7 @@ class TestNotebookTSNE(unittest.TestCase):
     def setUp(self):
         add_missing_development_version(["jyquickhelper"], __file__, hide=True)
 
+    @skipif_appveyor('too long')
     def test_notebook_tnse(self):
         fLOG(
             __file__,

diff --git a/_unittests/ut_mlmodel/test_kmeans_l1.py b/_unittests/ut_mlmodel/test_kmeans_l1.py
@@ -32,7 +32,7 @@ def test_kmeans_l2_random(self):
     def test_kmeans_l2_parallel(self):
         iris = datasets.load_iris()
         X = iris.data
-        clr = KMeansL1L2(4, n_jobs=2)
+        clr = KMeansL1L2(4)
         clr.fit(X)
         cls = set(clr.predict(X))
         self.assertEqual({0, 1, 2, 3}, cls)
@@ -41,7 +41,7 @@ def test_kmeans_l2_small(self):
         iris = datasets.load_iris()
         X = iris.data
         X = X[:6]
-        clr = KMeansL1L2(4, n_jobs=1)
+        clr = KMeansL1L2(4)
         clr.fit(X)
         cls = set(clr.predict(X))
         self.assertEqual({0, 1, 2, 3}, cls)
@@ -50,7 +50,7 @@ def test_kmeans_l1_small(self):
         iris = datasets.load_iris()
         X = iris.data
         X = X[:6]
-        clr = KMeansL1L2(4, norm='L1', n_jobs=1)
+        clr = KMeansL1L2(4, norm='L1')
         clr.fit(X)
         cls = set(clr.predict(X))
         self.assertEqual({0, 1, 2, 3}, cls)

diff --git a/_unittests/ut_mlmodel/test_kmeans_sklearn.py b/_unittests/ut_mlmodel/test_kmeans_sklearn.py
@@ -11,7 +11,7 @@
     assert_almost_equal, assert_raise_message)
 from sklearn.metrics.cluster import v_measure_score
 from sklearn.datasets import make_blobs
-from pyquickhelper.pycode import ExtTestCase
+from pyquickhelper.pycode import ExtTestCase, ignore_warnings
 from pyquickhelper.texthelper.version_helper import compare_module_version
 from mlinsights.mlmodel import KMeansL1L2
 
@@ -89,6 +89,7 @@ def do_test_kmeans_results(self, representation, algo, dtype, norm, sw):
         assert_array_almost_equal(kmeans.cluster_centers_, expected_centers)
         self.assertEqualArray(kmeans.n_iter_, expected_n_iter)
 
+    @ignore_warnings(UserWarning)
     def test_kmeans_results(self):
         for representation, algo in [('dense', 'full'),
                                      ('dense', 'elkan'),
@@ -121,6 +122,7 @@ def _check_fitted_model(self, km):
         assert_raise_message(ValueError, "n_samples=1 should be >= n_clusters=%d"
                              % km.n_clusters, km.fit, [[0., 1.]])
 
+    @ignore_warnings(UserWarning)
     def test_k_means_new_centers(self):
         # Explore the part of the code where a new center is reassigned
         X = np.array([[0, 0, 1, 1],
@@ -145,17 +147,19 @@ def test_k_means_new_centers(self):
                 1][this_labels]
             np.testing.assert_array_equal(this_labels, labels)
 
+    @ignore_warnings(UserWarning)
     def test_k_means_plus_plus_init_not_precomputed(self):
         km = KMeansL1L2(
             init="k-means++", n_clusters=TestKMeansL1L2Sklearn.n_clusters,
-            random_state=42, precompute_distances=False).fit(
+            random_state=42).fit(
             TestKMeansL1L2Sklearn.X)
         self._check_fitted_model(km)
 
+    @ignore_warnings(UserWarning)
     def test_k_means_random_init_not_precomputed(self):
         km = KMeansL1L2(
             init="random", n_clusters=TestKMeansL1L2Sklearn.n_clusters,
-            random_state=42, precompute_distances=False).fit(
+            random_state=42).fit(
                 TestKMeansL1L2Sklearn.X)
         self._check_fitted_model(km)
 

diff --git a/_unittests/ut_module/test_SKIP_code_style.py b/_unittests/ut_module/test_SKIP_code_style.py
@@ -16,7 +16,7 @@ def test_style_src(self):
         check_pep8(src_, fLOG=fLOG,
                    pylint_ignore=('C0103', 'C1801', 'R0201', 'R1705', 'W0108', 'W0613',
                                   'W0201', 'W0221', 'E0632', 'R1702', 'W0212', 'W0223',
-                                  'W0107', "R1720", 'R1732'),
+                                  'W0107', "R1720", 'R1732', 'C0209'),
                    skip=["categories_to_integers.py:174: W0640",
                          "E0401: Unable to import 'mlinsights.mlmodel.piecewise_tree_regression_criterion",
                          "setup.py:",
@@ -30,7 +30,7 @@ def test_style_test(self):
         check_pep8(test, fLOG=fLOG, neg_pattern="temp_.*",
                    pylint_ignore=('C0103', 'C1801', 'R0201', 'R1705', 'W0108', 'W0613',
                                   'C0111', 'W0107', 'C0111', 'R1702', 'C0415', "R1720",
-                                  'R1732'),
+                                  'R1732', 'C0209'),
                    skip=["Instance of 'tuple' has no",
                          "[E402] module level import",
                          "E0611: No name '_test_criterion_",

diff --git a/appveyor.yml b/appveyor.yml
@@ -5,6 +5,15 @@ environment:
     - PYTHON: "C:\\Python39-x64"
       PYTHON_VERSION: "3.9.x"
       PYTHON_ARCH: "64"
+      SKL: '>=1.0'
+    - PYTHON: "C:\\Python39-x64"
+      PYTHON_VERSION: "3.9.x"
+      PYTHON_ARCH: "64"
+      SKL: '==0.24.2'
+    - PYTHON: "C:\\Python38-x64"
+      PYTHON_VERSION: "3.8.x"
+      PYTHON_ARCH: "64"
+      SKL: '==0.23.2'
 init:
   - "ECHO %PYTHON% %PYTHON_VERSION% %PYTHON_ARCH%"
 
@@ -17,6 +26,7 @@ install:
   - "%PYTHON%\\Scripts\\pip install torch==1.7.1+cpu torchvision==0.8.2+cpu torchaudio===0.7.2 -f https://download.pytorch.org/whl/torch_stable.html"
   # other dependencies
   - "%PYTHON%\\Scripts\\pip install -r requirements.txt --no-deps"
+  - "%PYTHON%\\Scripts\\pip install scikit-learn%SKL%"
 build: off
 
 before_test:

diff --git a/azure-pipelines.yml b/azure-pipelines.yml
@@ -50,15 +50,13 @@ jobs:
   - script: |
       python -m pip install cibuildwheel
       export CIBW_MANYLINUX_X86_64_IMAGE="manylinux_2_24"
-      export CIBW_BEFORE_BUILD="pip install ."
       export CIBW_BEFORE_BUILD="pip install pybind11 cython numpy scipy pyquickhelper scikit-learn pandas pandas_streaming"
       export CIBW_BUILD="cp39-manylinux_x86_64"
       python -m cibuildwheel --output-dir dist/wheelhouse39 --platform linux
     displayName: 'Build Package manylinux_x_y'
   - script: |
       python -m pip install cibuildwheel
       export CIBW_MANYLINUX_X86_64_IMAGE="manylinux2014"
-      export CIBW_BEFORE_BUILD="pip install ."
       export CIBW_BEFORE_BUILD="pip install pybind11 cython numpy scipy pyquickhelper scikit-learn pandas pandas_streaming"
       export CIBW_BUILD="cp37-manylinux_x86_64 cp38-manylinux_x86_64 cp39-manylinux_x86_64"
       python -m cibuildwheel --output-dir dist/wheelhouse --platform linux
@@ -93,9 +91,8 @@ jobs:
     displayName: 'Runs Unit Tests'
   - script: |
       python -m pip install cibuildwheel
-      set CIBW_BEFORE_BUILD="pip install ."
-      set CIBW_BEFORE_BUILD="pip install pybind11 cython numpy scipy pyquickhelper scikit-learn pandas pandas_streaming"
-      set CIBW_BUILD="cp37-win_amd64 cp38-win_amd64 cp39-win_amd64"
+      set CIBW_BEFORE_BUILD=pip install pybind11 cython numpy scipy pyquickhelper scikit-learn pandas pandas_streaming
+      set CIBW_BUILD=cp37-win_amd64 cp38-win_amd64 cp39-win_amd64
       python -m cibuildwheel --output-dir dist/wheelhouse
     displayName: 'Build Package many'
   - task: PublishPipelineArtifact@0
@@ -157,11 +154,10 @@ jobs:
          # export MACOSX_DEPLOYMENT_TARGET=10.13
          python setup.py build_ext --inplace
     displayName: 'Build package inplace'
-  - script: python -u setup.py unittests
+  - script: python -u setup.py unittests -d 15
     displayName: 'Runs Unit Tests'
   - script: |
       python -m pip install cibuildwheel
-      export CIBW_BEFORE_BUILD="pip install ."
       export CIBW_BEFORE_BUILD="pip install pybind11 cython numpy scipy pyquickhelper scikit-learn pandas pandas_streaming"
       export CIBW_BUILD="cp37-macosx_x86_64 cp38-macosx_x86_64 cp39-macosx_x86_64"
       python -m cibuildwheel --output-dir dist/wheelhouse

diff --git a/mlinsights/helpers/pipeline.py b/mlinsights/helpers/pipeline.py
@@ -36,8 +36,9 @@ class PassThrough:
                 "Unable to handle this specific case.")
         elif hasattr(pipe, 'mapper') and pipe.mapper:
             # azureml DataTransformer
-            for couple in enumerate_pipeline_models(pipe.mapper, coor + (0,)):
-                yield couple
+            for couple in enumerate_pipeline_models(  # pragma: no cover
+                    pipe.mapper, coor + (0,)):  # pragma: no cover
+                yield couple  # pragma: no cover
         elif hasattr(pipe, 'built_features'):  # pragma: no cover
             # sklearn_pandas.dataframe_mapper.DataFrameMapper
             for i, (columns, transformers, _) in enumerate(pipe.built_features):
@@ -138,9 +139,9 @@ def display(self, data, nrows):
             rows = rows[:nrows]
             rows.append('...')
         if hasattr(data, 'shape'):
-            rows.insert(0, "shape={} type={}".format(data.shape, type(data)))
+            rows.insert(0, "shape=%r type=%r" % (data.shape, type(data)))
         else:
-            rows.insert(0, "type={}".format(type(data)))
+            rows.insert(0, "type=%r" % type(data))  # pragma: no cover
         return "\n".join(rows)
 
 

diff --git a/mlinsights/mlbatch/cache_model.py b/mlinsights/mlbatch/cache_model.py
@@ -119,7 +119,7 @@ def create_cache(name):
         @param      name        name
         @return                 created cache
         """
-        global _caches  # pylint: disable=W0603
+        global _caches  # pylint: disable=W0603,W0602
         if name in _caches:
             raise RuntimeError(  # pragma: no cover
                 "cache '{0}' already exists.".format(name))
@@ -135,7 +135,7 @@ def remove_cache(name):
 
         @param      name        name
         """
-        global _caches  # pylint: disable=W0603
+        global _caches  # pylint: disable=W0603,W0602
         del _caches[name]
 
     @staticmethod
@@ -146,7 +146,7 @@ def get_cache(name):
         @param      name        name
         @return                 created cache
         """
-        global _caches  # pylint: disable=W0603
+        global _caches  # pylint: disable=W0603,W0602
         return _caches[name]
 
     @staticmethod
@@ -157,5 +157,5 @@ def has_cache(name):
         @param      name        name
         @return                 boolean
         """
-        global _caches  # pylint: disable=W0603
+        global _caches  # pylint: disable=W0603,W0602
         return name in _caches
diff --git a/mlinsights/mlmodel/_kmeans_022.py b/mlinsights/mlmodel/_kmeans_022.py
@@ -146,7 +146,7 @@ def _assign_labels_array(X, sample_weight, x_squared_norms, centers,
 
 
 def _labels_inertia_skl(X, sample_weight, x_squared_norms, centers,
-                        precompute_distances=True, distances=None):
+                        distances=None):
     """E step of the K-means EM algorithm.
     Compute the labels and the inertia of the given samples and centers.
     This will compute the distances in-place.
@@ -160,8 +160,6 @@ def _labels_inertia_skl(X, sample_weight, x_squared_norms, centers,
         computations.
     :param centers: float array, shape (k, n_features)
         The cluster centers.
-    :param precompute_distances: boolean, default: True
-        Precompute distances (faster but takes more memory).
     :param distances: float array, shape (n_samples,)
         Pre-allocated array to be filled in with each sample's distance
         to the closest center.
@@ -183,10 +181,6 @@ def _labels_inertia_skl(X, sample_weight, x_squared_norms, centers,
             X, sample_weight, x_squared_norms, centers, labels,
             distances=distances)
     else:
-        if precompute_distances:
-            return _labels_inertia_precompute_dense(
-                norm='l2', X=X, sample_weight=sample_weight,
-                centers=centers, distances=distances)
         inertia = _assign_labels_array(
             X, sample_weight, x_squared_norms, centers, labels,
             distances=distances)

diff --git a/mlinsights/mlmodel/kmeans_constraint.py b/mlinsights/mlmodel/kmeans_constraint.py
@@ -52,8 +52,8 @@ class ConstraintKMeans(KMeans):
     _strategy_value = {'distance', 'gain', 'weights'}
 
     def __init__(self, n_clusters=8, init='k-means++', n_init=10, max_iter=500,
-                 tol=0.0001, precompute_distances='deprecated', verbose=0,
-                 random_state=None, copy_x=True, n_jobs=1, algorithm='auto',
+                 tol=0.0001, verbose=0,
+                 random_state=None, copy_x=True, algorithm='auto',
                  balanced_predictions=False, strategy='gain', kmeans0=True,
                  learning_rate=1., history=False):
         """
@@ -62,11 +62,9 @@ def __init__(self, n_clusters=8, init='k-means++', n_init=10, max_iter=500,
         @param      n_init                  used by :epkg:`k-means`
         @param      max_iter                used by :epkg:`k-means`
         @param      tol                     used by :epkg:`k-means`
-        @param      precompute_distances    used by :epkg:`k-means`
         @param      verbose                 used by :epkg:`k-means`
         @param      random_state            used by :epkg:`k-means`
         @param      copy_x                  used by :epkg:`k-means`
-        @param      n_jobs                  used by :epkg:`k-means`
         @param      algorithm               used by :epkg:`k-means`
         @param      balanced_predictions    produced balanced prediction
                                             or the regular ones
@@ -76,23 +74,19 @@ def __init__(self, n_clusters=8, init='k-means++', n_init=10, max_iter=500,
         @param      history                 keeps centers accress iterations
         @param      learning_rate           learning rate, used by strategy `'weights'`
         """
+        self._n_threads = 1
         KMeans.__init__(self, n_clusters=n_clusters, init=init, n_init=n_init,
-                        max_iter=max_iter, tol=tol, precompute_distances=precompute_distances,
+                        max_iter=max_iter, tol=tol,
                         verbose=verbose, random_state=random_state, copy_x=copy_x,
-                        n_jobs=n_jobs, algorithm=algorithm)
+                        algorithm=algorithm)
         self.balanced_predictions = balanced_predictions
         self.strategy = strategy
         self.kmeans0 = kmeans0
         self.history = history
-        self._n_threads = None
         self.learning_rate = learning_rate
         if strategy not in ConstraintKMeans._strategy_value:
             raise ValueError('strategy must be in {0}'.format(
                 ConstraintKMeans._strategy_value))
-        if precompute_distances == 'deprecated':
-            km = KMeans()
-            if km.precompute_distances != precompute_distances:
-                self.precompute_distances = km.precompute_distances
 
     def fit(self, X, y=None, sample_weight=None, fLOG=None):
         """