Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
148 changes: 80 additions & 68 deletions _doc/notebooks/sklearn/decision_tree_logreg.ipynb

Large diffs are not rendered by default.

36 changes: 13 additions & 23 deletions _doc/notebooks/sklearn/kmeans_l1.ipynb

Large diffs are not rendered by default.

168 changes: 77 additions & 91 deletions _doc/notebooks/sklearn/piecewise_classification.ipynb

Large diffs are not rendered by default.

159 changes: 68 additions & 91 deletions _doc/notebooks/sklearn/piecewise_linear_regression.ipynb

Large diffs are not rendered by default.

100 changes: 50 additions & 50 deletions _doc/notebooks/sklearn/quantile_mlpregression.ipynb

Large diffs are not rendered by default.

166 changes: 89 additions & 77 deletions _doc/notebooks/sklearn/quantile_regression.ipynb

Large diffs are not rendered by default.

169 changes: 55 additions & 114 deletions _doc/notebooks/sklearn/regression_confidence_interval.ipynb

Large diffs are not rendered by default.

183 changes: 90 additions & 93 deletions _doc/notebooks/sklearn/sklearn_transformed_target.ipynb

Large diffs are not rendered by default.

292 changes: 145 additions & 147 deletions _doc/notebooks/sklearn/visualize_pipeline.ipynb

Large diffs are not rendered by default.

163 changes: 80 additions & 83 deletions _doc/notebooks/sklearn_c/piecewise_linear_regression_criterion.ipynb

Large diffs are not rendered by default.

70 changes: 35 additions & 35 deletions _doc/notebooks/tree/leave_neighbors.ipynb

Large diffs are not rendered by default.

4 changes: 3 additions & 1 deletion _unittests/ut_documentation/test_nb_piecewise_c.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@
import unittest
import sklearn
from pyquickhelper.loghelper import fLOG
from pyquickhelper.pycode import add_missing_development_version
from pyquickhelper.pycode import (
add_missing_development_version, skipif_appveyor)
from pyquickhelper.ipythonhelper import test_notebook_execution_coverage
from pyquickhelper.texthelper import compare_module_version
import mlinsights
Expand All @@ -19,6 +20,7 @@ def setUp(self):

@unittest.skipIf(compare_module_version(sklearn.__version__, "0.21") < 0,
reason="This notebook uses Criterion API changed in 0.21")
@skipif_appveyor('too long')
def test_notebook_piecewise(self):
fLOG(
__file__,
Expand Down
4 changes: 3 additions & 1 deletion _unittests/ut_documentation/test_nb_poly.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@
import os
import unittest
from pyquickhelper.loghelper import fLOG
from pyquickhelper.pycode import add_missing_development_version
from pyquickhelper.pycode import (
add_missing_development_version, skipif_appveyor)
from pyquickhelper.ipythonhelper import test_notebook_execution_coverage
import mlinsights

Expand All @@ -15,6 +16,7 @@ class TestNotebookPolynomialFeatures(unittest.TestCase):
def setUp(self):
add_missing_development_version(["jyquickhelper"], __file__, hide=True)

@skipif_appveyor('too long')
def test_notebook_poly(self):
fLOG(
__file__,
Expand Down
4 changes: 3 additions & 1 deletion _unittests/ut_documentation/test_nb_tsne.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@
import os
import unittest
from pyquickhelper.loghelper import fLOG
from pyquickhelper.pycode import add_missing_development_version
from pyquickhelper.pycode import (
add_missing_development_version, skipif_appveyor)
from pyquickhelper.ipythonhelper import test_notebook_execution_coverage
import mlinsights

Expand All @@ -15,6 +16,7 @@ class TestNotebookTSNE(unittest.TestCase):
def setUp(self):
add_missing_development_version(["jyquickhelper"], __file__, hide=True)

@skipif_appveyor('too long')
def test_notebook_tnse(self):
fLOG(
__file__,
Expand Down
6 changes: 3 additions & 3 deletions _unittests/ut_mlmodel/test_kmeans_l1.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def test_kmeans_l2_random(self):
def test_kmeans_l2_parallel(self):
iris = datasets.load_iris()
X = iris.data
clr = KMeansL1L2(4, n_jobs=2)
clr = KMeansL1L2(4)
clr.fit(X)
cls = set(clr.predict(X))
self.assertEqual({0, 1, 2, 3}, cls)
Expand All @@ -41,7 +41,7 @@ def test_kmeans_l2_small(self):
iris = datasets.load_iris()
X = iris.data
X = X[:6]
clr = KMeansL1L2(4, n_jobs=1)
clr = KMeansL1L2(4)
clr.fit(X)
cls = set(clr.predict(X))
self.assertEqual({0, 1, 2, 3}, cls)
Expand All @@ -50,7 +50,7 @@ def test_kmeans_l1_small(self):
iris = datasets.load_iris()
X = iris.data
X = X[:6]
clr = KMeansL1L2(4, norm='L1', n_jobs=1)
clr = KMeansL1L2(4, norm='L1')
clr.fit(X)
cls = set(clr.predict(X))
self.assertEqual({0, 1, 2, 3}, cls)
Expand Down
10 changes: 7 additions & 3 deletions _unittests/ut_mlmodel/test_kmeans_sklearn.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
assert_almost_equal, assert_raise_message)
from sklearn.metrics.cluster import v_measure_score
from sklearn.datasets import make_blobs
from pyquickhelper.pycode import ExtTestCase
from pyquickhelper.pycode import ExtTestCase, ignore_warnings
from pyquickhelper.texthelper.version_helper import compare_module_version
from mlinsights.mlmodel import KMeansL1L2

Expand Down Expand Up @@ -89,6 +89,7 @@ def do_test_kmeans_results(self, representation, algo, dtype, norm, sw):
assert_array_almost_equal(kmeans.cluster_centers_, expected_centers)
self.assertEqualArray(kmeans.n_iter_, expected_n_iter)

@ignore_warnings(UserWarning)
def test_kmeans_results(self):
for representation, algo in [('dense', 'full'),
('dense', 'elkan'),
Expand Down Expand Up @@ -121,6 +122,7 @@ def _check_fitted_model(self, km):
assert_raise_message(ValueError, "n_samples=1 should be >= n_clusters=%d"
% km.n_clusters, km.fit, [[0., 1.]])

@ignore_warnings(UserWarning)
def test_k_means_new_centers(self):
# Explore the part of the code where a new center is reassigned
X = np.array([[0, 0, 1, 1],
Expand All @@ -145,17 +147,19 @@ def test_k_means_new_centers(self):
1][this_labels]
np.testing.assert_array_equal(this_labels, labels)

@ignore_warnings(UserWarning)
def test_k_means_plus_plus_init_not_precomputed(self):
km = KMeansL1L2(
init="k-means++", n_clusters=TestKMeansL1L2Sklearn.n_clusters,
random_state=42, precompute_distances=False).fit(
random_state=42).fit(
TestKMeansL1L2Sklearn.X)
self._check_fitted_model(km)

@ignore_warnings(UserWarning)
def test_k_means_random_init_not_precomputed(self):
km = KMeansL1L2(
init="random", n_clusters=TestKMeansL1L2Sklearn.n_clusters,
random_state=42, precompute_distances=False).fit(
random_state=42).fit(
TestKMeansL1L2Sklearn.X)
self._check_fitted_model(km)

Expand Down
4 changes: 2 additions & 2 deletions _unittests/ut_module/test_SKIP_code_style.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def test_style_src(self):
check_pep8(src_, fLOG=fLOG,
pylint_ignore=('C0103', 'C1801', 'R0201', 'R1705', 'W0108', 'W0613',
'W0201', 'W0221', 'E0632', 'R1702', 'W0212', 'W0223',
'W0107', "R1720", 'R1732'),
'W0107', "R1720", 'R1732', 'C0209'),
skip=["categories_to_integers.py:174: W0640",
"E0401: Unable to import 'mlinsights.mlmodel.piecewise_tree_regression_criterion",
"setup.py:",
Expand All @@ -30,7 +30,7 @@ def test_style_test(self):
check_pep8(test, fLOG=fLOG, neg_pattern="temp_.*",
pylint_ignore=('C0103', 'C1801', 'R0201', 'R1705', 'W0108', 'W0613',
'C0111', 'W0107', 'C0111', 'R1702', 'C0415', "R1720",
'R1732'),
'R1732', 'C0209'),
skip=["Instance of 'tuple' has no",
"[E402] module level import",
"E0611: No name '_test_criterion_",
Expand Down
10 changes: 10 additions & 0 deletions appveyor.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,15 @@ environment:
- PYTHON: "C:\\Python39-x64"
PYTHON_VERSION: "3.9.x"
PYTHON_ARCH: "64"
SKL: '>=1.0'
- PYTHON: "C:\\Python39-x64"
PYTHON_VERSION: "3.9.x"
PYTHON_ARCH: "64"
SKL: '==0.24.2'
- PYTHON: "C:\\Python38-x64"
PYTHON_VERSION: "3.8.x"
PYTHON_ARCH: "64"
SKL: '==0.23.2'
init:
- "ECHO %PYTHON% %PYTHON_VERSION% %PYTHON_ARCH%"

Expand All @@ -17,6 +26,7 @@ install:
- "%PYTHON%\\Scripts\\pip install torch==1.7.1+cpu torchvision==0.8.2+cpu torchaudio===0.7.2 -f https://download.pytorch.org/whl/torch_stable.html"
# other dependencies
- "%PYTHON%\\Scripts\\pip install -r requirements.txt --no-deps"
- "%PYTHON%\\Scripts\\pip install scikit-learn%SKL%"
build: off

before_test:
Expand Down
10 changes: 3 additions & 7 deletions azure-pipelines.yml
Original file line number Diff line number Diff line change
Expand Up @@ -50,15 +50,13 @@ jobs:
- script: |
python -m pip install cibuildwheel
export CIBW_MANYLINUX_X86_64_IMAGE="manylinux_2_24"
export CIBW_BEFORE_BUILD="pip install ."
export CIBW_BEFORE_BUILD="pip install pybind11 cython numpy scipy pyquickhelper scikit-learn pandas pandas_streaming"
export CIBW_BUILD="cp39-manylinux_x86_64"
python -m cibuildwheel --output-dir dist/wheelhouse39 --platform linux
displayName: 'Build Package manylinux_x_y'
- script: |
python -m pip install cibuildwheel
export CIBW_MANYLINUX_X86_64_IMAGE="manylinux2014"
export CIBW_BEFORE_BUILD="pip install ."
export CIBW_BEFORE_BUILD="pip install pybind11 cython numpy scipy pyquickhelper scikit-learn pandas pandas_streaming"
export CIBW_BUILD="cp37-manylinux_x86_64 cp38-manylinux_x86_64 cp39-manylinux_x86_64"
python -m cibuildwheel --output-dir dist/wheelhouse --platform linux
Expand Down Expand Up @@ -93,9 +91,8 @@ jobs:
displayName: 'Runs Unit Tests'
- script: |
python -m pip install cibuildwheel
set CIBW_BEFORE_BUILD="pip install ."
set CIBW_BEFORE_BUILD="pip install pybind11 cython numpy scipy pyquickhelper scikit-learn pandas pandas_streaming"
set CIBW_BUILD="cp37-win_amd64 cp38-win_amd64 cp39-win_amd64"
set CIBW_BEFORE_BUILD=pip install pybind11 cython numpy scipy pyquickhelper scikit-learn pandas pandas_streaming
set CIBW_BUILD=cp37-win_amd64 cp38-win_amd64 cp39-win_amd64
python -m cibuildwheel --output-dir dist/wheelhouse
displayName: 'Build Package many'
- task: PublishPipelineArtifact@0
Expand Down Expand Up @@ -157,11 +154,10 @@ jobs:
# export MACOSX_DEPLOYMENT_TARGET=10.13
python setup.py build_ext --inplace
displayName: 'Build package inplace'
- script: python -u setup.py unittests
- script: python -u setup.py unittests -d 15
displayName: 'Runs Unit Tests'
- script: |
python -m pip install cibuildwheel
export CIBW_BEFORE_BUILD="pip install ."
export CIBW_BEFORE_BUILD="pip install pybind11 cython numpy scipy pyquickhelper scikit-learn pandas pandas_streaming"
export CIBW_BUILD="cp37-macosx_x86_64 cp38-macosx_x86_64 cp39-macosx_x86_64"
python -m cibuildwheel --output-dir dist/wheelhouse
Expand Down
9 changes: 5 additions & 4 deletions mlinsights/helpers/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,9 @@ class PassThrough:
"Unable to handle this specific case.")
elif hasattr(pipe, 'mapper') and pipe.mapper:
# azureml DataTransformer
for couple in enumerate_pipeline_models(pipe.mapper, coor + (0,)):
yield couple
for couple in enumerate_pipeline_models( # pragma: no cover
pipe.mapper, coor + (0,)): # pragma: no cover
yield couple # pragma: no cover
elif hasattr(pipe, 'built_features'): # pragma: no cover
# sklearn_pandas.dataframe_mapper.DataFrameMapper
for i, (columns, transformers, _) in enumerate(pipe.built_features):
Expand Down Expand Up @@ -138,9 +139,9 @@ def display(self, data, nrows):
rows = rows[:nrows]
rows.append('...')
if hasattr(data, 'shape'):
rows.insert(0, "shape={} type={}".format(data.shape, type(data)))
rows.insert(0, "shape=%r type=%r" % (data.shape, type(data)))
else:
rows.insert(0, "type={}".format(type(data)))
rows.insert(0, "type=%r" % type(data)) # pragma: no cover
return "\n".join(rows)


Expand Down
8 changes: 4 additions & 4 deletions mlinsights/mlbatch/cache_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ def create_cache(name):
@param name name
@return created cache
"""
global _caches # pylint: disable=W0603
global _caches # pylint: disable=W0603,W0602
if name in _caches:
raise RuntimeError( # pragma: no cover
"cache '{0}' already exists.".format(name))
Expand All @@ -135,7 +135,7 @@ def remove_cache(name):

@param name name
"""
global _caches # pylint: disable=W0603
global _caches # pylint: disable=W0603,W0602
del _caches[name]

@staticmethod
Expand All @@ -146,7 +146,7 @@ def get_cache(name):
@param name name
@return created cache
"""
global _caches # pylint: disable=W0603
global _caches # pylint: disable=W0603,W0602
return _caches[name]

@staticmethod
Expand All @@ -157,5 +157,5 @@ def has_cache(name):
@param name name
@return boolean
"""
global _caches # pylint: disable=W0603
global _caches # pylint: disable=W0603,W0602
return name in _caches
8 changes: 1 addition & 7 deletions mlinsights/mlmodel/_kmeans_022.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ def _assign_labels_array(X, sample_weight, x_squared_norms, centers,


def _labels_inertia_skl(X, sample_weight, x_squared_norms, centers,
precompute_distances=True, distances=None):
distances=None):
"""E step of the K-means EM algorithm.
Compute the labels and the inertia of the given samples and centers.
This will compute the distances in-place.
Expand All @@ -160,8 +160,6 @@ def _labels_inertia_skl(X, sample_weight, x_squared_norms, centers,
computations.
:param centers: float array, shape (k, n_features)
The cluster centers.
:param precompute_distances: boolean, default: True
Precompute distances (faster but takes more memory).
:param distances: float array, shape (n_samples,)
Pre-allocated array to be filled in with each sample's distance
to the closest center.
Expand All @@ -183,10 +181,6 @@ def _labels_inertia_skl(X, sample_weight, x_squared_norms, centers,
X, sample_weight, x_squared_norms, centers, labels,
distances=distances)
else:
if precompute_distances:
return _labels_inertia_precompute_dense(
norm='l2', X=X, sample_weight=sample_weight,
centers=centers, distances=distances)
inertia = _assign_labels_array(
X, sample_weight, x_squared_norms, centers, labels,
distances=distances)
Expand Down
16 changes: 5 additions & 11 deletions mlinsights/mlmodel/kmeans_constraint.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,8 @@ class ConstraintKMeans(KMeans):
_strategy_value = {'distance', 'gain', 'weights'}

def __init__(self, n_clusters=8, init='k-means++', n_init=10, max_iter=500,
tol=0.0001, precompute_distances='deprecated', verbose=0,
random_state=None, copy_x=True, n_jobs=1, algorithm='auto',
tol=0.0001, verbose=0,
random_state=None, copy_x=True, algorithm='auto',
balanced_predictions=False, strategy='gain', kmeans0=True,
learning_rate=1., history=False):
"""
Expand All @@ -62,11 +62,9 @@ def __init__(self, n_clusters=8, init='k-means++', n_init=10, max_iter=500,
@param n_init used by :epkg:`k-means`
@param max_iter used by :epkg:`k-means`
@param tol used by :epkg:`k-means`
@param precompute_distances used by :epkg:`k-means`
@param verbose used by :epkg:`k-means`
@param random_state used by :epkg:`k-means`
@param copy_x used by :epkg:`k-means`
@param n_jobs used by :epkg:`k-means`
@param algorithm used by :epkg:`k-means`
@param balanced_predictions produced balanced prediction
or the regular ones
Expand All @@ -76,23 +74,19 @@ def __init__(self, n_clusters=8, init='k-means++', n_init=10, max_iter=500,
@param history keeps centers accress iterations
@param learning_rate learning rate, used by strategy `'weights'`
"""
self._n_threads = 1
KMeans.__init__(self, n_clusters=n_clusters, init=init, n_init=n_init,
max_iter=max_iter, tol=tol, precompute_distances=precompute_distances,
max_iter=max_iter, tol=tol,
verbose=verbose, random_state=random_state, copy_x=copy_x,
n_jobs=n_jobs, algorithm=algorithm)
algorithm=algorithm)
self.balanced_predictions = balanced_predictions
self.strategy = strategy
self.kmeans0 = kmeans0
self.history = history
self._n_threads = None
self.learning_rate = learning_rate
if strategy not in ConstraintKMeans._strategy_value:
raise ValueError('strategy must be in {0}'.format(
ConstraintKMeans._strategy_value))
if precompute_distances == 'deprecated':
km = KMeans()
if km.precompute_distances != precompute_distances:
self.precompute_distances = km.precompute_distances

def fit(self, X, y=None, sample_weight=None, fLOG=None):
"""
Expand Down
Loading