Skip to content
Permalink
Browse files

Fixes #28, piecewise linear regression

  • Loading branch information...
sdpython committed Feb 10, 2019
1 parent a7178f2 commit 366efb9148d0b17ebae359c854f9ff4318cf9963

Large diffs are not rendered by default.

Oops, something went wrong.
@@ -24,6 +24,8 @@ Transforms

.. autosignature:: mlinsights.mlmodel.categories_to_integers.CategoriesToIntegers

.. autosignature:: mlinsights.sklapi.piecewise_linear_regression.PiecewiseLinearRegression

.. autosignature:: mlinsights.sklapi.predictable_tsne.PredictableTSNE

.. autosignature:: mlinsights.sklapi.sklearn_base_transform_learner.SkBaseTransformLearner
@@ -0,0 +1,13 @@

.. blogpost::
:title: Piecewise Linear Regression
:keywords: scikit-learn, linear regression, piecewise
:date: 2019-02-10
:categories: machine learning

I decided to turn one of the notebook I wrote about
`Piecewise Linear Regression <http://www.xavierdupre.fr/app/mlstatpy/helpsphinx/notebooks/regression_lineaire.html#regression-lineaire-par-morceaux>`_.
I wanted to turn my code into something usable and following
the *scikit-learn* API:
:class:`PiecewiseLinearRegression <mlinsights.mlmodel.piecewise_linear_regression.PiecewiseLinearRegression>`
and another notebook :ref:`piecewiselinearregressionrst`.
@@ -0,0 +1,49 @@
# -*- coding: utf-8 -*-
"""
@brief test log(time=14s)
"""

import sys
import os
import unittest
from pyquickhelper.loghelper import fLOG
from pyquickhelper.pycode import add_missing_development_version
from pyquickhelper.ipythonhelper import test_notebook_execution_coverage


try:
import src
except ImportError:
path = os.path.normpath(
os.path.abspath(
os.path.join(
os.path.split(__file__)[0],
"..",
"..")))
if path not in sys.path:
sys.path.append(path)
import src

import src.mlinsights


class TestNotebookPiecewise(unittest.TestCase):

def setUp(self):
add_missing_development_version(["jyquickhelper"], __file__, hide=True)

def test_notebook_piecewise(self):
fLOG(
__file__,
self._testMethodName,
OutputPrint=__name__ == "__main__")

self.assertTrue(src.mlinsights is not None)
folder = os.path.join(os.path.dirname(__file__),
"..", "..", "_doc", "notebooks")
test_notebook_execution_coverage(
__file__, "piecewise", folder, 'mlinsights', fLOG=fLOG)


if __name__ == "__main__":
unittest.main()
@@ -0,0 +1,167 @@
# -*- coding: utf-8 -*-
"""
@brief test log(time=2s)
"""

import sys
import os
import unittest
import numpy
import pandas
from sklearn.linear_model import LinearRegression
from pyquickhelper.pycode import ExtTestCase

try:
import src
except ImportError:
path = os.path.normpath(
os.path.abspath(
os.path.join(
os.path.split(__file__)[0],
"..",
"..")))
if path not in sys.path:
sys.path.append(path)
import src

from src.mlinsights.mlmodel import test_sklearn_pickle, test_sklearn_clone, test_sklearn_grid_search_cv
from src.mlinsights.mlmodel.piecewise_linear_regression import PiecewiseLinearRegression


class TestPiecewiseRegression(ExtTestCase):

def test_piecewise_regression_no_intercept(self):
X = numpy.array([[0.1, 0.2], [0.2, 0.3], [0.2, 0.35], [0.2, 0.36]])
Y = numpy.array([1., 1.1, 1.15, 1.2])
clr = LinearRegression(fit_intercept=False)
clr.fit(X, Y)
clq = PiecewiseLinearRegression()
clq.fit(X, Y)
pred1 = clr.predict(X)
pred2 = clq.predict(X)
self.assertEqual(pred1.shape, (4,))
self.assertEqual(pred2.shape, (4,))
sc1 = clr.score(X, Y)
sc2 = clq.score(X, Y)
sc3 = clq.binner_.score(X, Y)
self.assertIsInstance(sc1, float)
self.assertIsInstance(sc2, float)
self.assertIsInstance(sc3, float)
paths = clq.binner_.decision_path(X)
s = paths.sum()
self.assertEqual(s, 8)
self.assertNotEqual(pred2.min(), pred2.max())

def test_piecewise_regression_no_intercept_bins(self):
X = numpy.array([[0.1, 0.2], [0.2, 0.3], [0.2, 0.35], [0.2, 0.36]])
Y = numpy.array([1., 1.1, 1.15, 1.2])
clr = LinearRegression(fit_intercept=False)
clr.fit(X, Y)
clq = PiecewiseLinearRegression(binner="bins")
clq.fit(X, Y)
pred1 = clr.predict(X)
pred2 = clq.predict(X)
self.assertEqual(pred1.shape, (4,))
self.assertEqual(pred2.shape, (4,))
sc1 = clr.score(X, Y)
sc2 = clq.score(X, Y)
self.assertIsInstance(sc1, float)
self.assertIsInstance(sc2, float)
paths = clq.binner_.transform(X)
self.assertEqual(paths.shape, (4, 10))
self.assertNotEqual(pred2.min(), pred2.max())

def test_piecewise_regression_intercept_weights3(self):
X = numpy.array([[0.1, 0.2], [0.2, 0.3], [0.3, 0.3]])
Y = numpy.array([1., 1.1, 1.2])
W = numpy.array([1., 1., 1.])
clr = LinearRegression(fit_intercept=True)
clr.fit(X, Y, W)
clq = PiecewiseLinearRegression(verbose=False)
clq.fit(X, Y, W)
pred1 = clr.predict(X)
pred2 = clq.predict(X)
self.assertNotEqual(pred2.min(), pred2.max())
self.assertEqual(pred1, pred2)

def test_piecewise_regression_intercept_weights6(self):
X = numpy.array([[0.1, 0.2], [0.2, 0.3], [0.3, 0.3],
[0.1, 0.2], [0.2, 0.3], [0.3, 0.3]])
Y = numpy.array([1., 1.1, 1.2, 1., 1.1, 1.2])
W = numpy.array([1., 1., 1., 1., 1., 1.])
clr = LinearRegression(fit_intercept=True)
clr.fit(X, Y, W)
clq = PiecewiseLinearRegression(verbose=False)
clq.fit(X, Y, W)
pred1 = clr.predict(X)
pred2 = clq.predict(X)
self.assertNotEqual(pred2.min(), pred2.max())
self.assertEqual(pred1, pred2)

def test_piecewise_regression_diff(self):
X = numpy.array([[0.1], [0.2], [0.3], [0.4], [0.5]])
Y = numpy.array([1., 1.1, 1.2, 10, 1.4])
clr = LinearRegression()
clr.fit(X, Y)
clq = PiecewiseLinearRegression(verbose=False)
clq.fit(X, Y)
pred1 = clr.predict(X)
self.assertNotEmpty(pred1)
pred2 = clq.predict(X)
self.assertEqual(len(clq.estimators_), 2)
p1 = clq.estimators_[0].predict(X[:3, :])
p2 = clq.estimators_[1].predict(X[3:, :])
self.assertEqual(pred2[:3], p1)
self.assertEqual(pred2[-2:], p2)
sc = clq.score(X, Y)
self.assertEqual(sc, 1)

def test_piecewise_regression_pandas(self):
X = pandas.DataFrame(numpy.array([[0.1, 0.2], [0.2, 0.3]]))
Y = numpy.array([1., 1.1])
clr = LinearRegression(fit_intercept=False)
clr.fit(X, Y)
clq = PiecewiseLinearRegression()
clq.fit(X, Y)
pred1 = clr.predict(X)
pred2 = clq.predict(X)
self.assertEqual(pred1, pred2)

def test_piecewise_regression_list(self):
X = [[0.1, 0.2], [0.2, 0.3]]
Y = numpy.array([1., 1.1])
clq = PiecewiseLinearRegression()
self.assertRaise(lambda: clq.fit(X, Y), TypeError)

def test_piecewise_regression_pickle(self):
X = numpy.random.random(100)
eps1 = (numpy.random.random(90) - 0.5) * 0.1
eps2 = numpy.random.random(10) * 2
eps = numpy.hstack([eps1, eps2])
X = X.reshape((100, 1))
Y = X.ravel() * 3.4 + 5.6 + eps
test_sklearn_pickle(lambda: LinearRegression(), X, Y)
test_sklearn_pickle(lambda: PiecewiseLinearRegression(), X, Y)

def test_piecewise_regression_clone(self):
test_sklearn_clone(lambda: PiecewiseLinearRegression(verbose=True))

def test_piecewise_regression_grid_search(self):
X = numpy.random.random(100)
eps1 = (numpy.random.random(90) - 0.5) * 0.1
eps2 = numpy.random.random(10) * 2
eps = numpy.hstack([eps1, eps2])
X = X.reshape((100, 1))
Y = X.ravel() * 3.4 + 5.6 + eps
self.assertRaise(lambda: test_sklearn_grid_search_cv(
lambda: PiecewiseLinearRegression(), X, Y), ValueError)
res = test_sklearn_grid_search_cv(lambda: PiecewiseLinearRegression(),
X, Y, binner__max_depth=[2, 3])
self.assertIn('model', res)
self.assertIn('score', res)
self.assertGreater(res['score'], 0)
self.assertLesser(res['score'], 1)


if __name__ == "__main__":
unittest.main()
@@ -5,6 +5,7 @@
from .categories_to_integers import CategoriesToIntegers
from .classification_kmeans import ClassifierAfterKMeans
from .ml_featurizer import model_featurizer
from .piecewise_linear_regression import PiecewiseLinearRegression
from .predictable_tsne import PredictableTSNE
from .quantile_mlpregressor import QuantileMLPRegressor
from .quantile_regression import QuantileLinearRegression
Oops, something went wrong.

0 comments on commit 366efb9

Please sign in to comment.
You can’t perform that action at this time.