-
Notifications
You must be signed in to change notification settings - Fork 13
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
add quantile regression, change description and documentation #7
- Loading branch information
Showing
13 changed files
with
691 additions
and
12 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
18 changes: 18 additions & 0 deletions
18
_doc/sphinxdoc/source/blog/2018/2018-05-07_quantile_regression.rst
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
|
||
.. blogpost:: | ||
:title: Quantile regression with scikit-learn. | ||
:keywords: scikit-learn, quantile regression | ||
:date: 2018-05-07 | ||
:categories: machine learning | ||
|
||
:epkg:`scikit-learn` does not have any quantile regression. | ||
:epkg:`statsmodels` does have one | ||
`QuantReg <http://www.statsmodels.org/dev/generated/statsmodels.regression.quantile_regression.QuantReg.html>`_ | ||
but I wanted to try something I did for my teachings | ||
`Régression Quantile <http://www.xavierdupre.fr/app/ensae_teaching_cs/helpsphinx3/notebooks/td_note_2017_2.html?highlight=mediane>`_ | ||
based on `Iteratively reweighted least squares <https://en.wikipedia.org/wiki/Iteratively_reweighted_least_squares>`_. | ||
I thought it was a good case study to turn a simple algorithm into | ||
a learner :epkg:`scikit-learn` can reused in a pipeline. | ||
The notebook :ref:`quantileregressionrst` demonstrates it | ||
and it is implemented in | ||
:class:`QuantileLinearRegression <mlinsights.mlmodel.quantile_regression.QuantileLinearRegression>`. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
# -*- coding: utf-8 -*- | ||
""" | ||
@brief test log(time=13s) | ||
""" | ||
|
||
import sys | ||
import os | ||
import unittest | ||
|
||
|
||
try: | ||
import pyquickhelper as skip_ | ||
except ImportError: | ||
path = os.path.normpath( | ||
os.path.abspath( | ||
os.path.join( | ||
os.path.split(__file__)[0], | ||
"..", | ||
"..", | ||
"..", | ||
"pyquickhelper", | ||
"src"))) | ||
if path not in sys.path: | ||
sys.path.append(path) | ||
import pyquickhelper as skip_ | ||
|
||
try: | ||
import src | ||
except ImportError: | ||
path = os.path.normpath( | ||
os.path.abspath( | ||
os.path.join( | ||
os.path.split(__file__)[0], | ||
"..", | ||
".."))) | ||
if path not in sys.path: | ||
sys.path.append(path) | ||
import src | ||
|
||
from pyquickhelper.loghelper import fLOG | ||
from pyquickhelper.pycode import add_missing_development_version | ||
from pyquickhelper.ipythonhelper import test_notebook_execution_coverage | ||
import src.mlinsights | ||
|
||
|
||
class TestNotebookQuantile(unittest.TestCase): | ||
|
||
def setUp(self): | ||
add_missing_development_version(["jyquickhelper"], __file__, hide=True) | ||
|
||
def test_notebook_quantile(self): | ||
fLOG( | ||
__file__, | ||
self._testMethodName, | ||
OutputPrint=__name__ == "__main__") | ||
|
||
self.assertTrue(src.mlinsights is not None) | ||
folder = os.path.join(os.path.dirname(__file__), | ||
"..", "..", "_doc", "notebooks") | ||
test_notebook_execution_coverage( | ||
__file__, "quantile", folder, 'mlinsights', fLOG=fLOG) | ||
|
||
|
||
if __name__ == "__main__": | ||
unittest.main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,136 @@ | ||
# -*- coding: utf-8 -*- | ||
""" | ||
@brief test log(time=2s) | ||
""" | ||
|
||
import sys | ||
import os | ||
import unittest | ||
import numpy | ||
import pandas | ||
from sklearn.linear_model import LinearRegression | ||
|
||
|
||
try: | ||
import pyquickhelper as skip_ | ||
except ImportError: | ||
path = os.path.normpath( | ||
os.path.abspath( | ||
os.path.join( | ||
os.path.split(__file__)[0], | ||
"..", | ||
"..", | ||
"..", | ||
"pyquickhelper", | ||
"src"))) | ||
if path not in sys.path: | ||
sys.path.append(path) | ||
import pyquickhelper as skip_ | ||
|
||
|
||
try: | ||
import src | ||
except ImportError: | ||
path = os.path.normpath( | ||
os.path.abspath( | ||
os.path.join( | ||
os.path.split(__file__)[0], | ||
"..", | ||
".."))) | ||
if path not in sys.path: | ||
sys.path.append(path) | ||
import src | ||
|
||
from pyquickhelper.pycode import ExtTestCase, get_temp_folder | ||
from src.mlinsights.mlmodel import QuantileLinearRegression | ||
|
||
|
||
class TestQuantileRegression(ExtTestCase): | ||
|
||
def test_quantile_regression_no_intercept(self): | ||
X = numpy.array([[0.1, 0.2], [0.2, 0.3]]) | ||
Y = numpy.array([1., 1.1]) | ||
clr = LinearRegression(fit_intercept=False) | ||
clr.fit(X, Y) | ||
clq = QuantileLinearRegression(fit_intercept=False) | ||
clq.fit(X, Y) | ||
self.assertEqual(clr.intercept_, 0) | ||
self.assertEqualArray(clr.coef_, clq.coef_) | ||
self.assertEqual(clq.intercept_, 0) | ||
self.assertEqualArray(clr.intercept_, clq.intercept_) | ||
|
||
def test_quantile_regression_intercept(self): | ||
X = numpy.array([[0.1, 0.2], [0.2, 0.3], [0.3, 0.3]]) | ||
Y = numpy.array([1., 1.1, 1.2]) | ||
clr = LinearRegression(fit_intercept=True) | ||
clr.fit(X, Y) | ||
clq = QuantileLinearRegression(verbose=False, fit_intercept=True) | ||
clq.fit(X, Y) | ||
self.assertNotEqual(clr.intercept_, 0) | ||
self.assertNotEqual(clq.intercept_, 0) | ||
self.assertEqualArray(clr.intercept_, clq.intercept_) | ||
self.assertEqualArray(clr.coef_, clq.coef_) | ||
|
||
def test_quantile_regression_diff(self): | ||
X = numpy.array([[0.1], [0.2], [0.3], [0.4]]) | ||
Y = numpy.array([1., 1.1, 1.2, 10]) | ||
clr = LinearRegression(fit_intercept=True) | ||
clr.fit(X, Y) | ||
clq = QuantileLinearRegression(verbose=False, fit_intercept=True) | ||
clq.fit(X, Y) | ||
self.assertNotEqual(clr.intercept_, 0) | ||
self.assertNotEqual(clq.intercept_, 0) | ||
self.assertNotEqualArray(clr.coef_, clq.coef_) | ||
self.assertNotEqualArray(clr.intercept_, clq.intercept_) | ||
self.assertLesser(clq.n_iter_, 10) | ||
|
||
def test_quantile_regression_pandas(self): | ||
X = pandas.DataFrame(numpy.array([[0.1, 0.2], [0.2, 0.3]])) | ||
Y = numpy.array([1., 1.1]) | ||
clr = LinearRegression(fit_intercept=False) | ||
clr.fit(X, Y) | ||
clq = QuantileLinearRegression(fit_intercept=False) | ||
clq.fit(X, Y) | ||
self.assertEqual(clr.intercept_, 0) | ||
self.assertEqualArray(clr.coef_, clq.coef_) | ||
self.assertEqual(clq.intercept_, 0) | ||
self.assertEqualArray(clr.intercept_, clq.intercept_) | ||
|
||
def test_quantile_regression_list(self): | ||
X = [[0.1, 0.2], [0.2, 0.3]] | ||
Y = numpy.array([1., 1.1]) | ||
clq = QuantileLinearRegression(fit_intercept=False) | ||
self.assertRaise(lambda: clq.fit(X, Y), TypeError) | ||
|
||
def test_quantile_regression_list(self): | ||
X = numpy.random.random(1000) | ||
eps1 = (numpy.random.random(900) - 0.5) * 0.1 | ||
eps2 = numpy.random.random(100) * 2 | ||
eps = numpy.hstack([eps1, eps2]) | ||
X = X.reshape((1000, 1)) | ||
Y = X * 3.4 + 5.6 + eps | ||
|
||
clq = QuantileLinearRegression(verbose=False, fit_intercept=True) | ||
self.assertRaise(lambda: clq.fit(X, Y), ValueError) | ||
|
||
Y = X.ravel() * 3.4 + 5.6 + eps | ||
|
||
clq = QuantileLinearRegression(verbose=False, fit_intercept=True) | ||
clq.fit(X, Y) | ||
|
||
clr = LinearRegression(fit_intercept=True) | ||
clr.fit(X, Y) | ||
|
||
self.assertNotEqual(clr.intercept_, 0) | ||
self.assertNotEqual(clq.intercept_, 0) | ||
self.assertNotEqualArray(clr.coef_, clq.coef_) | ||
self.assertNotEqualArray(clr.intercept_, clq.intercept_) | ||
self.assertLesser(clq.n_iter_, 10) | ||
|
||
pr = clr.predict(X) | ||
pq = clq.predict(X) | ||
self.assertEqual(pr.shape, pq.shape) | ||
|
||
|
||
if __name__ == "__main__": | ||
unittest.main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
""" | ||
@file | ||
@brief Shortcuts to *mlmodel*. | ||
""" | ||
from .quantile_regression import QuantileLinearRegression |
Oops, something went wrong.