Skip to content
Permalink
Browse files

fix bug #12 (weight in quantile regression), #11 specify quantiles

  • Loading branch information...
sdpython committed Jun 16, 2018
1 parent bc5b883 commit ada44bae9075b8e6c89dedb4032b182459a358fc
@@ -5,14 +5,18 @@
History
=======

current - 2018-05-08 - 0.00Mb
current - 2018-06-16 - 0.00Mb
=============================

* `9`: move items from ensae_teaching_cs (2018-05-08)
* `12`: fix bug in quantile regression: wrong weight for linear regression (2018-06-16)
* `11`: specifying quantile (2018-06-16)
* `4`: add function to compute non linear correlations (2018-06-16)
* `10`: implements combination between logistic regression and k-means (2018-05-27)

0.1.61 - 2018-05-08 - 0.02Mb
============================

* `9`: move items from ensae_teaching_cs (2018-05-08)
* `7`: add quantile regression (2018-05-07)
* `5`: replace flake8 by code style (2018-04-14)
* `1`: change background for cells in notebooks converted into rst then in html, highlight-ipython3 (2018-01-05)

Large diffs are not rendered by default.

Oops, something went wrong.
@@ -68,8 +68,8 @@ def test_quantile_regression_intercept_weights(self):
self.assertEqualArray(clr.coef_, clq.coef_)

def test_quantile_regression_diff(self):
X = numpy.array([[0.1], [0.2], [0.3], [0.4]])
Y = numpy.array([1., 1.1, 1.2, 10])
X = numpy.array([[0.1], [0.2], [0.3], [0.4], [0.5]])
Y = numpy.array([1., 1.1, 1.2, 10, 1.4])
clr = LinearRegression(fit_intercept=True)
clr.fit(X, Y)
clq = QuantileLinearRegression(verbose=False, fit_intercept=True)
@@ -156,6 +156,52 @@ def test_quantile_regression_grid_search(self):
self.assertGreater(res['score'], 0)
self.assertLesser(res['score'], 1)

def test_quantile_regression_diff_quantile(self):
X = numpy.array([[0.1], [0.2], [0.3], [0.4], [0.5], [0.6]])
Y = numpy.array([1., 1.11, 1.21, 10, 1.29, 1.39])
clqs = []
scores = []
for q in [0.25, 0.4999, 0.5, 0.5001, 0.75]:
clq = QuantileLinearRegression(
verbose=False, fit_intercept=True, quantile=q)
clq.fit(X, Y)
clqs.append(clq)
sc = clq.score(X, Y)
scores.append(sc)
self.assertGreater(sc, 0)

self.assertLesser(abs(clqs[1].intercept_ - clqs[2].intercept_), 0.01)
self.assertLesser(abs(clqs[2].intercept_ - clqs[3].intercept_), 0.01)
self.assertLesser(abs(clqs[1].coef_[0] - clqs[2].coef_[0]), 0.01)
self.assertLesser(abs(clqs[2].coef_[0] - clqs[3].coef_[0]), 0.01)

self.assertGreater(abs(clqs[0].intercept_ - clqs[1].intercept_), 0.01)
# self.assertGreater(abs(clqs[3].intercept_ - clqs[4].intercept_), 0.01)
self.assertGreater(abs(clqs[0].coef_[0] - clqs[1].coef_[0]), 0.05)
# self.assertGreater(abs(clqs[3].coef_[0] - clqs[4].coef_[0]), 0.05)

self.assertLesser(abs(scores[1] - scores[2]), 0.01)
self.assertLesser(abs(scores[2] - scores[3]), 0.01)

def test_quantile_regression_quantile_check(self):
n = 100
X = (numpy.arange(n) / n)
Y = X + X * X / n
X = X.reshape((n, 1))
for q in [0.1, 0.5, 0.9]:
clq = QuantileLinearRegression(
verbose=False, fit_intercept=True, quantile=q, max_iter=10)
clq.fit(X, Y)
y = clq.predict(X)
diff = y - Y
sign = numpy.sign(diff)
pos = (sign > 0).sum()
neg = (sign < 0).sum()
if q < 0.5:
self.assertGreater(neg, pos * 4)
if q > 0.5:
self.assertLesser(neg * 7, pos)


if __name__ == "__main__":
unittest.main()
@@ -1,4 +1,4 @@
#-*- coding: utf-8 -*-
# -*- coding: utf-8 -*-
"""
@file
@brief Implements a quantile linear regression.
@@ -14,10 +14,24 @@ class QuantileLinearRegression(LinearRegression):
trained with norm *L1*. This class inherits from
:epkg:`sklearn:linear_models:LinearRegression`.
See notebook :ref:`quantileregressionrst`.
Norm *L1* is chosen if ``quantile=0.5``, otherwise,
for *quantile=*:math:`\\rho`,
the following error is optimized:
.. math::
\\sum_i \\rho |f(X_i) - Y_i|^- + (1-\\rho) |f(X_i) - Y_i|^+
where :math:`|f(X_i) - Y_i|^-= \\max(Y_i - f(X_i), 0)` and
:math:`|f(X_i) - Y_i|^+= \\max(f(X_i) - Y_i, 0)`.
:math:`f(i)` is the prediction, :math:`Y_i` the expected
value.
"""

def __init__(self, fit_intercept=True, normalize=False, copy_X=True,
n_jobs=1, delta=0.0001, max_iter=10, verbose=False):
n_jobs=1, delta=0.0001, max_iter=10, quantile=0.5,
verbose=False):
"""
Parameters
----------
@@ -50,6 +64,10 @@ def __init__(self, fit_intercept=True, normalize=False, copy_X=True,
Used to ensure matrices has an inverse
(*M + delta*I*).
quantile: float, by default 0.5,
determines which quantile to use
to estimate the regression.
verbose: bool, optional, default False
Prints error at each iteration of the optimisation.
"""
@@ -58,6 +76,7 @@ def __init__(self, fit_intercept=True, normalize=False, copy_X=True,
self.max_iter = max_iter
self.verbose = verbose
self.delta = delta
self.quantile = quantile

def fit(self, X, y, sample_weight=None):
"""
@@ -109,15 +128,18 @@ def fit(self, X, y, sample_weight=None):

def compute_z(Xm, beta, Y, W, delta=0.0001):
"compute z"
epsilon = numpy.abs(Y - Xm @ beta)
r = numpy.reciprocal(numpy.maximum(
epsilon, numpy.ones(epsilon.shape) * delta))
r = r * r
deltas = numpy.ones(X.shape[0]) * delta
epsilon, mult = QuantileLinearRegression._epsilon(
Y, Xm @ beta, self.quantile)
r = numpy.reciprocal(numpy.maximum(epsilon, deltas))
if mult is not None:
epsilon *= 1 - mult
r *= 1 - mult
return r, epsilon

if not isinstance(X, numpy.ndarray):
if hasattr(X, 'as_matrix'):
X = X.as_matrix()
if hasattr(X, 'values'):
X = X.values
else:
raise TypeError("X must be an array or a dataframe.")

@@ -136,9 +158,10 @@ def compute_z(Xm, beta, Y, W, delta=0.0001):
clr.fit(Xm, y, W)
beta = clr.coef_
W, epsilon = compute_z(Xm, beta, y, W, delta=self.delta)
E = epsilon.sum()
if sample_weight is not None:
W *= sample_weight
epsilon *= sample_weight
E = epsilon.sum()
self.n_iter_ = i
if self.verbose:
print(
@@ -156,6 +179,21 @@ def compute_z(Xm, beta, Y, W, delta=0.0001):

return self

@staticmethod
def _epsilon(y_true, y_pred, quantile, sample_weight=None):
diff = y_pred - y_true
epsilon = numpy.abs(diff)
if quantile != 0.5:
sign = numpy.sign(diff)
mult = numpy.ones(y_true.shape[0])
mult[sign > 0] *= quantile
mult[sign < 0] *= (1 - quantile)
else:
mult = None
if sample_weight is not None:
epsilon *= sample_weight
return epsilon, mult

def score(self, X, y, sample_weight=None):
"""
Returns Mean absolute error regression loss.
@@ -178,4 +216,12 @@ def score(self, X, y, sample_weight=None):
mean absolute error regression loss
"""
pred = self.predict(X)
return mean_absolute_error(y, pred, sample_weight=sample_weight)

if self.quantile != 0.5:
epsilon, mult = QuantileLinearRegression._epsilon(
y, pred, self.quantile, sample_weight)
if mult is not None:
epsilon *= mult * 2
return epsilon.sum() / X.shape[0]
else:
return mean_absolute_error(y, pred, sample_weight=sample_weight)

0 comments on commit ada44ba

Please sign in to comment.
You can’t perform that action at this time.