Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
3c3c059
ENH add common link function submodule
lorentzenchr Jan 2, 2021
700f5bb
ENH add common loss function submodule
lorentzenchr Jan 2, 2021
b26a967
CLN replace deprecated np.int by int
lorentzenchr Apr 2, 2021
062ff68
DOC document default=1 for n_threads
lorentzenchr Apr 2, 2021
d11fd5c
CLN comments and line wrapping
lorentzenchr Apr 14, 2021
5f55187
CLN comments and doc
lorentzenchr Apr 14, 2021
da00b6f
BUG remove useless line of code
lorentzenchr Apr 14, 2021
9488d37
CLN remove line that was commented out
lorentzenchr Apr 14, 2021
18ff604
CLN nitpicks in comments and docstrings
lorentzenchr Apr 14, 2021
8cdee57
ENH set NPY_NO_DEPRECATED_API
lorentzenchr Apr 14, 2021
f573a4f
MNT change NPY_1_13_API_VERSION to NPY_1_7_API_VERSION
lorentzenchr Apr 15, 2021
bf0bbe2
MNT comment out NPY_NO_DEPRECATED_API
lorentzenchr Apr 15, 2021
b6fbd76
TST restructure domain test cases
lorentzenchr May 8, 2021
c2397dc
DOC add losses to API reference
lorentzenchr May 9, 2021
234cd0a
MNT add classes to __init__
lorentzenchr May 9, 2021
a0e3c7a
CLN fix import
lorentzenchr May 9, 2021
d42a524
DOC minor docstring changes
lorentzenchr May 9, 2021
c066bbe
TST prefer docstring over comment
lorentzenchr May 10, 2021
009eee0
ENH define loss.is_multiclass
lorentzenchr May 10, 2021
df4bbb8
DOC fix typos
lorentzenchr May 10, 2021
68c4468
CLN address review comments
lorentzenchr May 18, 2021
09c28fa
DOC small docstring improvements
lorentzenchr Jun 15, 2021
d74418a
TST test more losses in test_specific_fit_intercept_only
lorentzenchr Jun 15, 2021
a5018f4
FIX test_loss_boundary
lorentzenchr Jul 18, 2021
2871669
MNT apply black
lorentzenchr Jul 19, 2021
fc9e665
TST replace np.quantile by np.percentile
lorentzenchr Jul 20, 2021
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions doc/modules/classes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1647,3 +1647,27 @@ Recently deprecated

To be removed in 1.0 (renaming of 0.25)
---------------------------------------

.. _loss_function_ref:

:mod:`sklearn._loss`: Private Loss Function Classes
===========================================================

.. automodule:: sklearn._loss
:no-members:
:no-inherited-members:

.. currentmodule:: sklearn

.. autosummary::
:toctree: generated/
:template: class.rst

_loss.HalfSquaredError
_loss.AbsoluteError
_loss.PinballLoss
_loss.HalfPoissonLoss
_loss.HalfGammaLoss
_loss.HalfTweedieLoss
_loss.BinaryCrossEntropy
_loss.CategoricalCrossEntropy
27 changes: 27 additions & 0 deletions sklearn/_loss/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
"""
The :mod:`sklearn._loss` module includes loss function classes suitable for
fitting classification and regression tasks.
"""

from .loss import (
HalfSquaredError,
AbsoluteError,
PinballLoss,
HalfPoissonLoss,
HalfGammaLoss,
HalfTweedieLoss,
BinaryCrossEntropy,
CategoricalCrossEntropy,
)


__all__ = [
"HalfSquaredError",
"AbsoluteError",
"PinballLoss",
"HalfPoissonLoss",
"HalfGammaLoss",
"HalfTweedieLoss",
"BinaryCrossEntropy",
"CategoricalCrossEntropy",
]
75 changes: 75 additions & 0 deletions sklearn/_loss/_loss.pxd
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
# cython: language_level=3

import numpy as np
cimport numpy as np

np.import_array()


# Fused types for y_true, y_pred, raw_prediction
ctypedef fused Y_DTYPE_C:
np.npy_float64
np.npy_float32


# Fused types for gradient and hessian
ctypedef fused G_DTYPE_C:
np.npy_float64
np.npy_float32
Comment on lines +9 to +18
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fused types have been defined in the code base for KDTree, BallTree and DistMetrics (here are their definitions).

To ease maintenance on the long run, we might what to define as less new fused type as possible (I would favor using existing ones or cython's built-in fused types).

Are there particular reasons for using numpy's types?
Would using Cython's floating be possible here?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No particular reason for numpy types other than copying from the hist_gradient_boosting/common.pxd.

I do not use cython.floating as I want to have 2 different fused types:

  • Y_DTYPE_C for y_true, raw_prediction, sample_weight
  • G_DTYPE_C for loss, gradient, hessian

Using only one fused type, all of them would always be forced to the same dtype. Use case is again the HGBT, where gradients and hessians are float32, whereas y_true is float64.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This makes sense. Maybe it is worth indicating the motives here, explaining that defining two types allows working with more flexibility on inputs.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Reading a bit in https://numpy.org/devdocs/reference/c-api/dtype.html, the np.npy_float64 is really always a 64 bit floating point number, even on 32-bit systems, in contrast to plain C double which may be implementation specific. But I wonder if any C compiler that is used for scikit-learn does not comply to IEEE 754-1989.



# Struct to return 2 doubles
ctypedef struct double_pair:
double val1
double val2


# C base class for loss functions
cdef class cLossFunction:
cdef double closs(self, double y_true, double raw_prediction) nogil
cdef double cgradient(self, double y_true, double raw_prediction) nogil
cdef double_pair cgrad_hess(self, double y_true, double raw_prediction) nogil


cdef class cHalfSquaredError(cLossFunction):
cdef double closs(self, double y_true, double raw_prediction) nogil
cdef double cgradient(self, double y_true, double raw_prediction) nogil
cdef double_pair cgrad_hess(self, double y_true, double raw_prediction) nogil


cdef class cAbsoluteError(cLossFunction):
cdef double closs(self, double y_true, double raw_prediction) nogil
cdef double cgradient(self, double y_true, double raw_prediction) nogil
cdef double_pair cgrad_hess(self, double y_true, double raw_prediction) nogil


cdef class cPinballLoss(cLossFunction):
cdef readonly double quantile # readonly makes it inherited by children
cdef double closs(self, double y_true, double raw_prediction) nogil
cdef double cgradient(self, double y_true, double raw_prediction) nogil
cdef double_pair cgrad_hess(self, double y_true, double raw_prediction) nogil


cdef class cHalfPoissonLoss(cLossFunction):
cdef double closs(self, double y_true, double raw_prediction) nogil
cdef double cgradient(self, double y_true, double raw_prediction) nogil
cdef double_pair cgrad_hess(self, double y_true, double raw_prediction) nogil


cdef class cHalfGammaLoss(cLossFunction):
cdef double closs(self, double y_true, double raw_prediction) nogil
cdef double cgradient(self, double y_true, double raw_prediction) nogil
cdef double_pair cgrad_hess(self, double y_true, double raw_prediction) nogil


cdef class cHalfTweedieLoss(cLossFunction):
cdef readonly double power # readonly makes it inherited by children
cdef double closs(self, double y_true, double raw_prediction) nogil
cdef double cgradient(self, double y_true, double raw_prediction) nogil
cdef double_pair cgrad_hess(self, double y_true, double raw_prediction) nogil


cdef class cBinaryCrossEntropy(cLossFunction):
cdef double closs(self, double y_true, double raw_prediction) nogil
cdef double cgradient(self, double y_true, double raw_prediction) nogil
cdef double_pair cgrad_hess(self, double y_true, double raw_prediction) nogil
Loading