Skip to content

Commit

Permalink
[ENH] Adapter for Scipy Distributions (#287)
Browse files Browse the repository at this point in the history
Fixes #227 

#### What does this implement/fix? Explain your changes.
<!--
A clear and concise description of what you have implemented.
-->

- Adapter for Scipy distributions
- Fisk Distribution using Scipy Adapter
- Poisson Distribution using Scipy Adapter
  • Loading branch information
malikrafsan committed May 3, 2024
1 parent 4441e55 commit 79dccf2
Show file tree
Hide file tree
Showing 5 changed files with 214 additions and 172 deletions.
3 changes: 2 additions & 1 deletion skpro/distributions/adapters/scipy/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Adapters for probability distribution objects, scipy facing."""
# copyright: skpro developers, BSD-3-Clause License (see LICENSE file)

from skpro.distributions.adapters.scipy._distribution import _ScipyAdapter
from skpro.distributions.adapters.scipy._empirical import empirical_from_discrete

__all__ = ["empirical_from_discrete"]
__all__ = ["empirical_from_discrete", "_ScipyAdapter"]
109 changes: 109 additions & 0 deletions skpro/distributions/adapters/scipy/_distribution.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
# copyright: skpro developers, BSD-3-Clause License (see LICENSE file)
"""Adapter for Scipy Distributions."""

__author__ = ["malikrafsan"]

from typing import Union

import pandas as pd
from scipy.stats import rv_continuous, rv_discrete

from skpro.distributions.base import BaseDistribution

__all__ = ["_ScipyAdapter"]


class _ScipyAdapter(BaseDistribution):
"""Adapter for scipy distributions.
This class is an adapter for scipy distributions. It provides a common
interface for all scipy distributions. The class is abstract
and should not be instantiated directly.
"""

_distribution_attr = "_dist"
_tags = {
"object_type": ["distribution", "scipy_distribution_adapter"],
}

def __init__(self, index=None, columns=None):
obj = self._get_scipy_object()
setattr(self, self._distribution_attr, obj)
super().__init__(index, columns)

def _get_scipy_object(self) -> Union[rv_continuous, rv_discrete]:
"""Abstract method to get the scipy distribution object.
Should import the scipy distribution object and return it.
"""
raise NotImplementedError("abstract method")

def _get_scipy_param(self):
"""Abstract method to get the scipy distribution parameters.
Should return a tuple with two elements: a list of positional arguments (args)
and a dictionary of keyword arguments (kwds).
"""
raise NotImplementedError("abstract method")

def _mean(self):
obj: Union[rv_continuous, rv_discrete] = getattr(self, self._distribution_attr)
args, kwds = self._get_scipy_param()
return obj.mean(*args, **kwds)

def _var(self):
obj: Union[rv_continuous, rv_discrete] = getattr(self, self._distribution_attr)
args, kwds = self._get_scipy_param()
return obj.var(*args, **kwds)

def _pdf(self, x: pd.DataFrame):
obj: Union[rv_continuous, rv_discrete] = getattr(self, self._distribution_attr)
if isinstance(obj, rv_discrete):
return 0

args, kwds = self._get_scipy_param()
return obj.pdf(x, *args, **kwds)

def _log_pdf(self, x: pd.DataFrame):
obj: Union[rv_continuous, rv_discrete] = getattr(self, self._distribution_attr)
if isinstance(obj, rv_discrete):
return 0

args, kwds = self._get_scipy_param()
return obj.logpdf(x, *args, **kwds)

def _cdf(self, x: pd.DataFrame):
obj: Union[rv_continuous, rv_discrete] = getattr(self, self._distribution_attr)
args, kwds = self._get_scipy_param()
return obj.cdf(x, *args, **kwds)

def _ppf(self, p: pd.DataFrame):
obj: Union[rv_continuous, rv_discrete] = getattr(self, self._distribution_attr)
args, kwds = self._get_scipy_param()
return obj.ppf(p, *args, **kwds)

def _pmf(self, x: pd.DataFrame):
"""Return the probability mass function evaluated at x."""
obj: Union[rv_continuous, rv_discrete] = getattr(self, self._distribution_attr)
if isinstance(obj, rv_continuous):
return 0

args, kwds = self._get_scipy_param()
return obj.pmf(x, *args, **kwds)

def pmf(self, x: pd.DataFrame):
"""Return the probability mass function evaluated at x."""
return self._boilerplate("_pmf", x=x)

def _log_pmf(self, x: pd.DataFrame):
"""Return the log of the probability mass function evaluated at x."""
obj: Union[rv_continuous, rv_discrete] = getattr(self, self._distribution_attr)
if isinstance(obj, rv_continuous):
return 0

args, kwds = self._get_scipy_param()
return obj.logpmf(x, *args, **kwds)

def log_pmf(self, x: pd.DataFrame):
"""Return the log of the probability mass function evaluated at x."""
return self._boilerplate("_log_pmf", x=x)
85 changes: 85 additions & 0 deletions skpro/distributions/adapters/scipy/tests/test_scipy_adapters.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,12 @@

import numpy as np
import pandas as pd
import pytest
from skbase.testing import QuickTester

from skpro.tests.test_all_estimators import BaseFixtureGenerator, PackageConfig

__author__ = ["fkiraly", "malikrafsan"]


def test_empirical_from_discrete():
Expand Down Expand Up @@ -40,3 +46,82 @@ def test_empirical_from_discrete():
)
assert np.all(emp2.spl.index == expected_idx)
assert np.all(emp2.spl.columns == ["abc"])


class ScipyDistributionFixtureGenerator(BaseFixtureGenerator):
"""Fixture generator for scipy distributions adapter.
Fixtures parameterized
----------------------
object_class: object inheriting from BaseObject
ranges over object classes not excluded by EXCLUDE_OBJECTS, EXCLUDED_TESTS
object_instance: instance of object inheriting from BaseObject
ranges over object classes not excluded by EXCLUDE_OBJECTS, EXCLUDED_TESTS
instances are generated by create_test_instance class method
"""

object_type_filter = "scipy_distribution_adapter"


class TestScipyAdapter(PackageConfig, ScipyDistributionFixtureGenerator, QuickTester):
"""Test the scipy adapter."""

METHOD_TESTS = {
"NO_PARAMS": [("mean", "mean"), ("var", "var")],
"X_PARAMS": [("cdf", "cdf"), ("ppf", "ppf")],
"CONTINUOUS": [("pdf", "pdf"), ("log_pdf", "logpdf")],
"DISCRETE": [("pmf", "pmf"), ("log_pmf", "logpmf")],
}

X_VALUES = [0.1, 0.5, 0.99]

@pytest.mark.parametrize("method,scipy_method", METHOD_TESTS["NO_PARAMS"])
def test_method_no_params(self, object_instance, method, scipy_method):
"""Test method that doesn't need additional parameters."""
res = getattr(object_instance, method)()
params = object_instance._get_scipy_param()
scipy_obj = object_instance._get_scipy_object()

scipy_res = getattr(scipy_obj, scipy_method)(*params[0], **params[1])

assert np.allclose(res, scipy_res)

@pytest.mark.parametrize("method,scipy_method", METHOD_TESTS["X_PARAMS"])
@pytest.mark.parametrize("x", X_VALUES)
def test_method_with_x_params(self, object_instance, method, scipy_method, x):
"""Test method that needs x as parameter."""
res = getattr(object_instance, method)(x)
params = object_instance._get_scipy_param()
scipy_obj = object_instance._get_scipy_object()

scipy_res = getattr(scipy_obj, scipy_method)(x, *params[0], **params[1])

assert np.allclose(res, scipy_res)

@pytest.mark.parametrize("method,scipy_method", METHOD_TESTS["CONTINUOUS"])
@pytest.mark.parametrize("x", X_VALUES)
def test_method_continuous_dist(self, object_instance, method, scipy_method, x):
"""Test continuous distribution method."""
res = getattr(object_instance, method)(x)
if object_instance._tags["distr:measuretype"] != "continuous":
scipy_res = 0
else:
params = object_instance._get_scipy_param()
scipy_obj = object_instance._get_scipy_object()
scipy_res = getattr(scipy_obj, scipy_method)(x, *params[0], **params[1])

assert np.allclose(res, scipy_res)

@pytest.mark.parametrize("method,scipy_method", METHOD_TESTS["DISCRETE"])
@pytest.mark.parametrize("x", X_VALUES)
def test_method_discrete_dist(self, object_instance, method, scipy_method, x):
"""Test discrete distribution method."""
res = getattr(object_instance, method)(x)
if object_instance._tags["distr:measuretype"] != "discrete":
scipy_res = 0
else:
params = object_instance._get_scipy_param()
scipy_obj = object_instance._get_scipy_object()
scipy_res = getattr(scipy_obj, scipy_method)(x, *params[0], **params[1])

assert np.allclose(res, scipy_res)
111 changes: 8 additions & 103 deletions skpro/distributions/fisk.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
# copyright: skpro developers, BSD-3-Clause License (see LICENSE file)
"""Log-logistic aka Fisk probability distribution."""

__author__ = ["fkiraly"]
__author__ = ["fkiraly", "malikrafsan"]

import pandas as pd
from scipy.stats import fisk
from scipy.stats import fisk, rv_continuous

from skpro.distributions.base import BaseDistribution
from skpro.distributions.adapters.scipy import _ScipyAdapter


class Fisk(BaseDistribution):
class Fisk(_ScipyAdapter):
r"""Fisk distribution, aka log-logistic distribution.
The Fisk distribution is parametrized by a scale parameter :math:`\alpha`
Expand Down Expand Up @@ -47,109 +47,14 @@ def __init__(self, alpha=1, beta=1, index=None, columns=None):

super().__init__(index=index, columns=columns)

def _mean(self):
"""Return expected value of the distribution.
def _get_scipy_object(self) -> rv_continuous:
return fisk

Returns
-------
2D np.ndarray, same shape as ``self``
expected value of distribution (entry-wise)
"""
def _get_scipy_param(self):
alpha = self._bc_params["alpha"]
beta = self._bc_params["beta"]

mean_arr = fisk.mean(scale=alpha, c=beta)
return mean_arr

def _var(self):
r"""Return element/entry-wise variance of the distribution.
Returns
-------
2D np.ndarray, same shape as ``self``
variance of the distribution (entry-wise)
"""
alpha = self._bc_params["alpha"]
beta = self._bc_params["beta"]

var_arr = fisk.var(scale=alpha, c=beta)
return var_arr

def _pdf(self, x):
"""Probability density function.
Parameters
----------
x : 2D np.ndarray, same shape as ``self``
values to evaluate the pdf at
Returns
-------
2D np.ndarray, same shape as ``self``
pdf values at the given points
"""
alpha = self._bc_params["alpha"]
beta = self._bc_params["beta"]

pdf_arr = fisk.pdf(x, scale=alpha, c=beta)
return pdf_arr

def _log_pdf(self, x):
"""Logarithmic probability density function.
Parameters
----------
x : 2D np.ndarray, same shape as ``self``
values to evaluate the pdf at
Returns
-------
2D np.ndarray, same shape as ``self``
log pdf values at the given points
"""
alpha = self._bc_params["alpha"]
beta = self._bc_params["beta"]

lpdf_arr = fisk.logpdf(x, scale=alpha, c=beta)
return lpdf_arr

def _cdf(self, x):
"""Cumulative distribution function.
Parameters
----------
x : 2D np.ndarray, same shape as ``self``
values to evaluate the cdf at
Returns
-------
2D np.ndarray, same shape as ``self``
cdf values at the given points
"""
alpha = self._bc_params["alpha"]
beta = self._bc_params["beta"]

cdf_arr = fisk.cdf(x, scale=alpha, c=beta)
return cdf_arr

def _ppf(self, p):
"""Quantile function = percent point function = inverse cdf.
Parameters
----------
p : 2D np.ndarray, same shape as ``self``
values to evaluate the ppf at
Returns
-------
2D np.ndarray, same shape as ``self``
ppf values at the given points
"""
alpha = self._bc_params["alpha"]
beta = self._bc_params["beta"]

icdf_arr = fisk.ppf(p, scale=alpha, c=beta)
return icdf_arr
return [], {"c": beta, "scale": alpha}

@classmethod
def get_test_params(cls, parameter_set="default"):
Expand Down
Loading

0 comments on commit 79dccf2

Please sign in to comment.