-
-
Notifications
You must be signed in to change notification settings - Fork 72
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
add pandas uncertainty array #184
Open
andrewgsavage
wants to merge
7
commits into
lmfit:master
Choose a base branch
from
andrewgsavage:pandas
base: master
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Changes from 6 commits
Commits
Show all changes
7 commits
Select commit
Hold shift + click to select a range
d78c98b
add uncertainty array
andrewgsavage 260a4d1
get some more tests working
andrewgsavage 5217c85
arithmetic ops
andrewgsavage 94cd976
isna
andrewgsavage c074f96
isnumeric
andrewgsavage c3cbbaa
tests running
andrewgsavage e829224
Merge branch 'master' into pandas
andrewgsavage File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,276 @@ | ||
""" | ||
This file contains the tests required by pandas for an ExtensionArray and ExtensionType. | ||
""" | ||
import warnings | ||
|
||
import numpy as np | ||
import pandas as pd | ||
import pandas._testing as tm | ||
import pytest | ||
from pandas.core import ops | ||
from pandas.tests.extension import base | ||
from pandas.tests.extension.conftest import ( | ||
as_frame, # noqa: F401 | ||
as_array, # noqa: F401 | ||
as_series, # noqa: F401 | ||
fillna_method, # noqa: F401 | ||
groupby_apply_op, # noqa: F401 | ||
use_numpy, # noqa: F401 | ||
) | ||
|
||
from .uncertainty_array import UncertaintyArray, UncertaintyDtype, ufloat | ||
from uncertainties import umath | ||
|
||
# from .core import ufloat | ||
|
||
@pytest.fixture(params=[True, False]) | ||
def box_in_series(request): | ||
"""Whether to box the data in a Series""" | ||
return request.param | ||
|
||
|
||
@pytest.fixture | ||
def dtype(): | ||
return UncertaintyDtype() | ||
|
||
|
||
@pytest.fixture | ||
def data(request): | ||
return UncertaintyArray( | ||
[ufloat(i, abs(i)/100) for i in np.arange(start=1.0, stop=101.0)] | ||
) | ||
|
||
|
||
@pytest.fixture | ||
def data_missing(): | ||
return UncertaintyArray( | ||
[ufloat(i, abs(i)/100) for i in [np.nan, 1]] | ||
) | ||
|
||
|
||
@pytest.fixture | ||
def data_for_twos(): | ||
x = [ | ||
2.0, | ||
] * 100 | ||
return UncertaintyArray( | ||
[ufloat(i, abs(i)/100) for i in x] | ||
) | ||
|
||
|
||
@pytest.fixture(params=["data", "data_missing"]) | ||
def all_data(request, data, data_missing): | ||
if request.param == "data": | ||
return data | ||
elif request.param == "data_missing": | ||
return data_missing | ||
|
||
|
||
@pytest.fixture | ||
def data_repeated(data): | ||
"""Return different versions of data for count times""" | ||
|
||
def gen(count): | ||
for _ in range(count): | ||
yield data | ||
|
||
yield gen | ||
|
||
|
||
@pytest.fixture(params=[None, lambda x: x]) | ||
def sort_by_key(request): | ||
""" | ||
Simple fixture for testing keys in sorting methods. | ||
Tests None (no key) and the identity key. | ||
""" | ||
return request.param | ||
|
||
|
||
@pytest.fixture | ||
def data_for_sorting(): | ||
return UncertaintyArray( | ||
[ufloat(i, abs(i)/100) for i in [0.3, 10.0, -50.0]] | ||
) | ||
|
||
|
||
@pytest.fixture | ||
def data_missing_for_sorting(): | ||
return UncertaintyArray( | ||
[ufloat(i, abs(i)/100) for i in [4.0, np.nan, -5.0]] | ||
) | ||
|
||
|
||
@pytest.fixture | ||
def na_cmp(): | ||
"""Binary operator for comparing NA values.""" | ||
return lambda x, y: pd.isna(x) and pd.isna(y) | ||
|
||
|
||
@pytest.fixture | ||
def na_value(): | ||
return pd.NA | ||
|
||
|
||
@pytest.fixture | ||
def data_for_grouping(): | ||
a = ufloat(1.0, 0.1) | ||
b = ufloat(2.0, 0.1) | ||
c = ufloat(3.0, 0.1) | ||
x = [a, a, np.nan, np.nan, b, b, a, c] | ||
return UncertaintyArray(x) | ||
|
||
# === missing from pandas extension docs about what has to be included in tests === | ||
# copied from pandas/pandas/conftest.py | ||
_all_arithmetic_operators = [ | ||
"__add__", | ||
"__radd__", | ||
"__sub__", | ||
"__rsub__", | ||
"__mul__", | ||
"__rmul__", | ||
"__floordiv__", | ||
"__rfloordiv__", | ||
"__truediv__", | ||
"__rtruediv__", | ||
"__pow__", | ||
"__rpow__", | ||
"__mod__", | ||
"__rmod__", | ||
] | ||
|
||
|
||
@pytest.fixture(params=_all_arithmetic_operators) | ||
def all_arithmetic_operators(request): | ||
""" | ||
Fixture for dunder names for common arithmetic operations | ||
""" | ||
return request.param | ||
|
||
|
||
@pytest.fixture(params=["__eq__", "__ne__", "__le__", "__lt__", "__ge__", "__gt__"]) | ||
def all_compare_operators(request): | ||
""" | ||
Fixture for dunder names for common compare operations | ||
|
||
* >= | ||
* > | ||
* == | ||
* != | ||
* < | ||
* <= | ||
""" | ||
return request.param | ||
|
||
|
||
# commented functions aren't implemented in numpy/pandas | ||
_all_numeric_reductions = [ | ||
"sum", | ||
"max", | ||
"min", | ||
"mean", | ||
# "prod", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No reason we cannot reduce with prod. |
||
"std", | ||
"var", | ||
"median", | ||
"sem", | ||
"kurt", | ||
"skew", | ||
] | ||
|
||
|
||
@pytest.fixture(params=_all_numeric_reductions) | ||
def all_numeric_reductions(request): | ||
""" | ||
Fixture for numeric reduction names. | ||
""" | ||
return request.param | ||
|
||
|
||
_all_boolean_reductions = ["all", "any"] | ||
|
||
|
||
@pytest.fixture(params=_all_boolean_reductions) | ||
def all_boolean_reductions(request): | ||
""" | ||
Fixture for boolean reduction names. | ||
""" | ||
return request.param | ||
|
||
|
||
_all_numeric_accumulations = ["cumsum", "cumprod", "cummin", "cummax"] | ||
|
||
|
||
@pytest.fixture(params=_all_numeric_accumulations) | ||
def all_numeric_accumulations(request): | ||
""" | ||
Fixture for numeric accumulation names | ||
""" | ||
return request.param | ||
|
||
class InvalidScalar(): | ||
def strip(self): | ||
return "invalid_scalar" | ||
|
||
@pytest.fixture | ||
def invalid_scalar(data): | ||
""" | ||
A scalar that *cannot* be held by this ExtensionArray. | ||
The default should work for most subclasses, but is not guaranteed. | ||
If the array can hold any item (i.e. object dtype), then use pytest.skip. | ||
""" | ||
return InvalidScalar() | ||
|
||
|
||
import operator | ||
|
||
@pytest.fixture( | ||
params=[ | ||
operator.eq, | ||
operator.ne, | ||
operator.gt, | ||
operator.ge, | ||
operator.lt, | ||
operator.le, | ||
] | ||
) | ||
def comparison_op(request): | ||
""" | ||
Fixture for operator module comparison functions. | ||
""" | ||
return request.param | ||
|
||
from pandas.tests.extension import base | ||
|
||
class TestUncertaintyArray(base.ExtensionTests): | ||
divmod_exc = TypeError # TODO: fix this | ||
series_scalar_exc = None | ||
frame_scalar_exc = None | ||
series_array_exc = None | ||
|
||
# This test round trips to file. Set the uncertainty to zero so the recreated data compares equal to the original data. | ||
@pytest.mark.parametrize("data", [UncertaintyArray( | ||
[ufloat(i, 0) for i in np.arange(start=1.0, stop=101.0)] | ||
)]) | ||
@pytest.mark.parametrize("engine", ["c", "python"]) | ||
def test_EA_types(self, engine, data, request): | ||
super().test_EA_types(engine, data) | ||
|
||
@pytest.mark.xfail(run=True, reason="test returns Float64Dtype rather than float64 but is otherwise correct") | ||
def test_value_counts_with_normalize(self, data, index, obj): | ||
super().test_value_counts_with_normalize(data, index, obj) | ||
|
||
@pytest.mark.xfail(reason="Can't invert uncertainties") | ||
def test_invert(self, data): | ||
super().test_invert(data) | ||
|
||
@pytest.mark.xfail(reason="_reduce is not implemented") | ||
def test_in_numeric_groupby(self, data_for_grouping): | ||
super().test_in_numeric_groupby(data_for_grouping) | ||
|
||
@pytest.mark.xfail(reason="Couldn't work out why this fails") | ||
def test_groupby_extension_agg(self, data_for_grouping): | ||
super().test_groupby_extension_agg(data_for_grouping) | ||
|
||
@pytest.mark.xfail(reason="Couldn't work out why this fails") | ||
def test_from_dtype(self, data): | ||
super().test_from_dtype(data) |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
mean
is a non-trivail calculation in the world of uncertainties. I don't thinkuncertainties
does it. I've created code that does it:There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is the sort of thing that
__array_function__
would help with, so I could donp.mean(UArray)
without needing to understand the uncertainty logic