Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add anchor option to StdMean #176

Merged
merged 19 commits into from
Jun 9, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,4 @@ mapclassify/tests/.ropeproject/
.DS_Store
.vscode/settings.json
__pycache__
/notebooks/.ipynb_checkpoints/
6 changes: 5 additions & 1 deletion mapclassify/_classify_API.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ def classify(
initial=100,
bins=None,
lowest=None,
anchor=False,
):
"""

Expand Down Expand Up @@ -94,6 +95,9 @@ def classify(
Scalar minimum value of lowest class. Default is to set the minimum
to ``-inf`` if ``y.min()`` > first upper bound (which will override
the default), otherwise minimum is set to ``y.min()``.
anchor : bool (default False)
Anchor upper bound of one class to the sample mean.



Returns
Expand Down Expand Up @@ -182,7 +186,7 @@ def classify(
classifier = _classifiers[scheme](y, pct)

elif scheme == "stdmean":
classifier = _classifiers[scheme](y, multiples)
classifier = _classifiers[scheme](y, multiples, anchor)

elif scheme == "jenkscaspallsampled":
classifier = _classifiers[scheme](y, k, pct_sampled)
Expand Down
36 changes: 31 additions & 5 deletions mapclassify/classifiers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1520,12 +1520,14 @@ class StdMean(MapClassifier):

Parameters
----------

y : numpy.array
:math:`(n,1)`, values to classify.
:math:`(n,1)`, values to classify
multiples : numpy.array (default [-2, -1, 1, 2])
The multiples of the standard deviation to add/subtract from
the sample mean to define the bins
the sample mean to define the bins.
anchor : bool (default False)
Anchor upper bound of one class to the sample mean.


Attributes
----------
Expand All @@ -1539,6 +1541,17 @@ class StdMean(MapClassifier):
counts : numpy.array
:math:`(k,1)`, the number of observations falling in each class.

Notes
-----

If anchor is True, one of the intervals will have its closed upper bound
equal to the mean of y. Intermediate intervals will have widths equal to
the standard deviation of y. The first interval will be closed on the
minimum value of y, and the last interval will be closed on the maximum of
y. The first and last intervals may have widths different from the
intermediate intervals.


Examples
--------

Expand All @@ -1562,18 +1575,31 @@ class StdMean(MapClassifier):

>>> list(st3.counts)
[0, 0, 57, 0, 1]

>>> stda = mapclassify.StdMean(cal, anchor=True)
>>> stda.k
9
>>> stda.bins
array([ 125.92810345, 672.57333208, 1219.21856072, 1765.86378936,
2312.50901799, 2859.15424663, 3405.79947527, 3952.4447039 ,
4111.45 ])
>>> cal.mean(), cal.std(), cal.min(), cal.max()
(125.92810344827588, 546.6452286365233, 0.13, 4111.45)
"""

def __init__(self, y, multiples=[-2, -1, 1, 2]):
def __init__(self, y, multiples=[-2, -1, 1, 2], anchor=False):
self.multiples = multiples
self.anchor = anchor
MapClassifier.__init__(self, y)
self.name = "StdMean"

def _set_bins(self):
y = self.y
s = y.std(ddof=1)
m = y.mean()
if self.anchor:
min_z = int((y.min() - m) / s)
max_z = int((y.max() - m) / s) + 1
self.multiples = list(range(min_z, max_z))
cuts = [m + s * w for w in self.multiples]
y_max = y.max()
if cuts[-1] < y_max:
Expand Down
24 changes: 24 additions & 0 deletions mapclassify/tests/test_mapclassify.py
Original file line number Diff line number Diff line change
Expand Up @@ -631,6 +631,30 @@ def test_UserDefined_lowest(self):
assert ud.get_legend_classes() == classes


class TestStdMeanAnchor:
def setup_method(self):
self.V = load_example()

def test_StdMeanAnchor(self):
sm = StdMean(self.V, anchor=True)
bins = numpy.array(
[
125.92810345,
672.57333208,
1219.21856072,
1765.86378936,
2312.50901799,
2859.15424663,
3405.79947527,
3952.4447039,
4111.45,
]
)
counts = numpy.array([50, 6, 1, 0, 0, 0, 0, 0, 1])
numpy.testing.assert_array_almost_equal(sm.bins, bins)
numpy.testing.assert_array_almost_equal(sm.counts, counts)


class TestMaxP:
def setup_method(self):
self.V = load_example()
Expand Down