New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Groupby-like API for resampling #1272
Changes from 52 commits
0767397
fce727f
afa31fc
829b4c1
9b742c4
0ec8b71
09a6989
afa14e4
1fec1f9
4f70131
3a05c50
464a067
c213de9
db500d2
4f29932
5f4d6a5
3e2cc45
a98bb2e
2664b8e
ee4b2ef
07e6fb1
304e250
949291a
c898b23
37c8e8b
6949c06
d85fa81
4177d79
a7bd1fd
0f071ee
985600e
2e985c6
bc58b05
529406f
b6cf938
406f4e2
ce97f3a
2a7efee
829d292
38f6d86
b2307d0
85ed5ba
5082040
ed8d5c9
5d23a99
8c7d6cf
31e5510
9b43d00
2839107
9a92211
6df6dde
af1ab3d
d03b25f
dd11565
5cfba57
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,6 +3,7 @@ | |
from __future__ import print_function | ||
import numpy as np | ||
import pandas as pd | ||
import warnings | ||
|
||
from .pycompat import basestring, suppress, dask_array_type, OrderedDict | ||
from . import dtypes | ||
|
@@ -479,55 +480,33 @@ def rolling(self, min_periods=None, center=False, **windows): | |
return self._rolling_cls(self, min_periods=min_periods, | ||
center=center, **windows) | ||
|
||
def resample(self, freq, dim, how='mean', skipna=None, closed=None, | ||
label=None, base=0, keep_attrs=False): | ||
"""Resample this object to a new temporal resolution. | ||
def resample(self, freq=None, dim=None, how=None, skipna=None, | ||
closed=None, label=None, base=0, keep_attrs=False, **indexer): | ||
"""Returns a Resample object for performing resampling operations. | ||
|
||
Handles both downsampling and upsampling. Upsampling with filling is | ||
not yet supported; if any intervals contain no values in the original | ||
not supported; if any intervals contain no values from the original | ||
object, they will be given the value ``NaN``. | ||
|
||
Parameters | ||
---------- | ||
freq : str | ||
String in the '#offset' to specify the step-size along the | ||
resampled dimension, where '#' is an (optional) integer multipler | ||
(default 1) and 'offset' is any pandas date offset alias. Examples | ||
of valid offsets include: | ||
|
||
* 'AS': year start | ||
* 'QS-DEC': quarterly, starting on December 1 | ||
* 'MS': month start | ||
* 'D': day | ||
* 'H': hour | ||
* 'Min': minute | ||
|
||
The full list of these offset aliases is documented in pandas [1]_. | ||
dim : str | ||
Name of the dimension to resample along (e.g., 'time'). | ||
how : str or func, optional | ||
Used for downsampling. If a string, ``how`` must be a valid | ||
aggregation operation supported by xarray. Otherwise, ``how`` must be | ||
a function that can be called like ``how(values, axis)`` to reduce | ||
ndarray values along the given axis. Valid choices that can be | ||
provided as a string include all the usual Dataset/DataArray | ||
aggregations (``all``, ``any``, ``argmax``, ``argmin``, ``max``, | ||
``mean``, ``median``, ``min``, ``prod``, ``sum``, ``std`` and | ||
``var``), as well as ``first`` and ``last``. | ||
skipna : bool, optional | ||
Whether to skip missing values when aggregating in downsampling. | ||
closed : 'left' or 'right', optional | ||
Side of each interval to treat as closed. | ||
label : 'left or 'right', optional | ||
Side of each interval to use for labeling. | ||
base : int, optionalt | ||
base : int, optional | ||
For frequencies that evenly subdivide 1 day, the "origin" of the | ||
aggregated intervals. For example, for '24H' frequency, base could | ||
range from 0 through 23. | ||
keep_attrs : bool, optional | ||
If True, the object's attributes (`attrs`) will be copied from | ||
the original object to the new one. If False (default), the new | ||
object will be returned without attributes. | ||
**indexer : {dim: freq} | ||
Dictionary with a key indicating the dimension name to resample | ||
over and a value corresponding to the resampling frequency. | ||
|
||
Returns | ||
------- | ||
|
@@ -540,18 +519,67 @@ def resample(self, freq, dim, how='mean', skipna=None, closed=None, | |
.. [1] http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases | ||
""" | ||
from .dataarray import DataArray | ||
from .resample import RESAMPLE_DIM | ||
|
||
if dim is not None: | ||
if how is None: | ||
how = 'mean' | ||
return self._resample_immediately(freq, dim, how, skipna, closed, | ||
label, base, keep_attrs) | ||
|
||
if (how is not None) and indexer: | ||
raise TypeError("If passing an 'indexer' then 'dim' " | ||
"and 'how' should not be used") | ||
|
||
# More than one indexer is ambiguous, but we do in fact need one if | ||
# "dim" was not provided, until the old API is fully deprecated | ||
if len(indexer) != 1: | ||
raise ValueError( | ||
"Resampling only supported along single dimensions." | ||
) | ||
dim, freq = indexer.popitem() | ||
|
||
if isinstance(dim, basestring): | ||
dim_name = dim | ||
dim = self[dim] | ||
else: | ||
raise TypeError("Dimension name should be a string; " | ||
"was passed %r" % dim) | ||
group = DataArray(dim, [(dim.dims, dim)], name=RESAMPLE_DIM) | ||
time_grouper = pd.TimeGrouper(freq=freq, closed=closed, | ||
label=label, base=base) | ||
resampler = self._resample_cls(self, group=group, dim=dim_name, | ||
grouper=time_grouper, | ||
resample_dim=RESAMPLE_DIM) | ||
|
||
return resampler | ||
|
||
def _resample_immediately(self, freq, dim, how, skipna, | ||
closed, label, base, keep_attrs): | ||
"""Implement the original version of .resample() which immediately | ||
executes the desired resampling operation. """ | ||
from .dataarray import DataArray | ||
RESAMPLE_DIM = '__resample_dim__' | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There might be better way to handle this, but currently using |
||
|
||
warnings.warn("\n.resample() has been modified to defer " | ||
"calculations. Instead of passing 'dim' and " | ||
"'how=\"{how}\", instead consider using " | ||
".resample({dim}=\"{freq}\").{how}() ".format( | ||
dim=dim, freq=freq, how=how | ||
), DeprecationWarning, stacklevel=3) | ||
|
||
if isinstance(dim, basestring): | ||
dim = self[dim] | ||
group = DataArray(dim, [(RESAMPLE_DIM, dim)], name=RESAMPLE_DIM) | ||
group = DataArray(dim, [(dim.dims, dim)], name=RESAMPLE_DIM) | ||
time_grouper = pd.TimeGrouper(freq=freq, how=how, closed=closed, | ||
label=label, base=base) | ||
gb = self._groupby_cls(self, group, grouper=time_grouper) | ||
if isinstance(how, basestring): | ||
f = getattr(gb, how) | ||
if how in ['first', 'last']: | ||
result = f(skipna=skipna, keep_attrs=keep_attrs) | ||
elif how == 'count': | ||
result = f(dim=dim.name, keep_attrs=keep_attrs) | ||
else: | ||
result = f(dim=dim.name, skipna=skipna, keep_attrs=keep_attrs) | ||
else: | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should we explicitly disallow the specification of
how
when aindexers
is provided? I don't think we need to support that functionality since we know it will be deprecated.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
+1 that should be a TypeError