Skip to content

Commit

Permalink
Tweak auxiliary groupby apply and reduce methods
Browse files Browse the repository at this point in the history
  • Loading branch information
darothen committed Jul 19, 2017
1 parent 4f70131 commit 3a05c50
Show file tree
Hide file tree
Showing 2 changed files with 115 additions and 17 deletions.
6 changes: 3 additions & 3 deletions xarray/core/common.py
Expand Up @@ -542,7 +542,6 @@ def resample(self, freq=None, dim=None, how='mean', skipna=None,
.. [1] http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases
"""
from .dataarray import DataArray
RESAMPLE_DIM = '__resample_dim__'

if dim is not None:
return self._resample_immediately(freq, dim, how, skipna, closed,
Expand All @@ -559,15 +558,16 @@ def resample(self, freq=None, dim=None, how='mean', skipna=None,
if isinstance(dim, basestring):
dim_name = dim
dim = self[dim]
resample_dim = "resampled_" + dim_name
else:
raise ValueError("Dimension name should be a string; "
"was passed %r" % dim)
group = DataArray(dim, [(dim.dims, dim)], name=RESAMPLE_DIM)
group = DataArray(dim, [(dim.dims, dim)], name=resample_dim)
time_grouper = pd.TimeGrouper(freq=freq, closed=closed,
label=label, base=base)
resampler = self.resample_cls(self, group=group, dim=dim_name,
grouper=time_grouper,
resample_dim=RESAMPLE_DIM)
resample_dim=resample_dim)

return resampler

Expand Down
126 changes: 112 additions & 14 deletions xarray/core/groupby.py
Expand Up @@ -532,7 +532,9 @@ def _combine(self, applied, shortcut=False):
combined = self._concat_shortcut(applied, dim, positions)
else:
combined = concat(applied, dim)
combined = _maybe_reorder(combined, dim, positions)
print(combined[dim])
#combined = _maybe_reorder(combined, dim, positions)
print(combined[dim])

if isinstance(combined, type(self._obj)):
# only restore dimension order for arrays
Expand Down Expand Up @@ -585,18 +587,79 @@ def reduce_array(ar):

RESAMPLE_DIM = '__resample_dim__'
class DataArrayResample(DataArrayGroupBy):
"""DataArrayGroupBy object specialized to resampling a specified dimension
"""DataArrayGroupBy object specialized to time resampling operations over a
specified dimension
"""

def __init__(self, *args, dim=None, resample_dim=None, **kwargs):
self._dim = dim
self._resample_dim = resample_dim
if dim == resample_dim:
def __init__(self, *args, **kwargs):

self._dim = kwargs.pop('dim', None)
self._resample_dim = kwargs.pop('resample_dim', None)

if self._dim == self._resample_dim:
raise ValueError("Proxy resampling dimension ('{_resample_dim}') "
"cannot have the same name as actual dimension "
"('{_dim}')! ".format(self))
super(DataArrayResample, self).__init__(*args, **kwargs)


def apply(self, func, shortcut=False, **kwargs):
"""Apply a function over each array in the group and concatenate them
together into a new array.
`func` is called like `func(ar, *args, **kwargs)` for each array `ar`
in this group.
Apply uses heuristics (like `pandas.GroupBy.apply`) to figure out how
to stack together the array. The rule is:
1. If the dimension along which the group coordinate is defined is
still in the first grouped array after applying `func`, then stack
over this dimension.
2. Otherwise, stack over the new dimension given by name of this
grouping (the argument to the `groupby` function).
Parameters
----------
func : function
Callable to apply to each array.
shortcut : bool, optional
Whether or not to shortcut evaluation under the assumptions that:
(1) The action of `func` does not depend on any of the array
metadata (attributes or coordinates) but only on the data and
dimensions.
(2) The action of `func` creates arrays with homogeneous metadata,
that is, with the same dimensions and attributes.
If these conditions are satisfied `shortcut` provides significant
speedup. This should be the case for many common groupby operations
(e.g., applying numpy ufuncs).
**kwargs
Used to call `func(ar, **kwargs)` for each array `ar`.
Returns
-------
applied : DataArray or DataArray
The result of splitting, applying and combining this array.
"""
if shortcut:
grouped = self._iter_grouped_shortcut()
else:
grouped = self._iter_grouped()
applied = (maybe_wrap_array(arr, func(arr, **kwargs))
for arr in grouped)
combined = self._combine(applied, shortcut=shortcut)

# If the aggregation function didn't drop the original resampling
# dimension, then we need to do so before we can rename the proxy
# dimension we used.
if self._dim in combined:
combined = combined.drop(self._dim)

if self._resample_dim in combined.dims:
combined = combined.rename({self._resample_dim: self._dim})

return combined


def reduce(self, func, dim=None, axis=None, shortcut=True,
keep_attrs=False, **kwargs):
"""Reduce the items in this group by applying `func` along the
Expand Down Expand Up @@ -629,9 +692,9 @@ def reduce(self, func, dim=None, axis=None, shortcut=True,
def reduce_array(ar):
return ar.reduce(func, self._dim, axis=None, keep_attrs=keep_attrs,
**kwargs)
result = self.apply(reduce_array, shortcut=shortcut)
return self.apply(reduce_array, shortcut=shortcut)

return result.rename({self._resample_dim: self._dim})
# return result.rename({self._resample_dim: self._dim})

ops.inject_reduce_methods(DataArrayResample)
ops.inject_binary_ops(DataArrayResample)
Expand Down Expand Up @@ -731,15 +794,50 @@ class DatasetResample(DatasetGroupBy):
"""DatasetGroupBy object specialized to resampling a specified dimension
"""

def __init__(self, *args, dim=None, resample_dim=None, **kwargs):
self._dim = dim
self._resample_dim = resample_dim
if dim == resample_dim:
def __init__(self, *args, **kwargs):

self._dim = kwargs.pop('dim', None)
self._resample_dim = kwargs.pop('resample_dim', None)

if self._dim == self._resample_dim:
raise ValueError("Proxy resampling dimension ('{_resample_dim}') "
"cannot have the same name as actual dimension "
"('{_dim}')! ".format(self))
super(DatasetResample, self).__init__(*args, **kwargs)

def apply(self, func, **kwargs):
"""Apply a function over each Dataset in the groups generated for
resampling and concatenate them together into a new Dataset.
`func` is called like `func(ds, *args, **kwargs)` for each dataset `ds`
in this group.
Apply uses heuristics (like `pandas.GroupBy.apply`) to figure out how
to stack together the datasets. The rule is:
1. If the dimension along which the group coordinate is defined is
still in the first grouped item after applying `func`, then stack
over this dimension.
2. Otherwise, stack over the new dimension given by name of this
grouping (the argument to the `groupby` function).
Parameters
----------
func : function
Callable to apply to each sub-dataset.
**kwargs
Used to call `func(ds, **kwargs)` for each sub-dataset `ar`.
Returns
-------
applied : Dataset or DataArray
The result of splitting, applying and combining this dataset.
"""
kwargs.pop('shortcut', None) # ignore shortcut if set (for now)
applied = (func(ds, **kwargs) for ds in self._iter_grouped())
combined = self._combine(applied)

return combined.rename({self._resample_dim: self._dim})

def reduce(self, func, dim=None, keep_attrs=False, **kwargs):
"""Reduce the items in this group by applying `func` along the
pre-defined resampling dimension.
Expand Down Expand Up @@ -769,9 +867,9 @@ def reduce(self, func, dim=None, keep_attrs=False, **kwargs):

def reduce_dataset(ds):
return ds.reduce(func, self._dim, keep_attrs=keep_attrs, **kwargs)
result = self.apply(reduce_dataset)
return self.apply(reduce_dataset)

return result.rename({self._resample_dim: self._dim})
# return result.rename({self._resample_dim: self._dim})

ops.inject_reduce_methods(DatasetResample)
ops.inject_binary_ops(DatasetResample)

0 comments on commit 3a05c50

Please sign in to comment.