diff --git a/pandas/core/base.py b/pandas/core/base.py index 6996bb06065af..855d89411b8a7 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -2,6 +2,7 @@ Base and utility classes for pandas objects. """ from pandas import compat +from pandas.compat import builtins import numpy as np from pandas.core import common as com import pandas.core.nanops as nanops @@ -218,6 +219,266 @@ def __delete__(self, instance): raise AttributeError("can't delete attribute") +class GroupByError(Exception): + pass + + +class DataError(GroupByError): + pass + + +class SpecificationError(GroupByError): + pass + + +class SelectionMixin(object): + """ + mixin implementing the selection & aggregation interface on a group-like object + sub-classes need to define: obj, exclusions + """ + _selection = None + _internal_names = ['_cache'] + _internal_names_set = set(_internal_names) + _builtin_table = { + builtins.sum: np.sum, + builtins.max: np.max, + builtins.min: np.min, + } + _cython_table = { + builtins.sum: 'sum', + builtins.max: 'max', + builtins.min: 'min', + np.sum: 'sum', + np.mean: 'mean', + np.prod: 'prod', + np.std: 'std', + np.var: 'var', + np.median: 'median', + np.max: 'max', + np.min: 'min', + np.cumprod: 'cumprod', + np.cumsum: 'cumsum' + } + + @property + def name(self): + if self._selection is None: + return None # 'result' + else: + return self._selection + + @property + def _selection_list(self): + if not isinstance(self._selection, (list, tuple, com.ABCSeries, com.ABCIndex, np.ndarray)): + return [self._selection] + return self._selection + + @cache_readonly + def _selected_obj(self): + + if self._selection is None or isinstance(self.obj, com.ABCSeries): + return self.obj + else: + return self.obj[self._selection] + + @cache_readonly + def _obj_with_exclusions(self): + if self._selection is not None and isinstance(self.obj, com.ABCDataFrame): + return self.obj.reindex(columns=self._selection_list) + + if len(self.exclusions) > 0: + return self.obj.drop(self.exclusions, axis=1) + else: + return self.obj + + def __getitem__(self, key): + if self._selection is not None: + raise Exception('Column(s) %s already selected' % self._selection) + + if isinstance(key, (list, tuple, com.ABCSeries, com.ABCIndex, np.ndarray)): + if len(self.obj.columns.intersection(key)) != len(key): + bad_keys = list(set(key).difference(self.obj.columns)) + raise KeyError("Columns not found: %s" + % str(bad_keys)[1:-1]) + return self._gotitem(list(key), ndim=2) + + elif not getattr(self,'as_index',False): + if key not in self.obj.columns: + raise KeyError("Column not found: %s" % key) + return self._gotitem(key, ndim=2) + + else: + if key not in self.obj: + raise KeyError("Column not found: %s" % key) + return self._gotitem(key, ndim=1) + + def _gotitem(self, key, ndim, subset=None): + """ + sub-classes to define + return a sliced object + + Parameters + ---------- + key : string / list of selections + ndim : 1,2 + requested ndim of result + subset : object, default None + subset to act on + + """ + raise AbstractMethodError(self) + + _agg_doc = """Aggregate using input function or dict of {column -> function} + +Parameters +---------- +arg : function or dict + Function to use for aggregating groups. If a function, must either + work when passed a DataFrame or when passed to DataFrame.apply. If + passed a dict, the keys must be DataFrame column names. + + Accepted Combinations are: + - string cythonized function name + - function + - list of functions + - dict of columns -> functions + - nested dict of names -> dicts of functions + +Notes +----- +Numpy functions mean/median/prod/sum/std/var are special cased so the +default behavior is applying the function along axis=0 +(e.g., np.mean(arr_2d, axis=0)) as opposed to +mimicking the default Numpy behavior (e.g., np.mean(arr_2d)). + +Returns +------- +aggregated : DataFrame +""" + + @Appender(_agg_doc) + def agg(self, func, *args, **kwargs): + return self.aggregate(func, *args, **kwargs) + + @Appender(_agg_doc) + def aggregate(self, func, *args, **kwargs): + raise AbstractMethodError(self) + + def _aggregate(self, arg, *args, **kwargs): + """ + provide an implementation for the aggregators + + Returns + ------- + tuple of result, how + + Notes + ----- + how can be a string describe the required post-processing, or + None if not required + """ + + if isinstance(arg, compat.string_types): + return getattr(self, arg)(*args, **kwargs), None + + result = compat.OrderedDict() + if isinstance(arg, dict): + if self.axis != 0: # pragma: no cover + raise ValueError('Can only pass dict with axis=0') + + obj = self._selected_obj + + if any(isinstance(x, (list, tuple, dict)) for x in arg.values()): + new_arg = compat.OrderedDict() + for k, v in compat.iteritems(arg): + if not isinstance(v, (tuple, list, dict)): + new_arg[k] = [v] + else: + new_arg[k] = v + arg = new_arg + + keys = [] + if self._selection is not None: + subset = obj + + for fname, agg_how in compat.iteritems(arg): + colg = self._gotitem(self._selection, ndim=1, subset=subset) + result[fname] = colg.aggregate(agg_how) + keys.append(fname) + else: + for col, agg_how in compat.iteritems(arg): + colg = self._gotitem(col, ndim=1) + result[col] = colg.aggregate(agg_how) + keys.append(col) + + if isinstance(list(result.values())[0], com.ABCDataFrame): + from pandas.tools.merge import concat + result = concat([result[k] for k in keys], keys=keys, axis=1) + else: + from pandas import DataFrame + result = DataFrame(result) + + return result, True + elif hasattr(arg, '__iter__'): + return self._aggregate_multiple_funcs(arg), None + else: + result = None + + cy_func = self._is_cython_func(arg) + if cy_func and not args and not kwargs: + return getattr(self, cy_func)(), None + + # caller can react + return result, True + + def _aggregate_multiple_funcs(self, arg): + from pandas.tools.merge import concat + + if self.axis != 0: + raise NotImplementedError("axis other than 0 is not supported") + + obj = self._obj_with_exclusions + results = [] + keys = [] + + # degenerate case + if obj.ndim == 1: + for a in arg: + try: + colg = self._gotitem(obj.name, ndim=1, subset=obj) + results.append(colg.aggregate(a)) + keys.append(getattr(a,'name',a)) + except (TypeError, DataError): + pass + except SpecificationError: + raise + + # multiples + else: + for col in obj: + try: + colg = self._gotitem(col, ndim=1, subset=obj[col]) + results.append(colg.aggregate(arg)) + keys.append(col) + except (TypeError, DataError): + pass + except SpecificationError: + raise + result = concat(results, keys=keys, axis=1) + + return result + + def _is_cython_func(self, arg): + """ if we define an internal function for this argument, return it """ + return self._cython_table.get(arg) + + def _is_builtin_func(self, arg): + """ + if we define an builtin function for this argument, return it, + otherwise return the arg + """ + return self._builtin_table.get(arg, arg) + class FrozenList(PandasObject, list): """ diff --git a/pandas/core/frame.py b/pandas/core/frame.py index ff110880d34ba..2fc0786aa1e09 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5149,6 +5149,7 @@ def combineMult(self, other): DataFrame._setup_axes(['index', 'columns'], info_axis=1, stat_axis=0, axes_are_reversed=True, aliases={'rows': 0}) DataFrame._add_numeric_operations() +DataFrame._add_series_or_dataframe_operations() _EMPTY_SERIES = Series([]) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index b75573edc7157..e8abc96aab858 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -29,7 +29,6 @@ from pandas.util.decorators import Appender, Substitution, deprecate_kwarg from pandas.core import config - # goal is to be able to define the docs close to function, while still being # able to share _shared_docs = dict() @@ -4734,6 +4733,36 @@ def nanptp(values, axis=0, skipna=True): method ``ptp``.""", nanptp) + @classmethod + def _add_series_or_dataframe_operations(cls): + """ add the series or dataframe only operations to the cls; evaluate the doc strings again """ + + from pandas.core import window as rwindow + + @Appender(rwindow.rolling.__doc__) + def rolling(self, window, min_periods=None, freq=None, center=False, + how=None, win_type=None, axis=0): + axis = self._get_axis_number(axis) + return rwindow.rolling(self, window=window, min_periods=min_periods, freq=freq, center=center, + how=how, win_type=win_type, axis=axis) + cls.rolling = rolling + + @Appender(rwindow.expanding.__doc__) + def expanding(self, min_periods=None, freq=None, center=False, + how=None, axis=0): + axis = self._get_axis_number(axis) + return rwindow.expanding(self, min_periods=min_periods, freq=freq, center=center, + how=how, axis=axis) + cls.expanding = expanding + + @Appender(rwindow.ewm.__doc__) + def ewm(self, com=None, span=None, halflife=None, min_periods=0, freq=None, + adjust=True, how=None, ignore_na=False, axis=0): + axis = self._get_axis_number(axis) + return rwindow.ewm(self, com=com, span=span, halflife=halflife, min_periods=min_periods, + freq=freq, adjust=adjust, how=how, ignore_na=ignore_na, axis=axis) + cls.ewm = ewm + def _doc_parms(cls): """ return a tuple of the doc parms """ axis_descr = "{%s}" % ', '.join([ @@ -4916,6 +4945,6 @@ def logical_func(self, axis=None, bool_only=None, skipna=None, logical_func.__name__ = name return logical_func -# install the indexerse +# install the indexes for _name, _indexer in indexing.get_indexers_list(): NDFrame._create_indexer(_name, _indexer) diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 28d95c40c7294..b156f4afa2711 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -12,7 +12,7 @@ ) from pandas import compat -from pandas.core.base import PandasObject +from pandas.core.base import PandasObject, SelectionMixin, GroupByError, DataError, SpecificationError from pandas.core.categorical import Categorical from pandas.core.frame import DataFrame from pandas.core.generic import NDFrame @@ -37,28 +37,6 @@ import pandas.algos as _algos import pandas.hashtable as _hash -_agg_doc = """Aggregate using input function or dict of {column -> function} - -Parameters ----------- -arg : function or dict - Function to use for aggregating groups. If a function, must either - work when passed a DataFrame or when passed to DataFrame.apply. If - passed a dict, the keys must be DataFrame column names. - -Notes ------ -Numpy functions mean/median/prod/sum/std/var are special cased so the -default behavior is applying the function along axis=0 -(e.g., np.mean(arr_2d, axis=0)) as opposed to -mimicking the default Numpy behavior (e.g., np.mean(arr_2d)). - -Returns -------- -aggregated : DataFrame -""" - - # special case to prevent duplicate plots when catching exceptions when # forwarding methods from NDFrames _plotting_methods = frozenset(['plot', 'boxplot', 'hist']) @@ -91,18 +69,6 @@ _cython_transforms = frozenset(['cumprod', 'cumsum', 'shift']) -class GroupByError(Exception): - pass - - -class DataError(GroupByError): - pass - - -class SpecificationError(GroupByError): - pass - - def _groupby_function(name, alias, npfunc, numeric_only=True, _convert=False): def f(self): @@ -319,7 +285,7 @@ def f(self): return attr -class GroupBy(PandasObject): +class GroupBy(PandasObject, SelectionMixin): """ Class for grouping and aggregating relational data. See aggregate, @@ -387,8 +353,6 @@ class GroupBy(PandasObject): Number of groups """ _apply_whitelist = _common_apply_whitelist - _internal_names = ['_cache'] - _internal_names_set = set(_internal_names) _group_selection = None def __init__(self, obj, keys=None, axis=0, level=None, @@ -493,19 +457,6 @@ def _get_index(self, name): """ safe get index, translate keys for datelike to underlying repr """ return self._get_indices([name])[0] - @property - def name(self): - if self._selection is None: - return None # 'result' - else: - return self._selection - - @property - def _selection_list(self): - if not isinstance(self._selection, (list, tuple, Series, Index, np.ndarray)): - return [self._selection] - return self._selection - @cache_readonly def _selected_obj(self): @@ -558,9 +509,6 @@ def __getattr__(self, attr): raise AttributeError("%r object has no attribute %r" % (type(self).__name__, attr)) - def __getitem__(self, key): - raise NotImplementedError('Not implemented: %s' % key) - plot = property(GroupByPlot) def _make_wrapper(self, name): @@ -704,7 +652,7 @@ def apply(self, func, *args, **kwargs): ------- applied : type depending on grouped object and function """ - func = _intercept_function(func) + func = self._is_builtin_func(func) @wraps(func) def f(g): @@ -721,13 +669,6 @@ def _python_apply_general(self, f): return self._wrap_applied_output(keys, values, not_indexed_same=mutated) - def aggregate(self, func, *args, **kwargs): - raise AbstractMethodError(self) - - @Appender(_agg_doc) - def agg(self, func, *args, **kwargs): - return self.aggregate(func, *args, **kwargs) - def _iterate_slices(self): yield self.name, self._selected_obj @@ -1217,7 +1158,7 @@ def _cython_agg_general(self, how, numeric_only=True): return self._wrap_aggregated_output(output, names) def _python_agg_general(self, func, *args, **kwargs): - func = _intercept_function(func) + func = self._is_builtin_func(func) f = lambda x: func(x, *args, **kwargs) # iterate through "columns" ex exclusions to populate output dict @@ -1733,7 +1674,7 @@ def agg_series(self, obj, func): return self._aggregate_series_pure_python(obj, func) def _aggregate_series_fast(self, obj, func): - func = _intercept_function(func) + func = self._is_builtin_func(func) if obj.index._has_complex_internals: raise TypeError('Incompatible index for Cython grouper') @@ -2427,7 +2368,7 @@ def aggregate(self, func_or_funcs, *args, **kwargs): if hasattr(func_or_funcs, '__iter__'): ret = self._aggregate_multiple_funcs(func_or_funcs) else: - cyfunc = _intercept_cython(func_or_funcs) + cyfunc = self._is_cython_func(func_or_funcs) if cyfunc and not args and not kwargs: return getattr(self, cyfunc)() @@ -2559,7 +2500,7 @@ def transform(self, func, *args, **kwargs): transformed : Series """ - func = _intercept_cython(func) or func + func = self._is_cython_func(func) or func # if string function if isinstance(func, compat.string_types): @@ -2912,68 +2853,16 @@ def _post_process_cython_aggregate(self, obj): obj = obj.swapaxes(0, 1) return obj - @cache_readonly - def _obj_with_exclusions(self): - if self._selection is not None: - return self.obj.reindex(columns=self._selection_list) - - if len(self.exclusions) > 0: - return self.obj.drop(self.exclusions, axis=1) - else: - return self.obj - - @Appender(_agg_doc) + @Appender(SelectionMixin._agg_doc) def aggregate(self, arg, *args, **kwargs): - if isinstance(arg, compat.string_types): - return getattr(self, arg)(*args, **kwargs) - - result = OrderedDict() - if isinstance(arg, dict): - if self.axis != 0: # pragma: no cover - raise ValueError('Can only pass dict with axis=0') - obj = self._selected_obj + result, how = self._aggregate(arg, *args, **kwargs) + if how is None: + return result - if any(isinstance(x, (list, tuple, dict)) for x in arg.values()): - new_arg = OrderedDict() - for k, v in compat.iteritems(arg): - if not isinstance(v, (tuple, list, dict)): - new_arg[k] = [v] - else: - new_arg[k] = v - arg = new_arg - - keys = [] - if self._selection is not None: - subset = obj - if isinstance(subset, DataFrame): - raise NotImplementedError("Aggregating on a DataFrame is " - "not supported") - - for fname, agg_how in compat.iteritems(arg): - colg = SeriesGroupBy(subset, selection=self._selection, - grouper=self.grouper) - result[fname] = colg.aggregate(agg_how) - keys.append(fname) - else: - for col, agg_how in compat.iteritems(arg): - colg = SeriesGroupBy(obj[col], selection=col, - grouper=self.grouper) - result[col] = colg.aggregate(agg_how) - keys.append(col) - - if isinstance(list(result.values())[0], DataFrame): - from pandas.tools.merge import concat - result = concat([result[k] for k in keys], keys=keys, axis=1) - else: - result = DataFrame(result) - elif isinstance(arg, list): - return self._aggregate_multiple_funcs(arg) - else: - cyfunc = _intercept_cython(arg) - if cyfunc and not args and not kwargs: - return getattr(self, cyfunc)() + if result is None: + # grouper specific aggregations if self.grouper.nkeys > 1: return self._python_agg_general(arg, *args, **kwargs) else: @@ -2993,30 +2882,6 @@ def aggregate(self, arg, *args, **kwargs): return result._convert(datetime=True) - def _aggregate_multiple_funcs(self, arg): - from pandas.tools.merge import concat - - if self.axis != 0: - raise NotImplementedError("axis other than 0 is not supported") - - obj = self._obj_with_exclusions - - results = [] - keys = [] - for col in obj: - try: - colg = SeriesGroupBy(obj[col], selection=col, - grouper=self.grouper) - results.append(colg.aggregate(arg)) - keys.append(col) - except (TypeError, DataError): - pass - except SpecificationError: - raise - result = concat(results, keys=keys, axis=1) - - return result - def _aggregate_generic(self, func, *args, **kwargs): if self.grouper.nkeys != 1: raise AssertionError('Number of keys must be 1') @@ -3318,7 +3183,7 @@ def transform(self, func, *args, **kwargs): """ # optimized transforms - func = _intercept_cython(func) or func + func = self._is_cython_func(func) or func if isinstance(func, compat.string_types): if func in _cython_transforms: # cythonized transform @@ -3463,35 +3328,34 @@ class DataFrameGroupBy(NDFrameGroupBy): _block_agg_axis = 1 - def __getitem__(self, key): - if self._selection is not None: - raise Exception('Column(s) %s already selected' % self._selection) + def _gotitem(self, key, ndim, subset=None): + """ + sub-classes to define + return a sliced object - if isinstance(key, (list, tuple, Series, Index, np.ndarray)): - if len(self.obj.columns.intersection(key)) != len(key): - bad_keys = list(set(key).difference(self.obj.columns)) - raise KeyError("Columns not found: %s" - % str(bad_keys)[1:-1]) - return DataFrameGroupBy(self.obj, self.grouper, selection=key, - grouper=self.grouper, - exclusions=self.exclusions, - as_index=self.as_index) + Parameters + ---------- + key : string / list of selections + ndim : 1,2 + requested ndim of result + subset : object, default None + subset to act on + """ - elif not self.as_index: - if key not in self.obj.columns: - raise KeyError("Column not found: %s" % key) - return DataFrameGroupBy(self.obj, self.grouper, selection=key, + if ndim == 2: + if subset is None: + subset = self.obj + return DataFrameGroupBy(subset, self.grouper, selection=key, grouper=self.grouper, exclusions=self.exclusions, as_index=self.as_index) + elif ndim == 1: + if subset is None: + subset = self.obj[key] + return SeriesGroupBy(subset, selection=key, + grouper=self.grouper) - else: - if key not in self.obj: - raise KeyError("Column not found: %s" % key) - # kind of a kludge - return SeriesGroupBy(self.obj[key], selection=key, - grouper=self.grouper, - exclusions=self.exclusions) + raise AssertionError("invalid ndim for _gotitem") def _wrap_generic_output(self, result, obj): result_index = self.grouper.levels[0] @@ -4162,38 +4026,6 @@ def _reorder_by_uniques(uniques, labels): return uniques, labels -_func_table = { - builtins.sum: np.sum, - builtins.max: np.max, - builtins.min: np.min -} - - -_cython_table = { - builtins.sum: 'sum', - builtins.max: 'max', - builtins.min: 'min', - np.sum: 'sum', - np.mean: 'mean', - np.prod: 'prod', - np.std: 'std', - np.var: 'var', - np.median: 'median', - np.max: 'max', - np.min: 'min', - np.cumprod: 'cumprod', - np.cumsum: 'cumsum' -} - - -def _intercept_function(func): - return _func_table.get(func, func) - - -def _intercept_cython(func): - return _cython_table.get(func) - - def _groupby_indices(values): return _algos.groupby_indices(_values_from_object(com._ensure_object(values))) diff --git a/pandas/core/series.py b/pandas/core/series.py index ca55a834a33d2..d6eb18396e14c 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2765,6 +2765,7 @@ def _dir_additions(self): aliases={'rows': 0}) Series._add_numeric_operations() Series._add_series_only_operations() +Series._add_series_or_dataframe_operations() _INDEX_TYPES = ndarray, Index, list, tuple #------------------------------------------------------------------------------ diff --git a/pandas/core/window.py b/pandas/core/window.py new file mode 100644 index 0000000000000..5467f7f41fb96 --- /dev/null +++ b/pandas/core/window.py @@ -0,0 +1,1077 @@ +""" + +provide a generic structure to support window functions, +similar to how we have a Groupby object + + +""" +from __future__ import division + +import numpy as np +from functools import wraps +from collections import defaultdict + +import pandas as pd +from pandas.core.base import PandasObject, SelectionMixin, AbstractMethodError +import pandas.core.common as com +import pandas.algos as algos +from pandas import compat +from pandas.util.decorators import Substitution, Appender + +class _Window(PandasObject, SelectionMixin): + _attributes = ['window','min_periods','freq','center','how','win_type','axis'] + exclusions = set() + + def __init__(self, obj, window=None, min_periods=None, freq=None, center=False, + how=None, win_type=None, axis=0): + self.blocks = [] + self.obj = obj + self.window = window + self.min_periods = min_periods + self.freq = freq + self.center = center + self.how = how + self.win_type = win_type + self.axis = axis + self._convert_freq() + self._setup() + + @property + def _constructor(self): + return Window + + def _setup(self): + pass + + def _create_blocks(self): + """ split data into blocks """ + return self._selected_obj.as_blocks(copy=False).values() + + def _gotitem(self, key, ndim, subset=None): + """ + sub-classes to define + return a sliced object + + Parameters + ---------- + key : string / list of selections + ndim : 1,2 + requested ndim of result + subset : object, default None + subset to act on + """ + + # create a new object to prevent aliasing + if subset is None: + subset = self.obj + new_self = self._shallow_copy(subset) + if ndim==2 and key in subset: + new_self._selection = key + new_self._reset_cache() + return new_self + + def __getattr__(self, attr): + if attr in self._internal_names_set: + return object.__getattribute__(self, attr) + if attr in self.obj: + return self[attr] + + raise AttributeError("%r object has no attribute %r" % + (type(self).__name__, attr)) + + def _dir_additions(self): + return self.obj._dir_additions() + + def _get_window(self, other=None): + return self.window + + def __unicode__(self): + """ provide a nice str repr of our rolling object """ + + attrs = [ "{k}->{v}".format(k=k,v=getattr(self,k)) \ + for k in self._attributes if getattr(self,k,None) is not None ] + return "{klass} [{attrs}]".format(klass=self.__class__.__name__, + attrs=','.join(attrs)) + + def _shallow_copy(self, obj=None, **kwargs): + """ return a new object with the replacement attributes """ + if obj is None: + obj = self._selected_obj.copy() + if isinstance(obj, self.__class__): + obj = obj.obj + for attr in self._attributes: + if attr not in kwargs: + kwargs[attr] = getattr(self,attr) + return self._constructor(obj, **kwargs) + + def _prep_values(self, values=None, kill_inf=True): + + if values is None: + values = getattr(self._selected_obj,'values',self._selected_obj) + + # coerce dtypes as appropriate + if com.is_float_dtype(values.dtype): + pass + elif com.is_integer_dtype(values.dtype): + values = values.astype(float) + elif com.is_timedelta64_dtype(values.dtype): + values = values.view('i8').astype(float) + else: + try: + values = values.astype(float) + except (ValueError, TypeError): + raise TypeError("cannot handle this type -> {0}".format(values.dtype)) + + if kill_inf: + values = values.copy() + values[np.isinf(values)] = np.NaN + + return values + + def _wrap_result(self, result, block=None): + """ wrap a single result """ + + obj = self._selected_obj + if isinstance(result, np.ndarray): + + # coerce if necessary + if block is not None: + if com.is_timedelta64_dtype(block.values.dtype): + result = pd.to_timedelta(result.ravel(),unit='ns').values.reshape(result.shape) + + if result.ndim == 1: + from pandas import Series + return Series(result, obj.index, name=obj.name) + + return type(obj)(result, + index=obj.index, + columns=block.columns) + return result + + def _wrap_results(self, results, blocks): + """ wrap lists of results, blocks """ + + obj = self._selected_obj + final = [] + for result, block in zip(results, blocks): + + result = self._wrap_result(result, block) + if result.ndim == 1: + return result + final.append(result) + + if not len(final): + return obj.astype('float64') + return pd.concat(final,axis=1).reindex(columns=obj.columns) + + def _center_window(self, result, window): + """ center the result in the window """ + if self.axis > result.ndim-1: + raise ValueError("Requested axis is larger then no. of argument " + "dimensions") + + from pandas import Series, DataFrame + offset = _offset(window, True) + if offset > 0: + if isinstance(result, (Series, DataFrame)): + result = result.slice_shift(-offset, axis=self.axis) + else: + lead_indexer = [slice(None)] * result.ndim + lead_indexer[self.axis] = slice(offset, None) + result = np.copy(result[tuple(lead_indexer)]) + return result + + def _convert_freq(self): + """ conform to our freq """ + + from pandas import Series, DataFrame + if self.freq is not None and isinstance(self.obj, (Series, DataFrame)): + self.obj = self.obj.resample(self.freq, how=self.how) + + @Appender(SelectionMixin._agg_doc) + def aggregate(self, arg, *args, **kwargs): + result, how = self._aggregate(arg, *args, **kwargs) + if result is None: + import pdb; pdb.set_trace() + return result + +class Window(_Window): + + def _prep_window(self, **kwargs): + """ provide validation for our window type, return the window """ + window = self._get_window() + + if isinstance(window, (list, tuple, np.ndarray)): + return com._asarray_tuplesafe(window).astype(float) + elif com.is_integer(window): + try: + import scipy.signal as sig + except ImportError: + raise ImportError('Please install scipy to generate window weight') + win_type = _validate_win_type(self.win_type, kwargs) # may pop from kwargs + return sig.get_window(win_type, window).astype(float) + + raise ValueError('Invalid window %s' % str(window)) + + def _apply_window(self, mean=True, **kwargs): + """ + Applies a moving window of type ``window_type`` on the data. + + Parameters + ---------- + mean : boolean, default True + If True computes weighted mean, else weighted sum + + Returns + ------- + y : type of input argument + + """ + window = self._prep_window(**kwargs) + center = self.center + + results, blocks = [], self._create_blocks() + for b in blocks: + try: + values = self._prep_values(b.values) + except TypeError: + results.append(b.values.copy()) + continue + + if values.size == 0: + results.append(values.copy()) + continue + + offset = _offset(window, center) + additional_nans = np.array([np.NaN] * offset) + def f(arg, *args, **kwargs): + minp = _use_window(self.min_periods, len(window)) + return algos.roll_window(np.concatenate((arg, additional_nans)) if center else arg, + window, minp, avg=mean) + + result = np.apply_along_axis(f, self.axis, values) + + if center: + result = self._center_window(result, window) + results.append(result) + + return self._wrap_results(results, blocks) + + def sum(self, **kwargs): + return self._apply_window(mean=False, **kwargs) + + def mean(self, **kwargs): + return self._apply_window(mean=True, **kwargs) + +class _Rolling(_Window): + + @property + def _constructor(self): + return Rolling + + def _apply(self, func, window=None, center=None, check_minp=None, how=None, **kwargs): + """ + Rolling statistical measure using supplied function. Designed to be + used with passed-in Cython array-based functions. + + Parameters + ---------- + func : string/callable to apply + window : int/array, default to _get_window() + center : boolean, default to self.center + check_minp : function, default to _use_window + how : string, default to None + + Returns + ------- + y : type of input + """ + + if center is None: + center = self.center + if window is None: + window = self._get_window() + + if check_minp is None: + check_minp = _use_window + + results, blocks = [], self._create_blocks() + for b in blocks: + try: + values = self._prep_values(b.values) + except TypeError: + results.append(b.values.copy()) + continue + + if values.size == 0: + results.append(values.copy()) + continue + + # if we have a string function name, wrap it + if isinstance(func, compat.string_types): + if not hasattr(algos, func): + raise ValueError("we do not support this function algos.{0}".format(func)) + + cfunc = getattr(algos, func) + def func(arg, window, min_periods=None): + minp = check_minp(min_periods, window) + return cfunc(arg, window, minp, **kwargs) + + # calculation function + if center: + offset = _offset(window, center) + additional_nans = np.array([np.NaN] * offset) + def calc(x): + return func(np.concatenate((x, additional_nans)), + window, min_periods=self.min_periods) + else: + def calc(x): + return func(x,window, min_periods=self.min_periods) + + if values.ndim > 1: + result = np.apply_along_axis(calc, self.axis, values) + else: + result = calc(values) + + if center: + result = self._center_window(result, window) + + results.append(result) + + return self._wrap_results(results, blocks) + +class Rolling(_Rolling): + + def count(self): + """ + Rolling count of number of non-NaN observations inside provided window. + + Returns + ------- + same type as input + """ + + obj = self._selected_obj + window = self._get_window() + window = min(window, len(obj)) if not self.center else window + try: + converted = np.isfinite(obj).astype(float) + except TypeError: + converted = np.isfinite(obj.astype(float)).astype(float) + result = self._constructor(converted, + window=window, + min_periods=0, + center=self.center).sum() + + result[result.isnull()] = 0 + return result + + def apply(self, func, args=(), kwargs={}): + """ + Moving function apply + + Parameters + ---------- + func : function + Must produce a single value from an ndarray input + *args and **kwargs are passed to the function + """ + window = self._get_window() + offset = _offset(window, self.center) + def f(arg, window, min_periods): + minp = _use_window(min_periods, window) + return algos.roll_generic(arg, window, minp, offset, func, args, kwargs) + + return self._apply(f, center=False) + + def sum(self): + """ + Moving sum + """ + return self._apply('roll_sum') + + def max(self, how='max'): + """ + Moving max + + Parameters + ---------- + how : string, default max + Method for down- or re-sampling + """ + return self._apply('roll_max', how=how) + + def min(self, how='min'): + """ + Moving min + + Parameters + ---------- + how : string, default min + Method for down- or re-sampling + """ + return self._apply('roll_min', how=how) + + def mean(self): + """ + Moving mean + """ + return self._apply('roll_mean') + + def median(self, how='median'): + """ + Moving median + + Parameters + ---------- + how : string, default median + Method for down- or re-sampling + """ + + return self._apply('roll_median_c', how=how) + + def std(self, ddof=1): + """ + Moving standard deviation + + Parameters + ---------- + ddof : int, default 1 + Delta Degrees of Freedom. The divisor used in calculations + is ``N - ddof``, where ``N`` represents the number of elements. + """ + window = self._get_window() + def f(arg, *args, **kwargs): + minp = _require_min_periods(1)(self.min_periods, window) + return _zsqrt(algos.roll_var(arg, window, minp, ddof)) + + return self._apply(f, check_minp=_require_min_periods(1)) + + def var(self, ddof=1): + """ + Moving variance + + Parameters + ---------- + ddof : int, default 1 + Delta Degrees of Freedom. The divisor used in calculations + is ``N - ddof``, where ``N`` represents the number of elements. + """ + return self._apply('roll_var', + check_minp=_require_min_periods(1), + ddof=ddof) + + def skew(self): + """ + Unbiased moving skewness + """ + return self._apply('roll_skew', + check_minp=_require_min_periods(3)) + + def kurt(self): + """ + Unbiased moving kurtosis + """ + return self._apply('roll_kurt', + check_minp=_require_min_periods(4)) + + def quantile(self, quantile): + """ + Rolling quantile + + Parameters + ---------- + quantile : float + 0 <= quantile <= 1 + """ + window = self._get_window() + def f(arg, *args, **kwargs): + minp = _use_window(self.min_periods, window) + return algos.roll_quantile(arg, window, minp, quantile) + + return self._apply(f) + + def cov(self, other=None, pairwise=False, ddof=1): + """ + Moving sample covariance + + Parameters + ---------- + other : Series, DataFrame, or ndarray, optional + if not supplied then will default to self and produce pairwise output + pairwise : bool, default False + If False then only matching columns between self and other will be used and + the output will be a DataFrame. + If True then all pairwise combinations will be calculated and the output + will be a Panel in the case of DataFrame inputs. In the case of missing + elements, only complete pairwise observations will be used. + ddof : int, default 1 + Delta Degrees of Freedom. The divisor used in calculations + is ``N - ddof``, where ``N`` represents the number of elements. + """ + if other is None: + other = self._selected_obj + pairwise = True + other = self._shallow_copy(other) + window = self._get_window(other) + + def _get_cov(X, Y): + mean = lambda x: x.rolling(window, self.min_periods, center=self.center).mean() + count = (X+Y).rolling(window=window, center=self.center).count() + bias_adj = count / (count - ddof) + return (mean(X * Y) - mean(X) * mean(Y)) * bias_adj + return _flex_binary_moment(self._selected_obj, other._selected_obj, _get_cov, pairwise=bool(pairwise)) + + def corr(self, other=None, pairwise=False): + """ + Moving sample correlation + + Parameters + ---------- + other : Series, DataFrame, or ndarray, optional + if not supplied then will default to self and produce pairwise output + pairwise : bool, default False + If False then only matching columns between self and other will be used and + the output will be a DataFrame. + If True then all pairwise combinations will be calculated and the output + will be a Panel in the case of DataFrame inputs. In the case of missing + elements, only complete pairwise observations will be used. + """ + + if other is None: + other = self._selected_obj + pairwise = True + other = self._shallow_copy(other) + window = self._get_window(other) + + def _get_corr(a, b): + a = a.rolling(window=window, + min_periods=self.min_periods, + freq=self.freq, + center=self.center) + b = b.rolling(window=window, + min_periods=self.min_periods, + freq=self.freq, + center=self.center) + + return a.cov(b) / (a.std() * b.std()) + return _flex_binary_moment(self._selected_obj, other._selected_obj, _get_corr, pairwise=bool(pairwise)) + +class Expanding(Rolling): + _attributes = ['min_periods','freq','center','how','axis'] + + @property + def _constructor(self): + return Expanding + + def _get_window(self, other=None): + obj = self._selected_obj + if other is None: + return max(len(obj), self.min_periods) if self.min_periods else len(obj) + return max((len(obj) + len(obj)), self.min_periods) if self.min_periods else (len(obj) + len(obj)) + +class EWM(_Rolling): + _attributes = ['com','min_periods','freq','adjust','how','ignore_na','axis'] + + def __init__(self, obj, com=None, span=None, halflife=None, min_periods=0, freq=None, + adjust=True, how=None, ignore_na=False, axis=0): + self.obj = obj + self.com = _get_center_of_mass(com, span, halflife) + self.min_periods = min_periods + self.freq = freq + self.adjust = adjust + self.how = how + self.ignore_na = ignore_na + self.axis = axis + self._convert_freq() + + @property + def _constructor(self): + return EWM + + def _apply(self, func, **kwargs): + """ + Rolling statistical measure using supplied function. Designed to be + used with passed-in Cython array-based functions. + + Parameters + ---------- + func : string/callable to apply + + Returns + ------- + y : type of input argument + + """ + results, blocks = [], self._create_blocks() + for b in blocks: + try: + values = self._prep_values(b.values) + except TypeError: + results.append(b.values.copy()) + continue + + if values.size == 0: + results.append(values.copy()) + continue + + # if we have a string function name, wrap it + if isinstance(func, compat.string_types): + if not hasattr(algos, func): + raise ValueError("we do not support this function algos.{0}".format(func)) + + cfunc = getattr(algos, func) + def func(arg): + return cfunc(arg, self.com, int(self.adjust), int(self.ignore_na), int(self.min_periods)) + + results.append(np.apply_along_axis(func, self.axis, values)) + + return self._wrap_results(results, blocks) + + def mean(self): + """ + exponential weighted moving average + """ + return self._apply('ewma') + + def std(self, bias=False): + """ + exponential weighted moving stddev + + Parameters + ---------- + bias : boolean, default False + Use a standard estimation bias correction + """ + return _zsqrt(self.var(bias=bias)) + vol=std + + def var(self, bias=False): + """ + exponential weighted moving average + + Parameters + ---------- + bias : boolean, default False + Use a standard estimation bias correction + """ + def f(arg): + return algos.ewmcov(arg, + arg, + self.com, + int(self.adjust), + int(self.ignore_na), + int(self.min_periods), + int(bias)) + + return self._apply(f) + + def cov(self, other=None, pairwise=False, bias=False): + """ + exponential weighted sample covariance + + Parameters + ---------- + other : Series, DataFrame, or ndarray, optional + if not supplied then will default to self and produce pairwise output + pairwise : bool, default False + If False then only matching columns between self and other will be used and + the output will be a DataFrame. + If True then all pairwise combinations will be calculated and the output + will be a Panel in the case of DataFrame inputs. In the case of missing + elements, only complete pairwise observations will be used. + bias : boolean, default False + Use a standard estimation bias correction + """ + if other is None: + other = self._selected_obj + pairwise = True + other = self._shallow_copy(other) + + def _get_cov(X, Y): + X = self._shallow_copy(X) + Y = self._shallow_copy(Y) + cov = algos.ewmcov(X._prep_values(), + Y._prep_values(), + self.com, + int(self.adjust), + int(self.ignore_na), + int(self.min_periods), + int(bias)) + return X._wrap_result(cov) + + return _flex_binary_moment(self._selected_obj, other._selected_obj, _get_cov, pairwise=bool(pairwise)) + + def corr(self, other=None, pairwise=False): + """ + exponential weighted sample correlation + + Parameters + ---------- + other : Series, DataFrame, or ndarray, optional + if not supplied then will default to self and produce pairwise output + pairwise : bool, default False + If False then only matching columns between self and other will be used and + the output will be a DataFrame. + If True then all pairwise combinations will be calculated and the output + will be a Panel in the case of DataFrame inputs. In the case of missing + elements, only complete pairwise observations will be used. + """ + if other is None: + other = self._selected_obj + pairwise = True + other = self._shallow_copy(other) + + def _get_corr(X, Y): + X = self._shallow_copy(X) + Y = self._shallow_copy(Y) + def _cov(x, y): + return algos.ewmcov(x, y, self.com, int(self.adjust), int(self.ignore_na), int(self.min_periods), 1) + + x_values = X._prep_values() + y_values = Y._prep_values() + cov = _cov(x_values, y_values) + x_var = _cov(x_values, x_values) + y_var = _cov(y_values, y_values) + corr = cov / _zsqrt(x_var * y_var) + return X._wrap_result(corr) + + return _flex_binary_moment(self._selected_obj, other._selected_obj, _get_corr, pairwise=bool(pairwise)) + +######################## +##### Helper Funcs ##### +######################## + +def _flex_binary_moment(arg1, arg2, f, pairwise=False): + from pandas import Series, DataFrame, Panel + if not (isinstance(arg1,(np.ndarray, Series, DataFrame)) and + isinstance(arg2,(np.ndarray, Series, DataFrame))): + raise TypeError("arguments to moment function must be of type " + "np.ndarray/Series/DataFrame") + + if isinstance(arg1, (np.ndarray, Series)) and \ + isinstance(arg2, (np.ndarray,Series)): + X, Y = _prep_binary(arg1, arg2) + return f(X, Y) + + elif isinstance(arg1, DataFrame): + def dataframe_from_int_dict(data, frame_template): + result = DataFrame(data, index=frame_template.index) + if len(result.columns) > 0: + result.columns = frame_template.columns[result.columns] + return result + + results = {} + if isinstance(arg2, DataFrame): + if pairwise is False: + if arg1 is arg2: + # special case in order to handle duplicate column names + for i, col in enumerate(arg1.columns): + results[i] = f(arg1.iloc[:, i], arg2.iloc[:, i]) + return dataframe_from_int_dict(results, arg1) + else: + if not arg1.columns.is_unique: + raise ValueError("'arg1' columns are not unique") + if not arg2.columns.is_unique: + raise ValueError("'arg2' columns are not unique") + X, Y = arg1.align(arg2, join='outer') + X = X + 0 * Y + Y = Y + 0 * X + res_columns = arg1.columns.union(arg2.columns) + for col in res_columns: + if col in X and col in Y: + results[col] = f(X[col], Y[col]) + return DataFrame(results, index=X.index, columns=res_columns) + elif pairwise is True: + results = defaultdict(dict) + for i, k1 in enumerate(arg1.columns): + for j, k2 in enumerate(arg2.columns): + if j 0: + p.major_axis = arg1.columns[p.major_axis] + if len(p.minor_axis) > 0: + p.minor_axis = arg2.columns[p.minor_axis] + return p + else: + raise ValueError("'pairwise' is not True/False") + else: + results = {} + for i, col in enumerate(arg1.columns): + results[i] = f(*_prep_binary(arg1.iloc[:, i], arg2)) + return dataframe_from_int_dict(results, arg1) + + else: + return _flex_binary_moment(arg2, arg1, f) + +def _get_center_of_mass(com, span, halflife): + valid_count = len([x for x in [com, span, halflife] if x is not None]) + if valid_count > 1: + raise Exception("com, span, and halflife are mutually exclusive") + + if span is not None: + # convert span to center of mass + com = (span - 1) / 2. + elif halflife is not None: + # convert halflife to center of mass + decay = 1 - np.exp(np.log(0.5) / halflife) + com = 1 / decay - 1 + elif com is None: + raise Exception("Must pass one of com, span, or halflife") + + return float(com) + +def _offset(window, center): + if not com.is_integer(window): + window = len(window) + offset = (window - 1) / 2. if center else 0 + try: + return int(offset) + except: + return offset.astype(int) + +def _require_min_periods(p): + def _check_func(minp, window): + if minp is None: + return window + else: + return max(p, minp) + return _check_func + +def _use_window(minp, window): + if minp is None: + return window + else: + return minp + +def _zsqrt(x): + result = np.sqrt(x) + mask = x < 0 + + from pandas import DataFrame + if isinstance(x, DataFrame): + if mask.values.any(): + result[mask] = 0 + else: + if mask.any(): + result[mask] = 0 + + return result + +def _prep_binary(arg1, arg2): + if not isinstance(arg2, type(arg1)): + raise Exception('Input arrays must be of the same type!') + + # mask out values, this also makes a common index... + X = arg1 + 0 * arg2 + Y = arg2 + 0 * arg1 + + return X, Y + +def _validate_win_type(win_type, kwargs): + # may pop from kwargs + arg_map = {'kaiser': ['beta'], + 'gaussian': ['std'], + 'general_gaussian': ['power', 'width'], + 'slepian': ['width']} + if win_type in arg_map: + return tuple([win_type] + + _pop_args(win_type, arg_map[win_type], kwargs)) + return win_type + + +def _pop_args(win_type, arg_names, kwargs): + msg = '%s window requires %%s' % win_type + all_args = [] + for n in arg_names: + if n not in kwargs: + raise ValueError(msg % n) + all_args.append(kwargs.pop(n)) + return all_args + +############################# +##### top-level exports ##### +############################# + +def rolling(obj, win_type=None, **kwds): + """ + Provides rolling transformations. + + .. versionadded:: 0.18.0 + + Parameters + ---------- + window : int + Size of the moving window. This is the number of observations used for + calculating the statistic. + min_periods : int, default None + Minimum number of observations in window required to have a value + (otherwise result is NA). + freq : string or DateOffset object, optional (default None) + Frequency to conform the data to before computing the statistic. Specified + as a frequency string or DateOffset object. + center : boolean, default False + Set the labels at the center of the window. + how : string, default None + Method for down- or re-sampling + win_type : string, default None + prove a window type, see the notes below + axis : int, default 0 + + Returns + ------- + a Window sub-classed for the particular operation + + Notes + ----- + By default, the result is set to the right edge of the window. This can be + changed to the center of the window by setting ``center=True``. + + The `freq` keyword is used to conform time series data to a specified + frequency by resampling the data. This is done with the default parameters + of :meth:`~pandas.Series.resample` (i.e. using the `mean`). + + The recognized window types are: + + * ``boxcar`` + * ``triang`` + * ``blackman`` + * ``hamming`` + * ``bartlett`` + * ``parzen`` + * ``bohman`` + * ``blackmanharris`` + * ``nuttall`` + * ``barthann`` + * ``kaiser`` (needs beta) + * ``gaussian`` (needs std) + * ``general_gaussian`` (needs power, width) + * ``slepian`` (needs width). + """ + from pandas import Series, DataFrame + if not isinstance(obj, (Series, DataFrame)): + raise TypeError('invalid type: %s' % type(obj)) + + if win_type is not None: + return Window(obj, win_type=win_type, **kwds) + + return Rolling(obj, **kwds) + +def expanding(obj, **kwds): + """ + Provides expanding transformations. + + .. versionadded:: 0.18.0 + + Parameters + ---------- + min_periods : int, default None + Minimum number of observations in window required to have a value + (otherwise result is NA). + freq : string or DateOffset object, optional (default None) + Frequency to conform the data to before computing the statistic. Specified + as a frequency string or DateOffset object. + center : boolean, default False + Set the labels at the center of the window. + how : string, default None + Method for down- or re-sampling + axis : int, default 0 + + Returns + ------- + a Window sub-classed for the particular operation + + Notes + ----- + By default, the result is set to the right edge of the window. This can be + changed to the center of the window by setting ``center=True``. + + The `freq` keyword is used to conform time series data to a specified + frequency by resampling the data. This is done with the default parameters + of :meth:`~pandas.Series.resample` (i.e. using the `mean`). + """ + + from pandas import Series, DataFrame + if not isinstance(obj, (Series, DataFrame)): + raise TypeError('invalid type: %s' % type(obj)) + + return Expanding(obj, **kwds) + +def ewm(obj, **kwds): + """ + .. versionadded:: 0.18.0 + + Provides exponential weighted functions + + Parameters + ---------- + com : float. optional + Center of mass: :math:`\alpha = 1 / (1 + com)`, + span : float, optional + Specify decay in terms of span, :math:`\alpha = 2 / (span + 1)` + halflife : float, optional + Specify decay in terms of halflife, :math:`\alpha = 1 - exp(log(0.5) / halflife)` + min_periods : int, default 0 + Minimum number of observations in window required to have a value + (otherwise result is NA). + freq : None or string alias / date offset object, default=None + Frequency to conform to before computing statistic + adjust : boolean, default True + Divide by decaying adjustment factor in beginning periods to account for + imbalance in relative weightings (viewing EWMA as a moving average) + how : string, default 'mean' + Method for down- or re-sampling + ignore_na : boolean, default False + Ignore missing values when calculating weights; + specify True to reproduce pre-0.15.0 behavior + + Returns + ------- + a Window sub-classed for the particular operation + + Notes + ----- + Either center of mass, span or halflife must be specified + + EWMA is sometimes specified using a "span" parameter `s`, we have that the + decay parameter :math:`\alpha` is related to the span as + :math:`\alpha = 2 / (s + 1) = 1 / (1 + c)` + + where `c` is the center of mass. Given a span, the associated center of mass is + :math:`c = (s - 1) / 2` + + So a "20-day EWMA" would have center 9.5. + + The `freq` keyword is used to conform time series data to a specified + frequency by resampling the data. This is done with the default parameters + of :meth:`~pandas.Series.resample` (i.e. using the `mean`). + + When adjust is True (default), weighted averages are calculated using weights + (1-alpha)**(n-1), (1-alpha)**(n-2), ..., 1-alpha, 1. + + When adjust is False, weighted averages are calculated recursively as: + weighted_average[0] = arg[0]; + weighted_average[i] = (1-alpha)*weighted_average[i-1] + alpha*arg[i]. + + When ignore_na is False (default), weights are based on absolute positions. + For example, the weights of x and y used in calculating the final weighted + average of [x, None, y] are (1-alpha)**2 and 1 (if adjust is True), and + (1-alpha)**2 and alpha (if adjust is False). + + When ignore_na is True (reproducing pre-0.15.0 behavior), weights are based on + relative positions. For example, the weights of x and y used in calculating + the final weighted average of [x, None, y] are 1-alpha and 1 (if adjust is + True), and 1-alpha and alpha (if adjust is False). + + More details can be found at + http://pandas.pydata.org/pandas-docs/stable/computation.html#exponentially-weighted-moment-functions + """ + from pandas import Series, DataFrame + if not isinstance(obj, (Series, DataFrame)): + raise TypeError('invalid type: %s' % type(obj)) + + return EWM(obj, **kwds) diff --git a/pandas/stats/moments.py b/pandas/stats/moments.py index 3cddae45e7516..c6cff614de9b1 100644 --- a/pandas/stats/moments.py +++ b/pandas/stats/moments.py @@ -4,16 +4,8 @@ """ from __future__ import division -from functools import wraps -from collections import defaultdict - -from numpy import NaN import numpy as np - -from pandas.core.api import DataFrame, Series, Panel, notnull -import pandas.algos as algos -import pandas.core.common as pdcom - +from pandas.core.api import DataFrame, Series from pandas.util.decorators import Substitution, Appender __all__ = ['rolling_count', 'rolling_max', 'rolling_min', @@ -179,8 +171,38 @@ Use a standard estimation bias correction """ +def ensure_compat(dispatch, name, arg, func_kw=None, *args, **kwargs): + """ + wrapper function to dispatch to the appropriate window functions + wraps/unwraps ndarrays for compat + + can be removed when ndarray support is removed + """ + is_ndarray = isinstance(arg, np.ndarray) + if is_ndarray: + if arg.ndim == 1: + arg = Series(arg) + elif arg.ndim == 2: + arg = DataFrame(arg) + else: + raise AssertionError("cannot support ndim > 2 for ndarray compat") + + # get the functional keywords here + if func_kw is None: + func_kw = [] + kwds = {} + for k in func_kw: + value = kwargs.pop(k,None) + if value is not None: + kwds[k] = value + r = getattr(arg,dispatch)(**kwargs) + result = getattr(r,name)(*args, **kwds) + + if is_ndarray: + result = result.values + return result -def rolling_count(arg, window, freq=None, center=False, how=None): +def rolling_count(arg, window, **kwargs): """ Rolling count of number of non-NaN observations inside provided window. @@ -208,26 +230,12 @@ def rolling_count(arg, window, freq=None, center=False, how=None): frequency by resampling the data. This is done with the default parameters of :meth:`~pandas.Series.resample` (i.e. using the `mean`). """ - arg = _conv_timerule(arg, freq, how) - if not center: - window = min(window, len(arg)) - - return_hook, values = _process_data_structure(arg, kill_inf=False) - - converted = np.isfinite(values).astype(float) - result = rolling_sum(converted, window, min_periods=0, - center=center) # already converted - - # putmask here? - result[np.isnan(result)] = 0 - return return_hook(result) - + return ensure_compat('rolling', 'count', arg, window=window, **kwargs) @Substitution("Unbiased moving covariance.", _binary_arg_flex, _roll_kw%'None'+_pairwise_kw+_ddof_kw, _flex_retval, _roll_notes) @Appender(_doc_template) -def rolling_cov(arg1, arg2=None, window=None, min_periods=None, freq=None, - center=False, pairwise=None, how=None, ddof=1): +def rolling_cov(arg1, arg2=None, window=None, pairwise=None, **kwargs): if window is None and isinstance(arg2, (int, float)): window = arg2 arg2 = arg1 @@ -235,23 +243,19 @@ def rolling_cov(arg1, arg2=None, window=None, min_periods=None, freq=None, elif arg2 is None: arg2 = arg1 pairwise = True if pairwise is None else pairwise # only default unset - arg1 = _conv_timerule(arg1, freq, how) - arg2 = _conv_timerule(arg2, freq, how) - - def _get_cov(X, Y): - mean = lambda x: rolling_mean(x, window, min_periods, center=center) - count = rolling_count(X + Y, window, center=center) - bias_adj = count / (count - ddof) - return (mean(X * Y) - mean(X) * mean(Y)) * bias_adj - rs = _flex_binary_moment(arg1, arg2, _get_cov, pairwise=bool(pairwise)) - return rs - + return ensure_compat('rolling', + 'cov', + arg1, + other=arg2, + window=window, + pairwise=pairwise, + func_kw=['other','pairwise','ddof'], + **kwargs) @Substitution("Moving sample correlation.", _binary_arg_flex, _roll_kw%'None'+_pairwise_kw, _flex_retval, _roll_notes) @Appender(_doc_template) -def rolling_corr(arg1, arg2=None, window=None, min_periods=None, freq=None, - center=False, pairwise=None, how=None): +def rolling_corr(arg1, arg2=None, window=None, pairwise=None, **kwargs): if window is None and isinstance(arg2, (int, float)): window = arg2 arg2 = arg1 @@ -259,86 +263,14 @@ def rolling_corr(arg1, arg2=None, window=None, min_periods=None, freq=None, elif arg2 is None: arg2 = arg1 pairwise = True if pairwise is None else pairwise # only default unset - arg1 = _conv_timerule(arg1, freq, how) - arg2 = _conv_timerule(arg2, freq, how) - - def _get_corr(a, b): - num = rolling_cov(a, b, window, min_periods, freq=freq, - center=center) - den = (rolling_std(a, window, min_periods, freq=freq, - center=center) * - rolling_std(b, window, min_periods, freq=freq, - center=center)) - return num / den - - return _flex_binary_moment(arg1, arg2, _get_corr, pairwise=bool(pairwise)) - - -def _flex_binary_moment(arg1, arg2, f, pairwise=False): - if not (isinstance(arg1,(np.ndarray, Series, DataFrame)) and - isinstance(arg2,(np.ndarray, Series, DataFrame))): - raise TypeError("arguments to moment function must be of type " - "np.ndarray/Series/DataFrame") - - if isinstance(arg1, (np.ndarray, Series)) and \ - isinstance(arg2, (np.ndarray,Series)): - X, Y = _prep_binary(arg1, arg2) - return f(X, Y) - - elif isinstance(arg1, DataFrame): - def dataframe_from_int_dict(data, frame_template): - result = DataFrame(data, index=frame_template.index) - if len(result.columns) > 0: - result.columns = frame_template.columns[result.columns] - return result - - results = {} - if isinstance(arg2, DataFrame): - if pairwise is False: - if arg1 is arg2: - # special case in order to handle duplicate column names - for i, col in enumerate(arg1.columns): - results[i] = f(arg1.iloc[:, i], arg2.iloc[:, i]) - return dataframe_from_int_dict(results, arg1) - else: - if not arg1.columns.is_unique: - raise ValueError("'arg1' columns are not unique") - if not arg2.columns.is_unique: - raise ValueError("'arg2' columns are not unique") - X, Y = arg1.align(arg2, join='outer') - X = X + 0 * Y - Y = Y + 0 * X - res_columns = arg1.columns.union(arg2.columns) - for col in res_columns: - if col in X and col in Y: - results[col] = f(X[col], Y[col]) - return DataFrame(results, index=X.index, columns=res_columns) - elif pairwise is True: - results = defaultdict(dict) - for i, k1 in enumerate(arg1.columns): - for j, k2 in enumerate(arg2.columns): - if j 0: - p.major_axis = arg1.columns[p.major_axis] - if len(p.minor_axis) > 0: - p.minor_axis = arg2.columns[p.minor_axis] - return p - else: - raise ValueError("'pairwise' is not True/False") - else: - results = {} - for i, col in enumerate(arg1.columns): - results[i] = f(*_prep_binary(arg1.iloc[:, i], arg2)) - return dataframe_from_int_dict(results, arg1) - - else: - return _flex_binary_moment(arg2, arg1, f) - + return ensure_compat('rolling', + 'corr', + arg1, + other=arg2, + window=window, + pairwise=pairwise, + func_kw=['other','pairwise'], + **kwargs) @Substitution("Deprecated. Use rolling_corr(..., pairwise=True) instead.\n\n" "Pairwise moving sample correlation", _pairwise_arg, @@ -354,164 +286,65 @@ def rolling_corr_pairwise(df1, df2=None, window=None, min_periods=None, pairwise=True) -def _rolling_moment(arg, window, func, minp, axis=0, freq=None, center=False, - how=None, args=(), kwargs={}, **kwds): - """ - Rolling statistical measure using supplied function. Designed to be - used with passed-in Cython array-based functions. - - Parameters - ---------- - arg : DataFrame or numpy ndarray-like - window : Number of observations used for calculating statistic - func : Cython function to compute rolling statistic on raw series - minp : int - Minimum number of observations required to have a value - axis : int, default 0 - freq : None or string alias / date offset object, default=None - Frequency to conform to before computing statistic - center : boolean, default False - Whether the label should correspond with center of window - how : string, default 'mean' - Method for down- or re-sampling - args : tuple - Passed on to func - kwargs : dict - Passed on to func - - Returns - ------- - y : type of input - """ - arg = _conv_timerule(arg, freq, how) - - return_hook, values = _process_data_structure(arg) - - if values.size == 0: - result = values.copy() - else: - # actually calculate the moment. Faster way to do this? - offset = int((window - 1) / 2.) if center else 0 - additional_nans = np.array([np.NaN] * offset) - calc = lambda x: func(np.concatenate((x, additional_nans)) if center else x, - window, minp=minp, args=args, kwargs=kwargs, - **kwds) - if values.ndim > 1: - result = np.apply_along_axis(calc, axis, values) - else: - result = calc(values) - - if center: - result = _center_window(result, window, axis) - - return return_hook(result) - - -def _center_window(rs, window, axis): - if axis > rs.ndim-1: - raise ValueError("Requested axis is larger then no. of argument " - "dimensions") - - offset = int((window - 1) / 2.) - if offset > 0: - if isinstance(rs, (Series, DataFrame, Panel)): - rs = rs.slice_shift(-offset, axis=axis) - else: - lead_indexer = [slice(None)] * rs.ndim - lead_indexer[axis] = slice(offset, None) - rs = np.copy(rs[tuple(lead_indexer)]) - return rs - - -def _process_data_structure(arg, kill_inf=True): - if isinstance(arg, DataFrame): - return_hook = lambda v: type(arg)(v, index=arg.index, - columns=arg.columns) - values = arg.values - elif isinstance(arg, Series): - values = arg.values - return_hook = lambda v: Series(v, arg.index, name=arg.name) - else: - return_hook = lambda v: v - values = arg - - if not issubclass(values.dtype.type, float): - values = values.astype(float) - - if kill_inf: - values = values.copy() - values[np.isinf(values)] = np.NaN - - return return_hook, values #------------------------------------------------------------------------------ # Exponential moving moments -def _get_center_of_mass(com, span, halflife): - valid_count = len([x for x in [com, span, halflife] if x is not None]) - if valid_count > 1: - raise Exception("com, span, and halflife are mutually exclusive") - - if span is not None: - # convert span to center of mass - com = (span - 1) / 2. - elif halflife is not None: - # convert halflife to center of mass - decay = 1 - np.exp(np.log(0.5) / halflife) - com = 1 / decay - 1 - elif com is None: - raise Exception("Must pass one of com, span, or halflife") - - return float(com) - - @Substitution("Exponentially-weighted moving average", _unary_arg, _ewm_kw, _type_of_input_retval, _ewm_notes) @Appender(_doc_template) def ewma(arg, com=None, span=None, halflife=None, min_periods=0, freq=None, adjust=True, how=None, ignore_na=False): - arg = _conv_timerule(arg, freq, how) - com = _get_center_of_mass(com, span, halflife) - - def _ewma(v): - return algos.ewma(v, com, int(adjust), int(ignore_na), int(min_periods)) - - return_hook, values = _process_data_structure(arg) - if values.size == 0: - output = values.copy() - else: - output = np.apply_along_axis(_ewma, 0, values) - return return_hook(output) - + return ensure_compat('ewm', + 'mean', + arg, + com=com, + span=span, + halflife=halflife, + min_periods=min_periods, + freq=freq, + adjust=adjust, + how=how, + ignore_na=ignore_na) @Substitution("Exponentially-weighted moving variance", _unary_arg, _ewm_kw+_bias_kw, _type_of_input_retval, _ewm_notes) @Appender(_doc_template) def ewmvar(arg, com=None, span=None, halflife=None, min_periods=0, bias=False, freq=None, how=None, ignore_na=False, adjust=True): - arg = _conv_timerule(arg, freq, how) - com = _get_center_of_mass(com, span, halflife) - - def _ewmvar(v): - return algos.ewmcov(v, v, com, int(adjust), int(ignore_na), int(min_periods), int(bias)) - - return_hook, values = _process_data_structure(arg) - if values.size == 0: - output = values.copy() - else: - output = np.apply_along_axis(_ewmvar, 0, values) - return return_hook(output) - + return ensure_compat('ewm', + 'var', + arg, + com=com, + span=span, + halflife=halflife, + min_periods=min_periods, + freq=freq, + adjust=adjust, + how=how, + ignore_na=ignore_na, + bias=bias, + func_kw=['bias']) @Substitution("Exponentially-weighted moving std", _unary_arg, _ewm_kw+_bias_kw, _type_of_input_retval, _ewm_notes) @Appender(_doc_template) def ewmstd(arg, com=None, span=None, halflife=None, min_periods=0, bias=False, - ignore_na=False, adjust=True): - result = ewmvar(arg, com=com, span=span, halflife=halflife, - min_periods=min_periods, bias=bias, adjust=adjust, ignore_na=ignore_na) - return _zsqrt(result) + freq=None, how=None, ignore_na=False, adjust=True): + return ensure_compat('ewm', + 'std', + arg, + com=com, + span=span, + halflife=halflife, + min_periods=min_periods, + freq=freq, + adjust=adjust, + how=how, + ignore_na=ignore_na, + bias=bias, + func_kw=['bias']) ewmvol = ewmstd @@ -528,21 +361,22 @@ def ewmcov(arg1, arg2=None, com=None, span=None, halflife=None, min_periods=0, com = arg2 arg2 = arg1 pairwise = True if pairwise is None else pairwise - arg1 = _conv_timerule(arg1, freq, how) - arg2 = _conv_timerule(arg2, freq, how) - com = _get_center_of_mass(com, span, halflife) - - def _get_ewmcov(X, Y): - # X and Y have the same structure (and NaNs) when called from _flex_binary_moment() - return_hook, x_values = _process_data_structure(X) - return_hook, y_values = _process_data_structure(Y) - cov = algos.ewmcov(x_values, y_values, com, int(adjust), int(ignore_na), int(min_periods), int(bias)) - return return_hook(cov) - - result = _flex_binary_moment(arg1, arg2, _get_ewmcov, - pairwise=bool(pairwise)) - return result + return ensure_compat('ewm', + 'cov', + arg1, + other=arg2, + com=com, + span=span, + halflife=halflife, + min_periods=min_periods, + bias=bias, + freq=freq, + how=how, + ignore_na=ignore_na, + adjust=adjust, + pairwise=pairwise, + func_kw=['other','pairwise','bias']) @Substitution("Exponentially-weighted moving correlation", _binary_arg_flex, _ewm_kw+_pairwise_kw, _type_of_input_retval, _ewm_notes) @@ -556,80 +390,26 @@ def ewmcorr(arg1, arg2=None, com=None, span=None, halflife=None, min_periods=0, com = arg2 arg2 = arg1 pairwise = True if pairwise is None else pairwise - arg1 = _conv_timerule(arg1, freq, how) - arg2 = _conv_timerule(arg2, freq, how) - com = _get_center_of_mass(com, span, halflife) - - def _get_ewmcorr(X, Y): - # X and Y have the same structure (and NaNs) when called from _flex_binary_moment() - return_hook, x_values = _process_data_structure(X) - return_hook, y_values = _process_data_structure(Y) - cov = algos.ewmcov(x_values, y_values, com, int(adjust), int(ignore_na), int(min_periods), 1) - x_var = algos.ewmcov(x_values, x_values, com, int(adjust), int(ignore_na), int(min_periods), 1) - y_var = algos.ewmcov(y_values, y_values, com, int(adjust), int(ignore_na), int(min_periods), 1) - corr = cov / _zsqrt(x_var * y_var) - return return_hook(corr) - - result = _flex_binary_moment(arg1, arg2, _get_ewmcorr, - pairwise=bool(pairwise)) - return result - - -def _zsqrt(x): - result = np.sqrt(x) - mask = x < 0 - - if isinstance(x, DataFrame): - if mask.values.any(): - result[mask] = 0 - else: - if mask.any(): - result[mask] = 0 - - return result - - -def _prep_binary(arg1, arg2): - if not isinstance(arg2, type(arg1)): - raise Exception('Input arrays must be of the same type!') - - # mask out values, this also makes a common index... - X = arg1 + 0 * arg2 - Y = arg2 + 0 * arg1 - - return X, Y + return ensure_compat('ewm', + 'corr', + arg1, + other=arg2, + com=com, + span=span, + halflife=halflife, + min_periods=min_periods, + freq=freq, + how=how, + ignore_na=ignore_na, + adjust=adjust, + pairwise=pairwise, + func_kw=['other','pairwise']) #---------------------------------------------------------------------- # Python interface to Cython functions -def _conv_timerule(arg, freq, how): - - types = (DataFrame, Series) - if freq is not None and isinstance(arg, types): - # Conform to whatever frequency needed. - arg = arg.resample(freq, how=how) - - return arg - - -def _require_min_periods(p): - def _check_func(minp, window): - if minp is None: - return window - else: - return max(p, minp) - return _check_func - - -def _use_window(minp, window): - if minp is None: - return window - else: - return minp - - -def _rolling_func(func, desc, check_minp=_use_window, how=None, additional_kw=''): +def _rolling_func(name, desc, how=None, func_kw=None, additional_kw=''): if how is None: how_arg_str = 'None' else: @@ -638,36 +418,33 @@ def _rolling_func(func, desc, check_minp=_use_window, how=None, additional_kw='' @Substitution(desc, _unary_arg, _roll_kw%how_arg_str + additional_kw, _type_of_input_retval, _roll_notes) @Appender(_doc_template) - @wraps(func) def f(arg, window, min_periods=None, freq=None, center=False, how=how, **kwargs): - def call_cython(arg, window, minp, args=(), kwargs={}, **kwds): - minp = check_minp(minp, window) - return func(arg, window, minp, **kwds) - return _rolling_moment(arg, window, call_cython, min_periods, freq=freq, - center=center, how=how, **kwargs) - + return ensure_compat('rolling', + name, + arg, + window=window, + min_periods=min_periods, + freq=freq, + center=center, + how=how, + func_kw=func_kw, + **kwargs) return f -rolling_max = _rolling_func(algos.roll_max, 'Moving maximum.', how='max') -rolling_min = _rolling_func(algos.roll_min, 'Moving minimum.', how='min') -rolling_sum = _rolling_func(algos.roll_sum, 'Moving sum.') -rolling_mean = _rolling_func(algos.roll_mean, 'Moving mean.') -rolling_median = _rolling_func(algos.roll_median_c, 'Moving median.', - how='median') - -_ts_std = lambda *a, **kw: _zsqrt(algos.roll_var(*a, **kw)) -rolling_std = _rolling_func(_ts_std, 'Moving standard deviation.', - check_minp=_require_min_periods(1), +rolling_max = _rolling_func('max', 'Moving maximum.', how='max') +rolling_min = _rolling_func('min', 'Moving minimum.', how='min') +rolling_sum = _rolling_func('sum', 'Moving sum.') +rolling_mean = _rolling_func('mean', 'Moving mean.') +rolling_median = _rolling_func('median', 'Moving median.', how='median') +rolling_std = _rolling_func('std', 'Moving standard deviation.', + func_kw=['ddof'], additional_kw=_ddof_kw) -rolling_var = _rolling_func(algos.roll_var, 'Moving variance.', - check_minp=_require_min_periods(1), +rolling_var = _rolling_func('var', 'Moving variance.', + func_kw=['ddof'], additional_kw=_ddof_kw) -rolling_skew = _rolling_func(algos.roll_skew, 'Unbiased moving skewness.', - check_minp=_require_min_periods(3)) -rolling_kurt = _rolling_func(algos.roll_kurt, 'Unbiased moving kurtosis.', - check_minp=_require_min_periods(4)) - +rolling_skew = _rolling_func('skew', 'Unbiased moving skewness.') +rolling_kurt = _rolling_func('kurt', 'Unbiased moving kurtosis.') def rolling_quantile(arg, window, quantile, min_periods=None, freq=None, center=False): @@ -703,12 +480,15 @@ def rolling_quantile(arg, window, quantile, min_periods=None, freq=None, frequency by resampling the data. This is done with the default parameters of :meth:`~pandas.Series.resample` (i.e. using the `mean`). """ - - def call_cython(arg, window, minp, args=(), kwargs={}): - minp = _use_window(minp, window) - return algos.roll_quantile(arg, window, minp, quantile) - return _rolling_moment(arg, window, call_cython, min_periods, freq=freq, - center=center) + return ensure_compat('rolling', + 'quantile', + arg, + window=window, + freq=freq, + center=center, + min_periods=min_periods, + func_kw=['quantile'], + quantile=quantile) def rolling_apply(arg, window, func, min_periods=None, freq=None, @@ -749,12 +529,17 @@ def rolling_apply(arg, window, func, min_periods=None, freq=None, frequency by resampling the data. This is done with the default parameters of :meth:`~pandas.Series.resample` (i.e. using the `mean`). """ - offset = int((window - 1) / 2.) if center else 0 - def call_cython(arg, window, minp, args, kwargs): - minp = _use_window(minp, window) - return algos.roll_generic(arg, window, minp, offset, func, args, kwargs) - return _rolling_moment(arg, window, call_cython, min_periods, freq=freq, - center=False, args=args, kwargs=kwargs) + return ensure_compat('rolling', + 'apply', + arg, + window=window, + freq=freq, + center=center, + min_periods=min_periods, + func_kw=['func','args','kwargs'], + func=func, + args=args, + kwargs=kwargs) def rolling_window(arg, window=None, win_type=None, min_periods=None, @@ -816,97 +601,48 @@ def rolling_window(arg, window=None, win_type=None, min_periods=None, frequency by resampling the data. This is done with the default parameters of :meth:`~pandas.Series.resample` (i.e. using the `mean`). """ - if isinstance(window, (list, tuple, np.ndarray)): - if win_type is not None: - raise ValueError(('Do not specify window type if using custom ' - 'weights')) - window = pdcom._asarray_tuplesafe(window).astype(float) - elif pdcom.is_integer(window): # window size - if win_type is None: - raise ValueError('Must specify window type') - try: - import scipy.signal as sig - except ImportError: - raise ImportError('Please install scipy to generate window weight') - win_type = _validate_win_type(win_type, kwargs) # may pop from kwargs - window = sig.get_window(win_type, window).astype(float) - else: - raise ValueError('Invalid window %s' % str(window)) - - minp = _use_window(min_periods, len(window)) - - arg = _conv_timerule(arg, freq, how) - return_hook, values = _process_data_structure(arg) - - if values.size == 0: - result = values.copy() - else: - offset = int((len(window) - 1) / 2.) if center else 0 - additional_nans = np.array([np.NaN] * offset) - f = lambda x: algos.roll_window(np.concatenate((x, additional_nans)) if center else x, - window, minp, avg=mean) - result = np.apply_along_axis(f, axis, values) - - if center: - result = _center_window(result, len(window), axis) - - return return_hook(result) - - -def _validate_win_type(win_type, kwargs): - # may pop from kwargs - arg_map = {'kaiser': ['beta'], - 'gaussian': ['std'], - 'general_gaussian': ['power', 'width'], - 'slepian': ['width']} - if win_type in arg_map: - return tuple([win_type] + - _pop_args(win_type, arg_map[win_type], kwargs)) - return win_type - - -def _pop_args(win_type, arg_names, kwargs): - msg = '%s window requires %%s' % win_type - all_args = [] - for n in arg_names: - if n not in kwargs: - raise ValueError(msg % n) - all_args.append(kwargs.pop(n)) - return all_args - - -def _expanding_func(func, desc, check_minp=_use_window, additional_kw=''): + func = 'mean' if mean else 'sum' + return ensure_compat('rolling', + func, + arg, + window=window, + win_type=win_type, + freq=freq, + center=center, + min_periods=min_periods, + axis=axis, + how=how, + func_kw=kwargs.keys(), + **kwargs) + +def _expanding_func(name, desc, func_kw=None, additional_kw=''): @Substitution(desc, _unary_arg, _expanding_kw + additional_kw, _type_of_input_retval, "") @Appender(_doc_template) - @wraps(func) def f(arg, min_periods=1, freq=None, **kwargs): - window = max(len(arg), min_periods) if min_periods else len(arg) - - def call_cython(arg, window, minp, args=(), kwargs={}, **kwds): - minp = check_minp(minp, window) - return func(arg, window, minp, **kwds) - return _rolling_moment(arg, window, call_cython, min_periods, freq=freq, - **kwargs) - + return ensure_compat('expanding', + name, + arg, + min_periods=min_periods, + freq=freq, + func_kw=func_kw, + **kwargs) return f -expanding_max = _expanding_func(algos.roll_max, 'Expanding maximum.') -expanding_min = _expanding_func(algos.roll_min, 'Expanding minimum.') -expanding_sum = _expanding_func(algos.roll_sum, 'Expanding sum.') -expanding_mean = _expanding_func(algos.roll_mean, 'Expanding mean.') -expanding_median = _expanding_func(algos.roll_median_c, 'Expanding median.') +expanding_max = _expanding_func('max', 'Expanding maximum.') +expanding_min = _expanding_func('min', 'Expanding minimum.') +expanding_sum = _expanding_func('sum', 'Expanding sum.') +expanding_mean = _expanding_func('mean', 'Expanding mean.') +expanding_median = _expanding_func('median', 'Expanding median.') -expanding_std = _expanding_func(_ts_std, 'Expanding standard deviation.', - check_minp=_require_min_periods(1), +expanding_std = _expanding_func('std', 'Expanding standard deviation.', + func_kw=['ddof'], additional_kw=_ddof_kw) -expanding_var = _expanding_func(algos.roll_var, 'Expanding variance.', - check_minp=_require_min_periods(1), +expanding_var = _expanding_func('var', 'Expanding variance.', + func_kw=['ddof'], additional_kw=_ddof_kw) -expanding_skew = _expanding_func(algos.roll_skew, 'Unbiased expanding skewness.', - check_minp=_require_min_periods(3)) -expanding_kurt = _expanding_func(algos.roll_kurt, 'Unbiased expanding kurtosis.', - check_minp=_require_min_periods(4)) +expanding_skew = _expanding_func('skew', 'Unbiased expanding skewness.') +expanding_kurt = _expanding_func('kurt', 'Unbiased expanding kurtosis.') def expanding_count(arg, freq=None): @@ -930,7 +666,7 @@ def expanding_count(arg, freq=None): frequency by resampling the data. This is done with the default parameters of :meth:`~pandas.Series.resample` (i.e. using the `mean`). """ - return rolling_count(arg, len(arg), freq=freq) + return ensure_compat('expanding', 'count', arg, freq=freq) def expanding_quantile(arg, quantile, min_periods=1, freq=None): @@ -958,9 +694,13 @@ def expanding_quantile(arg, quantile, min_periods=1, freq=None): frequency by resampling the data. This is done with the default parameters of :meth:`~pandas.Series.resample` (i.e. using the `mean`). """ - return rolling_quantile(arg, len(arg), quantile, min_periods=min_periods, - freq=freq) - + return ensure_compat('expanding', + 'quantile', + arg, + freq=freq, + min_periods=min_periods, + func_kw=['quantile'], + quantile=quantile) @Substitution("Unbiased expanding covariance.", _binary_arg_flex, _expanding_kw+_pairwise_kw+_ddof_kw, _flex_retval, "") @@ -973,10 +713,15 @@ def expanding_cov(arg1, arg2=None, min_periods=1, freq=None, pairwise=None, ddof min_periods = arg2 arg2 = arg1 pairwise = True if pairwise is None else pairwise - window = max((len(arg1) + len(arg2)), min_periods) if min_periods else (len(arg1) + len(arg2)) - return rolling_cov(arg1, arg2, window, - min_periods=min_periods, freq=freq, - pairwise=pairwise, ddof=ddof) + return ensure_compat('expanding', + 'cov', + arg1, + other=arg2, + min_periods=min_periods, + pairwise=pairwise, + freq=freq, + ddof=ddof, + func_kw=['other','pairwise','ddof']) @Substitution("Expanding sample correlation.", _binary_arg_flex, @@ -990,11 +735,14 @@ def expanding_corr(arg1, arg2=None, min_periods=1, freq=None, pairwise=None): min_periods = arg2 arg2 = arg1 pairwise = True if pairwise is None else pairwise - window = max((len(arg1) + len(arg2)), min_periods) if min_periods else (len(arg1) + len(arg2)) - return rolling_corr(arg1, arg2, window, - min_periods=min_periods, - freq=freq, pairwise=pairwise) - + return ensure_compat('expanding', + 'corr', + arg1, + other=arg2, + min_periods=min_periods, + pairwise=pairwise, + freq=freq, + func_kw=['other','pairwise','ddof']) @Substitution("Deprecated. Use expanding_corr(..., pairwise=True) instead.\n\n" "Pairwise expanding sample correlation", _pairwise_arg, @@ -1038,6 +786,12 @@ def expanding_apply(arg, func, min_periods=1, freq=None, frequency by resampling the data. This is done with the default parameters of :meth:`~pandas.Series.resample` (i.e. using the `mean`). """ - window = max(len(arg), min_periods) if min_periods else len(arg) - return rolling_apply(arg, window, func, min_periods=min_periods, freq=freq, - args=args, kwargs=kwargs) + return ensure_compat('expanding', + 'apply', + arg, + freq=freq, + min_periods=min_periods, + func_kw=['func','args','kwargs'], + func=func, + args=args, + kwargs=kwargs) diff --git a/pandas/stats/tests/test_moments.py b/pandas/tests/test_window.py similarity index 85% rename from pandas/stats/tests/test_moments.py rename to pandas/tests/test_window.py index b9efa875735d2..116236ae7e422 100644 --- a/pandas/stats/tests/test_moments.py +++ b/pandas/tests/test_window.py @@ -9,12 +9,14 @@ import numpy as np from distutils.version import LooseVersion +import pandas as pd from pandas import Series, DataFrame, Panel, bdate_range, isnull, notnull, concat from pandas.util.testing import ( assert_almost_equal, assert_series_equal, assert_frame_equal, assert_panel_equal, assert_index_equal ) import pandas.core.datetools as datetools import pandas.stats.moments as mom +import pandas.core.window as rwindow import pandas.util.testing as tm from pandas.compat import range, zip, PY3, StringIO @@ -33,12 +35,87 @@ def _create_data(self): self.arr = arr self.rng = bdate_range(datetime(2009, 1, 1), periods=N) - self.series = Series(arr.copy(), index=self.rng) - self.frame = DataFrame(randn(N, K), index=self.rng, columns=np.arange(K)) +class TestApi(Base): + + def setUp(self): + self._create_data() + + def test_getitem(self): + + r = self.frame.rolling(window=5) + tm.assert_index_equal(r._selected_obj.columns,self.frame.columns) + + r = self.frame.rolling(window=5)[1] + self.assertEqual(r._selected_obj.name,self.frame.columns[1]) + + r = self.frame.rolling(window=5)[1,3] + tm.assert_index_equal(r._selected_obj.columns,self.frame.columns[[1,3]]) + + def test_select_bad_cols(self): + df = DataFrame([[1, 2]], columns=['A', 'B']) + g = df.rolling(window=5) + self.assertRaises(KeyError, g.__getitem__, ['C']) # g[['C']] + + self.assertRaises(KeyError, g.__getitem__, ['A', 'C']) # g[['A', 'C']] + with tm.assertRaisesRegexp(KeyError, '^[^A]+$'): + # A should not be referenced as a bad column... + # will have to rethink regex if you change message! + g[['A', 'C']] + + def test_attribute_access(self): + + df = DataFrame([[1, 2]], columns=['A', 'B']) + r = df.rolling(window=5) + tm.assert_series_equal(r.A.sum(),r['A'].sum()) + self.assertRaises(AttributeError, lambda : r.F) + + def tests_skip_nuiscance(self): + + df = DataFrame({'A' : range(5), 'B' : range(5,10), 'C' : 'foo'}) + + r = df.rolling(window=3) + result = r[['A','B']].sum() + expected = DataFrame({'A' : [np.nan,np.nan,3,6,9], + 'B' : [np.nan,np.nan,18,21,24]}, + columns=list('AB')) + assert_frame_equal(result, expected) + + expected = pd.concat([r[['A','B']].sum(),df[['C']]],axis=1) + result = r.sum() + assert_frame_equal(result, expected) + + def test_timedeltas(self): + + df = DataFrame({'A' : range(5), 'B' : pd.timedelta_range('1 day',periods=5)}) + r = df.rolling(window=3) + result = r.sum() + expected = DataFrame({'A' : [np.nan,np.nan,3,6,9], + 'B' : pd.to_timedelta([pd.NaT,pd.NaT,'6 days','9 days','12 days'])}, + columns=list('AB')) + assert_frame_equal(result, expected) + + def test_agg(self): + df = DataFrame({'A' : range(5), + 'B' : range(0,10,2)}) + + r = df.rolling(window=3) + + import pdb; pdb.set_trace() + agged = r.aggregate([np.mean, np.std]) + agged = r.aggregate({'A': np.mean, + 'B': np.std}) + agged = r.aggregate({'A': ['mean','sum']}) + agged = r['A'].aggregate(['mean','sum']) + agged = r.aggregate({'A': { 'mean' : 'mean', 'sum' : 'sum' } }) + agged = r.aggregate({'A': { 'mean' : 'mean', 'sum' : 'sum' }, + 'B': { 'mean2' : 'mean', 'sum2' : 'sum' }}) + agged = r.aggregate({'r1': { 'A' : ['mean','sum'] }, + 'r2' : { 'B' : ['mean','sum'] }}) + class TestMoments(Base): def setUp(self): @@ -57,17 +134,18 @@ def test_centered_axis_validation(self): self.assertRaises(ValueError, mom.rolling_mean,DataFrame(np.ones((10,10))),3,center=True ,axis=2) def test_rolling_sum(self): - self._check_moment_func(mom.rolling_sum, np.sum) + self._check_moment_func(mom.rolling_sum, np.sum, name='sum') def test_rolling_count(self): counter = lambda x: np.isfinite(x).astype(float).sum() self._check_moment_func(mom.rolling_count, counter, + name='count', has_min_periods=False, preserve_nan=False, fill_value=0) def test_rolling_mean(self): - self._check_moment_func(mom.rolling_mean, np.mean) + self._check_moment_func(mom.rolling_mean, np.mean, name='mean') def test_cmov_mean(self): # GH 8238 @@ -101,6 +179,9 @@ def test_cmov_window(self): rs = mom.rolling_window(Series(vals), 5, 'boxcar', center=True) assert_series_equal(xp, rs) + rs = Series(vals).rolling(5, win_type='boxcar', center=True).mean() + assert_series_equal(xp, rs) + def test_cmov_window_corner(self): # GH 8238 tm._skip_if_no_scipy() @@ -152,6 +233,27 @@ def test_cmov_window_frame(self): rs = mom.rolling_window(DataFrame(vals), 5, 'boxcar', center=True) assert_frame_equal(DataFrame(xp), rs) + rs = DataFrame(vals).rolling(5, win_type='boxcar', center=True).mean() + assert_frame_equal(DataFrame(xp), rs) + + # invalid method + self.assertRaises(AttributeError, lambda : DataFrame(vals).rolling(5, win_type='boxcar', center=True).std()) + + # sum + xp = np.array([[ np.nan, np.nan], + [ np.nan, np.nan], + [ 46.26, 46.96], + [ 43.22, 49.53], + [ 44.35, 51.04], + [ 34.05, 42.94], + [ 38.96, 43.22], + [ 45.25, 39.12], + [ np.nan, np.nan], + [ np.nan, np.nan]]) + + rs = DataFrame(vals).rolling(5, win_type='boxcar', center=True).sum() + assert_frame_equal(DataFrame(xp), rs) + def test_cmov_window_na_min_periods(self): tm._skip_if_no_scipy() @@ -162,7 +264,9 @@ def test_cmov_window_na_min_periods(self): xp = mom.rolling_mean(vals, 5, min_periods=4, center=True) rs = mom.rolling_window(vals, 5, 'boxcar', min_periods=4, center=True) + assert_series_equal(xp, rs) + rs = vals.rolling(5, win_type='boxcar', min_periods=4, center=True).mean() assert_series_equal(xp, rs) def test_cmov_window_regular(self): @@ -197,6 +301,9 @@ def test_cmov_window_regular(self): rs = mom.rolling_window(Series(vals), 5, wt, center=True) assert_series_equal(xp, rs) + rs = Series(vals).rolling(5, win_type=wt, center=True).mean() + assert_series_equal(xp, rs) + def test_cmov_window_regular_linear_range(self): # GH 8238 tm._skip_if_no_scipy() @@ -214,6 +321,9 @@ def test_cmov_window_regular_linear_range(self): rs = mom.rolling_window(Series(vals), 5, wt, center=True) assert_series_equal(xp, rs) + rs = Series(vals).rolling(5, win_type=wt, center=True).mean() + assert_series_equal(xp, rs) + def test_cmov_window_regular_missing_data(self): # GH 8238 tm._skip_if_no_scipy() @@ -248,6 +358,9 @@ def test_cmov_window_regular_missing_data(self): rs = mom.rolling_window(Series(vals), 5, wt, min_periods=3) assert_series_equal(xp, rs) + rs = Series(vals).rolling(5, win_type=wt, min_periods=3).mean() + assert_series_equal(xp, rs) + def test_cmov_window_special(self): # GH 8238 tm._skip_if_no_scipy() @@ -278,6 +391,9 @@ def test_cmov_window_special(self): **k) assert_series_equal(xp, rs) + rs = Series(vals).rolling(5, win_type=wt, center=True).mean(**k) + assert_series_equal(xp, rs) + def test_cmov_window_special_linear_range(self): # GH 8238 tm._skip_if_no_scipy() @@ -297,11 +413,14 @@ def test_cmov_window_special_linear_range(self): **k) assert_series_equal(xp, rs) + rs = Series(vals).rolling(5, win_type=wt, center=True).mean(**k) + assert_series_equal(xp, rs) + def test_rolling_median(self): - self._check_moment_func(mom.rolling_median, np.median) + self._check_moment_func(mom.rolling_median, np.median, name='median') def test_rolling_min(self): - self._check_moment_func(mom.rolling_min, np.min) + self._check_moment_func(mom.rolling_min, np.min, name='min') a = np.array([1, 2, 3, 4, 5]) b = mom.rolling_min(a, window=100, min_periods=1) @@ -311,7 +430,7 @@ def test_rolling_min(self): 2, 3]), window=3, min_periods=5) def test_rolling_max(self): - self._check_moment_func(mom.rolling_max, np.max) + self._check_moment_func(mom.rolling_max, np.max, name='max') a = np.array([1, 2, 3, 4, 5]) b = mom.rolling_max(a, window=100, min_periods=1) @@ -330,8 +449,8 @@ def scoreatpercentile(a, per): return values[int(idx)] for q in qs: - def f(x, window, min_periods=None, freq=None, center=False): - return mom.rolling_quantile(x, window, q, + def f(x, window, quantile, min_periods=None, freq=None, center=False): + return mom.rolling_quantile(x, window, quantile, min_periods=min_periods, freq=freq, center=center) @@ -339,7 +458,7 @@ def f(x, window, min_periods=None, freq=None, center=False): def alt(x): return scoreatpercentile(x, q) - self._check_moment_func(f, alt) + self._check_moment_func(f, alt, name='quantile', quantile=q) def test_rolling_apply(self): # suppress warnings about empty slices, as we are deliberately testing with a 0-length Series @@ -349,13 +468,14 @@ def test_rolling_apply(self): ser = Series([]) assert_series_equal(ser, mom.rolling_apply(ser, 10, lambda x: x.mean())) - def roll_mean(x, window, min_periods=None, freq=None, center=False): + f = lambda x: x[np.isfinite(x)].mean() + def roll_mean(x, window, min_periods=None, freq=None, center=False, **kwargs): return mom.rolling_apply(x, window, - lambda x: x[np.isfinite(x)].mean(), + func=f, min_periods=min_periods, freq=freq, center=center) - self._check_moment_func(roll_mean, np.mean) + self._check_moment_func(roll_mean, np.mean, name='apply', func=f) # GH 8080 s = Series([None, None, None]) @@ -363,6 +483,9 @@ def roll_mean(x, window, min_periods=None, freq=None, center=False): expected = Series([1., 2., 2.]) assert_series_equal(result, expected) + result = s.rolling(2, min_periods=0).apply(len) + assert_series_equal(result, expected) + def test_rolling_apply_out_of_bounds(self): # #1850 arr = np.arange(4) @@ -376,9 +499,12 @@ def test_rolling_apply_out_of_bounds(self): def test_rolling_std(self): self._check_moment_func(mom.rolling_std, - lambda x: np.std(x, ddof=1)) - self._check_moment_func(functools.partial(mom.rolling_std, ddof=0), - lambda x: np.std(x, ddof=0)) + lambda x: np.std(x, ddof=1), + name='std') + self._check_moment_func(mom.rolling_std, + lambda x: np.std(x, ddof=0), + name='std', + ddof=0) def test_rolling_std_1obs(self): result = mom.rolling_std(np.array([1., 2., 3., 4., 5.]), @@ -414,9 +540,12 @@ def test_rolling_std_neg_sqrt(self): def test_rolling_var(self): self._check_moment_func(mom.rolling_var, lambda x: np.var(x, ddof=1), - test_stable=True) - self._check_moment_func(functools.partial(mom.rolling_var, ddof=0), - lambda x: np.var(x, ddof=0)) + test_stable=True, + name='var') + self._check_moment_func(mom.rolling_var, + lambda x: np.var(x, ddof=0), + name='var', + ddof=0) def test_rolling_skew(self): try: @@ -424,7 +553,8 @@ def test_rolling_skew(self): except ImportError: raise nose.SkipTest('no scipy') self._check_moment_func(mom.rolling_skew, - lambda x: skew(x, bias=False)) + lambda x: skew(x, bias=False), + name='skew') def test_rolling_kurt(self): try: @@ -432,7 +562,8 @@ def test_rolling_kurt(self): except ImportError: raise nose.SkipTest('no scipy') self._check_moment_func(mom.rolling_kurt, - lambda x: kurtosis(x, bias=False)) + lambda x: kurtosis(x, bias=False), + name='kurt') def test_fperr_robustness(self): # TODO: remove this once python 2.5 out of picture @@ -463,36 +594,55 @@ def test_fperr_robustness(self): result = mom.rolling_mean(-arr, 1) self.assertTrue(result[-1] <= 0) - def _check_moment_func(self, func, static_comp, window=50, + def _check_moment_func(self, f, static_comp, + name=None, + window=50, has_min_periods=True, has_center=True, has_time_rule=True, preserve_nan=True, fill_value=None, - test_stable=False): + test_stable=False, + **kwargs): - self._check_ndarray(func, static_comp, window=window, + self._check_ndarray(f, static_comp, window=window, has_min_periods=has_min_periods, preserve_nan=preserve_nan, has_center=has_center, fill_value=fill_value, - test_stable=test_stable) + test_stable=test_stable, + **kwargs) - self._check_structures(func, static_comp, + self._check_structures(f, static_comp, has_min_periods=has_min_periods, has_time_rule=has_time_rule, fill_value=fill_value, - has_center=has_center) - - def _check_ndarray(self, func, static_comp, window=50, + has_center=has_center, + **kwargs) + + # new API + if name is not None: + self._check_structures(f, static_comp, + name=name, + has_min_periods=has_min_periods, + has_time_rule=has_time_rule, + fill_value=fill_value, + has_center=has_center, + **kwargs) + + def _check_ndarray(self, f, static_comp, window=50, has_min_periods=True, preserve_nan=True, has_center=True, fill_value=None, test_stable=False, - test_window=True): + test_window=True, + **kwargs): + + def get_result(arr, window, min_periods=None, center=False): + return f(arr, window, min_periods=min_periods, center=center, **kwargs) - result = func(self.arr, window) + result = get_result(self.arr, window) assert_almost_equal(result[-1], static_comp(self.arr[-50:])) @@ -505,11 +655,11 @@ def _check_ndarray(self, func, static_comp, window=50, arr[-10:] = np.NaN if has_min_periods: - result = func(arr, 50, min_periods=30) + result = get_result(arr, 50, min_periods=30) assert_almost_equal(result[-1], static_comp(arr[10:-10])) # min_periods is working correctly - result = func(arr, 20, min_periods=15) + result = get_result(arr, 20, min_periods=15) self.assertTrue(np.isnan(result[23])) self.assertFalse(np.isnan(result[24])) @@ -517,31 +667,31 @@ def _check_ndarray(self, func, static_comp, window=50, self.assertTrue(np.isnan(result[-5])) arr2 = randn(20) - result = func(arr2, 10, min_periods=5) + result = get_result(arr2, 10, min_periods=5) self.assertTrue(isnull(result[3])) self.assertTrue(notnull(result[4])) # min_periods=0 - result0 = func(arr, 20, min_periods=0) - result1 = func(arr, 20, min_periods=1) + result0 = get_result(arr, 20, min_periods=0) + result1 = get_result(arr, 20, min_periods=1) assert_almost_equal(result0, result1) else: - result = func(arr, 50) + result = get_result(arr, 50) assert_almost_equal(result[-1], static_comp(arr[10:-10])) # GH 7925 if has_center: if has_min_periods: - result = func(arr, 20, min_periods=15, center=True) - expected = func(np.concatenate((arr, np.array([np.NaN] * 9))), 20, min_periods=15)[9:] + result = get_result(arr, 20, min_periods=15, center=True) + expected = get_result(np.concatenate((arr, np.array([np.NaN] * 9))), 20, min_periods=15)[9:] else: - result = func(arr, 20, center=True) - expected = func(np.concatenate((arr, np.array([np.NaN] * 9))), 20)[9:] + result = get_result(arr, 20, center=True) + expected = get_result(np.concatenate((arr, np.array([np.NaN] * 9))), 20)[9:] self.assert_numpy_array_equal(result, expected) if test_stable: - result = func(self.arr + 1e9, window) + result = get_result(self.arr + 1e9, window) assert_almost_equal(result[-1], static_comp(self.arr[-50:] + 1e9)) @@ -549,16 +699,16 @@ def _check_ndarray(self, func, static_comp, window=50, if test_window: if has_min_periods: for minp in (0, len(self.arr)-1, len(self.arr)): - result = func(self.arr, len(self.arr)+1, min_periods=minp) - expected = func(self.arr, len(self.arr), min_periods=minp) + result = get_result(self.arr, len(self.arr)+1, min_periods=minp) + expected = get_result(self.arr, len(self.arr), min_periods=minp) nan_mask = np.isnan(result) self.assertTrue(np.array_equal(nan_mask, np.isnan(expected))) nan_mask = ~nan_mask assert_almost_equal(result[nan_mask], expected[nan_mask]) else: - result = func(self.arr, len(self.arr)+1) - expected = func(self.arr, len(self.arr)) + result = get_result(self.arr, len(self.arr)+1) + expected = get_result(self.arr, len(self.arr)) nan_mask = np.isnan(result) self.assertTrue(np.array_equal(nan_mask, np.isnan(expected))) nan_mask = ~nan_mask @@ -567,15 +717,34 @@ def _check_ndarray(self, func, static_comp, window=50, - def _check_structures(self, func, static_comp, + def _check_structures(self, f, static_comp, + name=None, has_min_periods=True, has_time_rule=True, has_center=True, - fill_value=None): + fill_value=None, + **kwargs): - series_result = func(self.series, 50) - tm.assertIsInstance(series_result, Series) + def get_result(obj, window, min_periods=None, freq=None, center=False): + + # check via the API calls if name is provided + if name is not None: + return getattr(obj.rolling(window=window, + min_periods=min_periods, + freq=freq, + center=center),name)(**kwargs) - frame_result = func(self.frame, 50) + # check via the moments API + return f(obj, + window=window, + min_periods=min_periods, + freq=freq, + center=center, + **kwargs) + + series_result = get_result(self.series, window=50) + frame_result = get_result(self.frame, window=50) + + tm.assertIsInstance(series_result, Series) self.assertEqual(type(frame_result), DataFrame) # check time_rule works @@ -584,13 +753,11 @@ def _check_structures(self, func, static_comp, minp = 10 if has_min_periods: - series_result = func(self.series[::2], win, min_periods=minp, - freq='B') - frame_result = func(self.frame[::2], win, min_periods=minp, - freq='B') + series_result = get_result(self.series[::2], window=win, min_periods=minp, freq='B') + frame_result = get_result(self.frame[::2], window=win, min_periods=minp, freq='B') else: - series_result = func(self.series[::2], win, freq='B') - frame_result = func(self.frame[::2], win, freq='B') + series_result = get_result(self.series[::2], window=win, freq='B') + frame_result = get_result(self.frame[::2], window=win, freq='B') last_date = series_result.index[-1] prev_date = last_date - 24 * datetools.bday @@ -605,22 +772,41 @@ def _check_structures(self, func, static_comp, # GH 7925 if has_center: + + # shifter index + s = ['x%d'%x for x in range(12)] + if has_min_periods: minp = 10 - series_xp = func(self.series.reindex(list(self.series.index)+['x%d'%x for x in range(12)]), 25, min_periods=minp).shift(-12).reindex(self.series.index) - frame_xp = func(self.frame.reindex(list(self.frame.index)+['x%d'%x for x in range(12)]), 25, min_periods=minp).shift(-12).reindex(self.frame.index) - series_rs = func(self.series, 25, min_periods=minp, - center=True) - frame_rs = func(self.frame, 25, min_periods=minp, - center=True) + series_xp = get_result(self.series.reindex(list(self.series.index)+s), + window=25, + min_periods=minp).shift(-12).reindex(self.series.index) + frame_xp = get_result(self.frame.reindex(list(self.frame.index)+s), + window=25, + min_periods=minp).shift(-12).reindex(self.frame.index) + + series_rs = get_result(self.series, + window=25, + min_periods=minp, + center=True) + frame_rs = get_result(self.frame, + window=25, + min_periods=minp, + center=True) else: - series_xp = func(self.series.reindex(list(self.series.index)+['x%d'%x for x in range(12)]), 25).shift(-12).reindex(self.series.index) - frame_xp = func(self.frame.reindex(list(self.frame.index)+['x%d'%x for x in range(12)]), 25).shift(-12).reindex(self.frame.index) - - series_rs = func(self.series, 25, center=True) - frame_rs = func(self.frame, 25, center=True) + series_xp = get_result(self.series.reindex(list(self.series.index)+s), + window=25).shift(-12).reindex(self.series.index) + frame_xp = get_result(self.frame.reindex(list(self.frame.index)+s), + window=25).shift(-12).reindex(self.frame.index) + + series_rs = get_result(self.series, + window=25, + center=True) + frame_rs = get_result(self.frame, + window=25, + center=True) if fill_value is not None: series_xp = series_xp.fillna(fill_value) @@ -642,7 +828,10 @@ def test_ewma(self): for f in [lambda s: mom.ewma(s, com=2.0, adjust=True), lambda s: mom.ewma(s, com=2.0, adjust=True, ignore_na=False), lambda s: mom.ewma(s, com=2.0, adjust=True, ignore_na=True), - ]: + lambda s: s.ewm(com=2.0, adjust=True).mean(), + lambda s: s.ewm(com=2.0, adjust=True, ignore_na=False).mean(), + lambda s: s.ewm(com=2.0, adjust=True, ignore_na=True).mean(), + ]: result = f(s) assert_series_equal(result, expected) @@ -650,6 +839,9 @@ def test_ewma(self): for f in [lambda s: mom.ewma(s, com=2.0, adjust=False), lambda s: mom.ewma(s, com=2.0, adjust=False, ignore_na=False), lambda s: mom.ewma(s, com=2.0, adjust=False, ignore_na=True), + lambda s: s.ewm(com=2.0, adjust=False).mean(), + lambda s: s.ewm(com=2.0, adjust=False, ignore_na=False).mean(), + lambda s: s.ewm(com=2.0, adjust=False, ignore_na=True).mean(), ]: result = f(s) assert_series_equal(result, expected) @@ -695,16 +887,20 @@ def simple_wma(s, w): expected = simple_wma(s, Series(w)) result = mom.ewma(s, com=com, adjust=adjust, ignore_na=ignore_na) assert_series_equal(result, expected) + result = s.ewm(com=com, adjust=adjust, ignore_na=ignore_na).mean() + assert_series_equal(result, expected) if ignore_na is False: # check that ignore_na defaults to False result = mom.ewma(s, com=com, adjust=adjust) assert_series_equal(result, expected) + result = s.ewm(com=com, adjust=adjust).mean() + assert_series_equal(result, expected) def test_ewmvar(self): - self._check_ew(mom.ewmvar) + self._check_ew(mom.ewmvar, name='var') def test_ewmvol(self): - self._check_ew(mom.ewmvol) + self._check_ew(mom.ewmvol, name='vol') def test_ewma_span_com_args(self): A = mom.ewma(self.arr, com=9.5) @@ -727,11 +923,17 @@ def test_ewma_halflife_arg(self): def test_moment_preserve_series_name(self): # GH 10565 s = Series(np.arange(100), name='foo') + s2 = mom.rolling_mean(s, 30) s3 = mom.rolling_sum(s, 20) self.assertEqual(s2.name, 'foo') self.assertEqual(s3.name, 'foo') + s2 = s.rolling(30).mean() + s3 = s.rolling(20).sum() + self.assertEqual(s2.name, 'foo') + self.assertEqual(s3.name, 'foo') + def test_ew_empty_arrays(self): arr = np.array([], dtype=np.float64) @@ -740,11 +942,11 @@ def test_ew_empty_arrays(self): result = f(arr, 3) assert_almost_equal(result, arr) - def _check_ew(self, func): - self._check_ew_ndarray(func) - self._check_ew_structures(func) + def _check_ew(self, func, name=None): + self._check_ew_ndarray(func, name=name) + self._check_ew_structures(func, name=name) - def _check_ew_ndarray(self, func, preserve_nan=False): + def _check_ew_ndarray(self, func, preserve_nan=False, name=None): result = func(self.arr, com=10) if preserve_nan: assert(np.isnan(result[self._nan_locs]).all()) @@ -787,11 +989,18 @@ def _check_ew_ndarray(self, func, preserve_nan=False): result2 = func(np.arange(50), span=10) self.assertEqual(result2.dtype, np.float_) - def _check_ew_structures(self, func): + def _check_ew_structures(self, func, name=None): series_result = func(self.series, com=10) tm.assertIsInstance(series_result, Series) + if name is not None: + series_result = getattr(self.series.ewm(com=10),name)() + tm.assertIsInstance(series_result, Series) + frame_result = func(self.frame, com=10) self.assertEqual(type(frame_result), DataFrame) + if name is not None: + frame_result = getattr(self.frame.ewm(com=10),name)() + self.assertEqual(type(frame_result), DataFrame) # create the data only once as we are not setting it def _create_consistency_data(): @@ -1204,8 +1413,11 @@ def test_rolling_cov(self): result = mom.rolling_cov(A, B, 50, min_periods=25) assert_almost_equal(result[-1], np.cov(A[-50:], B[-50:])[0, 1]) + result = A.rolling(window=50, min_periods=25).cov(B) + assert_almost_equal(result[-1], np.cov(A[-50:], B[-50:])[0, 1]) + def test_rolling_cov_pairwise(self): - self._check_pairwise_moment(mom.rolling_cov, 10, min_periods=5) + self._check_pairwise_moment(mom.rolling_cov, window=10, min_periods=5, name='cov') def test_rolling_corr(self): A = self.series @@ -1214,6 +1426,9 @@ def test_rolling_corr(self): result = mom.rolling_corr(A, B, 50, min_periods=25) assert_almost_equal(result[-1], np.corrcoef(A[-50:], B[-50:])[0, 1]) + result = A.rolling(window=50, min_periods=25).corr(B) + assert_almost_equal(result[-1], np.corrcoef(A[-50:], B[-50:])[0, 1]) + # test for correct bias correction a = tm.makeTimeSeries() b = tm.makeTimeSeries() @@ -1223,21 +1438,32 @@ def test_rolling_corr(self): result = mom.rolling_corr(a, b, len(a), min_periods=1) assert_almost_equal(result[-1], a.corr(b)) + result = a.rolling(window=len(a), min_periods=1).corr(b) + assert_almost_equal(result[-1], a.corr(b)) + def test_rolling_corr_pairwise(self): - self._check_pairwise_moment(mom.rolling_corr, 10, min_periods=5) + self._check_pairwise_moment(mom.rolling_corr, window=10, min_periods=5, name='corr') - def _check_pairwise_moment(self, func, *args, **kwargs): - panel = func(self.frame, *args, **kwargs) + def _check_pairwise_moment(self, func, name=None, **kwargs): + def get_result(obj, obj2=None): + return func(obj, obj2, **kwargs) + panel = get_result(self.frame) actual = panel.ix[:, 1, 5] - expected = func(self.frame[1], self.frame[5], *args, **kwargs) + expected = get_result(self.frame[1], self.frame[5]) tm.assert_series_equal(actual, expected, check_names=False) self.assertEqual(actual.name, 5) + if name is not None: + panel = getattr(self.frame.rolling(**kwargs),name)() + actual = panel.ix[:, 1, 5] + tm.assert_series_equal(actual, expected, check_names=False) + self.assertEqual(actual.name, 5) + def test_flex_binary_moment(self): # GH3155 # don't blow the stack - self.assertRaises(TypeError, mom._flex_binary_moment,5,6,None) + self.assertRaises(TypeError, rwindow._flex_binary_moment,5,6,None) def test_corr_sanity(self): #GH 3155 @@ -1373,6 +1599,9 @@ def test_expanding_count(self): result = mom.expanding_count(self.series) assert_almost_equal(result, mom.rolling_count(self.series, len(self.series))) + result = self.series.expanding().count() + assert_almost_equal(result, mom.rolling_count(self.series, + len(self.series))) def test_expanding_quantile(self): result = mom.expanding_quantile(self.series, 0.5)