Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

Already on GitHub? Sign in to your account

Add expanding moment functions and related tests. #1785

Merged
merged 1 commit into from Sep 8, 2012
Jump to file or symbol
Failed to load files and symbols.
+260 −1
Split
View
@@ -19,7 +19,12 @@
'rolling_corr', 'rolling_var', 'rolling_skew', 'rolling_kurt',
'rolling_quantile', 'rolling_median', 'rolling_apply',
'rolling_corr_pairwise',
- 'ewma', 'ewmvar', 'ewmstd', 'ewmvol', 'ewmcorr', 'ewmcov']
+ 'ewma', 'ewmvar', 'ewmstd', 'ewmvol', 'ewmcorr', 'ewmcov',
+ 'expanding_count', 'expanding_max', 'expanding_min',
+ 'expanding_sum', 'expanding_mean', 'expanding_std',
+ 'expanding_cov', 'expanding_corr', 'expanding_var',
+ 'expanding_skew', 'expanding_kurt', 'expanding_quantile',
+ 'expanding_median', 'expanding_apply', 'expanding_corr_pairwise']
#-------------------------------------------------------------------------------
# Docs
@@ -79,6 +84,24 @@
y : type of input argument
"""
+
+_expanding_doc = """
+%s
+
+Parameters
+----------
+%s
+min_periods : int
+ Minimum number of observations in window required to have a value
+freq : None or string alias / date offset object, default=None
+ Frequency to conform to before computing statistic
+
+Returns
+-------
+%s
+"""
+
+
_type_of_input = "y : type of input argument"
_flex_retval = """y : type depends on inputs
@@ -465,3 +488,131 @@ def call_cython(arg, window, minp):
return lib.roll_generic(arg, window, minp, func)
return _rolling_moment(arg, window, call_cython, min_periods,
freq=freq, time_rule=time_rule)
+
+
+def _expanding_func(func, desc, check_minp=_use_window):
+ @Substitution(desc, _unary_arg, _type_of_input)
+ @Appender(_expanding_doc)
+ @wraps(func)
+ def f(arg, min_periods=1, freq=None, time_rule=None, **kwargs):
+ window = len(arg)
+
+ def call_cython(arg, window, minp, **kwds):
+ minp = check_minp(minp, window)
+ return func(arg, window, minp, **kwds)
+ return _rolling_moment(arg, window, call_cython, min_periods,
+ freq=freq, time_rule=time_rule, **kwargs)
+
+ return f
+
+expanding_max = _expanding_func(lib.roll_max2, 'Expanding maximum')
+expanding_min = _expanding_func(lib.roll_min2, 'Expanding minimum')
+expanding_sum = _expanding_func(lib.roll_sum, 'Expanding sum')
+expanding_mean = _expanding_func(lib.roll_mean, 'Expanding mean')
+expanding_median = _expanding_func(lib.roll_median_cython, 'Expanding median')
+
+expanding_std = _expanding_func(_ts_std,
+ 'Unbiased expanding standard deviation',
+ check_minp=_require_min_periods(2))
+expanding_var = _expanding_func(lib.roll_var, 'Unbiased expanding variance',
+ check_minp=_require_min_periods(2))
+expanding_skew = _expanding_func(lib.roll_skew, 'Unbiased expanding skewness',
+ check_minp=_require_min_periods(3))
+expanding_kurt = _expanding_func(lib.roll_kurt, 'Unbiased expanding kurtosis',
+ check_minp=_require_min_periods(4))
+
+
+def expanding_count(arg, freq=None, time_rule=None):
+ """
+ Expanding count of number of non-NaN observations.
+
+ Parameters
+ ----------
+ arg : DataFrame or numpy ndarray-like
+ freq : None or string alias / date offset object, default=None
+ Frequency to conform to before computing statistic
+
+ Returns
+ -------
+ expanding_count : type of caller
+ """
+ return rolling_count(arg, len(arg), freq=freq, time_rule=time_rule)
+
+
+def expanding_quantile(arg, quantile, min_periods=1, freq=None,
+ time_rule=None):
+ """Expanding quantile
+
+ Parameters
+ ----------
+ arg : Series, DataFrame
+ quantile : 0 <= quantile <= 1
+ min_periods : int
+ Minimum number of observations in window required to have a value
+ freq : None or string alias / date offset object, default=None
+ Frequency to conform to before computing statistic
+
+ Returns
+ -------
+ y : type of input argument
+ """
+ return rolling_quantile(arg, len(arg), quantile, min_periods=min_periods,
+ freq=freq, time_rule=time_rule)
+
+
+@Substitution("Unbiased expanding covariance", _binary_arg_flex, _flex_retval)
+@Appender(_expanding_doc)
+def expanding_cov(arg1, arg2, min_periods=1, time_rule=None):
+ window = max(len(arg1), len(arg2))
+ return rolling_cov(arg1, arg2, window,
+ min_periods=min_periods, time_rule=time_rule)
+
+
+@Substitution("Expanding sample correlation", _binary_arg_flex, _flex_retval)
+@Appender(_expanding_doc)
+def expanding_corr(arg1, arg2, min_periods=1, time_rule=None):
+ window = max(len(arg1), len(arg2))
+ return rolling_corr(arg1, arg2, window,
+ min_periods=min_periods, time_rule=time_rule)
+
+
+def expanding_corr_pairwise(df, min_periods=1):
+ """
+ Computes pairwise expanding correlation matrices as Panel whose items are
+ dates
+
+ Parameters
+ ----------
+ df : DataFrame
+ min_periods : int, default 1
+
+ Returns
+ -------
+ correls : Panel
+ """
+
+ window = len(df)
+
+ return rolling_corr_pairwise(df, window, min_periods=min_periods)
+
+
+def expanding_apply(arg, func, min_periods=1, freq=None, time_rule=None):
+ """Generic expanding function application
+
+ Parameters
+ ----------
+ arg : Series, DataFrame
+ func : function
+ Must produce a single value from an ndarray input
+ min_periods : int
+ Minimum number of observations in window required to have a value
+ freq : None or string alias / date offset object, default=None
+ Frequency to conform to before computing statistic
+
+ Returns
+ -------
+ y : type of input argument
+ """
+ window = len(arg)
+ return rolling_apply(arg, window, func, min_periods=min_periods, freq=freq,
+ time_rule=time_rule)
@@ -344,6 +344,114 @@ def _check_binary_ew(self, func):
self.assertRaises(Exception, func, A, randn(50), 20, min_periods=5)
+ def test_expanding_apply(self):
+ ser = Series([])
+ assert_series_equal(ser, mom.expanding_apply(ser, lambda x: x.mean()))
+
+ def expanding_mean(x, min_periods=1, freq=None):
+ return mom.expanding_apply(x,
+ lambda x: x.mean(),
+ min_periods=min_periods,
+ freq=freq)
+ self._check_expanding(expanding_mean, np.mean)
+
+ def test_expanding_corr(self):
+ A = self.series.dropna()
+ B = (A + randn(len(A)))[:-5]
+
+ result = mom.expanding_corr(A, B)
+
+ rolling_result = mom.rolling_corr(A, B, len(A), min_periods=1)
+
+ assert_almost_equal(rolling_result, result)
+
+ def test_expanding_count(self):
+ result = mom.expanding_count(self.series)
+ assert_almost_equal(result, mom.rolling_count(self.series,
+ len(self.series)))
+
+ def test_expanding_quantile(self):
+ result = mom.expanding_quantile(self.series, 0.5)
+
+ rolling_result = mom.rolling_quantile(self.series,
+ len(self.series),
+ 0.5, min_periods=1)
+
+ assert_almost_equal(result, rolling_result)
+
+ def test_expanding_cov(self):
+ A = self.series
+ B = (A + randn(len(A)))[:-5]
+
+ result = mom.expanding_cov(A, B)
+
+ rolling_result = mom.rolling_cov(A, B, len(A), min_periods=1)
+
+ assert_almost_equal(rolling_result, result)
+
+ def test_expanding_max(self):
+ self._check_expanding(mom.expanding_max, np.max, preserve_nan=False)
+
+ def test_expanding_corr_pairwise(self):
+ result = mom.expanding_corr_pairwise(self.frame)
+
+ rolling_result = mom.rolling_corr_pairwise(self.frame,
+ len(self.frame),
+ min_periods=1)
+
+ for i in result.items:
+ assert_almost_equal(result[i], rolling_result[i])
+
+ def _check_expanding_ndarray(self, func, static_comp, has_min_periods=True,
+ has_time_rule=True, preserve_nan=True):
+ result = func(self.arr)
+
+ assert_almost_equal(result[10],
+ static_comp(self.arr[:11]))
+
+ if preserve_nan:
+ assert(np.isnan(result[self._nan_locs]).all())
+
+ arr = randn(50)
+
+ if has_min_periods:
+ result = func(arr, min_periods=30)
+ assert(np.isnan(result[:29]).all())
+ assert_almost_equal(result[-1], static_comp(arr[:50]))
+
+ # min_periods is working correctly
+ result = func(arr, min_periods=15)
+ self.assert_(np.isnan(result[13]))
+ self.assert_(not np.isnan(result[14]))
+
+ arr2 = randn(20)
+ result = func(arr2, min_periods=5)
+ self.assert_(isnull(result[3]))
+ self.assert_(notnull(result[4]))
+
+ # min_periods=0
+ result0 = func(arr, min_periods=0)
+ result1 = func(arr, min_periods=1)
+ assert_almost_equal(result0, result1)
+ else:
+ result = func(arr)
+ assert_almost_equal(result[-1], static_comp(arr[:50]))
+
+ def _check_expanding_structures(self, func):
+ series_result = func(self.series)
+ self.assert_(isinstance(series_result, Series))
+ frame_result = func(self.frame)
+ self.assertEquals(type(frame_result), DataFrame)
+
+ def _check_expanding(self, func, static_comp, has_min_periods=True,
+ has_time_rule=True,
+ preserve_nan=True):
+ self._check_expanding_ndarray(func, static_comp,
+ has_min_periods=has_min_periods,
+ has_time_rule=has_time_rule,
+ preserve_nan=preserve_nan)
+ self._check_expanding_structures(func)
+
if __name__ == '__main__':
import nose
nose.runmodule(argv=[__file__,'-vvs','-x','--pdb', '--pdb-failure'],