diff --git a/pandas/tests/frame/test_cumulative.py b/pandas/tests/frame/test_cumulative.py index 39714a4566494..5bd9c42612315 100644 --- a/pandas/tests/frame/test_cumulative.py +++ b/pandas/tests/frame/test_cumulative.py @@ -7,6 +7,7 @@ """ import numpy as np +import pytest from pandas import ( DataFrame, @@ -19,53 +20,22 @@ class TestDataFrameCumulativeOps: # --------------------------------------------------------------------- # Cumulative Operations - cumsum, cummax, ... - def test_cumsum_corner(self): - dm = DataFrame(np.arange(20).reshape(4, 5), index=range(4), columns=range(5)) - # TODO(wesm): do something with this? - result = dm.cumsum() # noqa - - def test_cumsum(self, datetime_frame): - datetime_frame.iloc[5:10, 0] = np.nan - datetime_frame.iloc[10:15, 1] = np.nan - datetime_frame.iloc[15:, 2] = np.nan - - # axis = 0 - cumsum = datetime_frame.cumsum() - expected = datetime_frame.apply(Series.cumsum) - tm.assert_frame_equal(cumsum, expected) - - # axis = 1 - cumsum = datetime_frame.cumsum(axis=1) - expected = datetime_frame.apply(Series.cumsum, axis=1) - tm.assert_frame_equal(cumsum, expected) - - # works + def test_cumulative_ops_smoke(self): + # it works df = DataFrame({"A": np.arange(20)}, index=np.arange(20)) + df.cummax() + df.cummin() df.cumsum() - # fix issue - cumsum_xs = datetime_frame.cumsum(axis=1) - assert np.shape(cumsum_xs) == np.shape(datetime_frame) + dm = DataFrame(np.arange(20).reshape(4, 5), index=range(4), columns=range(5)) + # TODO(wesm): do something with this? + dm.cumsum() - def test_cumprod(self, datetime_frame): + def test_cumprod_smoke(self, datetime_frame): datetime_frame.iloc[5:10, 0] = np.nan datetime_frame.iloc[10:15, 1] = np.nan datetime_frame.iloc[15:, 2] = np.nan - # axis = 0 - cumprod = datetime_frame.cumprod() - expected = datetime_frame.apply(Series.cumprod) - tm.assert_frame_equal(cumprod, expected) - - # axis = 1 - cumprod = datetime_frame.cumprod(axis=1) - expected = datetime_frame.apply(Series.cumprod, axis=1) - tm.assert_frame_equal(cumprod, expected) - - # fix issue - cumprod_xs = datetime_frame.cumprod(axis=1) - assert np.shape(cumprod_xs) == np.shape(datetime_frame) - # ints df = datetime_frame.fillna(0).astype(int) df.cumprod(0) @@ -76,53 +46,26 @@ def test_cumprod(self, datetime_frame): df.cumprod(0) df.cumprod(1) - def test_cummin(self, datetime_frame): - datetime_frame.iloc[5:10, 0] = np.nan - datetime_frame.iloc[10:15, 1] = np.nan - datetime_frame.iloc[15:, 2] = np.nan - - # axis = 0 - cummin = datetime_frame.cummin() - expected = datetime_frame.apply(Series.cummin) - tm.assert_frame_equal(cummin, expected) - - # axis = 1 - cummin = datetime_frame.cummin(axis=1) - expected = datetime_frame.apply(Series.cummin, axis=1) - tm.assert_frame_equal(cummin, expected) - - # it works - df = DataFrame({"A": np.arange(20)}, index=np.arange(20)) - df.cummin() - - # fix issue - cummin_xs = datetime_frame.cummin(axis=1) - assert np.shape(cummin_xs) == np.shape(datetime_frame) - - def test_cummax(self, datetime_frame): + @pytest.mark.parametrize("method", ["cumsum", "cumprod", "cummin", "cummax"]) + def test_cumulative_ops_match_series_apply(self, datetime_frame, method): datetime_frame.iloc[5:10, 0] = np.nan datetime_frame.iloc[10:15, 1] = np.nan datetime_frame.iloc[15:, 2] = np.nan # axis = 0 - cummax = datetime_frame.cummax() - expected = datetime_frame.apply(Series.cummax) - tm.assert_frame_equal(cummax, expected) + result = getattr(datetime_frame, method)() + expected = datetime_frame.apply(getattr(Series, method)) + tm.assert_frame_equal(result, expected) # axis = 1 - cummax = datetime_frame.cummax(axis=1) - expected = datetime_frame.apply(Series.cummax, axis=1) - tm.assert_frame_equal(cummax, expected) - - # it works - df = DataFrame({"A": np.arange(20)}, index=np.arange(20)) - df.cummax() + result = getattr(datetime_frame, method)(axis=1) + expected = datetime_frame.apply(getattr(Series, method), axis=1) + tm.assert_frame_equal(result, expected) - # fix issue - cummax_xs = datetime_frame.cummax(axis=1) - assert np.shape(cummax_xs) == np.shape(datetime_frame) + # fix issue TODO: GH ref? + assert np.shape(result) == np.shape(datetime_frame) - def test_cumulative_ops_preserve_dtypes(self): + def test_cumsum_preserve_dtypes(self): # GH#19296 dont incorrectly upcast to object df = DataFrame({"A": [1, 2, 3], "B": [1, 2, 3.0], "C": [True, False, False]}) diff --git a/pandas/tests/series/test_cumulative.py b/pandas/tests/series/test_cumulative.py index e070b86717503..74ab9376ed00f 100644 --- a/pandas/tests/series/test_cumulative.py +++ b/pandas/tests/series/test_cumulative.py @@ -5,7 +5,6 @@ -------- tests.frame.test_cumulative """ -from itertools import product import numpy as np import pytest @@ -13,6 +12,13 @@ import pandas as pd import pandas._testing as tm +methods = { + "cumsum": np.cumsum, + "cumprod": np.cumprod, + "cummin": np.minimum.accumulate, + "cummax": np.maximum.accumulate, +} + def _check_accum_op(name, series, check_dtype=True): func = getattr(np, name) @@ -37,130 +43,82 @@ def test_cumsum(self, datetime_series): def test_cumprod(self, datetime_series): _check_accum_op("cumprod", datetime_series) - def test_cummin(self, datetime_series): - tm.assert_numpy_array_equal( - datetime_series.cummin().values, - np.minimum.accumulate(np.array(datetime_series)), - ) - ts = datetime_series.copy() - ts[::2] = np.NaN - result = ts.cummin()[1::2] - expected = np.minimum.accumulate(ts.dropna()) + @pytest.mark.parametrize("method", ["cummin", "cummax"]) + def test_cummin_cummax(self, datetime_series, method): + ufunc = methods[method] - result.index = result.index._with_freq(None) - tm.assert_series_equal(result, expected) + result = getattr(datetime_series, method)().values + expected = ufunc(np.array(datetime_series)) - def test_cummax(self, datetime_series): - tm.assert_numpy_array_equal( - datetime_series.cummax().values, - np.maximum.accumulate(np.array(datetime_series)), - ) + tm.assert_numpy_array_equal(result, expected) ts = datetime_series.copy() ts[::2] = np.NaN - result = ts.cummax()[1::2] - expected = np.maximum.accumulate(ts.dropna()) + result = getattr(ts, method)()[1::2] + expected = ufunc(ts.dropna()) result.index = result.index._with_freq(None) tm.assert_series_equal(result, expected) - @pytest.mark.parametrize("tz", [None, "US/Pacific"]) - def test_cummin_datetime64(self, tz): - s = pd.Series( - pd.to_datetime( - ["NaT", "2000-1-2", "NaT", "2000-1-1", "NaT", "2000-1-3"] - ).tz_localize(tz) - ) - - expected = pd.Series( - pd.to_datetime( - ["NaT", "2000-1-2", "NaT", "2000-1-1", "NaT", "2000-1-1"] - ).tz_localize(tz) - ) - result = s.cummin(skipna=True) + @pytest.mark.parametrize( + "ts", + [ + pd.Timedelta(0), + pd.Timestamp("1999-12-31"), + pd.Timestamp("1999-12-31").tz_localize("US/Pacific"), + ], + ) + def test_cummin_cummax_datetimelike(self, ts): + # with ts==pd.Timedelta(0), we are testing td64; with naive Timestamp + # we are testing datetime64[ns]; with Timestamp[US/Pacific] + # we are testing dt64tz + tdi = pd.to_timedelta(["NaT", "2 days", "NaT", "1 days", "NaT", "3 days"]) + ser = pd.Series(tdi + ts) + + exp_tdi = pd.to_timedelta(["NaT", "2 days", "NaT", "2 days", "NaT", "3 days"]) + expected = pd.Series(exp_tdi + ts) + result = ser.cummax(skipna=True) tm.assert_series_equal(expected, result) - expected = pd.Series( - pd.to_datetime( - ["NaT", "2000-1-2", "2000-1-2", "2000-1-1", "2000-1-1", "2000-1-1"] - ).tz_localize(tz) - ) - result = s.cummin(skipna=False) + exp_tdi = pd.to_timedelta(["NaT", "2 days", "NaT", "1 days", "NaT", "1 days"]) + expected = pd.Series(exp_tdi + ts) + result = ser.cummin(skipna=True) tm.assert_series_equal(expected, result) - @pytest.mark.parametrize("tz", [None, "US/Pacific"]) - def test_cummax_datetime64(self, tz): - s = pd.Series( - pd.to_datetime( - ["NaT", "2000-1-2", "NaT", "2000-1-1", "NaT", "2000-1-3"] - ).tz_localize(tz) + exp_tdi = pd.to_timedelta( + ["NaT", "2 days", "2 days", "2 days", "2 days", "3 days"] ) - - expected = pd.Series( - pd.to_datetime( - ["NaT", "2000-1-2", "NaT", "2000-1-2", "NaT", "2000-1-3"] - ).tz_localize(tz) - ) - result = s.cummax(skipna=True) + expected = pd.Series(exp_tdi + ts) + result = ser.cummax(skipna=False) tm.assert_series_equal(expected, result) - expected = pd.Series( - pd.to_datetime( - ["NaT", "2000-1-2", "2000-1-2", "2000-1-2", "2000-1-2", "2000-1-3"] - ).tz_localize(tz) + exp_tdi = pd.to_timedelta( + ["NaT", "2 days", "2 days", "1 days", "1 days", "1 days"] ) - result = s.cummax(skipna=False) + expected = pd.Series(exp_tdi + ts) + result = ser.cummin(skipna=False) tm.assert_series_equal(expected, result) - def test_cummin_timedelta64(self): - s = pd.Series(pd.to_timedelta(["NaT", "2 min", "NaT", "1 min", "NaT", "3 min"])) + def test_cummethods_bool(self): + # GH#6270 + # checking Series method vs the ufunc applied to the values - expected = pd.Series( - pd.to_timedelta(["NaT", "2 min", "NaT", "1 min", "NaT", "1 min"]) - ) - result = s.cummin(skipna=True) - tm.assert_series_equal(expected, result) + a = pd.Series([False, False, False, True, True, False, False]) + c = pd.Series([False] * len(a)) - expected = pd.Series( - pd.to_timedelta(["NaT", "2 min", "2 min", "1 min", "1 min", "1 min"]) - ) - result = s.cummin(skipna=False) - tm.assert_series_equal(expected, result) + for method in methods: + for ser in [a, ~a, c, ~c]: + ufunc = methods[method] - def test_cummax_timedelta64(self): - s = pd.Series(pd.to_timedelta(["NaT", "2 min", "NaT", "1 min", "NaT", "3 min"])) + exp_vals = ufunc(ser.values) + expected = pd.Series(exp_vals) - expected = pd.Series( - pd.to_timedelta(["NaT", "2 min", "NaT", "2 min", "NaT", "3 min"]) - ) - result = s.cummax(skipna=True) - tm.assert_series_equal(expected, result) + result = getattr(ser, method)() - expected = pd.Series( - pd.to_timedelta(["NaT", "2 min", "2 min", "2 min", "2 min", "3 min"]) - ) - result = s.cummax(skipna=False) - tm.assert_series_equal(expected, result) + tm.assert_series_equal(result, expected) - def test_cummethods_bool(self): - # GH#6270 + def test_cummethods_bool_in_object_dtype(self): - a = pd.Series([False, False, False, True, True, False, False]) - b = ~a - c = pd.Series([False] * len(b)) - d = ~c - methods = { - "cumsum": np.cumsum, - "cumprod": np.cumprod, - "cummin": np.minimum.accumulate, - "cummax": np.maximum.accumulate, - } - args = product((a, b, c, d), methods) - for s, method in args: - expected = pd.Series(methods[method](s.values)) - result = getattr(s, method)() - tm.assert_series_equal(result, expected) - - e = pd.Series([False, True, np.nan, False]) + ser = pd.Series([False, True, np.nan, False]) cse = pd.Series([0, 1, np.nan, 1], dtype=object) cpe = pd.Series([False, 0, np.nan, 0]) cmin = pd.Series([False, False, np.nan, False]) @@ -168,5 +126,5 @@ def test_cummethods_bool(self): expecteds = {"cumsum": cse, "cumprod": cpe, "cummin": cmin, "cummax": cmax} for method in methods: - res = getattr(e, method)() + res = getattr(ser, method)() tm.assert_series_equal(res, expecteds[method])