From e8ffa81a976f405980fe3201a5a736202ab91c55 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 24 Jan 2019 11:05:16 -0800 Subject: [PATCH 1/9] collect and label tests --- pandas/tests/frame/test_analytics.py | 326 ++++++++++++++------------- 1 file changed, 172 insertions(+), 154 deletions(-) diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index f2c3f50c291c3..56fd64a372b42 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -231,7 +231,7 @@ def assert_bool_op_api(opname, bool_frame_with_na, float_string_frame, getattr(bool_frame_with_na, opname)(axis=1, bool_only=False) -class TestDataFrameAnalytics(): +class TestDataFrameAnalytics(object): # ---------------------------------------------------------------------= # Correlation and covariance @@ -502,6 +502,9 @@ def test_corrwith_kendall(self): expected = Series(np.ones(len(result))) tm.assert_series_equal(result, expected) + # ---------------------------------------------------------------------= + # Describe + def test_bool_describe_in_mixed_frame(self): df = DataFrame({ 'string_data': ['a', 'b', 'c', 'd', 'e'], @@ -693,6 +696,9 @@ def test_describe_tz_values(self, tz_naive_fixture): result = df.describe(include='all') tm.assert_frame_equal(result, expected) + # ---------------------------------------------------------------------= + # Reductions + def test_reduce_mixed_frame(self): # GH 6806 df = DataFrame({ @@ -706,37 +712,6 @@ def test_reduce_mixed_frame(self): np.array([2, 150, 'abcde'], dtype=object)) tm.assert_series_equal(test, df.T.sum(axis=1)) - def test_count(self, float_frame_with_na, float_frame, float_string_frame): - f = lambda s: notna(s).sum() - assert_stat_op_calc('count', f, float_frame_with_na, has_skipna=False, - check_dtype=False, check_dates=True) - assert_stat_op_api('count', float_frame, float_string_frame, - has_numeric_only=True) - - # corner case - frame = DataFrame() - ct1 = frame.count(1) - assert isinstance(ct1, Series) - - ct2 = frame.count(0) - assert isinstance(ct2, Series) - - # GH 423 - df = DataFrame(index=lrange(10)) - result = df.count(1) - expected = Series(0, index=df.index) - tm.assert_series_equal(result, expected) - - df = DataFrame(columns=lrange(10)) - result = df.count(0) - expected = Series(0, index=df.columns) - tm.assert_series_equal(result, expected) - - df = DataFrame() - result = df.count() - expected = Series(0, index=[]) - tm.assert_series_equal(result, expected) - def test_nunique(self, float_frame_with_na, float_frame, float_string_frame): f = lambda s: len(algorithms.unique1d(s.dropna())) @@ -821,52 +796,6 @@ def test_min(self, float_frame_with_na, int_frame, assert_stat_op_calc('min', np.min, int_frame) assert_stat_op_api('min', float_frame, float_string_frame) - def test_cummin(self, datetime_frame): - datetime_frame.loc[5:10, 0] = np.nan - datetime_frame.loc[10:15, 1] = np.nan - datetime_frame.loc[15:, 2] = np.nan - - # axis = 0 - cummin = datetime_frame.cummin() - expected = datetime_frame.apply(Series.cummin) - tm.assert_frame_equal(cummin, expected) - - # axis = 1 - cummin = datetime_frame.cummin(axis=1) - expected = datetime_frame.apply(Series.cummin, axis=1) - tm.assert_frame_equal(cummin, expected) - - # it works - df = DataFrame({'A': np.arange(20)}, index=np.arange(20)) - result = df.cummin() # noqa - - # fix issue - cummin_xs = datetime_frame.cummin(axis=1) - assert np.shape(cummin_xs) == np.shape(datetime_frame) - - def test_cummax(self, datetime_frame): - datetime_frame.loc[5:10, 0] = np.nan - datetime_frame.loc[10:15, 1] = np.nan - datetime_frame.loc[15:, 2] = np.nan - - # axis = 0 - cummax = datetime_frame.cummax() - expected = datetime_frame.apply(Series.cummax) - tm.assert_frame_equal(cummax, expected) - - # axis = 1 - cummax = datetime_frame.cummax(axis=1) - expected = datetime_frame.apply(Series.cummax, axis=1) - tm.assert_frame_equal(cummax, expected) - - # it works - df = DataFrame({'A': np.arange(20)}, index=np.arange(20)) - result = df.cummax() # noqa - - # fix issue - cummax_xs = datetime_frame.cummax(axis=1) - assert np.shape(cummax_xs) == np.shape(datetime_frame) - def test_max(self, float_frame_with_na, int_frame, float_frame, float_string_frame): with warnings.catch_warnings(record=True): @@ -948,58 +877,6 @@ def test_mixed_ops(self, op): result = getattr(df, op)() assert len(result) == 2 - def test_cumsum(self, datetime_frame): - datetime_frame.loc[5:10, 0] = np.nan - datetime_frame.loc[10:15, 1] = np.nan - datetime_frame.loc[15:, 2] = np.nan - - # axis = 0 - cumsum = datetime_frame.cumsum() - expected = datetime_frame.apply(Series.cumsum) - tm.assert_frame_equal(cumsum, expected) - - # axis = 1 - cumsum = datetime_frame.cumsum(axis=1) - expected = datetime_frame.apply(Series.cumsum, axis=1) - tm.assert_frame_equal(cumsum, expected) - - # works - df = DataFrame({'A': np.arange(20)}, index=np.arange(20)) - result = df.cumsum() # noqa - - # fix issue - cumsum_xs = datetime_frame.cumsum(axis=1) - assert np.shape(cumsum_xs) == np.shape(datetime_frame) - - def test_cumprod(self, datetime_frame): - datetime_frame.loc[5:10, 0] = np.nan - datetime_frame.loc[10:15, 1] = np.nan - datetime_frame.loc[15:, 2] = np.nan - - # axis = 0 - cumprod = datetime_frame.cumprod() - expected = datetime_frame.apply(Series.cumprod) - tm.assert_frame_equal(cumprod, expected) - - # axis = 1 - cumprod = datetime_frame.cumprod(axis=1) - expected = datetime_frame.apply(Series.cumprod, axis=1) - tm.assert_frame_equal(cumprod, expected) - - # fix issue - cumprod_xs = datetime_frame.cumprod(axis=1) - assert np.shape(cumprod_xs) == np.shape(datetime_frame) - - # ints - df = datetime_frame.fillna(0).astype(int) - df.cumprod(0) - df.cumprod(1) - - # ints32 - df = datetime_frame.fillna(0).astype(np.int32) - df.cumprod(0) - df.cumprod(1) - def test_sem(self, float_frame_with_na, datetime_frame, float_frame, float_string_frame): alt = lambda x: np.std(x, ddof=1) / np.sqrt(len(x)) @@ -1316,8 +1193,152 @@ def wrapper(x): check_dates=True) assert_stat_op_api('median', float_frame, float_string_frame) + def test_sum_bools(self): + df = DataFrame(index=lrange(1), columns=lrange(10)) + bools = isna(df) + assert bools.sum(axis=1)[0] == 10 + + # ---------------------------------------------------------------------= + # Cumulative Reductions - cumsum, cummax, ... + + def test_cumsum_corner(self): + dm = DataFrame(np.arange(20).reshape(4, 5), + index=lrange(4), columns=lrange(5)) + # ?(wesm) + result = dm.cumsum() # noqa + + def test_cumsum(self, datetime_frame): + datetime_frame.loc[5:10, 0] = np.nan + datetime_frame.loc[10:15, 1] = np.nan + datetime_frame.loc[15:, 2] = np.nan + + # axis = 0 + cumsum = datetime_frame.cumsum() + expected = datetime_frame.apply(Series.cumsum) + tm.assert_frame_equal(cumsum, expected) + + # axis = 1 + cumsum = datetime_frame.cumsum(axis=1) + expected = datetime_frame.apply(Series.cumsum, axis=1) + tm.assert_frame_equal(cumsum, expected) + + # works + df = DataFrame({'A': np.arange(20)}, index=np.arange(20)) + result = df.cumsum() # noqa + + # fix issue + cumsum_xs = datetime_frame.cumsum(axis=1) + assert np.shape(cumsum_xs) == np.shape(datetime_frame) + + def test_cumprod(self, datetime_frame): + datetime_frame.loc[5:10, 0] = np.nan + datetime_frame.loc[10:15, 1] = np.nan + datetime_frame.loc[15:, 2] = np.nan + + # axis = 0 + cumprod = datetime_frame.cumprod() + expected = datetime_frame.apply(Series.cumprod) + tm.assert_frame_equal(cumprod, expected) + + # axis = 1 + cumprod = datetime_frame.cumprod(axis=1) + expected = datetime_frame.apply(Series.cumprod, axis=1) + tm.assert_frame_equal(cumprod, expected) + + # fix issue + cumprod_xs = datetime_frame.cumprod(axis=1) + assert np.shape(cumprod_xs) == np.shape(datetime_frame) + + # ints + df = datetime_frame.fillna(0).astype(int) + df.cumprod(0) + df.cumprod(1) + + # ints32 + df = datetime_frame.fillna(0).astype(np.int32) + df.cumprod(0) + df.cumprod(1) + + def test_cummin(self, datetime_frame): + datetime_frame.loc[5:10, 0] = np.nan + datetime_frame.loc[10:15, 1] = np.nan + datetime_frame.loc[15:, 2] = np.nan + + # axis = 0 + cummin = datetime_frame.cummin() + expected = datetime_frame.apply(Series.cummin) + tm.assert_frame_equal(cummin, expected) + + # axis = 1 + cummin = datetime_frame.cummin(axis=1) + expected = datetime_frame.apply(Series.cummin, axis=1) + tm.assert_frame_equal(cummin, expected) + + # it works + df = DataFrame({'A': np.arange(20)}, index=np.arange(20)) + result = df.cummin() # noqa + + # fix issue + cummin_xs = datetime_frame.cummin(axis=1) + assert np.shape(cummin_xs) == np.shape(datetime_frame) + + def test_cummax(self, datetime_frame): + datetime_frame.loc[5:10, 0] = np.nan + datetime_frame.loc[10:15, 1] = np.nan + datetime_frame.loc[15:, 2] = np.nan + + # axis = 0 + cummax = datetime_frame.cummax() + expected = datetime_frame.apply(Series.cummax) + tm.assert_frame_equal(cummax, expected) + + # axis = 1 + cummax = datetime_frame.cummax(axis=1) + expected = datetime_frame.apply(Series.cummax, axis=1) + tm.assert_frame_equal(cummax, expected) + + # it works + df = DataFrame({'A': np.arange(20)}, index=np.arange(20)) + result = df.cummax() # noqa + + # fix issue + cummax_xs = datetime_frame.cummax(axis=1) + assert np.shape(cummax_xs) == np.shape(datetime_frame) + + # ---------------------------------------------------------------------= # Miscellanea + def test_count(self, float_frame_with_na, float_frame, float_string_frame): + f = lambda s: notna(s).sum() + assert_stat_op_calc('count', f, float_frame_with_na, has_skipna=False, + check_dtype=False, check_dates=True) + assert_stat_op_api('count', float_frame, float_string_frame, + has_numeric_only=True) + + # corner case + frame = DataFrame() + ct1 = frame.count(1) + assert isinstance(ct1, Series) + + ct2 = frame.count(0) + assert isinstance(ct2, Series) + + # GH 423 + df = DataFrame(index=lrange(10)) + result = df.count(1) + expected = Series(0, index=df.index) + tm.assert_series_equal(result, expected) + + df = DataFrame(columns=lrange(10)) + result = df.count(0) + expected = Series(0, index=df.columns) + tm.assert_series_equal(result, expected) + + df = DataFrame() + result = df.count() + expected = Series(0, index=[]) + tm.assert_series_equal(result, expected) + def test_count_objects(self, float_string_frame): dm = DataFrame(float_string_frame._series) df = DataFrame(float_string_frame._series) @@ -1325,17 +1346,23 @@ def test_count_objects(self, float_string_frame): tm.assert_series_equal(dm.count(), df.count()) tm.assert_series_equal(dm.count(1), df.count(1)) - def test_cumsum_corner(self): - dm = DataFrame(np.arange(20).reshape(4, 5), - index=lrange(4), columns=lrange(5)) - # ?(wesm) - result = dm.cumsum() # noqa + def test_pct_change(self): + # GH#11150 + pnl = DataFrame([np.arange(0, 40, 10), + np.arange(0, 40, 10), + np.arange(0, 40, 10)]).astype(np.float64) + pnl.iat[1, 0] = np.nan + pnl.iat[1, 1] = np.nan + pnl.iat[2, 3] = 60 - def test_sum_bools(self): - df = DataFrame(index=lrange(1), columns=lrange(10)) - bools = isna(df) - assert bools.sum(axis=1)[0] == 10 + for axis in range(2): + expected = pnl.ffill(axis=axis) / pnl.ffill(axis=axis).shift( + axis=axis) - 1 + result = pnl.pct_change(axis=axis, fill_method='pad') + + tm.assert_frame_equal(result, expected) + # ---------------------------------------------------------------------- # Index of max / min def test_idxmin(self, float_frame, int_frame): @@ -1661,7 +1688,9 @@ def test_isin_empty_datetimelike(self): result = df1_td.isin(df3) tm.assert_frame_equal(result, expected) + # ---------------------------------------------------------------------= # Rounding + def test_round(self): # GH 2665 @@ -1849,22 +1878,9 @@ def test_round_nonunique_categorical(self): tm.assert_frame_equal(result, expected) - def test_pct_change(self): - # GH 11150 - pnl = DataFrame([np.arange(0, 40, 10), np.arange(0, 40, 10), np.arange( - 0, 40, 10)]).astype(np.float64) - pnl.iat[1, 0] = np.nan - pnl.iat[1, 1] = np.nan - pnl.iat[2, 3] = 60 - - for axis in range(2): - expected = pnl.ffill(axis=axis) / pnl.ffill(axis=axis).shift( - axis=axis) - 1 - result = pnl.pct_change(axis=axis, fill_method='pad') - - tm.assert_frame_equal(result, expected) - + # ---------------------------------------------------------------------= # Clip + def test_clip(self, float_frame): median = float_frame.median().median() original = float_frame.copy() @@ -2037,7 +2053,9 @@ def test_clip_with_na_args(self, float_frame): 'col_2': [np.nan, np.nan, np.nan]}) tm.assert_frame_equal(result, expected) + # ---------------------------------------------------------------------= # Matrix-like + def test_dot(self): a = DataFrame(np.random.randn(3, 4), index=['a', 'b', 'c'], columns=['p', 'q', 'r', 's']) From f258a592904a982fa327eb9127de385fd3227783 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 24 Jan 2019 11:14:34 -0800 Subject: [PATCH 2/9] collect assert_stat_op_api tests --- pandas/tests/frame/test_analytics.py | 80 ++++++++++++++-------------- 1 file changed, 40 insertions(+), 40 deletions(-) diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index 56fd64a372b42..cb9b04c78c132 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -699,6 +699,31 @@ def test_describe_tz_values(self, tz_naive_fixture): # ---------------------------------------------------------------------= # Reductions + def test_stat_op_api(self, float_frame, float_string_frame): + assert_stat_op_api('count', float_frame, float_string_frame, + has_numeric_only=True) + assert_stat_op_api('sum', float_frame, float_string_frame, + has_numeric_only=True) + + assert_stat_op_api('nunique', float_frame, float_string_frame) + assert_stat_op_api('mean', float_frame, float_string_frame) + assert_stat_op_api('product', float_frame, float_string_frame) + assert_stat_op_api('median', float_frame, float_string_frame) + assert_stat_op_api('min', float_frame, float_string_frame) + assert_stat_op_api('max', float_frame, float_string_frame) + assert_stat_op_api('mad', float_frame, float_string_frame) + assert_stat_op_api('var', float_frame, float_string_frame) + assert_stat_op_api('std', float_frame, float_string_frame) + assert_stat_op_api('sem', float_frame, float_string_frame) + assert_stat_op_api('median', float_frame, float_string_frame) + + try: + from scipy.stats import skew, kurtosis + assert_stat_op_api('skew', float_frame, float_string_frame) + assert_stat_op_api('kurt', float_frame, float_string_frame) + except ImportError: + pass + def test_reduce_mixed_frame(self): # GH 6806 df = DataFrame({ @@ -712,13 +737,11 @@ def test_reduce_mixed_frame(self): np.array([2, 150, 'abcde'], dtype=object)) tm.assert_series_equal(test, df.T.sum(axis=1)) - def test_nunique(self, float_frame_with_na, float_frame, - float_string_frame): + def test_nunique(self, float_frame_with_na): f = lambda s: len(algorithms.unique1d(s.dropna())) assert_stat_op_calc('nunique', f, float_frame_with_na, has_skipna=False, check_dtype=False, check_dates=True) - assert_stat_op_api('nunique', float_frame, float_string_frame) df = DataFrame({'A': [1, 1, 1], 'B': [1, 2, 3], @@ -730,10 +753,7 @@ def test_nunique(self, float_frame_with_na, float_frame, tm.assert_series_equal(df.nunique(axis=1, dropna=False), Series({0: 1, 1: 3, 2: 2})) - def test_sum(self, float_frame_with_na, mixed_float_frame, - float_frame, float_string_frame): - assert_stat_op_api('sum', float_frame, float_string_frame, - has_numeric_only=True) + def test_sum(self, float_frame_with_na, mixed_float_frame): assert_stat_op_calc('sum', np.sum, float_frame_with_na, skipna_alternative=np.nansum) # mixed types (with upcasting happening) @@ -764,20 +784,16 @@ def test_stat_operators_attempt_obj_array(self, method): if method in ['sum', 'prod']: tm.assert_series_equal(result, expected) - def test_mean(self, float_frame_with_na, float_frame, float_string_frame): + def test_mean(self, float_frame_with_na): assert_stat_op_calc('mean', np.mean, float_frame_with_na, check_dates=True) - assert_stat_op_api('mean', float_frame, float_string_frame) - def test_product(self, float_frame_with_na, float_frame, - float_string_frame): + def test_product(self, float_frame_with_na): assert_stat_op_calc('product', np.prod, float_frame_with_na) - assert_stat_op_api('product', float_frame, float_string_frame) # TODO: Ensure warning isn't emitted in the first place @pytest.mark.filterwarnings("ignore:All-NaN:RuntimeWarning") - def test_median(self, float_frame_with_na, float_frame, - float_string_frame): + def test_median(self, float_frame_with_na): def wrapper(x): if isna(x).any(): return np.nan @@ -785,40 +801,31 @@ def wrapper(x): assert_stat_op_calc('median', wrapper, float_frame_with_na, check_dates=True) - assert_stat_op_api('median', float_frame, float_string_frame) - def test_min(self, float_frame_with_na, int_frame, - float_frame, float_string_frame): + def test_min(self, float_frame_with_na, int_frame): with warnings.catch_warnings(record=True): warnings.simplefilter("ignore", RuntimeWarning) assert_stat_op_calc('min', np.min, float_frame_with_na, check_dates=True) assert_stat_op_calc('min', np.min, int_frame) - assert_stat_op_api('min', float_frame, float_string_frame) - def test_max(self, float_frame_with_na, int_frame, - float_frame, float_string_frame): + def test_max(self, float_frame_with_na, int_frame): with warnings.catch_warnings(record=True): warnings.simplefilter("ignore", RuntimeWarning) assert_stat_op_calc('max', np.max, float_frame_with_na, check_dates=True) assert_stat_op_calc('max', np.max, int_frame) - assert_stat_op_api('max', float_frame, float_string_frame) - def test_mad(self, float_frame_with_na, float_frame, float_string_frame): + def test_mad(self, float_frame_with_na): f = lambda x: np.abs(x - x.mean()).mean() assert_stat_op_calc('mad', f, float_frame_with_na) - assert_stat_op_api('mad', float_frame, float_string_frame) - def test_var_std(self, float_frame_with_na, datetime_frame, float_frame, - float_string_frame): + def test_var_std(self, float_frame_with_na, datetime_frame): alt = lambda x: np.var(x, ddof=1) assert_stat_op_calc('var', alt, float_frame_with_na) - assert_stat_op_api('var', float_frame, float_string_frame) alt = lambda x: np.std(x, ddof=1) assert_stat_op_calc('std', alt, float_frame_with_na) - assert_stat_op_api('std', float_frame, float_string_frame) result = datetime_frame.std(ddof=4) expected = datetime_frame.apply(lambda x: x.std(ddof=4)) @@ -877,11 +884,9 @@ def test_mixed_ops(self, op): result = getattr(df, op)() assert len(result) == 2 - def test_sem(self, float_frame_with_na, datetime_frame, - float_frame, float_string_frame): + def test_sem(self, float_frame_with_na, datetime_frame): alt = lambda x: np.std(x, ddof=1) / np.sqrt(len(x)) assert_stat_op_calc('sem', alt, float_frame_with_na) - assert_stat_op_api('sem', float_frame, float_string_frame) result = datetime_frame.sem(ddof=4) expected = datetime_frame.apply( @@ -897,7 +902,7 @@ def test_sem(self, float_frame_with_na, datetime_frame, assert not (result < 0).any() @td.skip_if_no_scipy - def test_skew(self, float_frame_with_na, float_frame, float_string_frame): + def test_skew(self, float_frame_with_na, float_frame): from scipy.stats import skew def alt(x): @@ -906,10 +911,9 @@ def alt(x): return skew(x, bias=False) assert_stat_op_calc('skew', alt, float_frame_with_na) - assert_stat_op_api('skew', float_frame, float_string_frame) @td.skip_if_no_scipy - def test_kurt(self, float_frame_with_na, float_frame, float_string_frame): + def test_kurt(self, float_frame_with_na, float_frame): from scipy.stats import kurtosis def alt(x): @@ -918,7 +922,6 @@ def alt(x): return kurtosis(x, bias=False) assert_stat_op_calc('kurt', alt, float_frame_with_na) - assert_stat_op_api('kurt', float_frame, float_string_frame) index = MultiIndex(levels=[['bar'], ['one', 'two', 'three'], [0, 1]], codes=[[0, 0, 0, 0, 0, 0], @@ -1183,7 +1186,7 @@ def test_stats_mixed_type(self, float_string_frame): # TODO: Ensure warning isn't emitted in the first place @pytest.mark.filterwarnings("ignore:All-NaN:RuntimeWarning") - def test_median_corner(self, int_frame, float_frame, float_string_frame): + def test_median_corner(self, int_frame, float_frame): def wrapper(x): if isna(x).any(): return np.nan @@ -1191,7 +1194,6 @@ def wrapper(x): assert_stat_op_calc('median', wrapper, int_frame, check_dtype=False, check_dates=True) - assert_stat_op_api('median', float_frame, float_string_frame) def test_sum_bools(self): df = DataFrame(index=lrange(1), columns=lrange(10)) @@ -1308,12 +1310,10 @@ def test_cummax(self, datetime_frame): # ---------------------------------------------------------------------= # Miscellanea - def test_count(self, float_frame_with_na, float_frame, float_string_frame): + def test_count(self, float_frame_with_na): f = lambda s: notna(s).sum() assert_stat_op_calc('count', f, float_frame_with_na, has_skipna=False, check_dtype=False, check_dates=True) - assert_stat_op_api('count', float_frame, float_string_frame, - has_numeric_only=True) # corner case frame = DataFrame() @@ -1323,7 +1323,7 @@ def test_count(self, float_frame_with_na, float_frame, float_string_frame): ct2 = frame.count(0) assert isinstance(ct2, Series) - # GH 423 + # GH#423 df = DataFrame(index=lrange(10)) result = df.count(1) expected = Series(0, index=df.index) From a1243901d8fa89a5b4b0d089253f486f2bd2fb37 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 24 Jan 2019 11:22:45 -0800 Subject: [PATCH 3/9] collect tests --- pandas/tests/frame/test_analytics.py | 78 ++++++++++++++-------------- 1 file changed, 39 insertions(+), 39 deletions(-) diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index cb9b04c78c132..7f295f7ca54b0 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -724,6 +724,45 @@ def test_stat_op_api(self, float_frame, float_string_frame): except ImportError: pass + @pytest.mark.parametrize('method', ['sum', 'mean', 'prod', 'var', + 'std', 'skew', 'min', 'max']) + def test_stat_operators_attempt_obj_array(self, method): + # GH#676 + data = { + 'a': [-0.00049987540199591344, -0.0016467257772919831, + 0.00067695870775883013], + 'b': [-0, -0, 0.0], + 'c': [0.00031111847529610595, 0.0014902627951905339, + -0.00094099200035979691] + } + df1 = DataFrame(data, index=['foo', 'bar', 'baz'], dtype='O') + + df2 = DataFrame({0: [np.nan, 2], 1: [np.nan, 3], + 2: [np.nan, 4]}, dtype=object) + + for df in [df1, df2]: + assert df.values.dtype == np.object_ + result = getattr(df, method)(1) + expected = getattr(df.astype('f8'), method)(1) + + if method in ['sum', 'prod']: + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize('op', ['mean', 'std', 'var', + 'skew', 'kurt', 'sem']) + def test_mixed_ops(self, op): + # GH#16116 + df = DataFrame({'int': [1, 2, 3, 4], + 'float': [1., 2., 3., 4.], + 'str': ['a', 'b', 'c', 'd']}) + + result = getattr(df, op)() + assert len(result) == 2 + + with pd.option_context('use_bottleneck', False): + result = getattr(df, op)() + assert len(result) == 2 + def test_reduce_mixed_frame(self): # GH 6806 df = DataFrame({ @@ -760,30 +799,6 @@ def test_sum(self, float_frame_with_na, mixed_float_frame): assert_stat_op_calc('sum', np.sum, mixed_float_frame.astype('float32'), check_dtype=False, check_less_precise=True) - @pytest.mark.parametrize('method', ['sum', 'mean', 'prod', 'var', - 'std', 'skew', 'min', 'max']) - def test_stat_operators_attempt_obj_array(self, method): - # GH 676 - data = { - 'a': [-0.00049987540199591344, -0.0016467257772919831, - 0.00067695870775883013], - 'b': [-0, -0, 0.0], - 'c': [0.00031111847529610595, 0.0014902627951905339, - -0.00094099200035979691] - } - df1 = DataFrame(data, index=['foo', 'bar', 'baz'], dtype='O') - - df2 = DataFrame({0: [np.nan, 2], 1: [np.nan, 3], - 2: [np.nan, 4]}, dtype=object) - - for df in [df1, df2]: - assert df.values.dtype == np.object_ - result = getattr(df, method)(1) - expected = getattr(df.astype('f8'), method)(1) - - if method in ['sum', 'prod']: - tm.assert_series_equal(result, expected) - def test_mean(self, float_frame_with_na): assert_stat_op_calc('mean', np.mean, float_frame_with_na, check_dates=True) @@ -869,21 +884,6 @@ def test_numeric_only_flag(self, meth): pytest.raises(TypeError, lambda: getattr(df2, meth)( axis=1, numeric_only=False)) - @pytest.mark.parametrize('op', ['mean', 'std', 'var', - 'skew', 'kurt', 'sem']) - def test_mixed_ops(self, op): - # GH 16116 - df = DataFrame({'int': [1, 2, 3, 4], - 'float': [1., 2., 3., 4.], - 'str': ['a', 'b', 'c', 'd']}) - - result = getattr(df, op)() - assert len(result) == 2 - - with pd.option_context('use_bottleneck', False): - result = getattr(df, op)() - assert len(result) == 2 - def test_sem(self, float_frame_with_na, datetime_frame): alt = lambda x: np.std(x, ddof=1) / np.sqrt(len(x)) assert_stat_op_calc('sem', alt, float_frame_with_na) From 91088c2dc39f3b6cf363a679494636fb057ce8f8 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 24 Jan 2019 11:25:16 -0800 Subject: [PATCH 4/9] put median tests together --- pandas/tests/frame/test_analytics.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index 7f295f7ca54b0..414a9f5574e00 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -817,6 +817,17 @@ def wrapper(x): assert_stat_op_calc('median', wrapper, float_frame_with_na, check_dates=True) + # TODO: Ensure warning isn't emitted in the first place + @pytest.mark.filterwarnings("ignore:All-NaN:RuntimeWarning") + def test_median_corner(self, int_frame): + def wrapper(x): + if isna(x).any(): + return np.nan + return np.median(x) + + assert_stat_op_calc('median', wrapper, int_frame, check_dtype=False, + check_dates=True) + def test_min(self, float_frame_with_na, int_frame): with warnings.catch_warnings(record=True): warnings.simplefilter("ignore", RuntimeWarning) @@ -1184,17 +1195,6 @@ def test_stats_mixed_type(self, float_string_frame): float_string_frame.mean(1) float_string_frame.skew(1) - # TODO: Ensure warning isn't emitted in the first place - @pytest.mark.filterwarnings("ignore:All-NaN:RuntimeWarning") - def test_median_corner(self, int_frame, float_frame): - def wrapper(x): - if isna(x).any(): - return np.nan - return np.median(x) - - assert_stat_op_calc('median', wrapper, int_frame, check_dtype=False, - check_dates=True) - def test_sum_bools(self): df = DataFrame(index=lrange(1), columns=lrange(10)) bools = isna(df) From 7008ba99dc5581d0312959eba6ddfa0b88961345 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 24 Jan 2019 11:31:44 -0800 Subject: [PATCH 5/9] typo fixup --- pandas/tests/frame/test_analytics.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index 577594d7e007d..2397fe3d451a8 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -233,7 +233,7 @@ def assert_bool_op_api(opname, bool_frame_with_na, float_string_frame, class TestDataFrameAnalytics(object): - # ---------------------------------------------------------------------= + # --------------------------------------------------------------------- # Correlation and covariance @td.skip_if_no_scipy @@ -502,7 +502,7 @@ def test_corrwith_kendall(self): expected = Series(np.ones(len(result))) tm.assert_series_equal(result, expected) - # ---------------------------------------------------------------------= + # --------------------------------------------------------------------- # Describe def test_bool_describe_in_mixed_frame(self): @@ -696,7 +696,7 @@ def test_describe_tz_values(self, tz_naive_fixture): result = df.describe(include='all') tm.assert_frame_equal(result, expected) - # ---------------------------------------------------------------------= + # --------------------------------------------------------------------- # Reductions def test_stat_op_api(self, float_frame, float_string_frame): @@ -1219,7 +1219,7 @@ def test_sum_bools(self): bools = isna(df) assert bools.sum(axis=1)[0] == 10 - # ---------------------------------------------------------------------= + # --------------------------------------------------------------------- # Cumulative Reductions - cumsum, cummax, ... def test_cumsum_corner(self): @@ -1326,7 +1326,7 @@ def test_cummax(self, datetime_frame): cummax_xs = datetime_frame.cummax(axis=1) assert np.shape(cummax_xs) == np.shape(datetime_frame) - # ---------------------------------------------------------------------= + # --------------------------------------------------------------------- # Miscellanea def test_count(self, float_frame_with_na): @@ -1707,7 +1707,7 @@ def test_isin_empty_datetimelike(self): result = df1_td.isin(df3) tm.assert_frame_equal(result, expected) - # ---------------------------------------------------------------------= + # --------------------------------------------------------------------- # Rounding def test_round(self): @@ -1897,7 +1897,7 @@ def test_round_nonunique_categorical(self): tm.assert_frame_equal(result, expected) - # ---------------------------------------------------------------------= + # --------------------------------------------------------------------- # Clip def test_clip(self, float_frame): @@ -2072,7 +2072,7 @@ def test_clip_with_na_args(self, float_frame): 'col_2': [np.nan, np.nan, np.nan]}) tm.assert_frame_equal(result, expected) - # ---------------------------------------------------------------------= + # --------------------------------------------------------------------- # Matrix-like def test_dot(self): From 95b2d7e0cbde3a10f3ce2360824a0769df7955a6 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 24 Jan 2019 11:45:33 -0800 Subject: [PATCH 6/9] collect more, flake8 fixup --- pandas/tests/frame/test_analytics.py | 136 ++++++++++++++------------- 1 file changed, 72 insertions(+), 64 deletions(-) diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index 2397fe3d451a8..1ed74799e0727 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -1,4 +1,4 @@ -# -*- coding: utf-8 -*- +f # -*- coding: utf-8 -*- from datetime import timedelta import operator @@ -704,7 +704,7 @@ def test_stat_op_api(self, float_frame, float_string_frame): has_numeric_only=True) assert_stat_op_api('sum', float_frame, float_string_frame, has_numeric_only=True) - + assert_stat_op_api('nunique', float_frame, float_string_frame) assert_stat_op_api('mean', float_frame, float_string_frame) assert_stat_op_api('product', float_frame, float_string_frame) @@ -723,7 +723,71 @@ def test_stat_op_api(self, float_frame, float_string_frame): assert_stat_op_api('kurt', float_frame, float_string_frame) except ImportError: pass - + + def test_stat_op_calc(self, float_frame_with_na, mixed_float_frame): + + def count(s): + return notna(s).sum() + + def nunique(s): + return len(algorithms.unique1d(s.dropna())) + + def mad(x): + return np.abs(x - x.mean()).mean() + + def var(x): + return np.var(x, ddof=1) + + def std(x): + return x: np.std(x, ddof=1) + + def sem(x): + return np.std(x, ddof=1) / np.sqrt(len(x)) + + def skew(x): + from scipy.stats import skew + if len(x) < 3: + return np.nan + return skew(x, bias=False) + + def kurt(x): + from scipy.stats import kurtosis + if len(x) < 4: + return np.nan + return kurtosis(x, bias=False) + + assert_stat_op_calc('nunique', nunique, float_frame_with_na, + has_skipna=False, check_dtype=False, + check_dates=True) + + # mixed types (with upcasting happening) + assert_stat_op_calc('sum', np.sum, mixed_float_frame.astype('float32'), + check_dtype=False, check_less_precise=True) + + assert_stat_op_calc('sum', np.sum, float_frame_with_na, + skipna_alternative=np.nansum) + assert_stat_op_calc('mean', np.mean, float_frame_with_na, + check_dates=True) + assert_stat_op_calc('product', np.prod, float_frame_with_na) + + + assert_stat_op_calc('mad', mad, float_frame_with_na) + assert_stat_op_calc('var', var, float_frame_with_na) + assert_stat_op_calc('std', std, float_frame_with_na) + assert_stat_op_calc('sem', sem, float_frame_with_na) + + + assert_stat_op_calc('count', count, float_frame_with_na, + has_skipna=False, check_dtype=False, + check_dates=True) + + try: + from scipy import skew, kurtosis + assert_stat_op_calc('skew', skew, float_frame_with_na) + assert_stat_op_calc('kurt', kurt, float_frame_with_na) + except ImportError: + pass + @pytest.mark.parametrize('method', ['sum', 'mean', 'prod', 'var', 'std', 'skew', 'min', 'max']) def test_stat_operators_attempt_obj_array(self, method): @@ -776,12 +840,7 @@ def test_reduce_mixed_frame(self): np.array([2, 150, 'abcde'], dtype=object)) tm.assert_series_equal(test, df.T.sum(axis=1)) - def test_nunique(self, float_frame_with_na): - f = lambda s: len(algorithms.unique1d(s.dropna())) - assert_stat_op_calc('nunique', f, float_frame_with_na, - has_skipna=False, check_dtype=False, - check_dates=True) - + def test_nunique(self): df = DataFrame({'A': [1, 1, 1], 'B': [1, 2, 3], 'C': [1, np.nan, 3]}) @@ -792,20 +851,6 @@ def test_nunique(self, float_frame_with_na): tm.assert_series_equal(df.nunique(axis=1, dropna=False), Series({0: 1, 1: 3, 2: 2})) - def test_sum(self, float_frame_with_na, mixed_float_frame): - assert_stat_op_calc('sum', np.sum, float_frame_with_na, - skipna_alternative=np.nansum) - # mixed types (with upcasting happening) - assert_stat_op_calc('sum', np.sum, mixed_float_frame.astype('float32'), - check_dtype=False, check_less_precise=True) - - def test_mean(self, float_frame_with_na): - assert_stat_op_calc('mean', np.mean, float_frame_with_na, - check_dates=True) - - def test_product(self, float_frame_with_na): - assert_stat_op_calc('product', np.prod, float_frame_with_na) - @pytest.mark.parametrize('tz', [None, 'UTC']) def test_mean_mixed_datetime_numeric(self, tz): # https://github.com/pandas-dev/pandas/issues/24752 @@ -861,17 +906,7 @@ def test_max(self, float_frame_with_na, int_frame): check_dates=True) assert_stat_op_calc('max', np.max, int_frame) - def test_mad(self, float_frame_with_na): - f = lambda x: np.abs(x - x.mean()).mean() - assert_stat_op_calc('mad', f, float_frame_with_na) - - def test_var_std(self, float_frame_with_na, datetime_frame): - alt = lambda x: np.var(x, ddof=1) - assert_stat_op_calc('var', alt, float_frame_with_na) - - alt = lambda x: np.std(x, ddof=1) - assert_stat_op_calc('std', alt, float_frame_with_na) - + def test_var_std(self, datetime_frame): result = datetime_frame.std(ddof=4) expected = datetime_frame.apply(lambda x: x.std(ddof=4)) tm.assert_almost_equal(result, expected) @@ -914,10 +949,7 @@ def test_numeric_only_flag(self, meth): pytest.raises(TypeError, lambda: getattr(df2, meth)( axis=1, numeric_only=False)) - def test_sem(self, float_frame_with_na, datetime_frame): - alt = lambda x: np.std(x, ddof=1) / np.sqrt(len(x)) - assert_stat_op_calc('sem', alt, float_frame_with_na) - + def test_sem(self, datetime_frame): result = datetime_frame.sem(ddof=4) expected = datetime_frame.apply( lambda x: x.std(ddof=4) / np.sqrt(len(x))) @@ -932,27 +964,7 @@ def test_sem(self, float_frame_with_na, datetime_frame): assert not (result < 0).any() @td.skip_if_no_scipy - def test_skew(self, float_frame_with_na, float_frame): - from scipy.stats import skew - - def alt(x): - if len(x) < 3: - return np.nan - return skew(x, bias=False) - - assert_stat_op_calc('skew', alt, float_frame_with_na) - - @td.skip_if_no_scipy - def test_kurt(self, float_frame_with_na, float_frame): - from scipy.stats import kurtosis - - def alt(x): - if len(x) < 4: - return np.nan - return kurtosis(x, bias=False) - - assert_stat_op_calc('kurt', alt, float_frame_with_na) - + def test_kurt(self): index = MultiIndex(levels=[['bar'], ['one', 'two', 'three'], [0, 1]], codes=[[0, 0, 0, 0, 0, 0], [0, 1, 2, 0, 1, 2], @@ -1329,11 +1341,7 @@ def test_cummax(self, datetime_frame): # --------------------------------------------------------------------- # Miscellanea - def test_count(self, float_frame_with_na): - f = lambda s: notna(s).sum() - assert_stat_op_calc('count', f, float_frame_with_na, has_skipna=False, - check_dtype=False, check_dates=True) - + def test_count(self): # corner case frame = DataFrame() ct1 = frame.count(1) From eb1c1eb136f9f4d82a8fcebbaed72a2f0d740fd1 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 24 Jan 2019 16:12:54 -0800 Subject: [PATCH 7/9] typo fixup --- pandas/tests/frame/test_analytics.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index 1ed74799e0727..c7d46d2cc73da 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -1,4 +1,4 @@ -f # -*- coding: utf-8 -*- +# -*- coding: utf-8 -*- from datetime import timedelta import operator From 2a2d0af48a9713c7b6c16f4430a95d801d10913c Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Fri, 25 Jan 2019 16:05:51 -0800 Subject: [PATCH 8/9] flake8 fixups --- pandas/tests/frame/test_analytics.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index c7d46d2cc73da..cd407671472e2 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -718,7 +718,7 @@ def test_stat_op_api(self, float_frame, float_string_frame): assert_stat_op_api('median', float_frame, float_string_frame) try: - from scipy.stats import skew, kurtosis + from scipy.stats import skew, kurtosis # noqa:F401 assert_stat_op_api('skew', float_frame, float_string_frame) assert_stat_op_api('kurt', float_frame, float_string_frame) except ImportError: @@ -739,19 +739,19 @@ def var(x): return np.var(x, ddof=1) def std(x): - return x: np.std(x, ddof=1) + return np.std(x, ddof=1) def sem(x): return np.std(x, ddof=1) / np.sqrt(len(x)) - def skew(x): - from scipy.stats import skew + def skewness(x): + from scipy.stats import skew # noqa:F811 if len(x) < 3: return np.nan return skew(x, bias=False) def kurt(x): - from scipy.stats import kurtosis + from scipy.stats import kurtosis # noqa:F811 if len(x) < 4: return np.nan return kurtosis(x, bias=False) @@ -770,20 +770,18 @@ def kurt(x): check_dates=True) assert_stat_op_calc('product', np.prod, float_frame_with_na) - assert_stat_op_calc('mad', mad, float_frame_with_na) assert_stat_op_calc('var', var, float_frame_with_na) assert_stat_op_calc('std', std, float_frame_with_na) assert_stat_op_calc('sem', sem, float_frame_with_na) - assert_stat_op_calc('count', count, float_frame_with_na, has_skipna=False, check_dtype=False, check_dates=True) try: - from scipy import skew, kurtosis - assert_stat_op_calc('skew', skew, float_frame_with_na) + from scipy import skew, kurtosis # noqa:F401 + assert_stat_op_calc('skew', skewness, float_frame_with_na) assert_stat_op_calc('kurt', kurt, float_frame_with_na) except ImportError: pass From e01936d78ec173411aa1efa1fb55539313bc84c8 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Fri, 25 Jan 2019 16:09:53 -0800 Subject: [PATCH 9/9] de-duplicate --- pandas/tests/frame/test_analytics.py | 49 ++++++++-------------------- 1 file changed, 13 insertions(+), 36 deletions(-) diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index cd407671472e2..2d3431965bbf6 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -786,6 +786,19 @@ def kurt(x): except ImportError: pass + # TODO: Ensure warning isn't emitted in the first place + @pytest.mark.filterwarnings("ignore:All-NaN:RuntimeWarning") + def test_median(self, float_frame_with_na, int_frame): + def wrapper(x): + if isna(x).any(): + return np.nan + return np.median(x) + + assert_stat_op_calc('median', wrapper, float_frame_with_na, + check_dates=True) + assert_stat_op_calc('median', wrapper, int_frame, check_dtype=False, + check_dates=True) + @pytest.mark.parametrize('method', ['sum', 'mean', 'prod', 'var', 'std', 'skew', 'min', 'max']) def test_stat_operators_attempt_obj_array(self, method): @@ -868,42 +881,6 @@ def test_mean_excludeds_datetimes(self, tz): expected = pd.Series() tm.assert_series_equal(result, expected) - # TODO: Ensure warning isn't emitted in the first place - @pytest.mark.filterwarnings("ignore:All-NaN:RuntimeWarning") - def test_median(self, float_frame_with_na): - def wrapper(x): - if isna(x).any(): - return np.nan - return np.median(x) - - assert_stat_op_calc('median', wrapper, float_frame_with_na, - check_dates=True) - - # TODO: Ensure warning isn't emitted in the first place - @pytest.mark.filterwarnings("ignore:All-NaN:RuntimeWarning") - def test_median_corner(self, int_frame): - def wrapper(x): - if isna(x).any(): - return np.nan - return np.median(x) - - assert_stat_op_calc('median', wrapper, int_frame, check_dtype=False, - check_dates=True) - - def test_min(self, float_frame_with_na, int_frame): - with warnings.catch_warnings(record=True): - warnings.simplefilter("ignore", RuntimeWarning) - assert_stat_op_calc('min', np.min, float_frame_with_na, - check_dates=True) - assert_stat_op_calc('min', np.min, int_frame) - - def test_max(self, float_frame_with_na, int_frame): - with warnings.catch_warnings(record=True): - warnings.simplefilter("ignore", RuntimeWarning) - assert_stat_op_calc('max', np.max, float_frame_with_na, - check_dates=True) - assert_stat_op_calc('max', np.max, int_frame) - def test_var_std(self, datetime_frame): result = datetime_frame.std(ddof=4) expected = datetime_frame.apply(lambda x: x.std(ddof=4))