From 5ca822b35d196928fc9ef1b14d457553ea7f3e68 Mon Sep 17 00:00:00 2001 From: David Wolever Date: Sun, 19 Jan 2014 22:37:37 -0500 Subject: [PATCH 1/4] Add is_reduction argument to DataFrame.apply --- pandas/core/frame.py | 22 +++++++++++++++------- pandas/tests/test_frame.py | 9 +++++++++ 2 files changed, 24 insertions(+), 7 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 82bc3ac25f68a..0e3f54408420d 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3363,7 +3363,7 @@ def diff(self, periods=1): # Function application def apply(self, func, axis=0, broadcast=False, raw=False, reduce=True, - args=(), **kwds): + args=(), is_reduction=None, **kwds): """ Applies function along input axis of DataFrame. @@ -3391,6 +3391,14 @@ def apply(self, func, axis=0, broadcast=False, raw=False, reduce=True, args : tuple Positional arguments to pass to function in addition to the array/series + is_reduction : boolean or None, default None + If the DataFrame is empty, apply needs to determine whether the + return value should be a Series or a DataFrame. If is_reduction is + None, func will be called with an empty Series and the return value + will be guessed based on the result (or, if an exception is raised, + a DataFrame will be returned). If is_reduction is True a Series + will always be returned, and if False a DataFrame will always be + returned. Additional keyword arguments will be passed as keywords to the function Examples @@ -3423,12 +3431,12 @@ def apply(self, func, axis=0, broadcast=False, raw=False, reduce=True, else: if not broadcast: if not all(self.shape): - # How to determine this better? - is_reduction = False - try: - is_reduction = not isinstance(f(_EMPTY_SERIES), Series) - except Exception: - pass + if is_reduction is None: + is_reduction = False + try: + is_reduction = not isinstance(f(_EMPTY_SERIES), Series) + except Exception: + pass if is_reduction: return Series(NA, index=self._get_agg_axis(axis)) diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 3b6e4ba445ce0..2fab63f13710f 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -8978,6 +8978,15 @@ def test_apply_empty(self): rs = xp.apply(lambda x: x['a'], axis=1) assert_frame_equal(xp, rs) + # is_reduction + x = [] + result = self.empty.apply(x.append, axis=1, is_reduction=False) + assert_frame_equal(result, self.empty) + result = self.empty.apply(x.append, axis=1, is_reduction=True) + assert_series_equal(result, Series([])) + # Ensure that x.append hasn't been called + self.assertEqual(x, []) + def test_apply_standard_nonunique(self): df = DataFrame( [[1, 2, 3], [4, 5, 6], [7, 8, 9]], index=['a', 'a', 'c']) From 1fe8e6bd1f2700e3f3e1b73e4b68c3584c0364fc Mon Sep 17 00:00:00 2001 From: David Wolever Date: Mon, 20 Jan 2014 17:12:48 -0500 Subject: [PATCH 2/4] Use 'reduce' argument instead of adding 'is_reduction' argument --- pandas/core/frame.py | 32 ++++++++++++++++---------------- pandas/tests/test_frame.py | 6 +++--- 2 files changed, 19 insertions(+), 19 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 0e3f54408420d..f23d853cf60b5 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3362,8 +3362,8 @@ def diff(self, periods=1): #---------------------------------------------------------------------- # Function application - def apply(self, func, axis=0, broadcast=False, raw=False, reduce=True, - args=(), is_reduction=None, **kwds): + def apply(self, func, axis=0, broadcast=False, raw=False, reduce=None, + args=(), **kwds): """ Applies function along input axis of DataFrame. @@ -3381,8 +3381,14 @@ def apply(self, func, axis=0, broadcast=False, raw=False, reduce=True, broadcast : boolean, default False For aggregation functions, return object of same size with values propagated - reduce : boolean, default True - Try to apply reduction procedures + reduce : boolean or None, default None + Try to apply reduction procedures. If the DataFrame is empty, + apply will use reduce to determine whether the result should be a + Series or a DataFrame. If reduce is None (the default), apply's + return value will be guessed by calling func an empty Series (note: + while guessing, exceptions raised by func will be ignored). If + reduce is True a Series will always be returned, and if False a + DataFrame will always be returned. raw : boolean, default False If False, convert each row or column into a Series. If raw=True the passed function will receive ndarray objects instead. If you are @@ -3391,14 +3397,6 @@ def apply(self, func, axis=0, broadcast=False, raw=False, reduce=True, args : tuple Positional arguments to pass to function in addition to the array/series - is_reduction : boolean or None, default None - If the DataFrame is empty, apply needs to determine whether the - return value should be a Series or a DataFrame. If is_reduction is - None, func will be called with an empty Series and the return value - will be guessed based on the result (or, if an exception is raised, - a DataFrame will be returned). If is_reduction is True a Series - will always be returned, and if False a DataFrame will always be - returned. Additional keyword arguments will be passed as keywords to the function Examples @@ -3431,14 +3429,14 @@ def apply(self, func, axis=0, broadcast=False, raw=False, reduce=True, else: if not broadcast: if not all(self.shape): - if is_reduction is None: - is_reduction = False + if reduce is None: + reduce = False try: - is_reduction = not isinstance(f(_EMPTY_SERIES), Series) + reduce = not isinstance(f(_EMPTY_SERIES), Series) except Exception: pass - if is_reduction: + if reduce: return Series(NA, index=self._get_agg_axis(axis)) else: return self.copy() @@ -3446,6 +3444,8 @@ def apply(self, func, axis=0, broadcast=False, raw=False, reduce=True, if raw and not self._is_mixed_type: return self._apply_raw(f, axis) else: + if reduce is None: + reduce = True return self._apply_standard(f, axis, reduce=reduce) else: return self._apply_broadcast(f, axis) diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 2fab63f13710f..19da11bd786f9 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -8978,11 +8978,11 @@ def test_apply_empty(self): rs = xp.apply(lambda x: x['a'], axis=1) assert_frame_equal(xp, rs) - # is_reduction + # reduce with an empty DataFrame x = [] - result = self.empty.apply(x.append, axis=1, is_reduction=False) + result = self.empty.apply(x.append, axis=1, reduce=False) assert_frame_equal(result, self.empty) - result = self.empty.apply(x.append, axis=1, is_reduction=True) + result = self.empty.apply(x.append, axis=1, reduce=True) assert_series_equal(result, Series([])) # Ensure that x.append hasn't been called self.assertEqual(x, []) From a342be640cf3cbb3f2ac98ccbe5091ff250964a9 Mon Sep 17 00:00:00 2001 From: David Wolever Date: Mon, 20 Jan 2014 20:45:59 -0500 Subject: [PATCH 3/4] Fix apply on empty DataFrame returns DataFrame --- pandas/core/frame.py | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index f23d853cf60b5..c20a0e2a421c6 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3413,15 +3413,15 @@ def apply(self, func, axis=0, broadcast=False, raw=False, reduce=None, ------- applied : Series or DataFrame """ - if len(self.columns) == 0 and len(self.index) == 0: - return self - axis = self._get_axis_number(axis) if kwds or args and not isinstance(func, np.ufunc): f = lambda x: func(x, *args, **kwds) else: f = func + if len(self.columns) == 0 and len(self.index) == 0: + return self._apply_empty_result(func, axis, reduce) + if isinstance(f, np.ufunc): results = f(self.values) return self._constructor(data=results, index=self.index, @@ -3429,17 +3429,7 @@ def apply(self, func, axis=0, broadcast=False, raw=False, reduce=None, else: if not broadcast: if not all(self.shape): - if reduce is None: - reduce = False - try: - reduce = not isinstance(f(_EMPTY_SERIES), Series) - except Exception: - pass - - if reduce: - return Series(NA, index=self._get_agg_axis(axis)) - else: - return self.copy() + return self._apply_empty_result(func, axis, reduce) if raw and not self._is_mixed_type: return self._apply_raw(f, axis) @@ -3450,6 +3440,19 @@ def apply(self, func, axis=0, broadcast=False, raw=False, reduce=None, else: return self._apply_broadcast(f, axis) + def _apply_empty_result(self, func, axis, reduce): + if reduce is None: + reduce = False + try: + reduce = not isinstance(func(_EMPTY_SERIES), Series) + except Exception: + pass + + if reduce: + return Series(NA, index=self._get_agg_axis(axis)) + else: + return self.copy() + def _apply_raw(self, func, axis): try: result = lib.reduce(self.values, func, axis=axis) From 55b1d80543addbc2ca414793f13b9bacc7bc1304 Mon Sep 17 00:00:00 2001 From: David Wolever Date: Thu, 23 Jan 2014 21:43:36 -0500 Subject: [PATCH 4/4] Add changelog notes and examples --- doc/source/release.rst | 3 +++ doc/source/v0.13.1.txt | 31 +++++++++++++++++++++++++++++++ pandas/core/frame.py | 3 ++- pandas/tests/test_frame.py | 7 +++++++ 4 files changed, 43 insertions(+), 1 deletion(-) diff --git a/doc/source/release.rst b/doc/source/release.rst index 2153c50155ad0..0d4c937d3bdd7 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -62,6 +62,9 @@ API Changes when detecting chained assignment, related (:issue:`5938`) - DataFrame.head(0) returns self instead of empty frame (:issue:`5846`) - ``autocorrelation_plot`` now accepts ``**kwargs``. (:issue:`5623`) + - ``DataFrame.apply`` will use the ``reduce`` argument to determine whether a + ``Series`` or a ``DataFrame`` should be returned when the ``DataFrame`` is + empty (:issue:`6007`). Experimental Features ~~~~~~~~~~~~~~~~~~~~~ diff --git a/doc/source/v0.13.1.txt b/doc/source/v0.13.1.txt index 31004d24e56a6..98aedb902d898 100644 --- a/doc/source/v0.13.1.txt +++ b/doc/source/v0.13.1.txt @@ -18,6 +18,37 @@ There are several new or updated docs sections including: API changes ~~~~~~~~~~~ +- ``DataFrame.apply`` will use the ``reduce`` argument to determine whether a + ``Series`` or a ``DataFrame`` should be returned when the ``DataFrame`` is + empty (:issue:`6007`). + + Previously, calling ``DataFrame.apply`` an empty ``DataFrame`` would return + either a ``DataFrame`` if there were no columns, or the function being + applied would be called with an empty ``Series`` to guess whether a + ``Series`` or ``DataFrame`` should be returned: + + .. ipython:: python + + def applied_func(col): + print "Apply function being called with:", col + return col.sum() + + import pandas as pd + empty = pd.DataFrame(columns=['a', 'b']) + empty.apply(applied_func) + + Now, when ``apply`` is called on an empty ``DataFrame``: if the ``reduce`` + argument is ``True`` a ``Series`` will returned, if it is ``False`` a + ``DataFrame`` will be returned, and if it is ``None`` (the default) the + function being applied will be called with an empty series to try and guess + the return type. + + .. ipython:: python + + empty.apply(applied_func, reduce=True) + empty.apply(applied_func, reduce=False) + + Prior Version Deprecations/Changes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/pandas/core/frame.py b/pandas/core/frame.py index c20a0e2a421c6..ce66c92f7f64d 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3369,7 +3369,8 @@ def apply(self, func, axis=0, broadcast=False, raw=False, reduce=None, Objects passed to functions are Series objects having index either the DataFrame's index (axis=0) or the columns (axis=1). - Return type depends on whether passed function aggregates + Return type depends on whether passed function aggregates, or the + reduce argument if the DataFrame is empty. Parameters ---------- diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 19da11bd786f9..6760653d10f3e 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -8984,6 +8984,13 @@ def test_apply_empty(self): assert_frame_equal(result, self.empty) result = self.empty.apply(x.append, axis=1, reduce=True) assert_series_equal(result, Series([])) + + empty_with_cols = DataFrame(columns=['a', 'b', 'c']) + result = empty_with_cols.apply(x.append, axis=1, reduce=False) + assert_frame_equal(result, empty_with_cols) + result = empty_with_cols.apply(x.append, axis=1, reduce=True) + assert_series_equal(result, Series([])) + # Ensure that x.append hasn't been called self.assertEqual(x, [])