From 7bbd031104ee161b2fb79ba6f5732910661f94f8 Mon Sep 17 00:00:00 2001 From: sinhrks Date: Fri, 29 Apr 2016 13:04:11 -0400 Subject: [PATCH] ENH: Allow where/mask/Indexers to accept callable closes #12533 closes #11485 Author: sinhrks Closes #12539 from sinhrks/where and squashes the following commits: 6b5d618 [sinhrks] ENH: Allow .where to accept callable as condition --- doc/source/indexing.rst | 93 +++++++-- doc/source/whatsnew/v0.18.1.txt | 62 ++++++ pandas/core/common.py | 10 + pandas/core/frame.py | 16 +- pandas/core/generic.py | 28 ++- pandas/core/indexing.py | 22 ++ pandas/core/panel.py | 3 + pandas/core/series.py | 3 + pandas/tests/frame/test_indexing.py | 62 ++++++ pandas/tests/frame/test_query_eval.py | 11 + pandas/tests/indexing/test_callable.py | 275 +++++++++++++++++++++++++ pandas/tests/series/test_indexing.py | 18 ++ pandas/tests/test_panel.py | 8 + 13 files changed, 588 insertions(+), 23 deletions(-) create mode 100644 pandas/tests/indexing/test_callable.py diff --git a/doc/source/indexing.rst b/doc/source/indexing.rst index 5afe69791bbdf..6227b08587790 100644 --- a/doc/source/indexing.rst +++ b/doc/source/indexing.rst @@ -79,6 +79,10 @@ of multi-axis indexing. - A slice object with labels ``'a':'f'``, (note that contrary to usual python slices, **both** the start and the stop are included!) - A boolean array + - A ``callable`` function with one argument (the calling Series, DataFrame or Panel) and + that returns valid output for indexing (one of the above) + + .. versionadded:: 0.18.1 See more at :ref:`Selection by Label ` @@ -93,6 +97,10 @@ of multi-axis indexing. - A list or array of integers ``[4, 3, 0]`` - A slice object with ints ``1:7`` - A boolean array + - A ``callable`` function with one argument (the calling Series, DataFrame or Panel) and + that returns valid output for indexing (one of the above) + + .. versionadded:: 0.18.1 See more at :ref:`Selection by Position ` @@ -110,6 +118,8 @@ of multi-axis indexing. See more at :ref:`Advanced Indexing ` and :ref:`Advanced Hierarchical `. +- ``.loc``, ``.iloc``, ``.ix`` and also ``[]`` indexing can accept a ``callable`` as indexer. See more at :ref:`Selection By Callable `. + Getting values from an object with multi-axes selection uses the following notation (using ``.loc`` as an example, but applies to ``.iloc`` and ``.ix`` as well). Any of the axes accessors may be the null slice ``:``. Axes left out of @@ -317,6 +327,7 @@ The ``.loc`` attribute is the primary access method. The following are valid inp - A list or array of labels ``['a', 'b', 'c']`` - A slice object with labels ``'a':'f'`` (note that contrary to usual python slices, **both** the start and the stop are included!) - A boolean array +- A ``callable``, see :ref:`Selection By Callable ` .. ipython:: python @@ -340,13 +351,13 @@ With a DataFrame index=list('abcdef'), columns=list('ABCD')) df1 - df1.loc[['a','b','d'],:] + df1.loc[['a', 'b', 'd'], :] Accessing via label slices .. ipython:: python - df1.loc['d':,'A':'C'] + df1.loc['d':, 'A':'C'] For getting a cross section using a label (equiv to ``df.xs('a')``) @@ -358,15 +369,15 @@ For getting values with a boolean array .. ipython:: python - df1.loc['a']>0 - df1.loc[:,df1.loc['a']>0] + df1.loc['a'] > 0 + df1.loc[:, df1.loc['a'] > 0] For getting a value explicitly (equiv to deprecated ``df.get_value('a','A')``) .. ipython:: python # this is also equivalent to ``df1.at['a','A']`` - df1.loc['a','A'] + df1.loc['a', 'A'] .. _indexing.integer: @@ -387,6 +398,7 @@ The ``.iloc`` attribute is the primary access method. The following are valid in - A list or array of integers ``[4, 3, 0]`` - A slice object with ints ``1:7`` - A boolean array +- A ``callable``, see :ref:`Selection By Callable ` .. ipython:: python @@ -416,26 +428,26 @@ Select via integer slicing .. ipython:: python df1.iloc[:3] - df1.iloc[1:5,2:4] + df1.iloc[1:5, 2:4] Select via integer list .. ipython:: python - df1.iloc[[1,3,5],[1,3]] + df1.iloc[[1, 3, 5], [1, 3]] .. ipython:: python - df1.iloc[1:3,:] + df1.iloc[1:3, :] .. ipython:: python - df1.iloc[:,1:3] + df1.iloc[:, 1:3] .. ipython:: python # this is also equivalent to ``df1.iat[1,1]`` - df1.iloc[1,1] + df1.iloc[1, 1] For getting a cross section using an integer position (equiv to ``df.xs(1)``) @@ -471,8 +483,8 @@ returned) dfl = pd.DataFrame(np.random.randn(5,2), columns=list('AB')) dfl - dfl.iloc[:,2:3] - dfl.iloc[:,1:3] + dfl.iloc[:, 2:3] + dfl.iloc[:, 1:3] dfl.iloc[4:6] A single indexer that is out of bounds will raise an ``IndexError``. @@ -481,12 +493,52 @@ A list of indexers where any element is out of bounds will raise an .. code-block:: python - dfl.iloc[[4,5,6]] + dfl.iloc[[4, 5, 6]] IndexError: positional indexers are out-of-bounds - dfl.iloc[:,4] + dfl.iloc[:, 4] IndexError: single positional indexer is out-of-bounds +.. _indexing.callable: + +Selection By Callable +--------------------- + +.. versionadded:: 0.18.1 + +``.loc``, ``.iloc``, ``.ix`` and also ``[]`` indexing can accept a ``callable`` as indexer. +The ``callable`` must be a function with one argument (the calling Series, DataFrame or Panel) and that returns valid output for indexing. + +.. ipython:: python + + df1 = pd.DataFrame(np.random.randn(6, 4), + index=list('abcdef'), + columns=list('ABCD')) + df1 + + df1.loc[lambda df: df.A > 0, :] + df1.loc[:, lambda df: ['A', 'B']] + + df1.iloc[:, lambda df: [0, 1]] + + df1[lambda df: df.columns[0]] + + +You can use callable indexing in ``Series``. + +.. ipython:: python + + df1.A.loc[lambda s: s > 0] + +Using these methods / indexers, you can chain data selection operations +without using temporary variable. + +.. ipython:: python + + bb = pd.read_csv('data/baseball.csv', index_col='id') + (bb.groupby(['year', 'team']).sum() + .loc[lambda df: df.r > 100]) + .. _indexing.basics.partial_setting: Selecting Random Samples @@ -848,6 +900,19 @@ This is equivalent (but faster than) the following. df2 = df.copy() df.apply(lambda x, y: x.where(x>0,y), y=df['A']) +.. versionadded:: 0.18.1 + +Where can accept a callable as condition and ``other`` arguments. The function must +be with one argument (the calling Series or DataFrame) and that returns valid output +as condition and ``other`` argument. + +.. ipython:: python + + df3 = pd.DataFrame({'A': [1, 2, 3], + 'B': [4, 5, 6], + 'C': [7, 8, 9]}) + df3.where(lambda x: x > 4, lambda x: x + 10) + **mask** ``mask`` is the inverse boolean operation of ``where``. diff --git a/doc/source/whatsnew/v0.18.1.txt b/doc/source/whatsnew/v0.18.1.txt index 8c9c641a46a41..bfc95dea517c0 100644 --- a/doc/source/whatsnew/v0.18.1.txt +++ b/doc/source/whatsnew/v0.18.1.txt @@ -13,6 +13,8 @@ Highlights include: - ``pd.to_datetime()`` has gained the ability to assemble dates from a ``DataFrame``, see :ref:`here ` - Custom business hour offset, see :ref:`here `. - Many bug fixes in the handling of ``sparse``, see :ref:`here ` +- Method chaining improvements, see :ref:`here `. + .. contents:: What's new in v0.18.1 :local: @@ -94,6 +96,66 @@ Now you can do: df.groupby('group').resample('1D').ffill() +.. _whatsnew_0181.enhancements.method_chain: + +Method chaininng improvements +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The following methods / indexers now accept ``callable``. It is intended to make +these more useful in method chains, see :ref:`Selection By Callable `. +(:issue:`11485`, :issue:`12533`) + +- ``.where()`` and ``.mask()`` +- ``.loc[]``, ``iloc[]`` and ``.ix[]`` +- ``[]`` indexing + +``.where()`` and ``.mask()`` +"""""""""""""""""""""""""""" + +These can accept a callable as condition and ``other`` +arguments. + +.. ipython:: python + + df = pd.DataFrame({'A': [1, 2, 3], + 'B': [4, 5, 6], + 'C': [7, 8, 9]}) + df.where(lambda x: x > 4, lambda x: x + 10) + +``.loc[]``, ``.iloc[]``, ``.ix[]`` +"""""""""""""""""""""""""""""""""" + +These can accept a callable, and tuple of callable as a slicer. The callable +can return valid ``bool`` indexer or anything which is valid for these indexer's input. + +.. ipython:: python + + # callable returns bool indexer + df.loc[lambda x: x.A >= 2, lambda x: x.sum() > 10] + + # callable returns list of labels + df.loc[lambda x: [1, 2], lambda x: ['A', 'B']] + +``[]`` indexing +""""""""""""""" + +Finally, you can use a callable in ``[]`` indexing of Series, DataFrame and Panel. +The callable must return valid input for ``[]`` indexing depending on its +class and index type. + +.. ipython:: python + + df[lambda x: 'A'] + +Using these methods / indexers, you can chain data selection operations +without using temporary variable. + +.. ipython:: python + + bb = pd.read_csv('data/baseball.csv', index_col='id') + (bb.groupby(['year', 'team']).sum() + .loc[lambda df: df.r > 100]) + .. _whatsnew_0181.partial_string_indexing: Partial string indexing on ``DateTimeIndex`` when part of a ``MultiIndex`` diff --git a/pandas/core/common.py b/pandas/core/common.py index 14c95e01882a2..d41d49c895599 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -1843,6 +1843,16 @@ def _get_callable_name(obj): return None +def _apply_if_callable(maybe_callable, obj, **kwargs): + """ + Evaluate possibly callable input using obj and kwargs if it is callable, + otherwise return as it is + """ + if callable(maybe_callable): + return maybe_callable(obj, **kwargs) + return maybe_callable + + _string_dtypes = frozenset(map(_get_dtype_from_object, (compat.binary_type, compat.text_type))) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 9c87d1c887361..1ec5b05aa7eef 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1970,6 +1970,7 @@ def iget_value(self, i, j): return self.iat[i, j] def __getitem__(self, key): + key = com._apply_if_callable(key, self) # shortcut if we are an actual column is_mi_columns = isinstance(self.columns, MultiIndex) @@ -2138,6 +2139,9 @@ def query(self, expr, inplace=False, **kwargs): >>> df.query('a > b') >>> df[df.a > df.b] # same result as the previous expression """ + if not isinstance(expr, compat.string_types): + msg = "expr must be a string to be evaluated, {0} given" + raise ValueError(msg.format(type(expr))) kwargs['level'] = kwargs.pop('level', 0) + 1 kwargs['target'] = None res = self.eval(expr, **kwargs) @@ -2336,6 +2340,7 @@ def _box_col_values(self, values, items): name=items, fastpath=True) def __setitem__(self, key, value): + key = com._apply_if_callable(key, self) # see if we can slice the rows indexer = convert_to_index_sliceable(self, key) @@ -2454,8 +2459,9 @@ def assign(self, **kwargs): kwargs : keyword, value pairs keywords are the column names. If the values are callable, they are computed on the DataFrame and - assigned to the new columns. If the values are - not callable, (e.g. a Series, scalar, or array), + assigned to the new columns. The callable must not + change input DataFrame (though pandas doesn't check it). + If the values are not callable, (e.g. a Series, scalar, or array), they are simply assigned. Returns @@ -2513,11 +2519,7 @@ def assign(self, **kwargs): # do all calculations first... results = {} for k, v in kwargs.items(): - - if callable(v): - results[k] = v(data) - else: - results[k] = v + results[k] = com._apply_if_callable(v, data) # ... and then assign for k, v in sorted(results.items()): diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 788a564e3dee3..68c1e98c9957d 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -4283,8 +4283,26 @@ def _align_series(self, other, join='outer', axis=None, level=None, Parameters ---------- - cond : boolean %(klass)s or array - other : scalar or %(klass)s + cond : boolean %(klass)s, array or callable + If cond is callable, it is computed on the %(klass)s and + should return boolean %(klass)s or array. + The callable must not change input %(klass)s + (though pandas doesn't check it). + + .. versionadded:: 0.18.1 + + A callable can be used as cond. + + other : scalar, %(klass)s, or callable + If other is callable, it is computed on the %(klass)s and + should return scalar or %(klass)s. + The callable must not change input %(klass)s + (though pandas doesn't check it). + + .. versionadded:: 0.18.1 + + A callable can be used as other. + inplace : boolean, default False Whether to perform the operation in place on the data axis : alignment axis if needed, default None @@ -4304,6 +4322,9 @@ def _align_series(self, other, join='outer', axis=None, level=None, def where(self, cond, other=np.nan, inplace=False, axis=None, level=None, try_cast=False, raise_on_error=True): + cond = com._apply_if_callable(cond, self) + other = com._apply_if_callable(other, self) + if isinstance(cond, NDFrame): cond, _ = cond.align(self, join='right', broadcast_axis=1) else: @@ -4461,6 +4482,9 @@ def where(self, cond, other=np.nan, inplace=False, axis=None, level=None, @Appender(_shared_docs['where'] % dict(_shared_doc_kwargs, cond="False")) def mask(self, cond, other=np.nan, inplace=False, axis=None, level=None, try_cast=False, raise_on_error=True): + + cond = com._apply_if_callable(cond, self) + return self.where(~cond, other=other, inplace=inplace, axis=axis, level=level, try_cast=try_cast, raise_on_error=raise_on_error) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index df257fb5fd1d0..acb0675247a78 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -64,6 +64,7 @@ def __iter__(self): def __getitem__(self, key): if type(key) is tuple: + key = tuple(com._apply_if_callable(x, self.obj) for x in key) try: values = self.obj.get_value(*key) if lib.isscalar(values): @@ -73,6 +74,7 @@ def __getitem__(self, key): return self._getitem_tuple(key) else: + key = com._apply_if_callable(key, self.obj) return self._getitem_axis(key, axis=0) def _get_label(self, label, axis=0): @@ -122,6 +124,10 @@ def _get_setitem_indexer(self, key): raise IndexingError(key) def __setitem__(self, key, value): + if isinstance(key, tuple): + key = tuple(com._apply_if_callable(x, self.obj) for x in key) + else: + key = com._apply_if_callable(key, self.obj) indexer = self._get_setitem_indexer(key) self._setitem_with_indexer(indexer, value) @@ -1278,6 +1284,12 @@ class _LocationIndexer(_NDFrameIndexer): _exception = Exception def __getitem__(self, key): + if isinstance(key, tuple): + key = tuple(com._apply_if_callable(x, self.obj) for x in key) + else: + # scalar callable may return tuple + key = com._apply_if_callable(key, self.obj) + if type(key) is tuple: return self._getitem_tuple(key) else: @@ -1326,6 +1338,8 @@ class _LocIndexer(_LocationIndexer): - A slice object with labels, e.g. ``'a':'f'`` (note that contrary to usual python slices, **both** the start and the stop are included!). - A boolean array. + - A ``callable`` function with one argument (the calling Series, DataFrame + or Panel) and that returns valid output for indexing (one of the above) ``.loc`` will raise a ``KeyError`` when the items are not found. @@ -1466,6 +1480,8 @@ class _iLocIndexer(_LocationIndexer): - A list or array of integers, e.g. ``[4, 3, 0]``. - A slice object with ints, e.g. ``1:7``. - A boolean array. + - A ``callable`` function with one argument (the calling Series, DataFrame + or Panel) and that returns valid output for indexing (one of the above) ``.iloc`` will raise ``IndexError`` if a requested indexer is out-of-bounds, except *slice* indexers which allow out-of-bounds @@ -1633,6 +1649,12 @@ def __getitem__(self, key): return self.obj.get_value(*key, takeable=self._takeable) def __setitem__(self, key, value): + if isinstance(key, tuple): + key = tuple(com._apply_if_callable(x, self.obj) for x in key) + else: + # scalar callable may return tuple + key = com._apply_if_callable(key, self.obj) + if not isinstance(key, tuple): key = self._tuplify(key) if len(key) != self.obj.ndim: diff --git a/pandas/core/panel.py b/pandas/core/panel.py index b84079ffc4ffd..ea88c9f7223a9 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -268,6 +268,8 @@ def from_dict(cls, data, intersect=False, orient='items', dtype=None): return cls(**d) def __getitem__(self, key): + key = com._apply_if_callable(key, self) + if isinstance(self._info_axis, MultiIndex): return self._getitem_multilevel(key) if not (is_list_like(key) or isinstance(key, slice)): @@ -567,6 +569,7 @@ def _box_item_values(self, key, values): return self._constructor_sliced(values, **d) def __setitem__(self, key, value): + key = com._apply_if_callable(key, self) shape = tuple(self.shape) if isinstance(value, self._constructor_sliced): value = value.reindex( diff --git a/pandas/core/series.py b/pandas/core/series.py index a33d5598be7cd..d8e99f2bddc81 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -577,6 +577,7 @@ def _slice(self, slobj, axis=0, kind=None): return self._get_values(slobj) def __getitem__(self, key): + key = com._apply_if_callable(key, self) try: result = self.index.get_value(self, key) @@ -692,6 +693,8 @@ def _get_values(self, indexer): return self._values[indexer] def __setitem__(self, key, value): + key = com._apply_if_callable(key, self) + def setitem(key, value): try: self._set_with_engine(key, value) diff --git a/pandas/tests/frame/test_indexing.py b/pandas/tests/frame/test_indexing.py index 2006905fe034d..a6e46b7d0c756 100644 --- a/pandas/tests/frame/test_indexing.py +++ b/pandas/tests/frame/test_indexing.py @@ -119,6 +119,18 @@ def test_getitem_list(self): assert_frame_equal(result, expected) self.assertEqual(result.columns.names, ['sth', 'sth2']) + def test_getitem_callable(self): + # GH 12533 + result = self.frame[lambda x: 'A'] + tm.assert_series_equal(result, self.frame.loc[:, 'A']) + + result = self.frame[lambda x: ['A', 'B']] + tm.assert_frame_equal(result, self.frame.loc[:, ['A', 'B']]) + + df = self.frame[:3] + result = df[lambda x: [True, False, True]] + tm.assert_frame_equal(result, self.frame.iloc[[0, 2], :]) + def test_setitem_list(self): self.frame['E'] = 'foo' @@ -187,6 +199,14 @@ def test_setitem_mulit_index(self): df[('joe', 'last')] = df[('jolie', 'first')].loc[i, j] assert_frame_equal(df[('joe', 'last')], df[('jolie', 'first')]) + def test_setitem_callable(self): + # GH 12533 + df = pd.DataFrame({'A': [1, 2, 3, 4], 'B': [5, 6, 7, 8]}) + df[lambda x: 'A'] = [11, 12, 13, 14] + + exp = pd.DataFrame({'A': [11, 12, 13, 14], 'B': [5, 6, 7, 8]}) + tm.assert_frame_equal(df, exp) + def test_getitem_boolean(self): # boolean indexing d = self.tsframe.index[10] @@ -2545,6 +2565,27 @@ def test_where_axis(self): result.where(mask, d2, inplace=True, axis='columns') assert_frame_equal(result, expected) + def test_where_callable(self): + # GH 12533 + df = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) + result = df.where(lambda x: x > 4, lambda x: x + 1) + exp = DataFrame([[2, 3, 4], [5, 5, 6], [7, 8, 9]]) + tm.assert_frame_equal(result, exp) + tm.assert_frame_equal(result, df.where(df > 4, df + 1)) + + # return ndarray and scalar + result = df.where(lambda x: (x % 2 == 0).values, lambda x: 99) + exp = DataFrame([[99, 2, 99], [4, 99, 6], [99, 8, 99]]) + tm.assert_frame_equal(result, exp) + tm.assert_frame_equal(result, df.where(df % 2 == 0, 99)) + + # chain + result = (df + 2).where(lambda x: x > 8, lambda x: x + 10) + exp = DataFrame([[13, 14, 15], [16, 17, 18], [9, 10, 11]]) + tm.assert_frame_equal(result, exp) + tm.assert_frame_equal(result, + (df + 2).where((df + 2) > 8, (df + 2) + 10)) + def test_mask(self): df = DataFrame(np.random.randn(5, 3)) cond = df > 0 @@ -2581,6 +2622,27 @@ def test_mask_edge_case_1xN_frame(self): expec = DataFrame([[nan, 2]]) assert_frame_equal(res, expec) + def test_mask_callable(self): + # GH 12533 + df = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) + result = df.mask(lambda x: x > 4, lambda x: x + 1) + exp = DataFrame([[1, 2, 3], [4, 6, 7], [8, 9, 10]]) + tm.assert_frame_equal(result, exp) + tm.assert_frame_equal(result, df.mask(df > 4, df + 1)) + + # return ndarray and scalar + result = df.mask(lambda x: (x % 2 == 0).values, lambda x: 99) + exp = DataFrame([[1, 99, 3], [99, 5, 99], [7, 99, 9]]) + tm.assert_frame_equal(result, exp) + tm.assert_frame_equal(result, df.mask(df % 2 == 0, 99)) + + # chain + result = (df + 2).mask(lambda x: x > 8, lambda x: x + 10) + exp = DataFrame([[3, 4, 5], [6, 7, 8], [19, 20, 21]]) + tm.assert_frame_equal(result, exp) + tm.assert_frame_equal(result, + (df + 2).mask((df + 2) > 8, (df + 2) + 10)) + def test_head_tail(self): assert_frame_equal(self.frame.head(), self.frame[:5]) assert_frame_equal(self.frame.tail(), self.frame[-5:]) diff --git a/pandas/tests/frame/test_query_eval.py b/pandas/tests/frame/test_query_eval.py index 9f863bc4f62f3..49b0ce66999d8 100644 --- a/pandas/tests/frame/test_query_eval.py +++ b/pandas/tests/frame/test_query_eval.py @@ -136,6 +136,17 @@ def test_ops(self): result = (1 - np.isnan(df)).iloc[0:25] assert_frame_equal(result, expected) + def test_query_non_str(self): + # GH 11485 + df = pd.DataFrame({'A': [1, 2, 3], 'B': ['a', 'b', 'b']}) + + msg = "expr must be a string to be evaluated" + with tm.assertRaisesRegexp(ValueError, msg): + df.query(lambda x: x.B == "b") + + with tm.assertRaisesRegexp(ValueError, msg): + df.query(111) + class TestDataFrameQueryWithMultiIndex(tm.TestCase): diff --git a/pandas/tests/indexing/test_callable.py b/pandas/tests/indexing/test_callable.py new file mode 100644 index 0000000000000..3465d776bfa85 --- /dev/null +++ b/pandas/tests/indexing/test_callable.py @@ -0,0 +1,275 @@ +# -*- coding: utf-8 -*- +# pylint: disable-msg=W0612,E1101 +import nose + +import numpy as np +import pandas as pd +import pandas.util.testing as tm + + +class TestIndexingCallable(tm.TestCase): + + _multiprocess_can_split_ = True + + def test_frame_loc_ix_callable(self): + # GH 11485 + df = pd.DataFrame({'A': [1, 2, 3, 4], 'B': list('aabb'), + 'C': [1, 2, 3, 4]}) + # iloc cannot use boolean Series (see GH3635) + + # return bool indexer + res = df.loc[lambda x: x.A > 2] + tm.assert_frame_equal(res, df.loc[df.A > 2]) + + res = df.ix[lambda x: x.A > 2] + tm.assert_frame_equal(res, df.ix[df.A > 2]) + + res = df.loc[lambda x: x.A > 2, ] + tm.assert_frame_equal(res, df.loc[df.A > 2, ]) + + res = df.ix[lambda x: x.A > 2, ] + tm.assert_frame_equal(res, df.ix[df.A > 2, ]) + + res = df.loc[lambda x: x.B == 'b', :] + tm.assert_frame_equal(res, df.loc[df.B == 'b', :]) + + res = df.ix[lambda x: x.B == 'b', :] + tm.assert_frame_equal(res, df.ix[df.B == 'b', :]) + + res = df.loc[lambda x: x.A > 2, lambda x: x.columns == 'B'] + tm.assert_frame_equal(res, df.loc[df.A > 2, [False, True, False]]) + + res = df.ix[lambda x: x.A > 2, lambda x: x.columns == 'B'] + tm.assert_frame_equal(res, df.ix[df.A > 2, [False, True, False]]) + + res = df.loc[lambda x: x.A > 2, lambda x: 'B'] + tm.assert_series_equal(res, df.loc[df.A > 2, 'B']) + + res = df.ix[lambda x: x.A > 2, lambda x: 'B'] + tm.assert_series_equal(res, df.ix[df.A > 2, 'B']) + + res = df.loc[lambda x: x.A > 2, lambda x: ['A', 'B']] + tm.assert_frame_equal(res, df.loc[df.A > 2, ['A', 'B']]) + + res = df.ix[lambda x: x.A > 2, lambda x: ['A', 'B']] + tm.assert_frame_equal(res, df.ix[df.A > 2, ['A', 'B']]) + + res = df.loc[lambda x: x.A == 2, lambda x: ['A', 'B']] + tm.assert_frame_equal(res, df.loc[df.A == 2, ['A', 'B']]) + + res = df.ix[lambda x: x.A == 2, lambda x: ['A', 'B']] + tm.assert_frame_equal(res, df.ix[df.A == 2, ['A', 'B']]) + + # scalar + res = df.loc[lambda x: 1, lambda x: 'A'] + self.assertEqual(res, df.loc[1, 'A']) + + res = df.ix[lambda x: 1, lambda x: 'A'] + self.assertEqual(res, df.ix[1, 'A']) + + def test_frame_loc_ix_callable_mixture(self): + # GH 11485 + df = pd.DataFrame({'A': [1, 2, 3, 4], 'B': list('aabb'), + 'C': [1, 2, 3, 4]}) + + res = df.loc[lambda x: x.A > 2, ['A', 'B']] + tm.assert_frame_equal(res, df.loc[df.A > 2, ['A', 'B']]) + + res = df.ix[lambda x: x.A > 2, ['A', 'B']] + tm.assert_frame_equal(res, df.ix[df.A > 2, ['A', 'B']]) + + res = df.loc[[2, 3], lambda x: ['A', 'B']] + tm.assert_frame_equal(res, df.loc[[2, 3], ['A', 'B']]) + + res = df.ix[[2, 3], lambda x: ['A', 'B']] + tm.assert_frame_equal(res, df.ix[[2, 3], ['A', 'B']]) + + res = df.loc[3, lambda x: ['A', 'B']] + tm.assert_series_equal(res, df.loc[3, ['A', 'B']]) + + res = df.ix[3, lambda x: ['A', 'B']] + tm.assert_series_equal(res, df.ix[3, ['A', 'B']]) + + def test_frame_loc_callable(self): + # GH 11485 + df = pd.DataFrame({'X': [1, 2, 3, 4], + 'Y': list('aabb')}, + index=list('ABCD')) + + # return label + res = df.loc[lambda x: ['A', 'C']] + tm.assert_frame_equal(res, df.loc[['A', 'C']]) + + res = df.loc[lambda x: ['A', 'C'], ] + tm.assert_frame_equal(res, df.loc[['A', 'C'], ]) + + res = df.loc[lambda x: ['A', 'C'], :] + tm.assert_frame_equal(res, df.loc[['A', 'C'], :]) + + res = df.loc[lambda x: ['A', 'C'], lambda x: 'X'] + tm.assert_series_equal(res, df.loc[['A', 'C'], 'X']) + + res = df.loc[lambda x: ['A', 'C'], lambda x: ['X']] + tm.assert_frame_equal(res, df.loc[['A', 'C'], ['X']]) + + # mixture + res = df.loc[['A', 'C'], lambda x: 'X'] + tm.assert_series_equal(res, df.loc[['A', 'C'], 'X']) + + res = df.loc[['A', 'C'], lambda x: ['X']] + tm.assert_frame_equal(res, df.loc[['A', 'C'], ['X']]) + + res = df.loc[lambda x: ['A', 'C'], 'X'] + tm.assert_series_equal(res, df.loc[['A', 'C'], 'X']) + + res = df.loc[lambda x: ['A', 'C'], ['X']] + tm.assert_frame_equal(res, df.loc[['A', 'C'], ['X']]) + + def test_frame_loc_callable_setitem(self): + # GH 11485 + df = pd.DataFrame({'X': [1, 2, 3, 4], + 'Y': list('aabb')}, + index=list('ABCD')) + + # return label + res = df.copy() + res.loc[lambda x: ['A', 'C']] = -20 + exp = df.copy() + exp.loc[['A', 'C']] = -20 + tm.assert_frame_equal(res, exp) + + res = df.copy() + res.loc[lambda x: ['A', 'C'], :] = 20 + exp = df.copy() + exp.loc[['A', 'C'], :] = 20 + tm.assert_frame_equal(res, exp) + + res = df.copy() + res.loc[lambda x: ['A', 'C'], lambda x: 'X'] = -1 + exp = df.copy() + exp.loc[['A', 'C'], 'X'] = -1 + tm.assert_frame_equal(res, exp) + + res = df.copy() + res.loc[lambda x: ['A', 'C'], lambda x: ['X']] = [5, 10] + exp = df.copy() + exp.loc[['A', 'C'], ['X']] = [5, 10] + tm.assert_frame_equal(res, exp) + + # mixture + res = df.copy() + res.loc[['A', 'C'], lambda x: 'X'] = np.array([-1, -2]) + exp = df.copy() + exp.loc[['A', 'C'], 'X'] = np.array([-1, -2]) + tm.assert_frame_equal(res, exp) + + res = df.copy() + res.loc[['A', 'C'], lambda x: ['X']] = 10 + exp = df.copy() + exp.loc[['A', 'C'], ['X']] = 10 + tm.assert_frame_equal(res, exp) + + res = df.copy() + res.loc[lambda x: ['A', 'C'], 'X'] = -2 + exp = df.copy() + exp.loc[['A', 'C'], 'X'] = -2 + tm.assert_frame_equal(res, exp) + + res = df.copy() + res.loc[lambda x: ['A', 'C'], ['X']] = -4 + exp = df.copy() + exp.loc[['A', 'C'], ['X']] = -4 + tm.assert_frame_equal(res, exp) + + def test_frame_iloc_callable(self): + # GH 11485 + df = pd.DataFrame({'X': [1, 2, 3, 4], + 'Y': list('aabb')}, + index=list('ABCD')) + + # return location + res = df.iloc[lambda x: [1, 3]] + tm.assert_frame_equal(res, df.iloc[[1, 3]]) + + res = df.iloc[lambda x: [1, 3], :] + tm.assert_frame_equal(res, df.iloc[[1, 3], :]) + + res = df.iloc[lambda x: [1, 3], lambda x: 0] + tm.assert_series_equal(res, df.iloc[[1, 3], 0]) + + res = df.iloc[lambda x: [1, 3], lambda x: [0]] + tm.assert_frame_equal(res, df.iloc[[1, 3], [0]]) + + # mixture + res = df.iloc[[1, 3], lambda x: 0] + tm.assert_series_equal(res, df.iloc[[1, 3], 0]) + + res = df.iloc[[1, 3], lambda x: [0]] + tm.assert_frame_equal(res, df.iloc[[1, 3], [0]]) + + res = df.iloc[lambda x: [1, 3], 0] + tm.assert_series_equal(res, df.iloc[[1, 3], 0]) + + res = df.iloc[lambda x: [1, 3], [0]] + tm.assert_frame_equal(res, df.iloc[[1, 3], [0]]) + + def test_frame_iloc_callable_setitem(self): + # GH 11485 + df = pd.DataFrame({'X': [1, 2, 3, 4], + 'Y': list('aabb')}, + index=list('ABCD')) + + # return location + res = df.copy() + res.iloc[lambda x: [1, 3]] = 0 + exp = df.copy() + exp.iloc[[1, 3]] = 0 + tm.assert_frame_equal(res, exp) + + res = df.copy() + res.iloc[lambda x: [1, 3], :] = -1 + exp = df.copy() + exp.iloc[[1, 3], :] = -1 + tm.assert_frame_equal(res, exp) + + res = df.copy() + res.iloc[lambda x: [1, 3], lambda x: 0] = 5 + exp = df.copy() + exp.iloc[[1, 3], 0] = 5 + tm.assert_frame_equal(res, exp) + + res = df.copy() + res.iloc[lambda x: [1, 3], lambda x: [0]] = 25 + exp = df.copy() + exp.iloc[[1, 3], [0]] = 25 + tm.assert_frame_equal(res, exp) + + # mixture + res = df.copy() + res.iloc[[1, 3], lambda x: 0] = -3 + exp = df.copy() + exp.iloc[[1, 3], 0] = -3 + tm.assert_frame_equal(res, exp) + + res = df.copy() + res.iloc[[1, 3], lambda x: [0]] = -5 + exp = df.copy() + exp.iloc[[1, 3], [0]] = -5 + tm.assert_frame_equal(res, exp) + + res = df.copy() + res.iloc[lambda x: [1, 3], 0] = 10 + exp = df.copy() + exp.iloc[[1, 3], 0] = 10 + tm.assert_frame_equal(res, exp) + + res = df.copy() + res.iloc[lambda x: [1, 3], [0]] = [-5, -5] + exp = df.copy() + exp.iloc[[1, 3], [0]] = [-5, -5] + tm.assert_frame_equal(res, exp) + + +if __name__ == '__main__': + nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], + exit=False) diff --git a/pandas/tests/series/test_indexing.py b/pandas/tests/series/test_indexing.py index 058fb430b9c87..5ed3fda7d0b8f 100644 --- a/pandas/tests/series/test_indexing.py +++ b/pandas/tests/series/test_indexing.py @@ -389,6 +389,18 @@ def test_getitem_dataframe(self): df = pd.DataFrame(rng, index=rng) self.assertRaises(TypeError, s.__getitem__, df > 5) + def test_getitem_callable(self): + # GH 12533 + s = pd.Series(4, index=list('ABCD')) + result = s[lambda x: 'A'] + self.assertEqual(result, s.loc['A']) + + result = s[lambda x: ['A', 'B']] + tm.assert_series_equal(result, s.loc[['A', 'B']]) + + result = s[lambda x: [True, False, True, True]] + tm.assert_series_equal(result, s.iloc[[0, 2, 3]]) + def test_setitem_ambiguous_keyerror(self): s = Series(lrange(10), index=lrange(0, 20, 2)) @@ -413,6 +425,12 @@ def test_setitem_float_labels(self): assert_series_equal(s, tmp) + def test_setitem_callable(self): + # GH 12533 + s = pd.Series([1, 2, 3, 4], index=list('ABCD')) + s[lambda x: 'A'] = -1 + tm.assert_series_equal(s, pd.Series([-1, 2, 3, 4], index=list('ABCD'))) + def test_slice(self): numSlice = self.series[10:20] numSliceEnd = self.series[-10:] diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index ffefd46d20376..a6516614e9965 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -713,6 +713,14 @@ def test_getitem_fancy_xs_check_view(self): self._check_view((item, NS, 'C'), comp) self._check_view((NS, date, 'C'), comp) + def test_getitem_callable(self): + p = self.panel + # GH 12533 + + assert_frame_equal(p[lambda x: 'ItemB'], p.loc['ItemB']) + assert_panel_equal(p[lambda x: ['ItemB', 'ItemC']], + p.loc[['ItemB', 'ItemC']]) + def test_ix_setitem_slice_dataframe(self): a = Panel(items=[1, 2, 3], major_axis=[11, 22, 33], minor_axis=[111, 222, 333])