From 3ad161df208b4784d8af582686623d74dbf78c09 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Wed, 27 Apr 2016 16:37:25 +0100 Subject: [PATCH] API: Prevent invalid arguments to Categorical.reshape --- doc/source/whatsnew/v0.19.0.txt | 3 ++ pandas/core/categorical.py | 23 +++++++-- pandas/core/internals.py | 26 +++++++++- pandas/core/series.py | 14 ++++-- pandas/indexes/base.py | 10 ++++ pandas/io/packers.py | 7 +-- pandas/tests/indexes/test_base.py | 6 +++ pandas/tests/series/test_analytics.py | 68 ++++++++++++++++----------- pandas/tests/test_categorical.py | 37 +++++++++++++-- 9 files changed, 151 insertions(+), 43 deletions(-) diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt index bef02a06135de..688f3b7ff6ada 100644 --- a/doc/source/whatsnew/v0.19.0.txt +++ b/doc/source/whatsnew/v0.19.0.txt @@ -256,6 +256,7 @@ API changes ~~~~~~~~~~~ +- ``Index.reshape`` will raise a ``NotImplementedError`` exception when called (:issue: `12882`) - Non-convertible dates in an excel date column will be returned without conversion and the column will be ``object`` dtype, rather than raising an exception (:issue:`10001`) - ``eval``'s upcasting rules for ``float32`` types have been updated to be more consistent with NumPy's rules. New behavior will not upcast to ``float64`` if you multiply a pandas ``float32`` object by a scalar float64. (:issue:`12388`) - An ``UnsupportedFunctionCall`` error is now raised if NumPy ufuncs like ``np.mean`` are called on groupby or resample objects (:issue:`12811`) @@ -449,6 +450,8 @@ Furthermore: Deprecations ^^^^^^^^^^^^ +- ``Categorical.reshape`` has been deprecated and will be removed in a subsequent release (:issue:`12882`) +- ``Series.reshape`` has been deprecated and will be removed in a subsequent release (:issue:`12882`) - ``compact_ints`` and ``use_unsigned`` have been deprecated in ``pd.read_csv()`` and will be removed in a future version (:issue:`13320`) - ``buffer_lines`` has been deprecated in ``pd.read_csv()`` and will be removed in a future version (:issue:`13360`) diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index 79d8bfbf57f12..1d1a9f990e61a 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -383,11 +383,28 @@ def itemsize(self): def reshape(self, new_shape, *args, **kwargs): """ - An ndarray-compatible method that returns - `self` because categorical instances cannot - actually be reshaped. + DEPRECATED: calling this method will raise an error in a + future release. + + An ndarray-compatible method that returns `self` because + `Categorical` instances cannot actually be reshaped. + + Parameters + ---------- + new_shape : int or tuple of ints + A 1-D array of integers that correspond to the new + shape of the `Categorical`. For more information on + the parameter, please refer to `np.reshape`. """ + warn("reshape is deprecated and will raise " + "in a subsequent release", FutureWarning, stacklevel=2) + nv.validate_reshape(args, kwargs) + + # while the 'new_shape' parameter has no effect, + # we should still enforce valid shape parameters + np.reshape(self.codes, new_shape) + return self @property diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 363ac8249eb06..ff12cfddbe9cd 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -1839,7 +1839,7 @@ def convert(self, *args, **kwargs): try: values = values.reshape(shape) values = _block_shape(values, ndim=self.ndim) - except AttributeError: + except (AttributeError, NotImplementedError): pass newb = make_block(values, ndim=self.ndim, placement=[rl]) blocks.append(newb) @@ -3616,7 +3616,7 @@ def value_getitem(placement): return value else: if value.ndim == self.ndim - 1: - value = value.reshape((1,) + value.shape) + value = _safe_reshape(value, (1,) + value.shape) def value_getitem(placement): return value @@ -4686,6 +4686,28 @@ def rrenamer(x): _transform_index(right, rrenamer)) +def _safe_reshape(arr, new_shape): + """ + If possible, reshape `arr` to have shape `new_shape`, + with a couple of exceptions (see gh-13012): + + 1) If `arr` is a Categorical or Index, `arr` will be + returned as is. + 2) If `arr` is a Series, the `_values` attribute will + be reshaped and returned. + + Parameters + ---------- + arr : array-like, object to be reshaped + new_shape : int or tuple of ints, the new shape + """ + if isinstance(arr, ABCSeries): + arr = arr._values + if not isinstance(arr, Categorical): + arr = arr.reshape(new_shape) + return arr + + def _transform_index(index, func): """ Apply function to all values found in index. diff --git a/pandas/core/series.py b/pandas/core/series.py index 2c7f298dde2ec..b933f68cfad62 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -843,14 +843,22 @@ def repeat(self, reps, *args, **kwargs): def reshape(self, *args, **kwargs): """ - Return the values attribute of `self` with shape `args`. - However, if the specified shape matches exactly the current - shape, `self` is returned for compatibility reasons. + DEPRECATED: calling this method will raise an error in a + future release. Please call ``.values.reshape(...)`` instead. + + return an ndarray with the values shape + if the specified shape matches exactly the current shape, then + return self (for compat) See also -------- numpy.ndarray.reshape """ + warnings.warn("reshape is deprecated and will raise " + "in a subsequent release. Please use " + ".values.reshape(...) instead", FutureWarning, + stacklevel=2) + if len(args) == 1 and hasattr(args[0], '__iter__'): shape = args[0] else: diff --git a/pandas/indexes/base.py b/pandas/indexes/base.py index 5c9938c932da2..b013d6ccb0b8e 100644 --- a/pandas/indexes/base.py +++ b/pandas/indexes/base.py @@ -957,6 +957,16 @@ def rename(self, name, inplace=False): """ return self.set_names([name], inplace=inplace) + def reshape(self, *args, **kwargs): + """ + NOT IMPLEMENTED: do not call this method, as reshaping is not + supported for Index objects and will raise an error. + + Reshape an Index. + """ + raise NotImplementedError("reshaping is not supported " + "for Index objects") + @property def _has_complex_internals(self): # to disable groupby tricks in MultiIndex diff --git a/pandas/io/packers.py b/pandas/io/packers.py index 14e2c9b371296..94f390955dddd 100644 --- a/pandas/io/packers.py +++ b/pandas/io/packers.py @@ -61,7 +61,7 @@ from pandas.core.generic import NDFrame from pandas.core.common import PerformanceWarning from pandas.io.common import get_filepath_or_buffer -from pandas.core.internals import BlockManager, make_block +from pandas.core.internals import BlockManager, make_block, _safe_reshape import pandas.core.internals as internals from pandas.msgpack import Unpacker as _Unpacker, Packer as _Packer, ExtType @@ -622,8 +622,9 @@ def decode(obj): axes = obj[u'axes'] def create_block(b): - values = unconvert(b[u'values'], dtype_for(b[u'dtype']), - b[u'compress']).reshape(b[u'shape']) + values = _safe_reshape(unconvert( + b[u'values'], dtype_for(b[u'dtype']), + b[u'compress']), b[u'shape']) # locs handles duplicate column names, and should be used instead # of items; see GH 9618 diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 67869901b068e..06662e52e3a6f 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -1413,6 +1413,12 @@ def test_take_fill_value(self): with tm.assertRaises(IndexError): idx.take(np.array([1, -5])) + def test_reshape_raise(self): + msg = "reshaping is not supported" + idx = pd.Index([0, 1, 2]) + tm.assertRaisesRegexp(NotImplementedError, msg, + idx.reshape, idx.shape) + def test_reindex_preserves_name_if_target_is_list_or_ndarray(self): # GH6552 idx = pd.Index([0, 1, 2]) diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index d9e2d8096c8d7..34cfb2f0c1529 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -1554,49 +1554,63 @@ def test_shift_categorical(self): assert_index_equal(s.values.categories, sp1.values.categories) assert_index_equal(s.values.categories, sn2.values.categories) - def test_reshape_non_2d(self): - # GH 4554 - x = Series(np.random.random(201), name='x') - self.assertTrue(x.reshape(x.shape, ) is x) + def test_reshape_deprecate(self): + x = Series(np.random.random(10), name='x') + tm.assert_produces_warning(FutureWarning, x.reshape, x.shape) - # GH 2719 - a = Series([1, 2, 3, 4]) - result = a.reshape(2, 2) - expected = a.values.reshape(2, 2) - tm.assert_numpy_array_equal(result, expected) - self.assertIsInstance(result, type(expected)) + def test_reshape_non_2d(self): + # see gh-4554 + with tm.assert_produces_warning(FutureWarning): + x = Series(np.random.random(201), name='x') + self.assertTrue(x.reshape(x.shape, ) is x) + + # see gh-2719 + with tm.assert_produces_warning(FutureWarning): + a = Series([1, 2, 3, 4]) + result = a.reshape(2, 2) + expected = a.values.reshape(2, 2) + tm.assert_numpy_array_equal(result, expected) + self.assertIsInstance(result, type(expected)) def test_reshape_2d_return_array(self): x = Series(np.random.random(201), name='x') - result = x.reshape((-1, 1)) - self.assertNotIsInstance(result, Series) - result2 = np.reshape(x, (-1, 1)) - self.assertNotIsInstance(result2, Series) + with tm.assert_produces_warning(FutureWarning): + result = x.reshape((-1, 1)) + self.assertNotIsInstance(result, Series) - result = x[:, None] - expected = x.reshape((-1, 1)) - assert_almost_equal(result, expected) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result2 = np.reshape(x, (-1, 1)) + self.assertNotIsInstance(result2, Series) + + with tm.assert_produces_warning(FutureWarning): + result = x[:, None] + expected = x.reshape((-1, 1)) + assert_almost_equal(result, expected) def test_reshape_bad_kwarg(self): a = Series([1, 2, 3, 4]) - msg = "'foo' is an invalid keyword argument for this function" - tm.assertRaisesRegexp(TypeError, msg, a.reshape, (2, 2), foo=2) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + msg = "'foo' is an invalid keyword argument for this function" + tm.assertRaisesRegexp(TypeError, msg, a.reshape, (2, 2), foo=2) - msg = "reshape\(\) got an unexpected keyword argument 'foo'" - tm.assertRaisesRegexp(TypeError, msg, a.reshape, a.shape, foo=2) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + msg = "reshape\(\) got an unexpected keyword argument 'foo'" + tm.assertRaisesRegexp(TypeError, msg, a.reshape, a.shape, foo=2) def test_numpy_reshape(self): a = Series([1, 2, 3, 4]) - result = np.reshape(a, (2, 2)) - expected = a.values.reshape(2, 2) - tm.assert_numpy_array_equal(result, expected) - self.assertIsInstance(result, type(expected)) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = np.reshape(a, (2, 2)) + expected = a.values.reshape(2, 2) + tm.assert_numpy_array_equal(result, expected) + self.assertIsInstance(result, type(expected)) - result = np.reshape(a, a.shape) - tm.assert_series_equal(result, a) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = np.reshape(a, a.shape) + tm.assert_series_equal(result, a) def test_unstack(self): from numpy import nan diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py index 2ca1fc71df20a..dd39861ac3114 100644 --- a/pandas/tests/test_categorical.py +++ b/pandas/tests/test_categorical.py @@ -4058,13 +4058,40 @@ def test_numpy_repeat(self): msg = "the 'axis' parameter is not supported" tm.assertRaisesRegexp(ValueError, msg, np.repeat, cat, 2, axis=1) + def test_reshape(self): + cat = pd.Categorical([], categories=["a", "b"]) + tm.assert_produces_warning(FutureWarning, cat.reshape, 0) + + with tm.assert_produces_warning(FutureWarning): + cat = pd.Categorical([], categories=["a", "b"]) + self.assert_categorical_equal(cat.reshape(0), cat) + + with tm.assert_produces_warning(FutureWarning): + cat = pd.Categorical([], categories=["a", "b"]) + self.assert_categorical_equal(cat.reshape((5, -1)), cat) + + with tm.assert_produces_warning(FutureWarning): + cat = pd.Categorical(["a", "b"], categories=["a", "b"]) + self.assert_categorical_equal(cat.reshape(cat.shape), cat) + + with tm.assert_produces_warning(FutureWarning): + cat = pd.Categorical(["a", "b"], categories=["a", "b"]) + self.assert_categorical_equal(cat.reshape(cat.size), cat) + + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + msg = "can only specify one unknown dimension" + cat = pd.Categorical(["a", "b"], categories=["a", "b"]) + tm.assertRaisesRegexp(ValueError, msg, cat.reshape, (-2, -1)) + def test_numpy_reshape(self): - cat = pd.Categorical(["a", "b"], categories=["a", "b"]) - self.assert_categorical_equal(np.reshape(cat, cat.shape), cat) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + cat = pd.Categorical(["a", "b"], categories=["a", "b"]) + self.assert_categorical_equal(np.reshape(cat, cat.shape), cat) - msg = "the 'order' parameter is not supported" - tm.assertRaisesRegexp(ValueError, msg, np.reshape, - cat, cat.shape, order='F') + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + msg = "the 'order' parameter is not supported" + tm.assertRaisesRegexp(ValueError, msg, np.reshape, + cat, cat.shape, order='F') def test_na_actions(self):