Skip to content

Commit

Permalink
API: Prevent invalid arguments to Categorical.reshape
Browse files Browse the repository at this point in the history
  • Loading branch information
gfyoung committed Jul 13, 2016
1 parent 20de266 commit 3ad161d
Show file tree
Hide file tree
Showing 9 changed files with 151 additions and 43 deletions.
3 changes: 3 additions & 0 deletions doc/source/whatsnew/v0.19.0.txt
Expand Up @@ -256,6 +256,7 @@ API changes
~~~~~~~~~~~


- ``Index.reshape`` will raise a ``NotImplementedError`` exception when called (:issue: `12882`)
- Non-convertible dates in an excel date column will be returned without conversion and the column will be ``object`` dtype, rather than raising an exception (:issue:`10001`)
- ``eval``'s upcasting rules for ``float32`` types have been updated to be more consistent with NumPy's rules. New behavior will not upcast to ``float64`` if you multiply a pandas ``float32`` object by a scalar float64. (:issue:`12388`)
- An ``UnsupportedFunctionCall`` error is now raised if NumPy ufuncs like ``np.mean`` are called on groupby or resample objects (:issue:`12811`)
Expand Down Expand Up @@ -449,6 +450,8 @@ Furthermore:

Deprecations
^^^^^^^^^^^^
- ``Categorical.reshape`` has been deprecated and will be removed in a subsequent release (:issue:`12882`)
- ``Series.reshape`` has been deprecated and will be removed in a subsequent release (:issue:`12882`)

- ``compact_ints`` and ``use_unsigned`` have been deprecated in ``pd.read_csv()`` and will be removed in a future version (:issue:`13320`)
- ``buffer_lines`` has been deprecated in ``pd.read_csv()`` and will be removed in a future version (:issue:`13360`)
Expand Down
23 changes: 20 additions & 3 deletions pandas/core/categorical.py
Expand Up @@ -383,11 +383,28 @@ def itemsize(self):

def reshape(self, new_shape, *args, **kwargs):
"""
An ndarray-compatible method that returns
`self` because categorical instances cannot
actually be reshaped.
DEPRECATED: calling this method will raise an error in a
future release.
An ndarray-compatible method that returns `self` because
`Categorical` instances cannot actually be reshaped.
Parameters
----------
new_shape : int or tuple of ints
A 1-D array of integers that correspond to the new
shape of the `Categorical`. For more information on
the parameter, please refer to `np.reshape`.
"""
warn("reshape is deprecated and will raise "
"in a subsequent release", FutureWarning, stacklevel=2)

nv.validate_reshape(args, kwargs)

# while the 'new_shape' parameter has no effect,
# we should still enforce valid shape parameters
np.reshape(self.codes, new_shape)

return self

@property
Expand Down
26 changes: 24 additions & 2 deletions pandas/core/internals.py
Expand Up @@ -1839,7 +1839,7 @@ def convert(self, *args, **kwargs):
try:
values = values.reshape(shape)
values = _block_shape(values, ndim=self.ndim)
except AttributeError:
except (AttributeError, NotImplementedError):
pass
newb = make_block(values, ndim=self.ndim, placement=[rl])
blocks.append(newb)
Expand Down Expand Up @@ -3616,7 +3616,7 @@ def value_getitem(placement):
return value
else:
if value.ndim == self.ndim - 1:
value = value.reshape((1,) + value.shape)
value = _safe_reshape(value, (1,) + value.shape)

def value_getitem(placement):
return value
Expand Down Expand Up @@ -4686,6 +4686,28 @@ def rrenamer(x):
_transform_index(right, rrenamer))


def _safe_reshape(arr, new_shape):
"""
If possible, reshape `arr` to have shape `new_shape`,
with a couple of exceptions (see gh-13012):
1) If `arr` is a Categorical or Index, `arr` will be
returned as is.
2) If `arr` is a Series, the `_values` attribute will
be reshaped and returned.
Parameters
----------
arr : array-like, object to be reshaped
new_shape : int or tuple of ints, the new shape
"""
if isinstance(arr, ABCSeries):
arr = arr._values
if not isinstance(arr, Categorical):
arr = arr.reshape(new_shape)
return arr


def _transform_index(index, func):
"""
Apply function to all values found in index.
Expand Down
14 changes: 11 additions & 3 deletions pandas/core/series.py
Expand Up @@ -843,14 +843,22 @@ def repeat(self, reps, *args, **kwargs):

def reshape(self, *args, **kwargs):
"""
Return the values attribute of `self` with shape `args`.
However, if the specified shape matches exactly the current
shape, `self` is returned for compatibility reasons.
DEPRECATED: calling this method will raise an error in a
future release. Please call ``.values.reshape(...)`` instead.
return an ndarray with the values shape
if the specified shape matches exactly the current shape, then
return self (for compat)
See also
--------
numpy.ndarray.reshape
"""
warnings.warn("reshape is deprecated and will raise "
"in a subsequent release. Please use "
".values.reshape(...) instead", FutureWarning,
stacklevel=2)

if len(args) == 1 and hasattr(args[0], '__iter__'):
shape = args[0]
else:
Expand Down
10 changes: 10 additions & 0 deletions pandas/indexes/base.py
Expand Up @@ -957,6 +957,16 @@ def rename(self, name, inplace=False):
"""
return self.set_names([name], inplace=inplace)

def reshape(self, *args, **kwargs):
"""
NOT IMPLEMENTED: do not call this method, as reshaping is not
supported for Index objects and will raise an error.
Reshape an Index.
"""
raise NotImplementedError("reshaping is not supported "
"for Index objects")

@property
def _has_complex_internals(self):
# to disable groupby tricks in MultiIndex
Expand Down
7 changes: 4 additions & 3 deletions pandas/io/packers.py
Expand Up @@ -61,7 +61,7 @@
from pandas.core.generic import NDFrame
from pandas.core.common import PerformanceWarning
from pandas.io.common import get_filepath_or_buffer
from pandas.core.internals import BlockManager, make_block
from pandas.core.internals import BlockManager, make_block, _safe_reshape
import pandas.core.internals as internals

from pandas.msgpack import Unpacker as _Unpacker, Packer as _Packer, ExtType
Expand Down Expand Up @@ -622,8 +622,9 @@ def decode(obj):
axes = obj[u'axes']

def create_block(b):
values = unconvert(b[u'values'], dtype_for(b[u'dtype']),
b[u'compress']).reshape(b[u'shape'])
values = _safe_reshape(unconvert(
b[u'values'], dtype_for(b[u'dtype']),
b[u'compress']), b[u'shape'])

# locs handles duplicate column names, and should be used instead
# of items; see GH 9618
Expand Down
6 changes: 6 additions & 0 deletions pandas/tests/indexes/test_base.py
Expand Up @@ -1413,6 +1413,12 @@ def test_take_fill_value(self):
with tm.assertRaises(IndexError):
idx.take(np.array([1, -5]))

def test_reshape_raise(self):
msg = "reshaping is not supported"
idx = pd.Index([0, 1, 2])
tm.assertRaisesRegexp(NotImplementedError, msg,
idx.reshape, idx.shape)

def test_reindex_preserves_name_if_target_is_list_or_ndarray(self):
# GH6552
idx = pd.Index([0, 1, 2])
Expand Down
68 changes: 41 additions & 27 deletions pandas/tests/series/test_analytics.py
Expand Up @@ -1554,49 +1554,63 @@ def test_shift_categorical(self):
assert_index_equal(s.values.categories, sp1.values.categories)
assert_index_equal(s.values.categories, sn2.values.categories)

def test_reshape_non_2d(self):
# GH 4554
x = Series(np.random.random(201), name='x')
self.assertTrue(x.reshape(x.shape, ) is x)
def test_reshape_deprecate(self):
x = Series(np.random.random(10), name='x')
tm.assert_produces_warning(FutureWarning, x.reshape, x.shape)

# GH 2719
a = Series([1, 2, 3, 4])
result = a.reshape(2, 2)
expected = a.values.reshape(2, 2)
tm.assert_numpy_array_equal(result, expected)
self.assertIsInstance(result, type(expected))
def test_reshape_non_2d(self):
# see gh-4554
with tm.assert_produces_warning(FutureWarning):
x = Series(np.random.random(201), name='x')
self.assertTrue(x.reshape(x.shape, ) is x)

# see gh-2719
with tm.assert_produces_warning(FutureWarning):
a = Series([1, 2, 3, 4])
result = a.reshape(2, 2)
expected = a.values.reshape(2, 2)
tm.assert_numpy_array_equal(result, expected)
self.assertIsInstance(result, type(expected))

def test_reshape_2d_return_array(self):
x = Series(np.random.random(201), name='x')
result = x.reshape((-1, 1))
self.assertNotIsInstance(result, Series)

result2 = np.reshape(x, (-1, 1))
self.assertNotIsInstance(result2, Series)
with tm.assert_produces_warning(FutureWarning):
result = x.reshape((-1, 1))
self.assertNotIsInstance(result, Series)

result = x[:, None]
expected = x.reshape((-1, 1))
assert_almost_equal(result, expected)
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result2 = np.reshape(x, (-1, 1))
self.assertNotIsInstance(result2, Series)

with tm.assert_produces_warning(FutureWarning):
result = x[:, None]
expected = x.reshape((-1, 1))
assert_almost_equal(result, expected)

def test_reshape_bad_kwarg(self):
a = Series([1, 2, 3, 4])

msg = "'foo' is an invalid keyword argument for this function"
tm.assertRaisesRegexp(TypeError, msg, a.reshape, (2, 2), foo=2)
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
msg = "'foo' is an invalid keyword argument for this function"
tm.assertRaisesRegexp(TypeError, msg, a.reshape, (2, 2), foo=2)

msg = "reshape\(\) got an unexpected keyword argument 'foo'"
tm.assertRaisesRegexp(TypeError, msg, a.reshape, a.shape, foo=2)
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
msg = "reshape\(\) got an unexpected keyword argument 'foo'"
tm.assertRaisesRegexp(TypeError, msg, a.reshape, a.shape, foo=2)

def test_numpy_reshape(self):
a = Series([1, 2, 3, 4])

result = np.reshape(a, (2, 2))
expected = a.values.reshape(2, 2)
tm.assert_numpy_array_equal(result, expected)
self.assertIsInstance(result, type(expected))
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = np.reshape(a, (2, 2))
expected = a.values.reshape(2, 2)
tm.assert_numpy_array_equal(result, expected)
self.assertIsInstance(result, type(expected))

result = np.reshape(a, a.shape)
tm.assert_series_equal(result, a)
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = np.reshape(a, a.shape)
tm.assert_series_equal(result, a)

def test_unstack(self):
from numpy import nan
Expand Down
37 changes: 32 additions & 5 deletions pandas/tests/test_categorical.py
Expand Up @@ -4058,13 +4058,40 @@ def test_numpy_repeat(self):
msg = "the 'axis' parameter is not supported"
tm.assertRaisesRegexp(ValueError, msg, np.repeat, cat, 2, axis=1)

def test_reshape(self):
cat = pd.Categorical([], categories=["a", "b"])
tm.assert_produces_warning(FutureWarning, cat.reshape, 0)

with tm.assert_produces_warning(FutureWarning):
cat = pd.Categorical([], categories=["a", "b"])
self.assert_categorical_equal(cat.reshape(0), cat)

with tm.assert_produces_warning(FutureWarning):
cat = pd.Categorical([], categories=["a", "b"])
self.assert_categorical_equal(cat.reshape((5, -1)), cat)

with tm.assert_produces_warning(FutureWarning):
cat = pd.Categorical(["a", "b"], categories=["a", "b"])
self.assert_categorical_equal(cat.reshape(cat.shape), cat)

with tm.assert_produces_warning(FutureWarning):
cat = pd.Categorical(["a", "b"], categories=["a", "b"])
self.assert_categorical_equal(cat.reshape(cat.size), cat)

with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
msg = "can only specify one unknown dimension"
cat = pd.Categorical(["a", "b"], categories=["a", "b"])
tm.assertRaisesRegexp(ValueError, msg, cat.reshape, (-2, -1))

def test_numpy_reshape(self):
cat = pd.Categorical(["a", "b"], categories=["a", "b"])
self.assert_categorical_equal(np.reshape(cat, cat.shape), cat)
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
cat = pd.Categorical(["a", "b"], categories=["a", "b"])
self.assert_categorical_equal(np.reshape(cat, cat.shape), cat)

msg = "the 'order' parameter is not supported"
tm.assertRaisesRegexp(ValueError, msg, np.reshape,
cat, cat.shape, order='F')
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
msg = "the 'order' parameter is not supported"
tm.assertRaisesRegexp(ValueError, msg, np.reshape,
cat, cat.shape, order='F')

def test_na_actions(self):

Expand Down

0 comments on commit 3ad161d

Please sign in to comment.