Skip to content

Commit

Permalink
BUG: raise for invalid dtypes per issue #15520
Browse files Browse the repository at this point in the history
closes #15520

Author: Akash Tandon <akashtndn.acm@gmail.com>
Author: root <akashtndn.acm@gmail.com>
Author: analyticalmonk <akash@redcarpetup.com>
Author: Akash Tandon <analyticalmonk@users.noreply.github.com>

Closes #16047 from analyticalmonk/patch_for_15520 and squashes the following commits:

3646eb6 [analyticalmonk] TST: check for invalid dtype for Series constructor per GH15520
73d980a [Akash Tandon] Merge branch 'master' into patch_for_15520
b3c2fbb [root] BUG: Added 'O' to pandas_dtype's valid list
c3699fb [root] DOC: added whatsnew entry for PR#16047 addressing GH15520
fbed5a6 [Akash Tandon] TST: Added list to invalid dtype
ad9f345 [Akash Tandon] CLN: refactored code related to issue GH15520
a358181 [Akash Tandon] BUG: Added numpy.dtype_ to valid pandas_dtype() type list
3eaa432 [Akash Tandon] TST: Added numpy.object_ dtype to valid pandas_dtype list
f858726 [Akash Tandon] style fix
d4971cd [Akash Tandon] BUG: pandas_dtype() to raise error for invalid dtype per GH15520
ee0030f [Akash Tandon] TST: added more test-cases for pandas_dtype() test
3700259 [Akash Tandon] CLN: Replace _coerce_to_dtype() with pandas_dtype()
c10e1d4 [Akash Tandon] TST: maintain list containing dtypes in TestPandasDtype
fecba12 [Akash Tandon] BUG: Raise when invalid dtype passed to pandas_dtype
99fb660 [Akash Tandon] TST: wrote test representing bug fix result for #15520
  • Loading branch information
analyticalmonk authored and jreback committed Apr 26, 2017
1 parent b8d9861 commit 61ca022
Show file tree
Hide file tree
Showing 8 changed files with 57 additions and 18 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.20.0.txt
Expand Up @@ -1605,6 +1605,7 @@ Conversion
- Bug in the return type of ``pd.unique`` on a ``Categorical``, which was returning an ndarray and not a ``Categorical`` (:issue:`15903`)
- Bug in ``Index.to_series()`` where the index was not copied (and so mutating later would change the original), (:issue:`15949`)
- Bug in indexing with partial string indexing with a len-1 DataFrame (:issue:`16071`)
- Bug in ``Series`` construction where passing invalid dtype didn't raise an error. (:issue:`15520`)

Indexing
^^^^^^^^
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/dtypes/cast.py
Expand Up @@ -19,7 +19,7 @@
is_datetime_or_timedelta_dtype,
is_bool_dtype, is_scalar,
_string_dtypes,
_coerce_to_dtype,
pandas_dtype,
_ensure_int8, _ensure_int16,
_ensure_int32, _ensure_int64,
_NS_DTYPE, _TD_DTYPE, _INT64_DTYPE,
Expand Down Expand Up @@ -576,7 +576,7 @@ def astype_nansafe(arr, dtype, copy=True):
""" return a view if copy is False, but
need to be very careful as the result shape could change! """
if not isinstance(dtype, np.dtype):
dtype = _coerce_to_dtype(dtype)
dtype = pandas_dtype(dtype)

if issubclass(dtype.type, text_type):
# in Py3 that's str, in Py2 that's unicode
Expand Down
17 changes: 16 additions & 1 deletion pandas/core/dtypes/common.py
Expand Up @@ -788,4 +788,19 @@ def pandas_dtype(dtype):
elif isinstance(dtype, ExtensionDtype):
return dtype

return np.dtype(dtype)
try:
npdtype = np.dtype(dtype)
except (TypeError, ValueError):
raise

# Any invalid dtype (such as pd.Timestamp) should raise an error.
# np.dtype(invalid_type).kind = 0 for such objects. However, this will
# also catch some valid dtypes such as object, np.object_ and 'object'
# which we safeguard against by catching them earlier and returning
# np.dtype(valid_dtype) before this condition is evaluated.
if dtype in [object, np.object_, 'object', 'O']:
return npdtype
elif npdtype.kind == 'O':
raise TypeError('dtype {0} not understood'.format(dtype))

return npdtype
7 changes: 4 additions & 3 deletions pandas/core/generic.py
Expand Up @@ -11,7 +11,6 @@

from pandas._libs import tslib, lib
from pandas.core.dtypes.common import (
_coerce_to_dtype,
_ensure_int64,
needs_i8_conversion,
is_scalar,
Expand All @@ -23,7 +22,8 @@
is_datetime64tz_dtype,
is_list_like,
is_dict_like,
is_re_compilable)
is_re_compilable,
pandas_dtype)
from pandas.core.dtypes.cast import maybe_promote, maybe_upcast_putmask
from pandas.core.dtypes.missing import isnull, notnull
from pandas.core.dtypes.generic import ABCSeries, ABCPanel
Expand Down Expand Up @@ -170,13 +170,14 @@ def _validate_dtype(self, dtype):
""" validate the passed dtype """

if dtype is not None:
dtype = _coerce_to_dtype(dtype)
dtype = pandas_dtype(dtype)

# a compound dtype
if dtype.kind == 'V':
raise NotImplementedError("compound dtypes are not implemented"
"in the {0} constructor"
.format(self.__class__.__name__))

return dtype

def _init_mgr(self, mgr, axes=None, dtype=None, copy=False):
Expand Down
7 changes: 4 additions & 3 deletions pandas/core/series.py
Expand Up @@ -14,7 +14,7 @@
import numpy.ma as ma

from pandas.core.dtypes.common import (
_coerce_to_dtype, is_categorical_dtype,
is_categorical_dtype,
is_bool,
is_integer, is_integer_dtype,
is_float_dtype,
Expand All @@ -28,7 +28,8 @@
is_dict_like,
is_scalar,
_is_unorderable_exception,
_ensure_platform_int)
_ensure_platform_int,
pandas_dtype)
from pandas.core.dtypes.generic import ABCSparseArray, ABCDataFrame
from pandas.core.dtypes.cast import (
maybe_upcast, infer_dtype_from_scalar,
Expand Down Expand Up @@ -2872,7 +2873,7 @@ def _sanitize_array(data, index, dtype=None, copy=False,
"""

if dtype is not None:
dtype = _coerce_to_dtype(dtype)
dtype = pandas_dtype(dtype)

if isinstance(data, ma.MaskedArray):
mask = ma.getmaskarray(data)
Expand Down
15 changes: 15 additions & 0 deletions pandas/tests/dtypes/test_common.py
Expand Up @@ -2,6 +2,7 @@

import pytest
import numpy as np
import pandas as pd

from pandas.core.dtypes.dtypes import (
DatetimeTZDtype, PeriodDtype, CategoricalDtype)
Expand All @@ -13,6 +14,20 @@

class TestPandasDtype(tm.TestCase):

# Passing invalid dtype, both as a string or object, must raise TypeError
# Per issue GH15520
def test_invalid_dtype_error(self):
msg = 'not understood'
invalid_list = [pd.Timestamp, 'pd.Timestamp', list]
for dtype in invalid_list:
with tm.assertRaisesRegexp(TypeError, msg):
pandas_dtype(dtype)

valid_list = [object, 'float64', np.object_, np.dtype('object'), 'O',
np.float64, float, np.dtype('float64')]
for dtype in valid_list:
pandas_dtype(dtype)

def test_numpy_dtype(self):
for dtype in ['M8[ns]', 'm8[ns]', 'object', 'float64', 'int64']:
self.assertEqual(pandas_dtype(dtype), np.dtype(dtype))
Expand Down
8 changes: 8 additions & 0 deletions pandas/tests/series/test_constructors.py
Expand Up @@ -30,6 +30,14 @@

class TestSeriesConstructors(TestData, tm.TestCase):

def test_invalid_dtype(self):
# GH15520
msg = 'not understood'
invalid_list = [pd.Timestamp, 'pd.Timestamp', list]
for dtype in invalid_list:
with tm.assertRaisesRegexp(TypeError, msg):
Series([], name='time', dtype=dtype)

def test_scalar_conversion(self):

# Pass in scalar is disabled
Expand Down
16 changes: 7 additions & 9 deletions pandas/tests/test_strings.py
Expand Up @@ -1208,10 +1208,9 @@ def test_extractall_same_as_extract_subject_index(self):
tm.assert_frame_equal(extract_one_noname, no_match_index)

def test_empty_str_methods(self):
empty_str = empty = Series(dtype=str)
empty_str = empty = Series(dtype=object)
empty_int = Series(dtype=int)
empty_bool = Series(dtype=bool)
empty_list = Series(dtype=list)
empty_bytes = Series(dtype=object)

# GH7241
Expand Down Expand Up @@ -1242,25 +1241,24 @@ def test_empty_str_methods(self):
DataFrame(columns=[0, 1], dtype=str),
empty.str.extract('()()', expand=False))
tm.assert_frame_equal(DataFrame(dtype=str), empty.str.get_dummies())
tm.assert_series_equal(empty_str, empty_list.str.join(''))
tm.assert_series_equal(empty_str, empty_str.str.join(''))
tm.assert_series_equal(empty_int, empty.str.len())
tm.assert_series_equal(empty_list, empty_list.str.findall('a'))
tm.assert_series_equal(empty_str, empty_str.str.findall('a'))
tm.assert_series_equal(empty_int, empty.str.find('a'))
tm.assert_series_equal(empty_int, empty.str.rfind('a'))
tm.assert_series_equal(empty_str, empty.str.pad(42))
tm.assert_series_equal(empty_str, empty.str.center(42))
tm.assert_series_equal(empty_list, empty.str.split('a'))
tm.assert_series_equal(empty_list, empty.str.rsplit('a'))
tm.assert_series_equal(empty_list,
tm.assert_series_equal(empty_str, empty.str.split('a'))
tm.assert_series_equal(empty_str, empty.str.rsplit('a'))
tm.assert_series_equal(empty_str,
empty.str.partition('a', expand=False))
tm.assert_series_equal(empty_list,
tm.assert_series_equal(empty_str,
empty.str.rpartition('a', expand=False))
tm.assert_series_equal(empty_str, empty.str.slice(stop=1))
tm.assert_series_equal(empty_str, empty.str.slice(step=1))
tm.assert_series_equal(empty_str, empty.str.strip())
tm.assert_series_equal(empty_str, empty.str.lstrip())
tm.assert_series_equal(empty_str, empty.str.rstrip())
tm.assert_series_equal(empty_str, empty.str.rstrip())
tm.assert_series_equal(empty_str, empty.str.wrap(42))
tm.assert_series_equal(empty_str, empty.str.get(0))
tm.assert_series_equal(empty_str, empty_bytes.str.decode('ascii'))
Expand Down

0 comments on commit 61ca022

Please sign in to comment.