From 0758f1dbdcd594d0e5d71d99800b0baa9074ba4c Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sat, 5 May 2018 11:08:36 -0400 Subject: [PATCH] ENH: add integer-na support via an ExtensionArray closes #20700 --- pandas/conftest.py | 9 + pandas/core/algorithms.py | 4 +- pandas/core/arrays/__init__.py | 4 + pandas/core/arrays/base.py | 44 +- pandas/core/arrays/categorical.py | 4 + pandas/core/arrays/integer.py | 516 ++++++++++++++++++ pandas/core/dtypes/base.py | 6 + pandas/core/dtypes/cast.py | 5 + pandas/core/dtypes/common.py | 4 + pandas/core/dtypes/dtypes.py | 22 +- pandas/core/internals.py | 21 +- pandas/core/missing.py | 3 +- pandas/core/ops.py | 38 +- pandas/core/series.py | 9 +- pandas/io/formats/format.py | 1 - pandas/tests/extension/base/__init__.py | 1 + pandas/tests/extension/base/methods.py | 3 +- pandas/tests/extension/base/missing.py | 5 + pandas/tests/extension/base/ops.py | 6 + pandas/tests/extension/base/reshaping.py | 9 + .../extension/category/test_categorical.py | 4 + pandas/tests/extension/decimal/array.py | 7 +- .../tests/extension/decimal/test_decimal.py | 14 +- pandas/tests/extension/integer/__init__.py | 0 .../tests/extension/integer/test_integer.py | 390 +++++++++++++ pandas/tests/extension/json/array.py | 7 +- pandas/tests/extension/json/test_json.py | 10 +- 27 files changed, 1105 insertions(+), 41 deletions(-) create mode 100644 pandas/core/arrays/integer.py create mode 100644 pandas/tests/extension/base/ops.py create mode 100644 pandas/tests/extension/integer/__init__.py create mode 100644 pandas/tests/extension/integer/test_integer.py diff --git a/pandas/conftest.py b/pandas/conftest.py index 1e1d72119b194d..562854fed89161 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -96,6 +96,15 @@ def all_arithmetic_operators(request): return request.param +@pytest.fixture(params=['__eq__', '__ne__', '__le__', + '__lt__', '__ge__', '__gt__']) +def all_compare_operators(request): + """ + Fixture for dunder names for common compare operations + """ + return request.param + + @pytest.fixture(params=[None, 'gzip', 'bz2', 'zip', pytest.param('xz', marks=td.skip_if_no_lzma)]) def compression(request): diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 88bc497f9f22d3..eef2fde4386d87 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -154,7 +154,7 @@ def _reconstruct_data(values, dtype, original): """ from pandas import Index if is_extension_array_dtype(dtype): - pass + values = dtype.array_type._from_sequence(values) elif is_datetime64tz_dtype(dtype) or is_period_dtype(dtype): values = Index(original)._shallow_copy(values, name=None) elif is_bool_dtype(dtype): @@ -705,7 +705,7 @@ def value_counts(values, sort=True, ascending=False, normalize=False, else: - if is_categorical_dtype(values) or is_sparse(values): + if is_extension_array_dtype(values) or is_sparse(values): # handle Categorical and sparse, result = Series(values)._values.value_counts(dropna=dropna) diff --git a/pandas/core/arrays/__init__.py b/pandas/core/arrays/__init__.py index f8adcf520c15ba..1c34c123e483c2 100644 --- a/pandas/core/arrays/__init__.py +++ b/pandas/core/arrays/__init__.py @@ -1,2 +1,6 @@ from .base import ExtensionArray # noqa from .categorical import Categorical # noqa +from .integer import ( # noqa + Int8Array, Int16Array, Int32Array, Int64Array, + UInt8Array, UInt16Array, UInt32Array, UInt64Array, + to_integer_array) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 1922801c30719e..e09a02c6143b0a 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -36,6 +36,7 @@ class ExtensionArray(object): * isna * take * copy + * append * _concat_same_type An additional method is available to satisfy pandas' internal, @@ -49,6 +50,7 @@ class ExtensionArray(object): methods: * fillna + * dropna * unique * factorize / _values_for_factorize * argsort / _values_for_argsort @@ -82,7 +84,7 @@ class ExtensionArray(object): # Constructors # ------------------------------------------------------------------------ @classmethod - def _from_sequence(cls, scalars): + def _from_sequence(cls, scalars, copy=False): """Construct a new ExtensionArray from a sequence of scalars. Parameters @@ -90,6 +92,8 @@ def _from_sequence(cls, scalars): scalars : Sequence Each element will be an instance of the scalar type for this array, ``cls.dtype.type``. + copy : boolean, default True + if True, copy the underlying data Returns ------- ExtensionArray @@ -379,6 +383,16 @@ def fillna(self, value=None, method=None, limit=None): new_values = self.copy() return new_values + def dropna(self): + """ Return ExtensionArray without NA values + + Returns + ------- + valid : ExtensionArray + """ + + return self[~self.isna()] + def unique(self): """Compute the ExtensionArray of unique values. @@ -567,6 +581,34 @@ def copy(self, deep=False): """ raise AbstractMethodError(self) + def append(self, other): + """ + Append a collection of Arrays together + + Parameters + ---------- + other : ExtenionArray or list/tuple of ExtenionArrays + + Returns + ------- + appended : ExtensionArray + """ + + to_concat = [self] + cls = self.__class__ + + if isinstance(other, (list, tuple)): + to_concat = to_concat + list(other) + else: + to_concat.append(other) + + for obj in to_concat: + if not isinstance(obj, cls): + raise TypeError('all inputs must be of type {}'.format( + cls.__name__)) + + return cls._concat_same_type(to_concat) + # ------------------------------------------------------------------------ # Block-related methods # ------------------------------------------------------------------------ diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index abcb9ae3494b50..63b99ffa06b8ac 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -2343,6 +2343,10 @@ def isin(self, values): return algorithms.isin(self.codes, code_values) +# inform the Dtype about us +CategoricalDtype.array_type = Categorical + + # The Series.cat accessor diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py new file mode 100644 index 00000000000000..5b92e1bb6f2f05 --- /dev/null +++ b/pandas/core/arrays/integer.py @@ -0,0 +1,516 @@ +import sys +import operator +import warnings +import numpy as np + +from pandas.core.dtypes.generic import ABCSeries, ABCIndexClass +from pandas.compat import set_function_name, PY3 +from pandas.api.types import (is_integer, is_scalar, is_float, + is_float_dtype, is_integer_dtype, + is_object_dtype, + infer_dtype) +from pandas.core.arrays import ExtensionArray +from pandas.core.dtypes.base import ExtensionDtype +from pandas.core.dtypes.dtypes import registry +from pandas.core.dtypes.missing import isna, notna +from pandas.core import ops + +# available dtypes +_integer_dtypes = ['int8', 'int16', 'int32', 'int64'] +_integer_formatter = lambda x: x.capitalize() +_unsigned_dtypes = ['uint8', 'uint16', 'uint32', 'uint64'] +_unsigned_formatter = lambda x: "{}{}".format(x[0].upper(), x[1:].capitalize()) + + +class IntegerDtype(ExtensionDtype): + type = None + na_value = np.nan + kind = 'i' + is_integer = True + is_signed_integer = True + is_unsigned_integer = False + + @classmethod + def construct_from_string(cls, string): + if string == cls.name: + return cls() + else: + raise TypeError("Cannot construct a '{}' from " + "'{}'".format(cls, string)) + + @classmethod + def construct_from_string_strict(cls, string): + """ + Strict construction from a string, raise a TypeError if not + possible + """ + if string[0] == 'I': + return cls.construct_from_string(string) + raise TypeError("could not construct PeriodDtype") + + +class UnsignedIntegerDtype(IntegerDtype): + kind = 'u' + is_signed_integer = False + is_unsigned_integer = True + + @classmethod + def construct_from_string_strict(cls, string): + """ + Strict construction from a string, raise a TypeError if not + possible + """ + if string[0] == 'U': + return cls.construct_from_string(string) + raise TypeError("could not construct PeriodDtype") + + +def to_integer_array(values): + """ + Parameters + ---------- + values : 1D list-like + + Returns + ------- + infer and return an integer array + + Raises + ------ + TypeError if incompatible types + """ + values = np.array(values, copy=False) + kind = 'UInt' if values.dtype.kind == 'u' else 'Int' + array_type = "{}{}Array".format(kind, values.dtype.itemsize * 8) + try: + array_type = getattr(module, array_type) + except AttributeError: + raise TypeError("Incompatible dtype for {}".format(values.dtype)) + return array_type(values, copy=False) + + +def coerce_to_array(values, dtype, mask=None, copy=False): + """ + Coerce the input values array to numpy arrays with a mask + + Parameters + ---------- + values : 1D list-like + dtype : integer dtype + mask : boolean 1D array, optional + copy : boolean, default False + if True, copy the input + + Returns + ------- + tuple of (values, mask) + """ + values = np.array(values, copy=copy) + if is_object_dtype(values): + inferred_type = infer_dtype(values) + if inferred_type not in ['floating', 'integer', + 'mixed-integer', 'mixed-integer-float']: + raise TypeError("{} cannot be converted to an IntegerDtype".format( + values.dtype)) + elif not (is_integer_dtype(values) or is_float_dtype(values)): + raise TypeError("{} cannot be converted to an IntegerDtype".format( + values.dtype)) + + if mask is None: + mask = isna(values) + else: + assert len(mask) == len(values) + + if not values.ndim == 1: + raise TypeError("values must be a 1D list-like") + if not mask.ndim == 1: + raise TypeError("mask must be a 1D list-like") + + if mask.any(): + # we copy as need to coerce here + values = values.copy() + values[mask] = 1 + + values = values.astype(dtype.type) + + else: + values = values.astype(dtype.type, copy=False) + return values, mask + + +def make_comparison_op(op, cls): + def cmp_method(self, other): + + op_str = op.__name__ + mask = None + if isinstance(other, (np.ndarray, ABCIndexClass, ABCSeries)): + if other.ndim > 0 and len(self) != len(other): + raise ValueError('Lengths must match to compare') + elif isinstance(other, cls): + other, mask = other.data, other.mask + + # numpy will show a DeprecationWarning on invalid elementwise + # comparisons, this will raise in the future + with warnings.catch_warnings(record=True): + with np.errstate(all='ignore'): + result = op(self.data, np.asarray(other)) + + # nans propagate + if mask is None: + mask = self.mask + else: + mask = self.mask | mask + + result[mask] = True if op_str == 'ne' else False + return result + + name = '__{name}__'.format(name=op.__name__) + return set_function_name(cmp_method, name, cls) + + +def make_arithmetic_op(op, cls): + def integer_arithmetic_method(self, other): + + mask = None + if isinstance(other, (ABCSeries, ABCIndexClass)): + other = getattr(other, 'values', other) + elif isinstance(other, cls): + other, mask = other.data, other.mask + elif getattr(other, 'ndim', 0) > 1: + raise TypeError("can only perform ops with 1-d structures") + elif isinstance(other, IntegerArray): + pass + elif isinstance(other, np.ndarray): + if not other.ndim: + other = other.item() + elif other.ndim == 1: + if not (is_float_dtype(other) or is_integer_dtype(other)): + raise TypeError("can only perform ops with numeric values") + else: + if not (is_float(other) or is_integer(other)): + raise TypeError("can only perform ops with numeric values") + + # nans propagate + if mask is None: + mask = self.mask + else: + mask = self.mask | mask + + with np.errstate(all='ignore'): + result = op(self.data, other) + + # may need to fill infs + # and mask wraparound + if is_float_dtype(result): + mask |= (result == np.inf) | (result == -np.inf) + + return cls(result, mask=mask) + + name = '__{name}__'.format(name=op.__name__) + return set_function_name(integer_arithmetic_method, name, cls) + + +class IntegerArray(ExtensionArray): + """ + We represent an IntegerArray with 2 numpy arrays + - data: contains a numpy integer array of the appropriate dtype + - mask: a boolean array holding a mask on the data, False is missing + """ + + dtype = None + + def __init__(self, values, mask=None, copy=False): + self.data, self.mask = coerce_to_array( + values, dtype=self.dtype, mask=mask, copy=copy) + + @classmethod + def _from_sequence(cls, scalars, mask=None, copy=False): + return cls(scalars, mask=mask, copy=copy) + + @classmethod + def _from_factorized(cls, values, original): + return cls(values) + + def __getitem__(self, item): + if is_integer(item): + if self.mask[item]: + return self.dtype.na_value + return self.data[item] + return type(self)(self.data[item], mask=self.mask[item]) + + def _coerce_to_ndarray(self): + """ coerce to an ndarary, preserving my scalar types """ + + # TODO(jreback) make this better + data = self.data.astype(object) + data[self.mask] = self._na_value + return data + + def __array__(self, dtype=None): + """ + the array interface, return my values + We return an object array here to preserve our scalar values + """ + return self._coerce_to_ndarray() + + def __iter__(self): + """Iterate over elements of the array. + + """ + # This needs to be implemented so that pandas recognizes extension + # arrays as list-like. The default implementation makes successive + # calls to ``__getitem__``, which may be slower than necessary. + for i in range(len(self)): + if self.mask[i]: + yield self.dtype.na_value + else: + yield self.data[i] + + def _formatting_values(self): + # type: () -> np.ndarray + return self._coerce_to_ndarray() + + def take(self, indexer, allow_fill=False, fill_value=None): + from pandas.api.extensions import take + + # we always fill with 1 internally + # to avoid upcasting + data_fill_value = 1 if isna(fill_value) else fill_value + result = take(self.data, indexer, fill_value=data_fill_value, + allow_fill=allow_fill) + + mask = take(self.mask, indexer, fill_value=True, + allow_fill=allow_fill) + + # if we are filling + # we only fill where the indexer is null + # not existing missing values + # TODO(jreback) what if we have a non-na float as a fill value? + if allow_fill and notna(fill_value): + fill_mask = np.asarray(indexer) == -1 + result[fill_mask] = fill_value + mask = mask ^ fill_mask + + return self._from_sequence(result, mask=mask) + + def copy(self, deep=False): + if deep: + return type(self)( + self.data.copy(), mask=self.mask.copy()) + return type(self)(self) + + def __setitem__(self, key, value): + _is_scalar = is_scalar(value) + if _is_scalar: + value = [value] + value, mask = coerce_to_array(value, dtype=self.dtype) + + if _is_scalar: + value = value[0] + mask = mask[0] + + self.data[key] = value + self.mask[key] = mask + + def __len__(self): + return len(self.data) + + def __repr__(self): + + formatted = self._formatting_values() + return '{}({})'.format( + self.__class__.__name__, + formatted.tolist()) + + @property + def nbytes(self): + return self.data.nbytes + self.mask.nbytes + + def isna(self): + return self.mask + + @property + def _na_value(self): + return np.nan + + @classmethod + def _concat_same_type(cls, to_concat): + data = np.concatenate([x.data for x in to_concat]) + mask = np.concatenate([x.mask for x in to_concat]) + return cls(data, mask=mask) + + def astype(self, dtype, copy=True): + """Cast to a NumPy array with 'dtype'. + + Parameters + ---------- + dtype : str or dtype + Typecode or data-type to which the array is cast. + copy : bool, default True + Whether to copy the data, even if not necessary. If False, + a copy is made only if the old dtype does not match the + new dtype. + + Returns + ------- + array : ndarray + NumPy ndarray with 'dtype' for its dtype. + """ + data = self._coerce_to_ndarray() + return data.astype(dtype=dtype, copy=False) + + @property + def _ndarray_values(self): + # type: () -> np.ndarray + """Internal pandas method for lossy conversion to a NumPy ndarray. + + This method is not part of the pandas interface. + + The expectation is that this is cheap to compute, and is primarily + used for interacting with our indexers. + """ + return self.data + + def value_counts(self, dropna=True): + """ + Returns a Series containing counts of each category. + + Every category will have an entry, even those with a count of 0. + + Parameters + ---------- + dropna : boolean, default True + Don't include counts of NaN. + + Returns + ------- + counts : Series + + See Also + -------- + Series.value_counts + + """ + + from pandas import Index, Series + + # compute counts on the data with no nans + data = self.data[~self.mask] + value_counts = Index(data).value_counts() + + array = value_counts.values + index = value_counts.index + + # if we want nans, count the mask + if not dropna: + array = np.append(array, [self.mask.sum()]) + + # TODO(extension) + # should this be an and Index backed by the + # Array type? + index = index.astype(object).append(Index([np.nan])) + + return Series(array, index=index) + + def _values_for_argsort(self): + # type: () -> ndarray + """Return values for sorting. + + Returns + ------- + ndarray + The transformed values should maintain the ordering between values + within the array. + + See Also + -------- + ExtensionArray.argsort + """ + data = self.data.copy() + data[self.mask] = data.min() - 1 + return data + + @classmethod + def _add_comparison_methods_binary(cls): + cls.__eq__ = make_comparison_op(operator.eq, cls) + cls.__ne__ = make_comparison_op(operator.ne, cls) + cls.__lt__ = make_comparison_op(operator.lt, cls) + cls.__gt__ = make_comparison_op(operator.gt, cls) + cls.__le__ = make_comparison_op(operator.le, cls) + cls.__ge__ = make_comparison_op(operator.ge, cls) + + @classmethod + def _add_numeric_methods_binary(cls): + """ add in numeric methods """ + cls.__add__ = make_arithmetic_op(operator.add, cls) + cls.__radd__ = make_arithmetic_op(ops.radd, cls) + cls.__sub__ = make_arithmetic_op(operator.sub, cls) + cls.__rsub__ = make_arithmetic_op(ops.rsub, cls) + cls.__mul__ = make_arithmetic_op(operator.mul, cls) + cls.__rmul__ = make_arithmetic_op(ops.rmul, cls) + cls.__rpow__ = make_arithmetic_op(ops.rpow, cls) + cls.__pow__ = make_arithmetic_op(operator.pow, cls) + cls.__mod__ = make_arithmetic_op(operator.mod, cls) + cls.__floordiv__ = make_arithmetic_op(operator.floordiv, cls) + cls.__rfloordiv__ = make_arithmetic_op(ops.rfloordiv, cls) + cls.__truediv__ = make_arithmetic_op(operator.truediv, cls) + cls.__rtruediv__ = make_arithmetic_op(ops.rtruediv, cls) + if not PY3: + cls.__div__ = make_arithmetic_op(operator.div, cls) + cls.__rdiv__ = make_arithmetic_op(ops.rdiv, cls) + + cls.__divmod__ = make_arithmetic_op(divmod, cls) + + +class UnsignedIntegerArray(IntegerArray): + pass + + +module = sys.modules[__name__] + + +# create the Dtype +types = [(_integer_dtypes, IntegerDtype, _integer_formatter), + (_unsigned_dtypes, UnsignedIntegerDtype, _unsigned_formatter)] +for dtypes, superclass, formatter in types: + + for dtype in dtypes: + + name = formatter(dtype) + classname = "{}Dtype".format(name) + attributes_dict = {'type': getattr(np, dtype), + 'name': name} + dtype_type = type(classname, (superclass, ), attributes_dict) + setattr(module, classname, dtype_type) + + # register + registry.register(dtype_type, dtype_type.construct_from_string_strict) + + +# create the Array +types = [(_integer_dtypes, IntegerArray, _integer_formatter), + (_unsigned_dtypes, UnsignedIntegerArray, _unsigned_formatter)] +for dtypes, superclass, formatter in types: + + for dtype in dtypes: + + dtype_type = getattr(module, "{}Dtype".format(formatter(dtype))) + classname = "{}Array".format(formatter(dtype)) + attributes_dict = {'dtype': dtype_type()} + array_type = type(classname, (superclass, ), attributes_dict) + setattr(module, classname, array_type) + + # add ops + array_type._add_numeric_methods_binary() + array_type._add_comparison_methods_binary() + + # set the Array type on the Dtype + dtype_type.array_type = array_type + + +def make_data(): + return (list(range(8)) + + [np.nan] + + list(range(10, 98)) + + [np.nan] + + [99, 100]) diff --git a/pandas/core/dtypes/base.py b/pandas/core/dtypes/base.py index 49e98c16c716e0..ba359c9ef49822 100644 --- a/pandas/core/dtypes/base.py +++ b/pandas/core/dtypes/base.py @@ -156,6 +156,12 @@ def name(self): """ raise AbstractMethodError(self) + @property + def array_type(self): + """Return the array type associated with this dtype + """ + raise AbstractMethodError(self) + @classmethod def construct_from_string(cls, string): """Attempt to construct this type from a string. diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index e4ed6d544d42eb..73176887ca0d96 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -647,6 +647,11 @@ def conv(r, dtype): def astype_nansafe(arr, dtype, copy=True): """ return a view if copy is False, but need to be very careful as the result shape could change! """ + + # dispatch on extension dtype if needed + if is_extension_array_dtype(dtype): + return dtype.array_type._from_sequence(arr, copy=copy) + if not isinstance(dtype, np.dtype): dtype = pandas_dtype(dtype) diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index 4d9846b3518145..37d260088c4d47 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -1980,6 +1980,10 @@ def pandas_dtype(dtype): if result is not None: return result + # un-registered extension types + if isinstance(dtype, ExtensionDtype): + return dtype + try: npdtype = np.dtype(dtype) except (TypeError, ValueError): diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 795f8ec54f3d57..8c322f3250a5c6 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -27,7 +27,7 @@ def register(self, dtype, constructor=None): ---------- dtype : PandasExtension Dtype """ - if not issubclass(dtype, PandasExtensionDtype): + if not issubclass(dtype, (PandasExtensionDtype, ExtensionDtype)): raise ValueError("can only register pandas extension dtypes") if constructor is None: @@ -45,14 +45,20 @@ def find(self, dtype): ------- return the first matching dtype, otherwise return None """ - for dtype_type, constructor in self.dtypes.items(): - if isinstance(dtype, dtype_type): + if not isinstance(dtype, compat.string_types): + dtype_type = dtype + if not isinstance(dtype, type): + dtype_type = type(dtype) + if issubclass(dtype_type, (PandasExtensionDtype, ExtensionDtype)): return dtype - if isinstance(dtype, compat.string_types): - try: - return constructor(dtype) - except TypeError: - pass + + return None + + for dtype_type, constructor in self.dtypes.items(): + try: + return constructor(dtype) + except TypeError: + pass return None diff --git a/pandas/core/internals.py b/pandas/core/internals.py index fe508dc1bb0bc8..a5e9107b8a660f 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -633,8 +633,9 @@ def _astype(self, dtype, copy=False, errors='raise', values=None, return self.make_block(Categorical(self.values, dtype=dtype)) # astype processing - dtype = np.dtype(dtype) - if self.dtype == dtype: + if not is_extension_array_dtype(dtype): + dtype = np.dtype(dtype) + if is_dtype_equal(self.dtype, dtype): if copy: return self.copy() return self @@ -662,7 +663,13 @@ def _astype(self, dtype, copy=False, errors='raise', values=None, # _astype_nansafe works fine with 1-d only values = astype_nansafe(values.ravel(), dtype, copy=True) - values = values.reshape(self.shape) + + # TODO(extension) + # should we make this attribute? + try: + values = values.reshape(self.shape) + except AttributeError: + pass newb = make_block(values, placement=self.mgr_locs, klass=klass) @@ -3170,6 +3177,10 @@ def get_block_type(values, dtype=None): cls = TimeDeltaBlock elif issubclass(vtype, np.complexfloating): cls = ComplexBlock + elif is_categorical(values): + cls = CategoricalBlock + elif is_extension_array_dtype(values): + cls = ExtensionBlock elif issubclass(vtype, np.datetime64): assert not is_datetimetz(values) cls = DatetimeBlock @@ -3179,10 +3190,6 @@ def get_block_type(values, dtype=None): cls = IntBlock elif dtype == np.bool_: cls = BoolBlock - elif is_categorical(values): - cls = CategoricalBlock - elif is_extension_array_dtype(values): - cls = ExtensionBlock else: cls = ObjectBlock return cls diff --git a/pandas/core/missing.py b/pandas/core/missing.py index 31c489e2f8941d..cb5ee8388c2c48 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -638,7 +638,8 @@ def fill_zeros(result, x, y, name, fill): # if we have a fill of inf, then sign it correctly # (GH 6178 and PR 9308) if np.isinf(fill): - signs = np.sign(y if name.startswith(('r', '__r')) else x) + signs = y if name.startswith(('r', '__r')) else x + signs = np.sign(signs.astype('float', copy=False)) negative_inf_mask = (signs.ravel() < 0) & mask np.putmask(result, negative_inf_mask, -fill) diff --git a/pandas/core/ops.py b/pandas/core/ops.py index e14f82906cd065..5d4aa2a3d3759d 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -27,7 +27,7 @@ is_integer_dtype, is_categorical_dtype, is_object_dtype, is_timedelta64_dtype, is_datetime64_dtype, is_datetime64tz_dtype, - is_bool_dtype, + is_bool_dtype, is_extension_array_dtype, is_list_like, is_scalar, _ensure_object) @@ -1003,8 +1003,18 @@ def _arith_method_SERIES(cls, op, special): if op is divmod else _construct_result) def na_op(x, y): - import pandas.core.computation.expressions as expressions + # handle extension array ops + # TODO(extension) + # the ops *between* non-same-type extension arrays or not + # very well defined + if (is_extension_array_dtype(x) or is_extension_array_dtype(y)): + if (op_name.startswith('__r') and not + is_extension_array_dtype(y) and not + is_scalar(y)): + y = x.__class__._from_sequence(y) + return op(x, y) + import pandas.core.computation.expressions as expressions try: result = expressions.evaluate(op, str_rep, x, y, **eval_kwargs) except TypeError: @@ -1025,6 +1035,7 @@ def na_op(x, y): return result def safe_na_op(lvalues, rvalues): + # all others try: with np.errstate(all='ignore'): return na_op(lvalues, rvalues) @@ -1035,14 +1046,21 @@ def safe_na_op(lvalues, rvalues): raise def wrapper(left, right): - if isinstance(right, ABCDataFrame): return NotImplemented left, right = _align_method_SERIES(left, right) res_name = get_op_result_name(left, right) - if is_datetime64_dtype(left) or is_datetime64tz_dtype(left): + if is_categorical_dtype(left): + raise TypeError("{typ} cannot perform the operation " + "{op}".format(typ=type(left).__name__, op=str_rep)) + + elif (is_extension_array_dtype(left) or + is_extension_array_dtype(right)): + pass + + elif is_datetime64_dtype(left) or is_datetime64tz_dtype(left): result = dispatch_to_index_op(op, left, right, pd.DatetimeIndex) return construct_result(left, result, index=left.index, name=res_name, @@ -1054,10 +1072,6 @@ def wrapper(left, right): index=left.index, name=res_name, dtype=result.dtype) - elif is_categorical_dtype(left): - raise TypeError("{typ} cannot perform the operation " - "{op}".format(typ=type(left).__name__, op=str_rep)) - lvalues = left.values rvalues = right if isinstance(rvalues, ABCSeries): @@ -1136,6 +1150,14 @@ def na_op(x, y): # The `not is_scalar(y)` check excludes the string "category" return op(y, x) + # handle extension array ops + # TODO(extension) + # the ops *between* non-same-type extension arrays or not + # very well defined + elif (is_extension_array_dtype(x) or + is_extension_array_dtype(y)): + return op(x, y) + elif is_object_dtype(x.dtype): result = _comp_method_OBJECT_ARRAY(op, x, y) diff --git a/pandas/core/series.py b/pandas/core/series.py index 850f22e24010a9..d27d5ab6afe8c4 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -4055,12 +4055,9 @@ def _try_cast(arr, take_fast_path): subarr = Categorical(arr, dtype.categories, ordered=dtype.ordered) elif is_extension_array_dtype(dtype): - # We don't allow casting to third party dtypes, since we don't - # know what array belongs to which type. - msg = ("Cannot cast data to extension dtype '{}'. " - "Pass the extension array directly.".format(dtype)) - raise ValueError(msg) - + # create an extension array from its dtype + array_type = dtype.array_type + subarr = array_type(subarr, copy=copy) elif dtype is not None and raise_cast_failure: raise diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 12201f62946aca..adb4bf3f47572e 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -514,7 +514,6 @@ def _to_str_columns(self): Render a DataFrame to a list of columns (as lists of strings). """ frame = self.tr_frame - # may include levels names also str_index = self._get_formatted_index(frame) diff --git a/pandas/tests/extension/base/__init__.py b/pandas/tests/extension/base/__init__.py index 9da985625c4ee3..1f42de67375282 100644 --- a/pandas/tests/extension/base/__init__.py +++ b/pandas/tests/extension/base/__init__.py @@ -45,6 +45,7 @@ class TestMyDtype(BaseDtypeTests): from .dtype import BaseDtypeTests # noqa from .getitem import BaseGetitemTests # noqa from .groupby import BaseGroupbyTests # noqa +from .ops import BaseOpsTests # noqa from .interface import BaseInterfaceTests # noqa from .methods import BaseMethodsTests # noqa from .missing import BaseMissingTests # noqa diff --git a/pandas/tests/extension/base/methods.py b/pandas/tests/extension/base/methods.py index c5436aa731d50e..0ad3196277c34f 100644 --- a/pandas/tests/extension/base/methods.py +++ b/pandas/tests/extension/base/methods.py @@ -19,7 +19,8 @@ def test_value_counts(self, all_data, dropna): other = all_data result = pd.Series(all_data).value_counts(dropna=dropna).sort_index() - expected = pd.Series(other).value_counts(dropna=dropna).sort_index() + expected = pd.Series(other).value_counts( + dropna=dropna).sort_index() self.assert_series_equal(result, expected) diff --git a/pandas/tests/extension/base/missing.py b/pandas/tests/extension/base/missing.py index 32cf29818e0694..ceb1ac4fcddac0 100644 --- a/pandas/tests/extension/base/missing.py +++ b/pandas/tests/extension/base/missing.py @@ -18,6 +18,11 @@ def test_isna(self, data_missing): expected = pd.Series(expected) self.assert_series_equal(result, expected) + def test_dropna_array(self, data_missing): + result = data_missing.dropna() + expected = data_missing[[1]] + self.assert_extension_array_equal(result, expected) + def test_dropna_series(self, data_missing): ser = pd.Series(data_missing) result = ser.dropna() diff --git a/pandas/tests/extension/base/ops.py b/pandas/tests/extension/base/ops.py new file mode 100644 index 00000000000000..3742f342e43463 --- /dev/null +++ b/pandas/tests/extension/base/ops.py @@ -0,0 +1,6 @@ +from .base import BaseExtensionTests + + +class BaseOpsTests(BaseExtensionTests): + """Various Series and DataFrame ops methos.""" + pass diff --git a/pandas/tests/extension/base/reshaping.py b/pandas/tests/extension/base/reshaping.py index fe920a47ab7409..ff739c97f2785d 100644 --- a/pandas/tests/extension/base/reshaping.py +++ b/pandas/tests/extension/base/reshaping.py @@ -26,6 +26,14 @@ def test_concat(self, data, in_frame): assert dtype == data.dtype assert isinstance(result._data.blocks[0], ExtensionBlock) + def test_append(self, data): + + wrapped = pd.Series(data) + result = wrapped.append(wrapped) + expected = pd.concat([wrapped, wrapped]) + + self.assert_series_equal(result, expected) + @pytest.mark.parametrize('in_frame', [True, False]) def test_concat_all_na_block(self, data_missing, in_frame): valid_block = pd.Series(data_missing.take([1, 1]), index=[0, 1]) @@ -84,6 +92,7 @@ def test_concat_columns(self, data, na_value): expected = pd.DataFrame({ 'A': data._from_sequence(list(data[:3]) + [na_value]), 'B': [np.nan, 1, 2, 3]}) + result = pd.concat([df1, df2], axis=1) self.assert_frame_equal(result, expected) result = pd.concat([df1['A'], df2['B']], axis=1) diff --git a/pandas/tests/extension/category/test_categorical.py b/pandas/tests/extension/category/test_categorical.py index 530a4e7a22a7a3..c4928d026ca70f 100644 --- a/pandas/tests/extension/category/test_categorical.py +++ b/pandas/tests/extension/category/test_categorical.py @@ -55,6 +55,10 @@ class TestDtype(base.BaseDtypeTests): pass +class TestOps(base.BaseOpsTests): + pass + + class TestInterface(base.BaseInterfaceTests): @pytest.mark.skip(reason="Memory usage doesn't match") def test_memory_usage(self): diff --git a/pandas/tests/extension/decimal/array.py b/pandas/tests/extension/decimal/array.py index e9431bd0c233cc..0213837a6f3284 100644 --- a/pandas/tests/extension/decimal/array.py +++ b/pandas/tests/extension/decimal/array.py @@ -27,7 +27,7 @@ def construct_from_string(cls, string): class DecimalArray(ExtensionArray): dtype = DecimalDtype() - def __init__(self, values): + def __init__(self, values, copy=False): assert all(isinstance(v, decimal.Decimal) for v in values) values = np.asarray(values, dtype=object) @@ -40,7 +40,7 @@ def __init__(self, values): # self._values = self.values = self.data @classmethod - def _from_sequence(cls, scalars): + def _from_sequence(cls, scalars, copy=False): return cls(scalars) @classmethod @@ -101,5 +101,8 @@ def _concat_same_type(cls, to_concat): return cls(np.concatenate([x._data for x in to_concat])) +DecimalDtype.array_type = DecimalArray + + def make_data(): return [decimal.Decimal(random.random()) for _ in range(100)] diff --git a/pandas/tests/extension/decimal/test_decimal.py b/pandas/tests/extension/decimal/test_decimal.py index 1f8cf0264f62f0..86b902c5309c1b 100644 --- a/pandas/tests/extension/decimal/test_decimal.py +++ b/pandas/tests/extension/decimal/test_decimal.py @@ -99,10 +99,18 @@ class TestInterface(BaseDecimal, base.BaseInterfaceTests): pass -class TestConstructors(BaseDecimal, base.BaseConstructorsTests): +class TestOps(BaseDecimal, base.BaseOpsTests): pass +class TestConstructors(BaseDecimal, base.BaseConstructorsTests): + + @pytest.mark.xfail(reason="not implemented constructor from dtype") + def test_from_dtype(self, data): + # construct from our dtype & string dtype + pass + + class TestReshaping(BaseDecimal, base.BaseReshapingTests): pass @@ -147,6 +155,10 @@ class TestGroupby(BaseDecimal, base.BaseGroupbyTests): pass +# TODO(extension) +@pytest.mark.xfail(reason=( + "raising AssertionError as this is not implemented, " + "though easy enough to do")) def test_series_constructor_coerce_data_to_extension_dtype_raises(): xpr = ("Cannot cast data to extension dtype 'decimal'. Pass the " "extension array directly.") diff --git a/pandas/tests/extension/integer/__init__.py b/pandas/tests/extension/integer/__init__.py new file mode 100644 index 00000000000000..e69de29bb2d1d6 diff --git a/pandas/tests/extension/integer/test_integer.py b/pandas/tests/extension/integer/test_integer.py new file mode 100644 index 00000000000000..39526f6e18b5a5 --- /dev/null +++ b/pandas/tests/extension/integer/test_integer.py @@ -0,0 +1,390 @@ +import numpy as np +import pandas as pd +import pandas.util.testing as tm +import pytest + +from pandas.tests.extension import base + +from pandas.core.arrays import ( + to_integer_array, + Int8Array, Int16Array, Int32Array, Int64Array, + UInt8Array, UInt16Array, UInt32Array, UInt64Array) +from pandas.core.arrays.integer import ( + Int8Dtype, Int16Dtype, Int32Dtype, Int64Dtype, + UInt8Dtype, UInt16Dtype, UInt32Dtype, UInt64Dtype, + IntegerArray, make_data) + + +@pytest.fixture(params=[Int8Dtype, Int16Dtype, Int32Dtype, Int64Dtype, + UInt8Dtype, UInt16Dtype, UInt32Dtype, UInt64Dtype]) +def dtype(request): + return request.param() + + +@pytest.fixture(params=[Int8Array, Int16Array, Int32Array, Int64Array, + UInt8Array, UInt16Array, UInt32Array, UInt64Array]) +def arrays(request): + return request.param + + +@pytest.fixture +def data(arrays): + return arrays(make_data()) + + +@pytest.fixture +def data_missing(arrays): + return arrays([np.nan, 1]) + + +@pytest.fixture +def data_for_sorting(arrays): + return arrays([1, 2, 0]) + + +@pytest.fixture +def data_missing_for_sorting(arrays): + return arrays([1, np.nan, 0]) + + +@pytest.fixture +def na_cmp(): + # we are np.nan + return lambda x, y: np.isnan(x) and np.isnan(y) + + +@pytest.fixture +def na_value(): + return np.nan + + +@pytest.fixture +def data_for_grouping(arrays): + b = 1 + a = 0 + c = 2 + na = np.nan + return arrays([b, b, na, na, a, a, b, c]) + + +def test_dtypes(dtype): + # smoke tests on auto dtype construction + + if dtype.is_signed_integer: + assert np.dtype(dtype.type).kind == 'i' + else: + assert np.dtype(dtype.type).kind == 'u' + assert dtype.name is not None + + +class BaseInteger(object): + + def assert_series_equal(self, left, right, *args, **kwargs): + + left_na = left.isna() + right_na = right.isna() + + tm.assert_series_equal(left_na, right_na) + return tm.assert_series_equal(left[~left_na], + right[~right_na], + *args, **kwargs) + + def assert_frame_equal(self, left, right, *args, **kwargs): + # TODO(EA): select_dtypes + tm.assert_index_equal( + left.columns, right.columns, + exact=kwargs.get('check_column_type', 'equiv'), + check_names=kwargs.get('check_names', True), + check_exact=kwargs.get('check_exact', False), + check_categorical=kwargs.get('check_categorical', True), + obj='{obj}.columns'.format(obj=kwargs.get('obj', 'DataFrame'))) + + integers = (left.dtypes == 'integer').index + + for col in integers: + self.assert_series_equal(left[col], right[col], + *args, **kwargs) + + left = left.drop(columns=integers) + right = right.drop(columns=integers) + tm.assert_frame_equal(left, right, *args, **kwargs) + + +class TestDtype(BaseInteger, base.BaseDtypeTests): + + @pytest.mark.skip(reason="using multiple dtypes") + def test_is_dtype_unboxes_dtype(self): + # we have multiple dtypes, so skip + pass + + +class TestOps(BaseInteger, base.BaseOpsTests): + + def compare(self, s, op, other): + + result = getattr(s, op)(other) + + # compute expected + mask = s.isna() + + # other array is an Integer + if isinstance(other, IntegerArray): + omask = getattr(other, 'mask', None) + mask = getattr(other, 'data', other) + if omask is not None: + mask |= omask + + # to compare properly, we convert the expected + # to float, mask to nans and convert infs + # if we have uints then we process as uints + # then conert to float + # and we ultimately want to create a IntArray + # for comparisons + rs = pd.Series(s.values.data) + expected = getattr(rs, op)(other) + + # truediv can make infs + if 'truediv' in op: + fill_value = np.nan + else: + fill_value = 0 + + try: + expected[(expected == np.inf) | (expected == -np.inf)] = fill_value + expected = expected.astype(s.dtype) + + except ValueError: + + expected = expected.astype(float) + expected[(expected == np.inf) | (expected == -np.inf)] = fill_value + expected = expected.astype(s.dtype) + + expected[mask] = np.nan + self.assert_series_equal(result, expected) + + def test_arith_scalar(self, data, all_arithmetic_operators): + # scalar + op = all_arithmetic_operators + s = pd.Series(data) + self.compare(s, op, 1) + + def test_arith_array(self, data, all_arithmetic_operators): + # ndarray & other series + op = all_arithmetic_operators + s = pd.Series(data) + self.compare(s, op, np.ones(len(s), dtype=s.dtype.type)) + + def test_arith_integer_array(self, data, all_arithmetic_operators): + # we operate with a rhs of an integer array + + op = all_arithmetic_operators + s = pd.Series(data) + rhs = pd.Series([1] * len(data), dtype=data.dtype) + rhs.iloc[-1] = np.nan + + self.compare(s, op, rhs) + + def test_compare_scalar(self, data, all_compare_operators): + op = all_compare_operators + + # array + result = getattr(data, op)(0) + expected = getattr(data.data, op)(0) + + # fill the nan locations + expected[data.mask] = True if op == '__ne__' else False + + tm.assert_numpy_array_equal(result, expected) + + # series + s = pd.Series(data) + result = getattr(s, op)(0) + + expected = pd.Series(data.data) + expected = getattr(expected, op)(0) + + # fill the nan locations + expected[data.mask] = True if op == '__ne__' else False + + tm.assert_series_equal(result, expected) + + def test_error(self, data, all_arithmetic_operators): + # invalid ops + + op = all_arithmetic_operators + s = pd.Series(data) + ops = getattr(s, op) + opa = getattr(data, op) + + # invalid scalars + with pytest.raises(TypeError): + ops('foo') + with pytest.raises(TypeError): + ops(pd.Timestamp('20180101')) + + # invalid array-likes + with pytest.raises(TypeError): + ops(pd.Series('foo', index=s.index)) + + if op != '__rpow__': + # TODO(extension) + # rpow with a datetimelike coerces the integer array incorrectly + with pytest.raises(TypeError): + ops(pd.Series(pd.date_range('20180101', periods=len(s)))) + + # 2d + with pytest.raises(TypeError): + opa(pd.DataFrame({'A': s})) + with pytest.raises(TypeError): + opa(np.arange(len(s)).reshape(-1, len(s))) + + +class TestInterface(BaseInteger, base.BaseInterfaceTests): + pass + + +class TestConstructors(BaseInteger, base.BaseConstructorsTests): + + def test_from_dtype_from_float(self, data): + # construct from our dtype & string dtype + dtype = data.dtype + + # from float + expected = pd.Series(data) + result = pd.Series(np.array(data).astype('float'), dtype=str(dtype)) + self.assert_series_equal(result, expected) + + # from int / list + expected = pd.Series(data) + result = pd.Series(np.array(data).tolist(), dtype=str(dtype)) + self.assert_series_equal(result, expected) + + # from int / array + expected = pd.Series(data).dropna().reset_index(drop=True) + dropped = np.array(data.dropna()).astype(np.dtype((dtype.type))) + result = pd.Series(dropped, dtype=str(dtype)) + self.assert_series_equal(result, expected) + + +class TestReshaping(BaseInteger, base.BaseReshapingTests): + + def test_concat_mixed_dtypes(self, data): + # https://github.com/pandas-dev/pandas/issues/20762 + df1 = pd.DataFrame({'A': data[:3]}) + df2 = pd.DataFrame({"A": [1, 2, 3]}) + df3 = pd.DataFrame({"A": ['a', 'b', 'c']}).astype('category') + df4 = pd.DataFrame({"A": pd.SparseArray([1, 2, 3])}) + dfs = [df1, df2, df3, df4] + + # dataframes + result = pd.concat(dfs) + expected = pd.concat([x.astype(object) for x in dfs]) + self.assert_frame_equal(result, expected) + + # series + result = pd.concat([x['A'] for x in dfs]) + expected = pd.concat([x['A'].astype(object) for x in dfs]) + self.assert_series_equal(result, expected) + + result = pd.concat([df1, df2]) + expected = pd.concat([df1.astype('object'), df2.astype('object')]) + self.assert_frame_equal(result, expected) + + # concat of an Integer and Int coerces to object dtype + # TODO(jreback) once integrated this would + # be a result of Integer + result = pd.concat([df1['A'], df2['A']]) + expected = pd.concat([df1['A'].astype('object'), + df2['A'].astype('object')]) + self.assert_series_equal(result, expected) + + +class TestGetitem(BaseInteger, base.BaseGetitemTests): + pass + + +class TestMissing(BaseInteger, base.BaseMissingTests): + pass + + +class TestMethods(BaseInteger, base.BaseMethodsTests): + + @pytest.mark.xfail(reason="need a Index type with ExtensionArrays") + @pytest.mark.parametrize('dropna', [True, False]) + def test_value_counts(self, all_data, dropna): + all_data = all_data[:10] + if dropna: + other = np.array(all_data[~all_data.isna()]) + else: + other = all_data + + result = pd.Series(all_data).value_counts(dropna=dropna).sort_index() + expected = pd.Series(other).value_counts( + dropna=dropna).sort_index() + + self.assert_series_equal(result, expected) + + +class TestCasting(BaseInteger, base.BaseCastingTests): + pass + + +class TestGroupby(BaseInteger, base.BaseGroupbyTests): + pass + + +def test_frame_repr(data_missing): + + df = pd.DataFrame({'A': data_missing}) + result = repr(df) + expected = ' A\n0 NaN\n1 1' + assert result == expected + + +def test_conversions(data_missing): + + # astype to object series + df = pd.DataFrame({'A': data_missing}) + result = df['A'].astype('object') + expected = pd.Series(np.array([np.nan, 1], dtype=object), name='A') + tm.assert_series_equal(result, expected) + + # convert to object ndarray + # we assert that we are exactly equal + # including type conversions of scalars + result = df['A'].astype('object').values + expected = np.array([np.nan, 1], dtype=object) + tm.assert_numpy_array_equal(result, expected) + + for r, e in zip(result, expected): + if pd.isnull(r): + assert pd.isnull(e) + else: + assert r == e + assert type(r) == type(e) + + +@pytest.mark.parametrize( + 'values', + [ + ['foo', 'bar'], + 'foo', + 1, + 1.0, + pd.date_range('20130101', periods=2), + np.array(['foo'])]) +def test_to_integer_array_error(values): + # error in converting existing arrays to IntegerArrays + with pytest.raises(TypeError): + to_integer_array(values) + + +@pytest.mark.parametrize( + 'values, expected', + [ + (np.array([1], dtype='int64'), Int64Array([1])), + (np.array([1, np.nan]), Int64Array([1, np.nan]))]) +def test_to_integer_array(values, expected): + # convert existing arrays to IntegerArrays + result = to_integer_array(values) + tm.assert_extension_array_equal(result, expected) diff --git a/pandas/tests/extension/json/array.py b/pandas/tests/extension/json/array.py index 88bb66f38b35c4..a81849c6bb3221 100644 --- a/pandas/tests/extension/json/array.py +++ b/pandas/tests/extension/json/array.py @@ -44,7 +44,7 @@ def construct_from_string(cls, string): class JSONArray(ExtensionArray): dtype = JSONDtype() - def __init__(self, values): + def __init__(self, values, copy=False): for val in values: if not isinstance(val, self.dtype.type): raise TypeError @@ -58,7 +58,7 @@ def __init__(self, values): # self._values = self.values = self.data @classmethod - def _from_sequence(cls, scalars): + def _from_sequence(cls, scalars, copy=False): return cls(scalars) @classmethod @@ -170,6 +170,9 @@ def _values_for_argsort(self): return np.array(frozen, dtype=object)[1:] +JSONDtype.array_type = JSONArray + + def make_data(): # TODO: Use a regular dict. See _NDFrameIndexer._setitem_with_indexer return [collections.UserDict([ diff --git a/pandas/tests/extension/json/test_json.py b/pandas/tests/extension/json/test_json.py index b7ac8033f3f6dc..fd1010ff45e962 100644 --- a/pandas/tests/extension/json/test_json.py +++ b/pandas/tests/extension/json/test_json.py @@ -129,10 +129,18 @@ def test_custom_asserts(self): self.assert_frame_equal(a.to_frame(), b.to_frame()) -class TestConstructors(BaseJSON, base.BaseConstructorsTests): +class TestOps(BaseJSON, base.BaseOpsTests): pass +class TestConstructors(BaseJSON, base.BaseConstructorsTests): + + @pytest.mark.xfail(reason="not implemented constructor from dtype") + def test_from_dtype(self, data): + # construct from our dtype & string dtype + pass + + class TestReshaping(BaseJSON, base.BaseReshapingTests): pass