Skip to content

Commit

Permalink
ENH: add integer-na support via an ExtensionArray
Browse files Browse the repository at this point in the history
closes #20700
  • Loading branch information
jreback committed May 21, 2018
1 parent ec1c081 commit 0758f1d
Show file tree
Hide file tree
Showing 27 changed files with 1,105 additions and 41 deletions.
9 changes: 9 additions & 0 deletions pandas/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,15 @@ def all_arithmetic_operators(request):
return request.param


@pytest.fixture(params=['__eq__', '__ne__', '__le__',
'__lt__', '__ge__', '__gt__'])
def all_compare_operators(request):
"""
Fixture for dunder names for common compare operations
"""
return request.param


@pytest.fixture(params=[None, 'gzip', 'bz2', 'zip',
pytest.param('xz', marks=td.skip_if_no_lzma)])
def compression(request):
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ def _reconstruct_data(values, dtype, original):
"""
from pandas import Index
if is_extension_array_dtype(dtype):
pass
values = dtype.array_type._from_sequence(values)
elif is_datetime64tz_dtype(dtype) or is_period_dtype(dtype):
values = Index(original)._shallow_copy(values, name=None)
elif is_bool_dtype(dtype):
Expand Down Expand Up @@ -705,7 +705,7 @@ def value_counts(values, sort=True, ascending=False, normalize=False,

else:

if is_categorical_dtype(values) or is_sparse(values):
if is_extension_array_dtype(values) or is_sparse(values):

# handle Categorical and sparse,
result = Series(values)._values.value_counts(dropna=dropna)
Expand Down
4 changes: 4 additions & 0 deletions pandas/core/arrays/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,6 @@
from .base import ExtensionArray # noqa
from .categorical import Categorical # noqa
from .integer import ( # noqa
Int8Array, Int16Array, Int32Array, Int64Array,
UInt8Array, UInt16Array, UInt32Array, UInt64Array,
to_integer_array)
44 changes: 43 additions & 1 deletion pandas/core/arrays/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ class ExtensionArray(object):
* isna
* take
* copy
* append
* _concat_same_type
An additional method is available to satisfy pandas' internal,
Expand All @@ -49,6 +50,7 @@ class ExtensionArray(object):
methods:
* fillna
* dropna
* unique
* factorize / _values_for_factorize
* argsort / _values_for_argsort
Expand Down Expand Up @@ -82,14 +84,16 @@ class ExtensionArray(object):
# Constructors
# ------------------------------------------------------------------------
@classmethod
def _from_sequence(cls, scalars):
def _from_sequence(cls, scalars, copy=False):
"""Construct a new ExtensionArray from a sequence of scalars.
Parameters
----------
scalars : Sequence
Each element will be an instance of the scalar type for this
array, ``cls.dtype.type``.
copy : boolean, default True
if True, copy the underlying data
Returns
-------
ExtensionArray
Expand Down Expand Up @@ -379,6 +383,16 @@ def fillna(self, value=None, method=None, limit=None):
new_values = self.copy()
return new_values

def dropna(self):
""" Return ExtensionArray without NA values
Returns
-------
valid : ExtensionArray
"""

return self[~self.isna()]

def unique(self):
"""Compute the ExtensionArray of unique values.
Expand Down Expand Up @@ -567,6 +581,34 @@ def copy(self, deep=False):
"""
raise AbstractMethodError(self)

def append(self, other):
"""
Append a collection of Arrays together
Parameters
----------
other : ExtenionArray or list/tuple of ExtenionArrays
Returns
-------
appended : ExtensionArray
"""

to_concat = [self]
cls = self.__class__

if isinstance(other, (list, tuple)):
to_concat = to_concat + list(other)
else:
to_concat.append(other)

for obj in to_concat:
if not isinstance(obj, cls):
raise TypeError('all inputs must be of type {}'.format(
cls.__name__))

return cls._concat_same_type(to_concat)

# ------------------------------------------------------------------------
# Block-related methods
# ------------------------------------------------------------------------
Expand Down
4 changes: 4 additions & 0 deletions pandas/core/arrays/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -2343,6 +2343,10 @@ def isin(self, values):
return algorithms.isin(self.codes, code_values)


# inform the Dtype about us
CategoricalDtype.array_type = Categorical


# The Series.cat accessor


Expand Down
Loading

0 comments on commit 0758f1d

Please sign in to comment.