From 0758f1dbdcd594d0e5d71d99800b0baa9074ba4c Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Sat, 5 May 2018 11:08:36 -0400
Subject: [PATCH] ENH: add integer-na support via an ExtensionArray

closes #20700
---
 pandas/conftest.py                            |   9 +
 pandas/core/algorithms.py                     |   4 +-
 pandas/core/arrays/__init__.py                |   4 +
 pandas/core/arrays/base.py                    |  44 +-
 pandas/core/arrays/categorical.py             |   4 +
 pandas/core/arrays/integer.py                 | 516 ++++++++++++++++++
 pandas/core/dtypes/base.py                    |   6 +
 pandas/core/dtypes/cast.py                    |   5 +
 pandas/core/dtypes/common.py                  |   4 +
 pandas/core/dtypes/dtypes.py                  |  22 +-
 pandas/core/internals.py                      |  21 +-
 pandas/core/missing.py                        |   3 +-
 pandas/core/ops.py                            |  38 +-
 pandas/core/series.py                         |   9 +-
 pandas/io/formats/format.py                   |   1 -
 pandas/tests/extension/base/__init__.py       |   1 +
 pandas/tests/extension/base/methods.py        |   3 +-
 pandas/tests/extension/base/missing.py        |   5 +
 pandas/tests/extension/base/ops.py            |   6 +
 pandas/tests/extension/base/reshaping.py      |   9 +
 .../extension/category/test_categorical.py    |   4 +
 pandas/tests/extension/decimal/array.py       |   7 +-
 .../tests/extension/decimal/test_decimal.py   |  14 +-
 pandas/tests/extension/integer/__init__.py    |   0
 .../tests/extension/integer/test_integer.py   | 390 +++++++++++++
 pandas/tests/extension/json/array.py          |   7 +-
 pandas/tests/extension/json/test_json.py      |  10 +-
 27 files changed, 1105 insertions(+), 41 deletions(-)
 create mode 100644 pandas/core/arrays/integer.py
 create mode 100644 pandas/tests/extension/base/ops.py
 create mode 100644 pandas/tests/extension/integer/__init__.py
 create mode 100644 pandas/tests/extension/integer/test_integer.py

diff --git a/pandas/conftest.py b/pandas/conftest.py
index 1e1d72119b194d..562854fed89161 100644
--- a/pandas/conftest.py
+++ b/pandas/conftest.py
@@ -96,6 +96,15 @@ def all_arithmetic_operators(request):
     return request.param
 
 
+@pytest.fixture(params=['__eq__', '__ne__', '__le__',
+                        '__lt__', '__ge__', '__gt__'])
+def all_compare_operators(request):
+    """
+    Fixture for dunder names for common compare operations
+   """
+    return request.param
+
+
 @pytest.fixture(params=[None, 'gzip', 'bz2', 'zip',
                         pytest.param('xz', marks=td.skip_if_no_lzma)])
 def compression(request):
diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
index 88bc497f9f22d3..eef2fde4386d87 100644
--- a/pandas/core/algorithms.py
+++ b/pandas/core/algorithms.py
@@ -154,7 +154,7 @@ def _reconstruct_data(values, dtype, original):
     """
     from pandas import Index
     if is_extension_array_dtype(dtype):
-        pass
+        values = dtype.array_type._from_sequence(values)
     elif is_datetime64tz_dtype(dtype) or is_period_dtype(dtype):
         values = Index(original)._shallow_copy(values, name=None)
     elif is_bool_dtype(dtype):
@@ -705,7 +705,7 @@ def value_counts(values, sort=True, ascending=False, normalize=False,
 
     else:
 
-        if is_categorical_dtype(values) or is_sparse(values):
+        if is_extension_array_dtype(values) or is_sparse(values):
 
             # handle Categorical and sparse,
             result = Series(values)._values.value_counts(dropna=dropna)
diff --git a/pandas/core/arrays/__init__.py b/pandas/core/arrays/__init__.py
index f8adcf520c15ba..1c34c123e483c2 100644
--- a/pandas/core/arrays/__init__.py
+++ b/pandas/core/arrays/__init__.py
@@ -1,2 +1,6 @@
 from .base import ExtensionArray  # noqa
 from .categorical import Categorical  # noqa
+from .integer import (  # noqa
+    Int8Array, Int16Array, Int32Array, Int64Array,
+    UInt8Array, UInt16Array, UInt32Array, UInt64Array,
+    to_integer_array)
diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
index 1922801c30719e..e09a02c6143b0a 100644
--- a/pandas/core/arrays/base.py
+++ b/pandas/core/arrays/base.py
@@ -36,6 +36,7 @@ class ExtensionArray(object):
     * isna
     * take
     * copy
+    * append
     * _concat_same_type
 
     An additional method is available to satisfy pandas' internal,
@@ -49,6 +50,7 @@ class ExtensionArray(object):
     methods:
 
     * fillna
+    * dropna
     * unique
     * factorize / _values_for_factorize
     * argsort / _values_for_argsort
@@ -82,7 +84,7 @@ class ExtensionArray(object):
     # Constructors
     # ------------------------------------------------------------------------
     @classmethod
-    def _from_sequence(cls, scalars):
+    def _from_sequence(cls, scalars, copy=False):
         """Construct a new ExtensionArray from a sequence of scalars.
 
         Parameters
@@ -90,6 +92,8 @@ def _from_sequence(cls, scalars):
         scalars : Sequence
             Each element will be an instance of the scalar type for this
             array, ``cls.dtype.type``.
+        copy : boolean, default True
+            if True, copy the underlying data
         Returns
         -------
         ExtensionArray
@@ -379,6 +383,16 @@ def fillna(self, value=None, method=None, limit=None):
             new_values = self.copy()
         return new_values
 
+    def dropna(self):
+        """ Return ExtensionArray without NA values
+
+        Returns
+        -------
+        valid : ExtensionArray
+        """
+
+        return self[~self.isna()]
+
     def unique(self):
         """Compute the ExtensionArray of unique values.
 
@@ -567,6 +581,34 @@ def copy(self, deep=False):
         """
         raise AbstractMethodError(self)
 
+    def append(self, other):
+        """
+        Append a collection of Arrays together
+
+        Parameters
+        ----------
+        other : ExtenionArray or list/tuple of ExtenionArrays
+
+        Returns
+        -------
+        appended : ExtensionArray
+        """
+
+        to_concat = [self]
+        cls = self.__class__
+
+        if isinstance(other, (list, tuple)):
+            to_concat = to_concat + list(other)
+        else:
+            to_concat.append(other)
+
+        for obj in to_concat:
+            if not isinstance(obj, cls):
+                raise TypeError('all inputs must be of type {}'.format(
+                    cls.__name__))
+
+        return cls._concat_same_type(to_concat)
+
     # ------------------------------------------------------------------------
     # Block-related methods
     # ------------------------------------------------------------------------
diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index abcb9ae3494b50..63b99ffa06b8ac 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -2343,6 +2343,10 @@ def isin(self, values):
         return algorithms.isin(self.codes, code_values)
 
 
+# inform the Dtype about us
+CategoricalDtype.array_type = Categorical
+
+
 # The Series.cat accessor
 
 
diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py
new file mode 100644
index 00000000000000..5b92e1bb6f2f05
--- /dev/null
+++ b/pandas/core/arrays/integer.py
@@ -0,0 +1,516 @@
+import sys
+import operator
+import warnings
+import numpy as np
+
+from pandas.core.dtypes.generic import ABCSeries, ABCIndexClass
+from pandas.compat import set_function_name, PY3
+from pandas.api.types import (is_integer, is_scalar, is_float,
+                              is_float_dtype, is_integer_dtype,
+                              is_object_dtype,
+                              infer_dtype)
+from pandas.core.arrays import ExtensionArray
+from pandas.core.dtypes.base import ExtensionDtype
+from pandas.core.dtypes.dtypes import registry
+from pandas.core.dtypes.missing import isna, notna
+from pandas.core import ops
+
+# available dtypes
+_integer_dtypes = ['int8', 'int16', 'int32', 'int64']
+_integer_formatter = lambda x: x.capitalize()
+_unsigned_dtypes = ['uint8', 'uint16', 'uint32', 'uint64']
+_unsigned_formatter = lambda x: "{}{}".format(x[0].upper(), x[1:].capitalize())
+
+
+class IntegerDtype(ExtensionDtype):
+    type = None
+    na_value = np.nan
+    kind = 'i'
+    is_integer = True
+    is_signed_integer = True
+    is_unsigned_integer = False
+
+    @classmethod
+    def construct_from_string(cls, string):
+        if string == cls.name:
+            return cls()
+        else:
+            raise TypeError("Cannot construct a '{}' from "
+                            "'{}'".format(cls, string))
+
+    @classmethod
+    def construct_from_string_strict(cls, string):
+        """
+        Strict construction from a string, raise a TypeError if not
+        possible
+        """
+        if string[0] == 'I':
+            return cls.construct_from_string(string)
+        raise TypeError("could not construct PeriodDtype")
+
+
+class UnsignedIntegerDtype(IntegerDtype):
+    kind = 'u'
+    is_signed_integer = False
+    is_unsigned_integer = True
+
+    @classmethod
+    def construct_from_string_strict(cls, string):
+        """
+        Strict construction from a string, raise a TypeError if not
+        possible
+        """
+        if string[0] == 'U':
+            return cls.construct_from_string(string)
+        raise TypeError("could not construct PeriodDtype")
+
+
+def to_integer_array(values):
+    """
+    Parameters
+    ----------
+    values : 1D list-like
+
+    Returns
+    -------
+    infer and return an integer array
+
+    Raises
+    ------
+    TypeError if incompatible types
+    """
+    values = np.array(values, copy=False)
+    kind = 'UInt' if values.dtype.kind == 'u' else 'Int'
+    array_type = "{}{}Array".format(kind, values.dtype.itemsize * 8)
+    try:
+        array_type = getattr(module, array_type)
+    except AttributeError:
+        raise TypeError("Incompatible dtype for {}".format(values.dtype))
+    return array_type(values, copy=False)
+
+
+def coerce_to_array(values, dtype, mask=None, copy=False):
+    """
+    Coerce the input values array to numpy arrays with a mask
+
+    Parameters
+    ----------
+    values : 1D list-like
+    dtype : integer dtype
+    mask : boolean 1D array, optional
+    copy : boolean, default False
+        if True, copy the input
+
+    Returns
+    -------
+    tuple of (values, mask)
+    """
+    values = np.array(values, copy=copy)
+    if is_object_dtype(values):
+        inferred_type = infer_dtype(values)
+        if inferred_type not in ['floating', 'integer',
+                                 'mixed-integer', 'mixed-integer-float']:
+            raise TypeError("{} cannot be converted to an IntegerDtype".format(
+                values.dtype))
+    elif not (is_integer_dtype(values) or is_float_dtype(values)):
+        raise TypeError("{} cannot be converted to an IntegerDtype".format(
+            values.dtype))
+
+    if mask is None:
+        mask = isna(values)
+    else:
+        assert len(mask) == len(values)
+
+    if not values.ndim == 1:
+        raise TypeError("values must be a 1D list-like")
+    if not mask.ndim == 1:
+        raise TypeError("mask must be a 1D list-like")
+
+    if mask.any():
+        # we copy as need to coerce here
+        values = values.copy()
+        values[mask] = 1
+
+        values = values.astype(dtype.type)
+
+    else:
+        values = values.astype(dtype.type, copy=False)
+    return values, mask
+
+
+def make_comparison_op(op, cls):
+    def cmp_method(self, other):
+
+        op_str = op.__name__
+        mask = None
+        if isinstance(other, (np.ndarray, ABCIndexClass, ABCSeries)):
+            if other.ndim > 0 and len(self) != len(other):
+                raise ValueError('Lengths must match to compare')
+        elif isinstance(other, cls):
+            other, mask = other.data, other.mask
+
+        # numpy will show a DeprecationWarning on invalid elementwise
+        # comparisons, this will raise in the future
+        with warnings.catch_warnings(record=True):
+            with np.errstate(all='ignore'):
+                result = op(self.data, np.asarray(other))
+
+        # nans propagate
+        if mask is None:
+            mask = self.mask
+        else:
+            mask = self.mask | mask
+
+        result[mask] = True if op_str == 'ne' else False
+        return result
+
+    name = '__{name}__'.format(name=op.__name__)
+    return set_function_name(cmp_method, name, cls)
+
+
+def make_arithmetic_op(op, cls):
+    def integer_arithmetic_method(self, other):
+
+        mask = None
+        if isinstance(other, (ABCSeries, ABCIndexClass)):
+            other = getattr(other, 'values', other)
+        elif isinstance(other, cls):
+            other, mask = other.data, other.mask
+        elif getattr(other, 'ndim', 0) > 1:
+            raise TypeError("can only perform ops with 1-d structures")
+        elif isinstance(other, IntegerArray):
+            pass
+        elif isinstance(other, np.ndarray):
+            if not other.ndim:
+                other = other.item()
+            elif other.ndim == 1:
+                if not (is_float_dtype(other) or is_integer_dtype(other)):
+                    raise TypeError("can only perform ops with numeric values")
+        else:
+            if not (is_float(other) or is_integer(other)):
+                raise TypeError("can only perform ops with numeric values")
+
+        # nans propagate
+        if mask is None:
+            mask = self.mask
+        else:
+            mask = self.mask | mask
+
+        with np.errstate(all='ignore'):
+            result = op(self.data, other)
+
+        # may need to fill infs
+        # and mask wraparound
+        if is_float_dtype(result):
+            mask |= (result == np.inf) | (result == -np.inf)
+
+        return cls(result, mask=mask)
+
+    name = '__{name}__'.format(name=op.__name__)
+    return set_function_name(integer_arithmetic_method, name, cls)
+
+
+class IntegerArray(ExtensionArray):
+    """
+    We represent an IntegerArray with 2 numpy arrays
+    - data: contains a numpy integer array of the appropriate dtype
+    - mask: a boolean array holding a mask on the data, False is missing
+    """
+
+    dtype = None
+
+    def __init__(self, values, mask=None, copy=False):
+        self.data, self.mask = coerce_to_array(
+            values, dtype=self.dtype, mask=mask, copy=copy)
+
+    @classmethod
+    def _from_sequence(cls, scalars, mask=None, copy=False):
+        return cls(scalars, mask=mask, copy=copy)
+
+    @classmethod
+    def _from_factorized(cls, values, original):
+        return cls(values)
+
+    def __getitem__(self, item):
+        if is_integer(item):
+            if self.mask[item]:
+                return self.dtype.na_value
+            return self.data[item]
+        return type(self)(self.data[item], mask=self.mask[item])
+
+    def _coerce_to_ndarray(self):
+        """ coerce to an ndarary, preserving my scalar types """
+
+        # TODO(jreback) make this better
+        data = self.data.astype(object)
+        data[self.mask] = self._na_value
+        return data
+
+    def __array__(self, dtype=None):
+        """
+        the array interface, return my values
+        We return an object array here to preserve our scalar values
+        """
+        return self._coerce_to_ndarray()
+
+    def __iter__(self):
+        """Iterate over elements of the array.
+
+        """
+        # This needs to be implemented so that pandas recognizes extension
+        # arrays as list-like. The default implementation makes successive
+        # calls to ``__getitem__``, which may be slower than necessary.
+        for i in range(len(self)):
+            if self.mask[i]:
+                yield self.dtype.na_value
+            else:
+                yield self.data[i]
+
+    def _formatting_values(self):
+        # type: () -> np.ndarray
+        return self._coerce_to_ndarray()
+
+    def take(self, indexer, allow_fill=False, fill_value=None):
+        from pandas.api.extensions import take
+
+        # we always fill with 1 internally
+        # to avoid upcasting
+        data_fill_value = 1 if isna(fill_value) else fill_value
+        result = take(self.data, indexer, fill_value=data_fill_value,
+                      allow_fill=allow_fill)
+
+        mask = take(self.mask, indexer, fill_value=True,
+                    allow_fill=allow_fill)
+
+        # if we are filling
+        # we only fill where the indexer is null
+        # not existing missing values
+        # TODO(jreback) what if we have a non-na float as a fill value?
+        if allow_fill and notna(fill_value):
+            fill_mask = np.asarray(indexer) == -1
+            result[fill_mask] = fill_value
+            mask = mask ^ fill_mask
+
+        return self._from_sequence(result, mask=mask)
+
+    def copy(self, deep=False):
+        if deep:
+            return type(self)(
+                self.data.copy(), mask=self.mask.copy())
+        return type(self)(self)
+
+    def __setitem__(self, key, value):
+        _is_scalar = is_scalar(value)
+        if _is_scalar:
+            value = [value]
+        value, mask = coerce_to_array(value, dtype=self.dtype)
+
+        if _is_scalar:
+            value = value[0]
+            mask = mask[0]
+
+        self.data[key] = value
+        self.mask[key] = mask
+
+    def __len__(self):
+        return len(self.data)
+
+    def __repr__(self):
+
+        formatted = self._formatting_values()
+        return '{}({})'.format(
+            self.__class__.__name__,
+            formatted.tolist())
+
+    @property
+    def nbytes(self):
+        return self.data.nbytes + self.mask.nbytes
+
+    def isna(self):
+        return self.mask
+
+    @property
+    def _na_value(self):
+        return np.nan
+
+    @classmethod
+    def _concat_same_type(cls, to_concat):
+        data = np.concatenate([x.data for x in to_concat])
+        mask = np.concatenate([x.mask for x in to_concat])
+        return cls(data, mask=mask)
+
+    def astype(self, dtype, copy=True):
+        """Cast to a NumPy array with 'dtype'.
+
+        Parameters
+        ----------
+        dtype : str or dtype
+            Typecode or data-type to which the array is cast.
+        copy : bool, default True
+            Whether to copy the data, even if not necessary. If False,
+            a copy is made only if the old dtype does not match the
+            new dtype.
+
+        Returns
+        -------
+        array : ndarray
+            NumPy ndarray with 'dtype' for its dtype.
+        """
+        data = self._coerce_to_ndarray()
+        return data.astype(dtype=dtype, copy=False)
+
+    @property
+    def _ndarray_values(self):
+        # type: () -> np.ndarray
+        """Internal pandas method for lossy conversion to a NumPy ndarray.
+
+        This method is not part of the pandas interface.
+
+        The expectation is that this is cheap to compute, and is primarily
+        used for interacting with our indexers.
+        """
+        return self.data
+
+    def value_counts(self, dropna=True):
+        """
+        Returns a Series containing counts of each category.
+
+        Every category will have an entry, even those with a count of 0.
+
+        Parameters
+        ----------
+        dropna : boolean, default True
+            Don't include counts of NaN.
+
+        Returns
+        -------
+        counts : Series
+
+        See Also
+        --------
+        Series.value_counts
+
+        """
+
+        from pandas import Index, Series
+
+        # compute counts on the data with no nans
+        data = self.data[~self.mask]
+        value_counts = Index(data).value_counts()
+
+        array = value_counts.values
+        index = value_counts.index
+
+        # if we want nans, count the mask
+        if not dropna:
+            array = np.append(array, [self.mask.sum()])
+
+            # TODO(extension)
+            # should this be an and Index backed by the
+            # Array type?
+            index = index.astype(object).append(Index([np.nan]))
+
+        return Series(array, index=index)
+
+    def _values_for_argsort(self):
+        # type: () -> ndarray
+        """Return values for sorting.
+
+        Returns
+        -------
+        ndarray
+            The transformed values should maintain the ordering between values
+            within the array.
+
+        See Also
+        --------
+        ExtensionArray.argsort
+        """
+        data = self.data.copy()
+        data[self.mask] = data.min() - 1
+        return data
+
+    @classmethod
+    def _add_comparison_methods_binary(cls):
+        cls.__eq__ = make_comparison_op(operator.eq, cls)
+        cls.__ne__ = make_comparison_op(operator.ne, cls)
+        cls.__lt__ = make_comparison_op(operator.lt, cls)
+        cls.__gt__ = make_comparison_op(operator.gt, cls)
+        cls.__le__ = make_comparison_op(operator.le, cls)
+        cls.__ge__ = make_comparison_op(operator.ge, cls)
+
+    @classmethod
+    def _add_numeric_methods_binary(cls):
+        """ add in numeric methods """
+        cls.__add__ = make_arithmetic_op(operator.add, cls)
+        cls.__radd__ = make_arithmetic_op(ops.radd, cls)
+        cls.__sub__ = make_arithmetic_op(operator.sub, cls)
+        cls.__rsub__ = make_arithmetic_op(ops.rsub, cls)
+        cls.__mul__ = make_arithmetic_op(operator.mul, cls)
+        cls.__rmul__ = make_arithmetic_op(ops.rmul, cls)
+        cls.__rpow__ = make_arithmetic_op(ops.rpow, cls)
+        cls.__pow__ = make_arithmetic_op(operator.pow, cls)
+        cls.__mod__ = make_arithmetic_op(operator.mod, cls)
+        cls.__floordiv__ = make_arithmetic_op(operator.floordiv, cls)
+        cls.__rfloordiv__ = make_arithmetic_op(ops.rfloordiv, cls)
+        cls.__truediv__ = make_arithmetic_op(operator.truediv, cls)
+        cls.__rtruediv__ = make_arithmetic_op(ops.rtruediv, cls)
+        if not PY3:
+            cls.__div__ = make_arithmetic_op(operator.div, cls)
+            cls.__rdiv__ = make_arithmetic_op(ops.rdiv, cls)
+
+        cls.__divmod__ = make_arithmetic_op(divmod, cls)
+
+
+class UnsignedIntegerArray(IntegerArray):
+    pass
+
+
+module = sys.modules[__name__]
+
+
+# create the Dtype
+types = [(_integer_dtypes, IntegerDtype, _integer_formatter),
+         (_unsigned_dtypes, UnsignedIntegerDtype, _unsigned_formatter)]
+for dtypes, superclass, formatter in types:
+
+    for dtype in dtypes:
+
+        name = formatter(dtype)
+        classname = "{}Dtype".format(name)
+        attributes_dict = {'type': getattr(np, dtype),
+                           'name': name}
+        dtype_type = type(classname, (superclass, ), attributes_dict)
+        setattr(module, classname, dtype_type)
+
+        # register
+        registry.register(dtype_type, dtype_type.construct_from_string_strict)
+
+
+# create the Array
+types = [(_integer_dtypes, IntegerArray, _integer_formatter),
+         (_unsigned_dtypes, UnsignedIntegerArray, _unsigned_formatter)]
+for dtypes, superclass, formatter in types:
+
+    for dtype in dtypes:
+
+        dtype_type = getattr(module, "{}Dtype".format(formatter(dtype)))
+        classname = "{}Array".format(formatter(dtype))
+        attributes_dict = {'dtype': dtype_type()}
+        array_type = type(classname, (superclass, ), attributes_dict)
+        setattr(module, classname, array_type)
+
+        # add ops
+        array_type._add_numeric_methods_binary()
+        array_type._add_comparison_methods_binary()
+
+        # set the Array type on the Dtype
+        dtype_type.array_type = array_type
+
+
+def make_data():
+    return (list(range(8)) +
+            [np.nan] +
+            list(range(10, 98)) +
+            [np.nan] +
+            [99, 100])
diff --git a/pandas/core/dtypes/base.py b/pandas/core/dtypes/base.py
index 49e98c16c716e0..ba359c9ef49822 100644
--- a/pandas/core/dtypes/base.py
+++ b/pandas/core/dtypes/base.py
@@ -156,6 +156,12 @@ def name(self):
         """
         raise AbstractMethodError(self)
 
+    @property
+    def array_type(self):
+        """Return the array type associated with this dtype
+        """
+        raise AbstractMethodError(self)
+
     @classmethod
     def construct_from_string(cls, string):
         """Attempt to construct this type from a string.
diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
index e4ed6d544d42eb..73176887ca0d96 100644
--- a/pandas/core/dtypes/cast.py
+++ b/pandas/core/dtypes/cast.py
@@ -647,6 +647,11 @@ def conv(r, dtype):
 def astype_nansafe(arr, dtype, copy=True):
     """ return a view if copy is False, but
         need to be very careful as the result shape could change! """
+
+    # dispatch on extension dtype if needed
+    if is_extension_array_dtype(dtype):
+        return dtype.array_type._from_sequence(arr, copy=copy)
+
     if not isinstance(dtype, np.dtype):
         dtype = pandas_dtype(dtype)
 
diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py
index 4d9846b3518145..37d260088c4d47 100644
--- a/pandas/core/dtypes/common.py
+++ b/pandas/core/dtypes/common.py
@@ -1980,6 +1980,10 @@ def pandas_dtype(dtype):
     if result is not None:
         return result
 
+    # un-registered extension types
+    if isinstance(dtype, ExtensionDtype):
+        return dtype
+
     try:
         npdtype = np.dtype(dtype)
     except (TypeError, ValueError):
diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py
index 795f8ec54f3d57..8c322f3250a5c6 100644
--- a/pandas/core/dtypes/dtypes.py
+++ b/pandas/core/dtypes/dtypes.py
@@ -27,7 +27,7 @@ def register(self, dtype, constructor=None):
         ----------
         dtype : PandasExtension Dtype
         """
-        if not issubclass(dtype, PandasExtensionDtype):
+        if not issubclass(dtype, (PandasExtensionDtype, ExtensionDtype)):
             raise ValueError("can only register pandas extension dtypes")
 
         if constructor is None:
@@ -45,14 +45,20 @@ def find(self, dtype):
         -------
         return the first matching dtype, otherwise return None
         """
-        for dtype_type, constructor in self.dtypes.items():
-            if isinstance(dtype, dtype_type):
+        if not isinstance(dtype, compat.string_types):
+            dtype_type = dtype
+            if not isinstance(dtype, type):
+                dtype_type = type(dtype)
+            if issubclass(dtype_type, (PandasExtensionDtype, ExtensionDtype)):
                 return dtype
-            if isinstance(dtype, compat.string_types):
-                try:
-                    return constructor(dtype)
-                except TypeError:
-                    pass
+
+            return None
+
+        for dtype_type, constructor in self.dtypes.items():
+            try:
+                return constructor(dtype)
+            except TypeError:
+                pass
 
         return None
 
diff --git a/pandas/core/internals.py b/pandas/core/internals.py
index fe508dc1bb0bc8..a5e9107b8a660f 100644
--- a/pandas/core/internals.py
+++ b/pandas/core/internals.py
@@ -633,8 +633,9 @@ def _astype(self, dtype, copy=False, errors='raise', values=None,
             return self.make_block(Categorical(self.values, dtype=dtype))
 
         # astype processing
-        dtype = np.dtype(dtype)
-        if self.dtype == dtype:
+        if not is_extension_array_dtype(dtype):
+            dtype = np.dtype(dtype)
+        if is_dtype_equal(self.dtype, dtype):
             if copy:
                 return self.copy()
             return self
@@ -662,7 +663,13 @@ def _astype(self, dtype, copy=False, errors='raise', values=None,
 
                 # _astype_nansafe works fine with 1-d only
                 values = astype_nansafe(values.ravel(), dtype, copy=True)
-                values = values.reshape(self.shape)
+
+                # TODO(extension)
+                # should we make this attribute?
+                try:
+                    values = values.reshape(self.shape)
+                except AttributeError:
+                    pass
 
             newb = make_block(values, placement=self.mgr_locs,
                               klass=klass)
@@ -3170,6 +3177,10 @@ def get_block_type(values, dtype=None):
         cls = TimeDeltaBlock
     elif issubclass(vtype, np.complexfloating):
         cls = ComplexBlock
+    elif is_categorical(values):
+        cls = CategoricalBlock
+    elif is_extension_array_dtype(values):
+        cls = ExtensionBlock
     elif issubclass(vtype, np.datetime64):
         assert not is_datetimetz(values)
         cls = DatetimeBlock
@@ -3179,10 +3190,6 @@ def get_block_type(values, dtype=None):
         cls = IntBlock
     elif dtype == np.bool_:
         cls = BoolBlock
-    elif is_categorical(values):
-        cls = CategoricalBlock
-    elif is_extension_array_dtype(values):
-        cls = ExtensionBlock
     else:
         cls = ObjectBlock
     return cls
diff --git a/pandas/core/missing.py b/pandas/core/missing.py
index 31c489e2f8941d..cb5ee8388c2c48 100644
--- a/pandas/core/missing.py
+++ b/pandas/core/missing.py
@@ -638,7 +638,8 @@ def fill_zeros(result, x, y, name, fill):
             # if we have a fill of inf, then sign it correctly
             # (GH 6178 and PR 9308)
             if np.isinf(fill):
-                signs = np.sign(y if name.startswith(('r', '__r')) else x)
+                signs = y if name.startswith(('r', '__r')) else x
+                signs = np.sign(signs.astype('float', copy=False))
                 negative_inf_mask = (signs.ravel() < 0) & mask
                 np.putmask(result, negative_inf_mask, -fill)
 
diff --git a/pandas/core/ops.py b/pandas/core/ops.py
index e14f82906cd065..5d4aa2a3d3759d 100644
--- a/pandas/core/ops.py
+++ b/pandas/core/ops.py
@@ -27,7 +27,7 @@
     is_integer_dtype, is_categorical_dtype,
     is_object_dtype, is_timedelta64_dtype,
     is_datetime64_dtype, is_datetime64tz_dtype,
-    is_bool_dtype,
+    is_bool_dtype, is_extension_array_dtype,
     is_list_like,
     is_scalar,
     _ensure_object)
@@ -1003,8 +1003,18 @@ def _arith_method_SERIES(cls, op, special):
                         if op is divmod else _construct_result)
 
     def na_op(x, y):
-        import pandas.core.computation.expressions as expressions
+        # handle extension array ops
+        # TODO(extension)
+        # the ops *between* non-same-type extension arrays or not
+        # very well defined
+        if (is_extension_array_dtype(x) or is_extension_array_dtype(y)):
+            if (op_name.startswith('__r') and not
+                    is_extension_array_dtype(y) and not
+                    is_scalar(y)):
+                y = x.__class__._from_sequence(y)
+            return op(x, y)
 
+        import pandas.core.computation.expressions as expressions
         try:
             result = expressions.evaluate(op, str_rep, x, y, **eval_kwargs)
         except TypeError:
@@ -1025,6 +1035,7 @@ def na_op(x, y):
         return result
 
     def safe_na_op(lvalues, rvalues):
+        # all others
         try:
             with np.errstate(all='ignore'):
                 return na_op(lvalues, rvalues)
@@ -1035,14 +1046,21 @@ def safe_na_op(lvalues, rvalues):
             raise
 
     def wrapper(left, right):
-
         if isinstance(right, ABCDataFrame):
             return NotImplemented
 
         left, right = _align_method_SERIES(left, right)
         res_name = get_op_result_name(left, right)
 
-        if is_datetime64_dtype(left) or is_datetime64tz_dtype(left):
+        if is_categorical_dtype(left):
+            raise TypeError("{typ} cannot perform the operation "
+                            "{op}".format(typ=type(left).__name__, op=str_rep))
+
+        elif (is_extension_array_dtype(left) or
+                is_extension_array_dtype(right)):
+            pass
+
+        elif is_datetime64_dtype(left) or is_datetime64tz_dtype(left):
             result = dispatch_to_index_op(op, left, right, pd.DatetimeIndex)
             return construct_result(left, result,
                                     index=left.index, name=res_name,
@@ -1054,10 +1072,6 @@ def wrapper(left, right):
                                     index=left.index, name=res_name,
                                     dtype=result.dtype)
 
-        elif is_categorical_dtype(left):
-            raise TypeError("{typ} cannot perform the operation "
-                            "{op}".format(typ=type(left).__name__, op=str_rep))
-
         lvalues = left.values
         rvalues = right
         if isinstance(rvalues, ABCSeries):
@@ -1136,6 +1150,14 @@ def na_op(x, y):
             # The `not is_scalar(y)` check excludes the string "category"
             return op(y, x)
 
+        # handle extension array ops
+        # TODO(extension)
+        # the ops *between* non-same-type extension arrays or not
+        # very well defined
+        elif (is_extension_array_dtype(x) or
+                is_extension_array_dtype(y)):
+            return op(x, y)
+
         elif is_object_dtype(x.dtype):
             result = _comp_method_OBJECT_ARRAY(op, x, y)
 
diff --git a/pandas/core/series.py b/pandas/core/series.py
index 850f22e24010a9..d27d5ab6afe8c4 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -4055,12 +4055,9 @@ def _try_cast(arr, take_fast_path):
                 subarr = Categorical(arr, dtype.categories,
                                      ordered=dtype.ordered)
             elif is_extension_array_dtype(dtype):
-                # We don't allow casting to third party dtypes, since we don't
-                # know what array belongs to which type.
-                msg = ("Cannot cast data to extension dtype '{}'. "
-                       "Pass the extension array directly.".format(dtype))
-                raise ValueError(msg)
-
+                # create an extension array from its dtype
+                array_type = dtype.array_type
+                subarr = array_type(subarr, copy=copy)
 
             elif dtype is not None and raise_cast_failure:
                 raise
diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py
index 12201f62946aca..adb4bf3f47572e 100644
--- a/pandas/io/formats/format.py
+++ b/pandas/io/formats/format.py
@@ -514,7 +514,6 @@ def _to_str_columns(self):
         Render a DataFrame to a list of columns (as lists of strings).
         """
         frame = self.tr_frame
-
         # may include levels names also
 
         str_index = self._get_formatted_index(frame)
diff --git a/pandas/tests/extension/base/__init__.py b/pandas/tests/extension/base/__init__.py
index 9da985625c4ee3..1f42de67375282 100644
--- a/pandas/tests/extension/base/__init__.py
+++ b/pandas/tests/extension/base/__init__.py
@@ -45,6 +45,7 @@ class TestMyDtype(BaseDtypeTests):
 from .dtype import BaseDtypeTests  # noqa
 from .getitem import BaseGetitemTests  # noqa
 from .groupby import BaseGroupbyTests  # noqa
+from .ops import BaseOpsTests  # noqa
 from .interface import BaseInterfaceTests  # noqa
 from .methods import BaseMethodsTests  # noqa
 from .missing import BaseMissingTests  # noqa
diff --git a/pandas/tests/extension/base/methods.py b/pandas/tests/extension/base/methods.py
index c5436aa731d50e..0ad3196277c34f 100644
--- a/pandas/tests/extension/base/methods.py
+++ b/pandas/tests/extension/base/methods.py
@@ -19,7 +19,8 @@ def test_value_counts(self, all_data, dropna):
             other = all_data
 
         result = pd.Series(all_data).value_counts(dropna=dropna).sort_index()
-        expected = pd.Series(other).value_counts(dropna=dropna).sort_index()
+        expected = pd.Series(other).value_counts(
+            dropna=dropna).sort_index()
 
         self.assert_series_equal(result, expected)
 
diff --git a/pandas/tests/extension/base/missing.py b/pandas/tests/extension/base/missing.py
index 32cf29818e0694..ceb1ac4fcddac0 100644
--- a/pandas/tests/extension/base/missing.py
+++ b/pandas/tests/extension/base/missing.py
@@ -18,6 +18,11 @@ def test_isna(self, data_missing):
         expected = pd.Series(expected)
         self.assert_series_equal(result, expected)
 
+    def test_dropna_array(self, data_missing):
+        result = data_missing.dropna()
+        expected = data_missing[[1]]
+        self.assert_extension_array_equal(result, expected)
+
     def test_dropna_series(self, data_missing):
         ser = pd.Series(data_missing)
         result = ser.dropna()
diff --git a/pandas/tests/extension/base/ops.py b/pandas/tests/extension/base/ops.py
new file mode 100644
index 00000000000000..3742f342e43463
--- /dev/null
+++ b/pandas/tests/extension/base/ops.py
@@ -0,0 +1,6 @@
+from .base import BaseExtensionTests
+
+
+class BaseOpsTests(BaseExtensionTests):
+    """Various Series and DataFrame ops methos."""
+    pass
diff --git a/pandas/tests/extension/base/reshaping.py b/pandas/tests/extension/base/reshaping.py
index fe920a47ab7409..ff739c97f2785d 100644
--- a/pandas/tests/extension/base/reshaping.py
+++ b/pandas/tests/extension/base/reshaping.py
@@ -26,6 +26,14 @@ def test_concat(self, data, in_frame):
         assert dtype == data.dtype
         assert isinstance(result._data.blocks[0], ExtensionBlock)
 
+    def test_append(self, data):
+
+        wrapped = pd.Series(data)
+        result = wrapped.append(wrapped)
+        expected = pd.concat([wrapped, wrapped])
+
+        self.assert_series_equal(result, expected)
+
     @pytest.mark.parametrize('in_frame', [True, False])
     def test_concat_all_na_block(self, data_missing, in_frame):
         valid_block = pd.Series(data_missing.take([1, 1]), index=[0, 1])
@@ -84,6 +92,7 @@ def test_concat_columns(self, data, na_value):
         expected = pd.DataFrame({
             'A': data._from_sequence(list(data[:3]) + [na_value]),
             'B': [np.nan, 1, 2, 3]})
+
         result = pd.concat([df1, df2], axis=1)
         self.assert_frame_equal(result, expected)
         result = pd.concat([df1['A'], df2['B']], axis=1)
diff --git a/pandas/tests/extension/category/test_categorical.py b/pandas/tests/extension/category/test_categorical.py
index 530a4e7a22a7a3..c4928d026ca70f 100644
--- a/pandas/tests/extension/category/test_categorical.py
+++ b/pandas/tests/extension/category/test_categorical.py
@@ -55,6 +55,10 @@ class TestDtype(base.BaseDtypeTests):
     pass
 
 
+class TestOps(base.BaseOpsTests):
+    pass
+
+
 class TestInterface(base.BaseInterfaceTests):
     @pytest.mark.skip(reason="Memory usage doesn't match")
     def test_memory_usage(self):
diff --git a/pandas/tests/extension/decimal/array.py b/pandas/tests/extension/decimal/array.py
index e9431bd0c233cc..0213837a6f3284 100644
--- a/pandas/tests/extension/decimal/array.py
+++ b/pandas/tests/extension/decimal/array.py
@@ -27,7 +27,7 @@ def construct_from_string(cls, string):
 class DecimalArray(ExtensionArray):
     dtype = DecimalDtype()
 
-    def __init__(self, values):
+    def __init__(self, values, copy=False):
         assert all(isinstance(v, decimal.Decimal) for v in values)
         values = np.asarray(values, dtype=object)
 
@@ -40,7 +40,7 @@ def __init__(self, values):
         # self._values = self.values = self.data
 
     @classmethod
-    def _from_sequence(cls, scalars):
+    def _from_sequence(cls, scalars, copy=False):
         return cls(scalars)
 
     @classmethod
@@ -101,5 +101,8 @@ def _concat_same_type(cls, to_concat):
         return cls(np.concatenate([x._data for x in to_concat]))
 
 
+DecimalDtype.array_type = DecimalArray
+
+
 def make_data():
     return [decimal.Decimal(random.random()) for _ in range(100)]
diff --git a/pandas/tests/extension/decimal/test_decimal.py b/pandas/tests/extension/decimal/test_decimal.py
index 1f8cf0264f62f0..86b902c5309c1b 100644
--- a/pandas/tests/extension/decimal/test_decimal.py
+++ b/pandas/tests/extension/decimal/test_decimal.py
@@ -99,10 +99,18 @@ class TestInterface(BaseDecimal, base.BaseInterfaceTests):
     pass
 
 
-class TestConstructors(BaseDecimal, base.BaseConstructorsTests):
+class TestOps(BaseDecimal, base.BaseOpsTests):
     pass
 
 
+class TestConstructors(BaseDecimal, base.BaseConstructorsTests):
+
+    @pytest.mark.xfail(reason="not implemented constructor from dtype")
+    def test_from_dtype(self, data):
+        # construct from our dtype & string dtype
+        pass
+
+
 class TestReshaping(BaseDecimal, base.BaseReshapingTests):
     pass
 
@@ -147,6 +155,10 @@ class TestGroupby(BaseDecimal, base.BaseGroupbyTests):
     pass
 
 
+# TODO(extension)
+@pytest.mark.xfail(reason=(
+    "raising AssertionError as this is not implemented, "
+    "though easy enough to do"))
 def test_series_constructor_coerce_data_to_extension_dtype_raises():
     xpr = ("Cannot cast data to extension dtype 'decimal'. Pass the "
            "extension array directly.")
diff --git a/pandas/tests/extension/integer/__init__.py b/pandas/tests/extension/integer/__init__.py
new file mode 100644
index 00000000000000..e69de29bb2d1d6
diff --git a/pandas/tests/extension/integer/test_integer.py b/pandas/tests/extension/integer/test_integer.py
new file mode 100644
index 00000000000000..39526f6e18b5a5
--- /dev/null
+++ b/pandas/tests/extension/integer/test_integer.py
@@ -0,0 +1,390 @@
+import numpy as np
+import pandas as pd
+import pandas.util.testing as tm
+import pytest
+
+from pandas.tests.extension import base
+
+from pandas.core.arrays import (
+    to_integer_array,
+    Int8Array, Int16Array, Int32Array, Int64Array,
+    UInt8Array, UInt16Array, UInt32Array, UInt64Array)
+from pandas.core.arrays.integer import (
+    Int8Dtype, Int16Dtype, Int32Dtype, Int64Dtype,
+    UInt8Dtype, UInt16Dtype, UInt32Dtype, UInt64Dtype,
+    IntegerArray, make_data)
+
+
+@pytest.fixture(params=[Int8Dtype, Int16Dtype, Int32Dtype, Int64Dtype,
+                        UInt8Dtype, UInt16Dtype, UInt32Dtype, UInt64Dtype])
+def dtype(request):
+    return request.param()
+
+
+@pytest.fixture(params=[Int8Array, Int16Array, Int32Array, Int64Array,
+                        UInt8Array, UInt16Array, UInt32Array, UInt64Array])
+def arrays(request):
+    return request.param
+
+
+@pytest.fixture
+def data(arrays):
+    return arrays(make_data())
+
+
+@pytest.fixture
+def data_missing(arrays):
+    return arrays([np.nan, 1])
+
+
+@pytest.fixture
+def data_for_sorting(arrays):
+    return arrays([1, 2, 0])
+
+
+@pytest.fixture
+def data_missing_for_sorting(arrays):
+    return arrays([1, np.nan, 0])
+
+
+@pytest.fixture
+def na_cmp():
+    # we are np.nan
+    return lambda x, y: np.isnan(x) and np.isnan(y)
+
+
+@pytest.fixture
+def na_value():
+    return np.nan
+
+
+@pytest.fixture
+def data_for_grouping(arrays):
+    b = 1
+    a = 0
+    c = 2
+    na = np.nan
+    return arrays([b, b, na, na, a, a, b, c])
+
+
+def test_dtypes(dtype):
+    # smoke tests on auto dtype construction
+
+    if dtype.is_signed_integer:
+        assert np.dtype(dtype.type).kind == 'i'
+    else:
+        assert np.dtype(dtype.type).kind == 'u'
+    assert dtype.name is not None
+
+
+class BaseInteger(object):
+
+    def assert_series_equal(self, left, right, *args, **kwargs):
+
+        left_na = left.isna()
+        right_na = right.isna()
+
+        tm.assert_series_equal(left_na, right_na)
+        return tm.assert_series_equal(left[~left_na],
+                                      right[~right_na],
+                                      *args, **kwargs)
+
+    def assert_frame_equal(self, left, right, *args, **kwargs):
+        # TODO(EA): select_dtypes
+        tm.assert_index_equal(
+            left.columns, right.columns,
+            exact=kwargs.get('check_column_type', 'equiv'),
+            check_names=kwargs.get('check_names', True),
+            check_exact=kwargs.get('check_exact', False),
+            check_categorical=kwargs.get('check_categorical', True),
+            obj='{obj}.columns'.format(obj=kwargs.get('obj', 'DataFrame')))
+
+        integers = (left.dtypes == 'integer').index
+
+        for col in integers:
+            self.assert_series_equal(left[col], right[col],
+                                     *args, **kwargs)
+
+        left = left.drop(columns=integers)
+        right = right.drop(columns=integers)
+        tm.assert_frame_equal(left, right, *args, **kwargs)
+
+
+class TestDtype(BaseInteger, base.BaseDtypeTests):
+
+    @pytest.mark.skip(reason="using multiple dtypes")
+    def test_is_dtype_unboxes_dtype(self):
+        # we have multiple dtypes, so skip
+        pass
+
+
+class TestOps(BaseInteger, base.BaseOpsTests):
+
+    def compare(self, s, op, other):
+
+        result = getattr(s, op)(other)
+
+        # compute expected
+        mask = s.isna()
+
+        # other array is an Integer
+        if isinstance(other, IntegerArray):
+            omask = getattr(other, 'mask', None)
+            mask = getattr(other, 'data', other)
+            if omask is not None:
+                mask |= omask
+
+        # to compare properly, we convert the expected
+        # to float, mask to nans and convert infs
+        # if we have uints then we process as uints
+        # then conert to float
+        # and we ultimately want to create a IntArray
+        # for comparisons
+        rs = pd.Series(s.values.data)
+        expected = getattr(rs, op)(other)
+
+        # truediv can make infs
+        if 'truediv' in op:
+            fill_value = np.nan
+        else:
+            fill_value = 0
+
+        try:
+            expected[(expected == np.inf) | (expected == -np.inf)] = fill_value
+            expected = expected.astype(s.dtype)
+
+        except ValueError:
+
+            expected = expected.astype(float)
+            expected[(expected == np.inf) | (expected == -np.inf)] = fill_value
+            expected = expected.astype(s.dtype)
+
+        expected[mask] = np.nan
+        self.assert_series_equal(result, expected)
+
+    def test_arith_scalar(self, data, all_arithmetic_operators):
+        # scalar
+        op = all_arithmetic_operators
+        s = pd.Series(data)
+        self.compare(s, op, 1)
+
+    def test_arith_array(self, data, all_arithmetic_operators):
+        # ndarray & other series
+        op = all_arithmetic_operators
+        s = pd.Series(data)
+        self.compare(s, op, np.ones(len(s), dtype=s.dtype.type))
+
+    def test_arith_integer_array(self, data, all_arithmetic_operators):
+        # we operate with a rhs of an integer array
+
+        op = all_arithmetic_operators
+        s = pd.Series(data)
+        rhs = pd.Series([1] * len(data), dtype=data.dtype)
+        rhs.iloc[-1] = np.nan
+
+        self.compare(s, op, rhs)
+
+    def test_compare_scalar(self, data, all_compare_operators):
+        op = all_compare_operators
+
+        # array
+        result = getattr(data, op)(0)
+        expected = getattr(data.data, op)(0)
+
+        # fill the nan locations
+        expected[data.mask] = True if op == '__ne__' else False
+
+        tm.assert_numpy_array_equal(result, expected)
+
+        # series
+        s = pd.Series(data)
+        result = getattr(s, op)(0)
+
+        expected = pd.Series(data.data)
+        expected = getattr(expected, op)(0)
+
+        # fill the nan locations
+        expected[data.mask] = True if op == '__ne__' else False
+
+        tm.assert_series_equal(result, expected)
+
+    def test_error(self, data, all_arithmetic_operators):
+        # invalid ops
+
+        op = all_arithmetic_operators
+        s = pd.Series(data)
+        ops = getattr(s, op)
+        opa = getattr(data, op)
+
+        # invalid scalars
+        with pytest.raises(TypeError):
+            ops('foo')
+        with pytest.raises(TypeError):
+            ops(pd.Timestamp('20180101'))
+
+        # invalid array-likes
+        with pytest.raises(TypeError):
+            ops(pd.Series('foo', index=s.index))
+
+        if op != '__rpow__':
+            # TODO(extension)
+            # rpow with a datetimelike coerces the integer array incorrectly
+            with pytest.raises(TypeError):
+                ops(pd.Series(pd.date_range('20180101', periods=len(s))))
+
+        # 2d
+        with pytest.raises(TypeError):
+            opa(pd.DataFrame({'A': s}))
+        with pytest.raises(TypeError):
+            opa(np.arange(len(s)).reshape(-1, len(s)))
+
+
+class TestInterface(BaseInteger, base.BaseInterfaceTests):
+    pass
+
+
+class TestConstructors(BaseInteger, base.BaseConstructorsTests):
+
+    def test_from_dtype_from_float(self, data):
+        # construct from our dtype & string dtype
+        dtype = data.dtype
+
+        # from float
+        expected = pd.Series(data)
+        result = pd.Series(np.array(data).astype('float'), dtype=str(dtype))
+        self.assert_series_equal(result, expected)
+
+        # from int / list
+        expected = pd.Series(data)
+        result = pd.Series(np.array(data).tolist(), dtype=str(dtype))
+        self.assert_series_equal(result, expected)
+
+        # from int / array
+        expected = pd.Series(data).dropna().reset_index(drop=True)
+        dropped = np.array(data.dropna()).astype(np.dtype((dtype.type)))
+        result = pd.Series(dropped, dtype=str(dtype))
+        self.assert_series_equal(result, expected)
+
+
+class TestReshaping(BaseInteger, base.BaseReshapingTests):
+
+    def test_concat_mixed_dtypes(self, data):
+        # https://github.com/pandas-dev/pandas/issues/20762
+        df1 = pd.DataFrame({'A': data[:3]})
+        df2 = pd.DataFrame({"A": [1, 2, 3]})
+        df3 = pd.DataFrame({"A": ['a', 'b', 'c']}).astype('category')
+        df4 = pd.DataFrame({"A": pd.SparseArray([1, 2, 3])})
+        dfs = [df1, df2, df3, df4]
+
+        # dataframes
+        result = pd.concat(dfs)
+        expected = pd.concat([x.astype(object) for x in dfs])
+        self.assert_frame_equal(result, expected)
+
+        # series
+        result = pd.concat([x['A'] for x in dfs])
+        expected = pd.concat([x['A'].astype(object) for x in dfs])
+        self.assert_series_equal(result, expected)
+
+        result = pd.concat([df1, df2])
+        expected = pd.concat([df1.astype('object'), df2.astype('object')])
+        self.assert_frame_equal(result, expected)
+
+        # concat of an Integer and Int coerces to object dtype
+        # TODO(jreback) once integrated this would
+        # be a result of Integer
+        result = pd.concat([df1['A'], df2['A']])
+        expected = pd.concat([df1['A'].astype('object'),
+                              df2['A'].astype('object')])
+        self.assert_series_equal(result, expected)
+
+
+class TestGetitem(BaseInteger, base.BaseGetitemTests):
+    pass
+
+
+class TestMissing(BaseInteger, base.BaseMissingTests):
+    pass
+
+
+class TestMethods(BaseInteger, base.BaseMethodsTests):
+
+    @pytest.mark.xfail(reason="need a Index type with ExtensionArrays")
+    @pytest.mark.parametrize('dropna', [True, False])
+    def test_value_counts(self, all_data, dropna):
+        all_data = all_data[:10]
+        if dropna:
+            other = np.array(all_data[~all_data.isna()])
+        else:
+            other = all_data
+
+        result = pd.Series(all_data).value_counts(dropna=dropna).sort_index()
+        expected = pd.Series(other).value_counts(
+            dropna=dropna).sort_index()
+
+        self.assert_series_equal(result, expected)
+
+
+class TestCasting(BaseInteger, base.BaseCastingTests):
+    pass
+
+
+class TestGroupby(BaseInteger, base.BaseGroupbyTests):
+    pass
+
+
+def test_frame_repr(data_missing):
+
+    df = pd.DataFrame({'A': data_missing})
+    result = repr(df)
+    expected = '     A\n0  NaN\n1    1'
+    assert result == expected
+
+
+def test_conversions(data_missing):
+
+    # astype to object series
+    df = pd.DataFrame({'A': data_missing})
+    result = df['A'].astype('object')
+    expected = pd.Series(np.array([np.nan, 1], dtype=object), name='A')
+    tm.assert_series_equal(result, expected)
+
+    # convert to object ndarray
+    # we assert that we are exactly equal
+    # including type conversions of scalars
+    result = df['A'].astype('object').values
+    expected = np.array([np.nan, 1], dtype=object)
+    tm.assert_numpy_array_equal(result, expected)
+
+    for r, e in zip(result, expected):
+        if pd.isnull(r):
+            assert pd.isnull(e)
+        else:
+            assert r == e
+            assert type(r) == type(e)
+
+
+@pytest.mark.parametrize(
+    'values',
+    [
+        ['foo', 'bar'],
+        'foo',
+        1,
+        1.0,
+        pd.date_range('20130101', periods=2),
+        np.array(['foo'])])
+def test_to_integer_array_error(values):
+    # error in converting existing arrays to IntegerArrays
+    with pytest.raises(TypeError):
+        to_integer_array(values)
+
+
+@pytest.mark.parametrize(
+    'values, expected',
+    [
+        (np.array([1], dtype='int64'), Int64Array([1])),
+        (np.array([1, np.nan]), Int64Array([1, np.nan]))])
+def test_to_integer_array(values, expected):
+    # convert existing arrays to IntegerArrays
+    result = to_integer_array(values)
+    tm.assert_extension_array_equal(result, expected)
diff --git a/pandas/tests/extension/json/array.py b/pandas/tests/extension/json/array.py
index 88bb66f38b35c4..a81849c6bb3221 100644
--- a/pandas/tests/extension/json/array.py
+++ b/pandas/tests/extension/json/array.py
@@ -44,7 +44,7 @@ def construct_from_string(cls, string):
 class JSONArray(ExtensionArray):
     dtype = JSONDtype()
 
-    def __init__(self, values):
+    def __init__(self, values, copy=False):
         for val in values:
             if not isinstance(val, self.dtype.type):
                 raise TypeError
@@ -58,7 +58,7 @@ def __init__(self, values):
         # self._values = self.values = self.data
 
     @classmethod
-    def _from_sequence(cls, scalars):
+    def _from_sequence(cls, scalars, copy=False):
         return cls(scalars)
 
     @classmethod
@@ -170,6 +170,9 @@ def _values_for_argsort(self):
         return np.array(frozen, dtype=object)[1:]
 
 
+JSONDtype.array_type = JSONArray
+
+
 def make_data():
     # TODO: Use a regular dict. See _NDFrameIndexer._setitem_with_indexer
     return [collections.UserDict([
diff --git a/pandas/tests/extension/json/test_json.py b/pandas/tests/extension/json/test_json.py
index b7ac8033f3f6dc..fd1010ff45e962 100644
--- a/pandas/tests/extension/json/test_json.py
+++ b/pandas/tests/extension/json/test_json.py
@@ -129,10 +129,18 @@ def test_custom_asserts(self):
             self.assert_frame_equal(a.to_frame(), b.to_frame())
 
 
-class TestConstructors(BaseJSON, base.BaseConstructorsTests):
+class TestOps(BaseJSON, base.BaseOpsTests):
     pass
 
 
+class TestConstructors(BaseJSON, base.BaseConstructorsTests):
+
+    @pytest.mark.xfail(reason="not implemented constructor from dtype")
+    def test_from_dtype(self, data):
+        # construct from our dtype & string dtype
+        pass
+
+
 class TestReshaping(BaseJSON, base.BaseReshapingTests):
     pass