From ed56aa39b05434f0650f7cc9631cb84604e044f3 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Thu, 4 Oct 2018 15:07:32 -0500
Subject: [PATCH 1/8] API: ExtensionDtype Equality and Hashability

Closes https://github.com/pandas-dev/pandas/issues/22476
---
 doc/source/whatsnew/v0.24.0.txt         |  9 +++++
 pandas/core/dtypes/base.py              | 45 +++++++++++++++++++++----
 pandas/core/dtypes/dtypes.py            |  9 +++--
 pandas/tests/extension/base/dtype.py    |  7 ++++
 pandas/tests/extension/decimal/array.py |  6 +---
 pandas/tests/extension/json/array.py    |  1 +
 6 files changed, 60 insertions(+), 17 deletions(-)

diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt
index c9874b4dd03d6..437fd985820e1 100644
--- a/doc/source/whatsnew/v0.24.0.txt
+++ b/doc/source/whatsnew/v0.24.0.txt
@@ -492,6 +492,15 @@ Previous Behavior:
 ExtensionType Changes
 ^^^^^^^^^^^^^^^^^^^^^
 
+**:class:`pandas.api.extensions.ExtensionDtype` Equality and Hashability**
+
+Pandas now requires that extension dtypes be hashable. The base class implements
+a default ``__eq__`` and ``__hash__``. If you have a parametrized dtype, you should
+update the ``ExtensionDtype._metadata`` tuple to match the signature of your
+``__init__`` method. See :class:`pandas.api.extensions.ExtensionDtype` for more.
+
+**Other changes**
+
 - ``ExtensionArray`` has gained the abstract methods ``.dropna()`` (:issue:`21185`)
 - ``ExtensionDtype`` has gained the ability to instantiate from string dtypes, e.g. ``decimal`` would instantiate a registered ``DecimalDtype``; furthermore
   the ``ExtensionDtype`` has gained the method ``construct_array_type`` (:issue:`21185`)
diff --git a/pandas/core/dtypes/base.py b/pandas/core/dtypes/base.py
index b0fa55e346613..ac4d6d1590f38 100644
--- a/pandas/core/dtypes/base.py
+++ b/pandas/core/dtypes/base.py
@@ -22,14 +22,17 @@ class _DtypeOpsMixin(object):
     # of the NA value, not the physical NA vaalue for storage.
     # e.g. for JSONArray, this is an empty dictionary.
     na_value = np.nan
+    _metadata = ()
 
     def __eq__(self, other):
         """Check whether 'other' is equal to self.
 
-        By default, 'other' is considered equal if
+        By default, 'other' is considered equal if either
 
         * it's a string matching 'self.name'.
-        * it's an instance of this type.
+        * it's an instance of this type and all of the
+          the attributes in ``self._metadata`` are equal between
+          `self` and `other`.
 
         Parameters
         ----------
@@ -40,11 +43,19 @@ def __eq__(self, other):
         bool
         """
         if isinstance(other, compat.string_types):
-            return other == self.name
-        elif isinstance(other, type(self)):
-            return True
-        else:
-            return False
+            try:
+                other = self.construct_from_string(other)
+            except TypeError:
+                return False
+        if isinstance(other, type(self)):
+            return all(
+                getattr(self, attr) == getattr(other, attr)
+                for attr in self._metadata
+            )
+        return False
+
+    def __hash__(self):
+        return hash(tuple(getattr(self, attr) for attr in self._metadata))
 
     def __ne__(self, other):
         return not self.__eq__(other)
@@ -161,6 +172,26 @@ class ExtensionDtype(_DtypeOpsMixin):
     The `na_value` class attribute can be used to set the default NA value
     for this type. :attr:`numpy.nan` is used by default.
 
+    ExtensionDtypes are required to be hashable. The base class provides
+    a default implementation, which relies on the ``_metadata`` class
+    attribute. ``_metadata`` should be a tuple containing the strings
+    that define your data type. For example, with ``PeriodDtype`` that's
+    the ``freq`` attribute.
+
+    **If you have a parametrized dtype you should set the ``_metadata``
+    class property**.
+
+    Ideally, the attributes in ``_metadata`` will match the
+    parameters to your ``ExtensionDtype.__init__`` (if any). If any of
+    the attributes in ``_metadata`` don't implement the standard
+    ``__eq__`` or ``__hash__``, the default implementations here will not
+    work.
+
+    .. versionchanged:: 0.24.0
+
+       Added ``_metadata``, ``__hash__``, and changed the default definition
+       of ``__eq__``.
+
     This class does not inherit from 'abc.ABCMeta' for performance reasons.
     Methods and properties required by the interface raise
     ``pandas.errors.AbstractMethodError`` and no ``register`` method is
diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py
index beda9bc02f4d5..611cae28877c3 100644
--- a/pandas/core/dtypes/dtypes.py
+++ b/pandas/core/dtypes/dtypes.py
@@ -101,7 +101,6 @@ class PandasExtensionDtype(_DtypeOpsMixin):
     base = None
     isbuiltin = 0
     isnative = 0
-    _metadata = []
     _cache = {}
 
     def __unicode__(self):
@@ -209,7 +208,7 @@ class CategoricalDtype(PandasExtensionDtype, ExtensionDtype):
     kind = 'O'
     str = '|O08'
     base = np.dtype('O')
-    _metadata = ['categories', 'ordered']
+    _metadata = ('categories', 'ordered')
     _cache = {}
 
     def __init__(self, categories=None, ordered=None):
@@ -485,7 +484,7 @@ class DatetimeTZDtype(PandasExtensionDtype):
     str = '|M8[ns]'
     num = 101
     base = np.dtype('M8[ns]')
-    _metadata = ['unit', 'tz']
+    _metadata = ('unit', 'tz')
     _match = re.compile(r"(datetime64|M8)\[(?P<unit>.+), (?P<tz>.+)\]")
     _cache = {}
 
@@ -589,7 +588,7 @@ class PeriodDtype(PandasExtensionDtype):
     str = '|O08'
     base = np.dtype('O')
     num = 102
-    _metadata = ['freq']
+    _metadata = ('freq',)
     _match = re.compile(r"(P|p)eriod\[(?P<freq>.+)\]")
     _cache = {}
 
@@ -709,7 +708,7 @@ class IntervalDtype(PandasExtensionDtype, ExtensionDtype):
     str = '|O08'
     base = np.dtype('O')
     num = 103
-    _metadata = ['subtype']
+    _metadata = ('subtype',)
     _match = re.compile(r"(I|i)nterval\[(?P<subtype>.+)\]")
     _cache = {}
 
diff --git a/pandas/tests/extension/base/dtype.py b/pandas/tests/extension/base/dtype.py
index 8d1f1cadcc23f..d5cf9571e3622 100644
--- a/pandas/tests/extension/base/dtype.py
+++ b/pandas/tests/extension/base/dtype.py
@@ -49,6 +49,10 @@ def test_eq_with_str(self, dtype):
     def test_eq_with_numpy_object(self, dtype):
         assert dtype != np.dtype('object')
 
+    def test_eq_with_self(self, dtype):
+        assert dtype == dtype
+        assert dtype != object()
+
     def test_array_type(self, data, dtype):
         assert dtype.construct_array_type() is type(data)
 
@@ -81,3 +85,6 @@ def test_check_dtype(self, data):
                              index=list('ABCD'))
         result = df.dtypes.apply(str) == str(dtype)
         self.assert_series_equal(result, expected)
+
+    def test_hashable(self, dtype):
+        hash(dtype)  # no error
diff --git a/pandas/tests/extension/decimal/array.py b/pandas/tests/extension/decimal/array.py
index 79e1a692f744a..a1ee3a4fefef2 100644
--- a/pandas/tests/extension/decimal/array.py
+++ b/pandas/tests/extension/decimal/array.py
@@ -15,15 +15,11 @@ class DecimalDtype(ExtensionDtype):
     type = decimal.Decimal
     name = 'decimal'
     na_value = decimal.Decimal('NaN')
+    _metadata = ('context',)
 
     def __init__(self, context=None):
         self.context = context or decimal.getcontext()
 
-    def __eq__(self, other):
-        if isinstance(other, type(self)):
-            return self.context == other.context
-        return super(DecimalDtype, self).__eq__(other)
-
     def __repr__(self):
         return 'DecimalDtype(context={})'.format(self.context)
 
diff --git a/pandas/tests/extension/json/array.py b/pandas/tests/extension/json/array.py
index 87876d84bef99..976511941042d 100644
--- a/pandas/tests/extension/json/array.py
+++ b/pandas/tests/extension/json/array.py
@@ -27,6 +27,7 @@
 class JSONDtype(ExtensionDtype):
     type = compat.Mapping
     name = 'json'
+
     try:
         na_value = collections.UserDict()
     except AttributeError:

From 463c0bd6f4a4011205f599a8f2323d559a8f7b2e Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Thu, 4 Oct 2018 15:46:01 -0500
Subject: [PATCH 2/8] issue note

---
 doc/source/whatsnew/v0.24.0.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt
index 437fd985820e1..a1467cbca963a 100644
--- a/doc/source/whatsnew/v0.24.0.txt
+++ b/doc/source/whatsnew/v0.24.0.txt
@@ -497,7 +497,7 @@ ExtensionType Changes
 Pandas now requires that extension dtypes be hashable. The base class implements
 a default ``__eq__`` and ``__hash__``. If you have a parametrized dtype, you should
 update the ``ExtensionDtype._metadata`` tuple to match the signature of your
-``__init__`` method. See :class:`pandas.api.extensions.ExtensionDtype` for more.
+``__init__`` method. See :class:`pandas.api.extensions.ExtensionDtype` for more (:issue:`22476`).
 
 **Other changes**
 

From 2a1660c77e8ad0c28485c6d81227ac9d6b6de4f2 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Thu, 4 Oct 2018 14:13:51 -0500
Subject: [PATCH 3/8] BUG: concat different EAs

---
 doc/source/whatsnew/v0.24.0.txt     |  1 +
 pandas/core/internals/managers.py   |  3 +--
 pandas/tests/reshape/test_concat.py | 13 +++++++++++++
 3 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt
index a1467cbca963a..1f42cc3621c0d 100644
--- a/doc/source/whatsnew/v0.24.0.txt
+++ b/doc/source/whatsnew/v0.24.0.txt
@@ -514,6 +514,7 @@ update the ``ExtensionDtype._metadata`` tuple to match the signature of your
 - :meth:`Series.astype` and :meth:`DataFrame.astype` now dispatch to :meth:`ExtensionArray.astype` (:issue:`21185:`).
 - Slicing a single row of a ``DataFrame`` with multiple ExtensionArrays of the same type now preserves the dtype, rather than coercing to object (:issue:`22784`)
 - Added :meth:`pandas.api.types.register_extension_dtype` to register an extension type with pandas (:issue:`22664`)
+- Bug in concatenation an Series with two different extension dtypes not casting to object dtype (:issue:`22994`)
 - Updated the ``.type`` attribute for ``PeriodDtype``, ``DatetimeTZDtype``, and ``IntervalDtype`` to be instances of the dtype (``Period``, ``Timestamp``, and ``Interval`` respectively) (:issue:`22938`)
 
 .. _whatsnew_0240.api.incompatibilities:
diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
index 2f29f1ae2509f..428b12def7962 100644
--- a/pandas/core/internals/managers.py
+++ b/pandas/core/internals/managers.py
@@ -1634,8 +1634,7 @@ def concat(self, to_concat, new_axis):
         # check if all series are of the same block type:
         if len(non_empties) > 0:
             blocks = [obj.blocks[0] for obj in non_empties]
-
-            if all(type(b) is type(blocks[0]) for b in blocks[1:]):  # noqa
+            if len({b.dtype for b in blocks}) == 1:
                 new_block = blocks[0].concat_same_type(blocks)
             else:
                 values = [x.values for x in blocks]
diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py
index 2aaa04d571e69..841e33fbe9524 100644
--- a/pandas/tests/reshape/test_concat.py
+++ b/pandas/tests/reshape/test_concat.py
@@ -1,6 +1,7 @@
 from warnings import catch_warnings, simplefilter
 from itertools import combinations
 from collections import deque
+from decimal import Decimal
 
 import datetime as dt
 import dateutil
@@ -19,6 +20,7 @@
 from pandas.util import testing as tm
 from pandas.util.testing import (assert_frame_equal,
                                  makeCustomDataframe as mkdf)
+from pandas.tests.extension.decimal import to_decimal
 
 import pytest
 
@@ -2361,6 +2363,17 @@ def test_concat_datetime_timezone(self):
                                 index=idx1.append(idx1))
         tm.assert_frame_equal(result, expected)
 
+    def test_concat_different_extension_dtypes_upcasts(self):
+        a = pd.Series(pd.core.arrays.integer_array([1, 2]))
+        b = pd.Series(to_decimal([1, 2]))
+
+        result = pd.concat([a, b], ignore_index=True)
+        expected = pd.Series([
+            1, 2,
+            Decimal(1), Decimal(2)
+        ], dtype=object)
+        tm.assert_series_equal(result, expected)
+
 
 @pytest.mark.parametrize('pdt', [pd.Series, pd.DataFrame, pd.Panel])
 @pytest.mark.parametrize('dt', np.sctypes['float'])

From 869e2bfdf3a7ce80de7093387542f0d0e8557b08 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Sun, 14 Oct 2018 16:52:14 -0500
Subject: [PATCH 4/8] Allow multiple fill values

---
 pandas/core/dtypes/concat.py | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py
index ac824708245d2..2b1778e5bcb2e 100644
--- a/pandas/core/dtypes/concat.py
+++ b/pandas/core/dtypes/concat.py
@@ -560,11 +560,6 @@ def _concat_sparse(to_concat, axis=0, typs=None):
 
     fill_values = [x.fill_value for x in to_concat
                    if isinstance(x, SparseArray)]
-
-    if len(set(fill_values)) > 1:
-        raise ValueError("Cannot concatenate SparseArrays with different "
-                         "fill values")
-
     fill_value = fill_values[0]
 
     # TODO: Fix join unit generation so we aren't passed this.

From b6ccab469c3e82db20c839a556d67cfbe1b27f43 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Mon, 15 Oct 2018 06:37:47 -0500
Subject: [PATCH 5/8] skips

---
 pandas/tests/extension/decimal/test_decimal.py | 10 +++++++---
 pandas/tests/reshape/test_concat.py            |  3 ++-
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/pandas/tests/extension/decimal/test_decimal.py b/pandas/tests/extension/decimal/test_decimal.py
index f84d24295b049..be1c61166e4b1 100644
--- a/pandas/tests/extension/decimal/test_decimal.py
+++ b/pandas/tests/extension/decimal/test_decimal.py
@@ -100,7 +100,9 @@ def test_hashable(self, dtype):
 
 
 class TestInterface(BaseDecimal, base.BaseInterfaceTests):
-    pass
+
+    pytestmark = pytest.mark.skipif(compat.PY2,
+                                    reason="Unhashble dtype in Py2.")
 
 
 class TestConstructors(BaseDecimal, base.BaseConstructorsTests):
@@ -112,7 +114,8 @@ def test_from_dtype(self, data):
 
 
 class TestReshaping(BaseDecimal, base.BaseReshapingTests):
-    pass
+    pytestmark = pytest.mark.skipif(compat.PY2,
+                                    reason="Unhashble dtype in Py2.")
 
 
 class TestGetitem(BaseDecimal, base.BaseGetitemTests):
@@ -174,7 +177,8 @@ class TestCasting(BaseDecimal, base.BaseCastingTests):
 
 
 class TestGroupby(BaseDecimal, base.BaseGroupbyTests):
-    pass
+    pytestmark = pytest.mark.skipif(compat.PY2,
+                                    reason="Unhashble dtype in Py2.")
 
 
 class TestSetitem(BaseDecimal, base.BaseSetitemTests):
diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py
index 841e33fbe9524..e2b44852d218f 100644
--- a/pandas/tests/reshape/test_concat.py
+++ b/pandas/tests/reshape/test_concat.py
@@ -15,7 +15,7 @@
                     read_csv, isna, Series, date_range,
                     Index, Panel, MultiIndex, Timestamp,
                     DatetimeIndex, Categorical)
-from pandas.compat import Iterable
+from pandas.compat import Iterable, PY2
 from pandas.core.dtypes.dtypes import CategoricalDtype
 from pandas.util import testing as tm
 from pandas.util.testing import (assert_frame_equal,
@@ -2363,6 +2363,7 @@ def test_concat_datetime_timezone(self):
                                 index=idx1.append(idx1))
         tm.assert_frame_equal(result, expected)
 
+    @pytest.mark.skipif(PY2, reason="Unhashable Decimal dtype")
     def test_concat_different_extension_dtypes_upcasts(self):
         a = pd.Series(pd.core.arrays.integer_array([1, 2]))
         b = pd.Series(to_decimal([1, 2]))

From 9507a0ce9ea813d4d14d8859285f9f8f368abed4 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Mon, 15 Oct 2018 07:00:15 -0500
Subject: [PATCH 6/8] fixup [ci skip]

---
 doc/source/whatsnew/v0.24.0.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt
index 0c3137b2400bd..3d7e7686b2db6 100644
--- a/doc/source/whatsnew/v0.24.0.txt
+++ b/doc/source/whatsnew/v0.24.0.txt
@@ -602,7 +602,7 @@ update the ``ExtensionDtype._metadata`` tuple to match the signature of your
 - :meth:`Series.astype` and :meth:`DataFrame.astype` now dispatch to :meth:`ExtensionArray.astype` (:issue:`21185:`).
 - Slicing a single row of a ``DataFrame`` with multiple ExtensionArrays of the same type now preserves the dtype, rather than coercing to object (:issue:`22784`)
 - Added :meth:`pandas.api.types.register_extension_dtype` to register an extension type with pandas (:issue:`22664`)
-- Bug in concatenation an Series with two different extension dtypes not casting to object dtype (:issue:`22994`)
+- Bug when concatenating multiple ``Series`` with different extension dtypes not casting to object dtype (:issue:`22994`)
 - Series backed by an ``ExtensionArray`` now work with :func:`util.hash_pandas_object` (:issue:`23066`)
 - Updated the ``.type`` attribute for ``PeriodDtype``, ``DatetimeTZDtype``, and ``IntervalDtype`` to be instances of the dtype (``Period``, ``Timestamp``, and ``Interval`` respectively) (:issue:`22938`)
 - :func:`ExtensionArray.isna` is allowed to return an ``ExtensionArray`` (:issue:`22325`).

From 5502bb8233c71f56e4e92554596172bc2942912e Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Mon, 15 Oct 2018 07:00:48 -0500
Subject: [PATCH 7/8] trigger CI


From e17d3976fc9d6f37822c4f78b0933de4a4964b9a Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Mon, 15 Oct 2018 10:42:18 -0500
Subject: [PATCH 8/8] lint

---
 pandas/tests/reshape/test_concat.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py
index e2b44852d218f..d39c9fafe5749 100644
--- a/pandas/tests/reshape/test_concat.py
+++ b/pandas/tests/reshape/test_concat.py
@@ -9,13 +9,12 @@
 from numpy.random import randn
 
 from datetime import datetime
-from pandas.compat import StringIO, iteritems, PY2
+from pandas.compat import Iterable, StringIO, iteritems, PY2
 import pandas as pd
 from pandas import (DataFrame, concat,
                     read_csv, isna, Series, date_range,
                     Index, Panel, MultiIndex, Timestamp,
                     DatetimeIndex, Categorical)
-from pandas.compat import Iterable, PY2
 from pandas.core.dtypes.dtypes import CategoricalDtype
 from pandas.util import testing as tm
 from pandas.util.testing import (assert_frame_equal,