pandas-dev · jorisvandenbossche · Nov 8, 2018 · Oct 22, 2018 · Oct 23, 2018 · Oct 24, 2018
diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt
@@ -724,6 +724,8 @@ update the ``ExtensionDtype._metadata`` tuple to match the signature of your
 - Updated the ``.type`` attribute for ``PeriodDtype``, ``DatetimeTZDtype``, and ``IntervalDtype`` to be instances of the dtype (``Period``, ``Timestamp``, and ``Interval`` respectively) (:issue:`22938`)
 - :func:`ExtensionArray.isna` is allowed to return an ``ExtensionArray`` (:issue:`22325`).
 - Support for reduction operations such as ``sum``, ``mean`` via opt-in base class method override (:issue:`22762`)
+- :meth:`DataFrame.stack` no longer converts to object dtype for DataFrames where each column has the same extension dtype. The output Series will have the same dtype as the columns (:issue:`23077`).
+
 
 .. _whatsnew_0240.api.incompatibilities:
 

diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
@@ -36,6 +36,7 @@
     is_list_like,
     is_re,
     is_re_compilable,
+    is_sparse,
     pandas_dtype)
 from pandas.core.dtypes.cast import (
     maybe_downcast_to_dtype,
@@ -633,7 +634,9 @@ def _astype(self, dtype, copy=False, errors='raise', values=None,
             return self
 
         if klass is None:
-            if dtype == np.object_:
+            # sparse is "special" and preserves sparsity.
+            # We're changing this in GH-23125
+            if dtype == np.object_ and is_sparse(values):
                 klass = ObjectBlock
             elif is_extension_array_dtype(dtype):
                 klass = ExtensionBlock

diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py
@@ -470,8 +470,15 @@ def factorize(index):
         if is_extension_array_dtype(dtype):
             arr = dtype.construct_array_type()
             new_values = arr._concat_same_type([
-                col for _, col in frame.iteritems()
+                col._values for _, col in frame.iteritems()
             ])
+            # final take to get the order correct.
+            # idx is an indexer like
+            # [c0r0, c1r0, c2r0, ...,
+            #  c0r1, c1r1, c241, ...]
+            idx = np.arange(N * K).reshape(K, N).T.ravel()
+            new_values = new_values.take(idx)
+
         else:
             # homogeneous, non-EA
             new_values = frame.values.ravel()

diff --git a/pandas/tests/extension/base/reshaping.py b/pandas/tests/extension/base/reshaping.py
@@ -170,3 +170,11 @@ def test_merge(self, data, na_value):
                  [data[0], data[0], data[1], data[2], na_value],
                  dtype=data.dtype)})
         self.assert_frame_equal(res, exp[['ext', 'int1', 'key', 'int2']])
+
+    def test_stack(self, data):
+        df = pd.DataFrame({"A": data[:5], "B": data[:5]})
+        result = df.stack()
+        assert result.dtype == df.A.dtype
+        result = result.astype(object)
+        expected = df.astype(object).stack()
+        self.assert_series_equal(result, expected)
diff --git a/pandas/tests/frame/test_reshape.py b/pandas/tests/frame/test_reshape.py
@@ -872,6 +872,17 @@ def test_stack_preserve_categorical_dtype(self, ordered, labels):
 
         tm.assert_series_equal(result, expected)
 
+    def test_stack_preserve_categorical_dtype_values(self):
+        # GH-23077
+        cat = pd.Categorical(['a', 'a', 'b', 'c'])
+        df = pd.DataFrame({"A": cat, "B": cat})
+        result = df.stack()
+        index = pd.MultiIndex.from_product([[0, 1, 2, 3], ['A', 'B']])
+        expected = pd.Series(pd.Categorical(['a', 'a', 'a', 'a',
+                                             'b', 'b', 'c', 'c']),
+                             index=index)
+        tm.assert_series_equal(result, expected)
+
     @pytest.mark.parametrize("level", [0, 'baz'])
     def test_unstack_swaplevel_sortlevel(self, level):
         # GH 20994

diff --git a/pandas/tests/sparse/frame/test_frame.py b/pandas/tests/sparse/frame/test_frame.py
@@ -736,6 +736,16 @@ def test_astype_bool(self):
         assert res['A'].dtype == SparseDtype(np.bool)
         assert res['B'].dtype == SparseDtype(np.bool)
 
+    def test_astype_object(self):
+        # This may change in GH-23125
+        df = pd.DataFrame({"A": SparseArray([0, 1]),
+                           "B": SparseArray([0, 1])})
+        result = df.astype(object)
+        dtype = SparseDtype(object, 0)
+        expected = pd.DataFrame({"A": SparseArray([0, 1], dtype=dtype),
+                                 "B": SparseArray([0, 1], dtype=dtype)})
+        tm.assert_frame_equal(result, expected)
+
     def test_fillna(self, float_frame_fill0, float_frame_fill0_dense):
         df = float_frame_fill0.reindex(lrange(5))
         dense = float_frame_fill0_dense.reindex(lrange(5))