pandas-dev · jorisvandenbossche · Mar 20, 2023 · jbrockmendel · Apr 14, 2023 · jorisvandenbossche
diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py
@@ -142,6 +142,9 @@ def __from_arrow__(
             return BooleanArray._concat_same_type(results)
 
 
+_boolean_dtype = BooleanDtype()
+
+
 def coerce_to_array(
     values, mask=None, copy: bool = False
 ) -> tuple[np.ndarray, np.ndarray]:
@@ -299,7 +302,7 @@ def __init__(
                 "values should be boolean numpy array. Use "
                 "the 'pd.array' function instead"
             )
-        self._dtype = BooleanDtype()
+        self._dtype = _boolean_dtype
         super().__init__(values, mask, copy=copy)
 
     @property

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -107,7 +107,10 @@
     needs_i8_conversion,
     pandas_dtype,
 )
-from pandas.core.dtypes.dtypes import ExtensionDtype
+from pandas.core.dtypes.dtypes import (
+    BaseMaskedDtype,
+    ExtensionDtype,
+)
 from pandas.core.dtypes.missing import (
     isna,
     notna,
@@ -2505,6 +2508,7 @@ def _from_arrays(
         index,
         dtype: Dtype | None = None,
         verify_integrity: bool = True,
+        is_1d_ea_only: bool = False,
     ) -> Self:
         """
         Create DataFrame from a list of arrays corresponding to the columns.
@@ -2544,6 +2548,7 @@ def _from_arrays(
             dtype=dtype,
             verify_integrity=verify_integrity,
             typ=manager,
+            is_1d_ea_only=is_1d_ea_only,
         )
         return cls(mgr)
 
@@ -3610,11 +3615,21 @@ def transpose(self, *args, copy: bool = False) -> DataFrame:
             # We have EAs with the same dtype. We can preserve that dtype in transpose.
             dtype = dtypes[0]
             arr_type = dtype.construct_array_type()
-            values = self.values
+            if isinstance(dtype, BaseMaskedDtype):
+                data, mask = self._mgr.as_array_masked()
+                new_values = [arr_type(data[i], mask[i]) for i in range(self.shape[0])]
+            else:
+                values = self.values
+                new_values = [
+                    arr_type._from_sequence(row, dtype=dtype) for row in values
+                ]
 
-            new_values = [arr_type._from_sequence(row, dtype=dtype) for row in values]
             result = type(self)._from_arrays(
-                new_values, index=self.columns, columns=self.index
+                new_values,
+                index=self.columns,
+                columns=self.index,
+                verify_integrity=False,
+                is_1d_ea_only=True,
             )
 
         else:
@@ -10935,7 +10950,23 @@ def _get_data() -> DataFrame:
             df = _get_data()
         if axis is None:
             return func(df.values)
-        elif axis == 1:
+
+        # if len(df._mgr) > 0:
+        #     common_dtype = find_common_type(list(df._mgr.get_dtypes()))
+        #     is_masked_ea = isinstance(common_dtype, BaseMaskedDtype)
+        #     is_np = isinstance(common_dtype, np.dtype)
+        # else:
+        #     common_dtype = None
+
+        # if axis == 1 and common_dtype and is_masked_ea:
+        #     data, mask = self._mgr.as_array_masked()
+        #     ea2d = common_dtype.construct_array_type()(data, mask)
+        #     result = ea2d._reduce(name, axis=axis, skipna=skipna, **kwds)
+        #     labels = self._get_agg_axis(axis)
+        #     result = self._constructor_sliced(result, index=labels, copy=False)
+        #     return result
+
+        if axis == 1:
             if len(df.index) == 0:
                 # Taking a transpose would result in no columns, losing the dtype.
                 # In the empty case, reducing along axis 0 or 1 gives the same

diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py
@@ -104,6 +104,7 @@ def arrays_to_mgr(
     verify_integrity: bool = True,
     typ: str | None = None,
     consolidate: bool = True,
+    is_1d_ea_only: bool = False,
 ) -> Manager:
     """
     Segregate Series based on type and coerce into matrices.
@@ -127,7 +128,8 @@ def arrays_to_mgr(
 
     else:
         index = ensure_index(index)
-        arrays = [extract_array(x, extract_numpy=True) for x in arrays]
+        if not is_1d_ea_only:
+            arrays = [extract_array(x, extract_numpy=True) for x in arrays]
         # with _from_arrays, the passed arrays should never be Series objects
         refs = [None] * len(arrays)
 
@@ -152,7 +154,7 @@ def arrays_to_mgr(
 
     if typ == "block":
         return create_block_manager_from_column_arrays(
-            arrays, axes, consolidate=consolidate, refs=refs
+            arrays, axes, consolidate, refs=refs, is_1d_ea_only=is_1d_ea_only
         )
     elif typ == "array":
         return ArrayManager(arrays, [index, columns])

diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
@@ -1747,6 +1747,40 @@ def as_array(
 
         return arr.transpose()
 
+    def as_array_masked(self) -> np.ndarray:
+        """
+        Convert the blockmanager data into an numpy array.
+
+        Returns
+        -------
+        arr : ndarray
+        """
+        # # TODO(CoW) handle case where resulting array is a view
+        # if len(self.blocks) == 0:
+        #     arr = np.empty(self.shape, dtype=float)
+        #     return arr.transpose()
+
+        # TODO we already established we only have a single dtype, but this
+        # could be generalized to be a mix of all masked dtypes
+        dtype = self.blocks[0].dtype.numpy_dtype
+
+        result_data = np.empty(self.shape, dtype=dtype)
+        result_mask = np.empty(self.shape, dtype="bool")
+
+        itemmask = np.zeros(self.shape[0])
+
+        for blk in self.blocks:
+            rl = blk.mgr_locs
+            arr = blk.values
+            result_data[rl.indexer] = arr._data
+            result_mask[rl.indexer] = arr._mask
+            itemmask[rl.indexer] = 1
+
+        if not itemmask.all():
+            raise AssertionError("Some items were not contained in blocks")
+
+        return result_data.transpose(), result_mask.transpose()
+
     def _interleave(
         self,
         dtype: np.dtype | None = None,
@@ -2130,6 +2164,7 @@ def create_block_manager_from_column_arrays(
     axes: list[Index],
     consolidate: bool,
     refs: list,
+    is_1d_ea_only: bool = False,
 ) -> BlockManager:
     # Assertions disabled for performance (caller is responsible for verifying)
     # assert isinstance(axes, list)
@@ -2143,7 +2178,7 @@ def create_block_manager_from_column_arrays(
     #  verify_integrity=False below.
 
     try:
-        blocks = _form_blocks(arrays, consolidate, refs)
+        blocks = _form_blocks(arrays, consolidate, refs, is_1d_ea_only)
         mgr = BlockManager(blocks, axes, verify_integrity=False)
     except ValueError as e:
         raise_construction_error(len(arrays), arrays[0].shape, axes, e)
@@ -2197,9 +2232,17 @@ def _grouping_func(tup: tuple[int, ArrayLike]) -> tuple[int, bool, DtypeObj]:
     return sep, isinstance(dtype, np.dtype), dtype
 
 
-def _form_blocks(arrays: list[ArrayLike], consolidate: bool, refs: list) -> list[Block]:
+def _form_blocks(
+    arrays: list[ArrayLike], consolidate: bool, refs: list, is_1d_ea_only: bool
+) -> list[Block]:
     tuples = list(enumerate(arrays))
 
+    if is_1d_ea_only:
+        block_type = get_block_type(arrays[0].dtype)
+        return [
+            block_type(arr, placement=BlockPlacement(i), ndim=2) for i, arr in tuples
+        ]
+
     if not consolidate:
         nbs = _tuples_to_blocks_no_consolidate(tuples, refs)
         return nbs