Skip to content

Commit

Permalink
BUG: 2D ndarray of dtype 'object' is always copied upon construction (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
irgolic committed Jan 20, 2021
1 parent edbd450 commit 560f894
Show file tree
Hide file tree
Showing 4 changed files with 41 additions and 30 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.3.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,7 @@ Datetimelike
- Bug in :meth:`DatetimeIndex.intersection`, :meth:`DatetimeIndex.symmetric_difference`, :meth:`PeriodIndex.intersection`, :meth:`PeriodIndex.symmetric_difference` always returning object-dtype when operating with :class:`CategoricalIndex` (:issue:`38741`)
- Bug in :meth:`Series.where` incorrectly casting ``datetime64`` values to ``int64`` (:issue:`37682`)
- Bug in :class:`Categorical` incorrectly typecasting ``datetime`` object to ``Timestamp`` (:issue:`38878`)
- Bug in :func:`DataFrame` constructor unnecessarily copying 2D object arrays (:issue:`39263`)

Timedelta
^^^^^^^^^
Expand Down
32 changes: 2 additions & 30 deletions pandas/core/internals/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@
is_integer_dtype,
is_list_like,
is_named_tuple,
is_object_dtype,
)
from pandas.core.dtypes.generic import (
ABCDataFrame,
Expand All @@ -59,7 +58,7 @@
)
from pandas.core.internals.managers import (
create_block_manager_from_arrays,
create_block_manager_from_blocks,
create_block_manager_from_array
)

if TYPE_CHECKING:
Expand Down Expand Up @@ -232,34 +231,7 @@ def init_ndarray(values, index, columns, dtype: Optional[DtypeObj], copy: bool):
)
values = values.T

# if we don't have a dtype specified, then try to convert objects
# on the entire block; this is to convert if we have datetimelike's
# embedded in an object type
if dtype is None and is_object_dtype(values.dtype):

if values.ndim == 2 and values.shape[0] != 1:
# transpose and separate blocks

dvals_list = [maybe_infer_to_datetimelike(row) for row in values]
for n in range(len(dvals_list)):
if isinstance(dvals_list[n], np.ndarray):
dvals_list[n] = dvals_list[n].reshape(1, -1)

from pandas.core.internals.blocks import make_block

# TODO: What about re-joining object columns?
block_values = [
make_block(dvals_list[n], placement=[n], ndim=2)
for n in range(len(dvals_list))
]

else:
datelike_vals = maybe_infer_to_datetimelike(values)
block_values = [datelike_vals]
else:
block_values = [values]

return create_block_manager_from_blocks(block_values, [columns, index])
return create_block_manager_from_array(values, [columns, index], dtype)


def init_dict(data: Dict, index, columns, dtype: Optional[DtypeObj] = None):
Expand Down
30 changes: 30 additions & 0 deletions pandas/core/internals/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
find_common_type,
infer_dtype_from_scalar,
maybe_promote,
maybe_infer_to_datetimelike
)
from pandas.core.dtypes.common import (
DT64NS_DTYPE,
Expand Down Expand Up @@ -1679,6 +1680,30 @@ def create_block_manager_from_arrays(
raise construction_error(len(arrays), arrays[0].shape, axes, e)


def create_block_manager_from_array(
array, axes: List[Index], dtype: Optional[Dtype] = None
) -> BlockManager:
assert isinstance(axes, list)
assert all(isinstance(x, Index) for x in axes)

# ensure we dont have any PandasArrays when we call get_block_type
# Note: just calling extract_array breaks tests that patch PandasArray._typ.
array = array if not isinstance(array, ABCPandasArray) else array.to_numpy()

maybe_datetime = [maybe_infer_to_datetimelike(instance) for instance in array]
try:
if dtype is not None or all(is_dtype_equal(instance.dtype, array.dtype)
for instance in maybe_datetime):
blocks = [make_block(array, slice(0, len(axes[0])), dtype=dtype)]
else:
blocks = _form_blocks(maybe_datetime, axes[0], axes)
mgr = BlockManager(blocks, axes)
mgr._consolidate_inplace()
return mgr
except ValueError as e:
raise construction_error(array.shape[0], array.shape[1:], axes, e)


def construction_error(tot_items, block_shape, axes, e=None):
""" raise a helpful message about our construction """
passed = tuple(map(int, [tot_items] + list(block_shape)))
Expand Down Expand Up @@ -1706,6 +1731,11 @@ def construction_error(tot_items, block_shape, axes, e=None):
def _form_blocks(arrays, names: Index, axes) -> List[Block]:
# put "leftover" items in float bucket, where else?
# generalize?

if len(arrays) != len(names):
raise ValueError(f"Number of arrays ({len(arrays)}) "
f"does not match index length ({len(names)})")

items_dict: DefaultDict[str, List] = defaultdict(list)
extra_locs = []

Expand Down
8 changes: 8 additions & 0 deletions pandas/tests/frame/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -2267,6 +2267,14 @@ def test_nested_dict_construction(self):
)
tm.assert_frame_equal(result, expected)

def test_object_array_does_not_copy(self):
a = np.array(["a", "b"], dtype="object")
b = np.array([["a", "b"], ["c", "d"]], dtype="object")
df = DataFrame(a)
assert np.shares_memory(df.values, a)
df2 = DataFrame(b)
assert np.shares_memory(df2.values, b)

def test_from_tzaware_object_array(self):
# GH#26825 2D object array of tzaware timestamps should not raise
dti = date_range("2016-04-05 04:30", periods=3, tz="UTC")
Expand Down

0 comments on commit 560f894

Please sign in to comment.