Skip to content

Commit

Permalink
BUG: pd.array with non-nano
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel committed Apr 23, 2023
1 parent c58fa84 commit adea1d3
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 37 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -323,6 +323,7 @@ Conversion
- Bug in :meth:`ArrowDtype.numpy_dtype` returning nanosecond units for non-nanosecond ``pyarrow.timestamp`` and ``pyarrow.duration`` types (:issue:`51800`)
- Bug in :meth:`DataFrame.__repr__` incorrectly raising a ``TypeError`` when the dtype of a column is ``np.record`` (:issue:`48526`)
- Bug in :meth:`DataFrame.info` raising ``ValueError`` when ``use_numba`` is set (:issue:`51922`)
- Bug in :func:`array` when given a ``datetime64`` or ``timedelta64`` dtype with unit of "s", "us", or "ms" returning :class:`PandasArray` instead of :class:`DatetimeArray` or :class:`TimedeltaArray` (:issue:`??`)
-

Strings
Expand Down
41 changes: 25 additions & 16 deletions pandas/core/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,11 @@
from numpy import ma

from pandas._libs import lib
from pandas._libs.tslibs.period import Period
from pandas._libs.tslibs import (
Period,
get_unit_from_dtype,
is_supported_unit,
)
from pandas._typing import (
AnyArrayLike,
ArrayLike,
Expand All @@ -28,10 +32,7 @@
T,
)

from pandas.core.dtypes.base import (
ExtensionDtype,
_registry as registry,
)
from pandas.core.dtypes.base import ExtensionDtype
from pandas.core.dtypes.cast import (
construct_1d_arraylike_from_scalar,
construct_1d_object_array_from_listlike,
Expand All @@ -42,12 +43,10 @@
maybe_promote,
)
from pandas.core.dtypes.common import (
is_datetime64_ns_dtype,
is_dtype_equal,
is_extension_array_dtype,
is_list_like,
is_object_dtype,
is_timedelta64_ns_dtype,
pandas_dtype,
)
from pandas.core.dtypes.dtypes import PandasDtype
from pandas.core.dtypes.generic import (
Expand Down Expand Up @@ -310,8 +309,8 @@ def array(
data = extract_array(data, extract_numpy=True)

# this returns None for not-found dtypes.
if isinstance(dtype, str):
dtype = registry.find(dtype) or dtype
if dtype is not None:
dtype = pandas_dtype(dtype)

if isinstance(data, ExtensionArray) and (
dtype is None or is_dtype_equal(dtype, data.dtype)
Expand All @@ -321,8 +320,8 @@ def array(
return data.copy()
return data

if is_extension_array_dtype(dtype):
cls = cast(ExtensionDtype, dtype).construct_array_type()
if isinstance(dtype, ExtensionDtype):
cls = dtype.construct_array_type()
return cls._from_sequence(data, dtype=dtype, copy=copy)

if dtype is None:
Expand Down Expand Up @@ -365,12 +364,22 @@ def array(
return BooleanArray._from_sequence(data, copy=copy)

# Pandas overrides NumPy for
# 1. datetime64[ns]
# 2. timedelta64[ns]
# 1. datetime64[ns,us,ms,s]
# 2. timedelta64[ns,us,ms,s]
# so that a DatetimeArray is returned.
if is_datetime64_ns_dtype(dtype):
if (
lib.is_np_dtype(dtype, "M")
# error: Argument 1 to "py_get_unit_from_dtype" has incompatible type
# "Optional[dtype[Any]]"; expected "dtype[Any]"
and is_supported_unit(get_unit_from_dtype(dtype)) # type: ignore[arg-type]
):
return DatetimeArray._from_sequence(data, dtype=dtype, copy=copy)
elif is_timedelta64_ns_dtype(dtype):
if (
lib.is_np_dtype(dtype, "m")
# error: Argument 1 to "py_get_unit_from_dtype" has incompatible type
# "Optional[dtype[Any]]"; expected "dtype[Any]"
and is_supported_unit(get_unit_from_dtype(dtype)) # type: ignore[arg-type]
):
return TimedeltaArray._from_sequence(data, dtype=dtype, copy=copy)

return PandasArray._from_sequence(data, dtype=dtype, copy=copy)
Expand Down
31 changes: 10 additions & 21 deletions pandas/tests/arrays/test_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,6 @@
import pytest
import pytz

from pandas.core.dtypes.base import _registry as registry

import pandas as pd
import pandas._testing as tm
from pandas.api.extensions import register_extension_dtype
Expand Down Expand Up @@ -80,6 +78,11 @@
np.dtype("datetime64[ns]"),
DatetimeArray._from_sequence(np.array([1, 2], dtype="datetime64[ns]")),
),
(
[1, 2],
np.dtype("datetime64[s]"),
DatetimeArray._from_sequence(np.array([1, 2], dtype="datetime64[s]")),
),
(
np.array([1, 2], dtype="datetime64[ns]"),
None,
Expand Down Expand Up @@ -119,6 +122,11 @@
np.dtype("timedelta64[ns]"),
TimedeltaArray._from_sequence(["1H", "2H"]),
),
(
np.array([1, 2], dtype="m8[s]"),
np.dtype("timedelta64[s]"),
TimedeltaArray._from_sequence(np.array([1, 2], dtype="m8[s]")),
),
(
pd.TimedeltaIndex(["1H", "2H"]),
None,
Expand Down Expand Up @@ -404,25 +412,6 @@ def test_array_unboxes(index_or_series):
tm.assert_equal(result, expected)


@pytest.fixture
def registry_without_decimal():
"""Fixture yielding 'registry' with no DecimalDtype entries"""
idx = registry.dtypes.index(DecimalDtype)
registry.dtypes.pop(idx)
yield
registry.dtypes.append(DecimalDtype)


def test_array_not_registered(registry_without_decimal):
# check we aren't on it
assert registry.find("decimal") is None
data = [decimal.Decimal("1"), decimal.Decimal("2")]

result = pd.array(data, dtype=DecimalDtype)
expected = DecimalArray._from_sequence(data)
tm.assert_equal(result, expected)


def test_array_to_numpy_na():
# GH#40638
arr = pd.array([pd.NA, 1], dtype="string")
Expand Down

0 comments on commit adea1d3

Please sign in to comment.