Skip to content

Commit

Permalink
BUG: fixed Series.dt methods in ArrowDtype class that were returning …
Browse files Browse the repository at this point in the history
…incorrect time values. (#57355)
  • Loading branch information
St0rmie committed Jun 15, 2024
1 parent dd87dd3 commit 61379f7
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 17 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -608,6 +608,7 @@ Other
- Bug in :meth:`DataFrame.where` where using a non-bool type array in the function would return a ``ValueError`` instead of a ``TypeError`` (:issue:`56330`)
- Bug in :meth:`Index.sort_values` when passing a key function that turns values into tuples, e.g. ``key=natsort.natsort_key``, would raise ``TypeError`` (:issue:`56081`)
- Bug in :meth:`Series.diff` allowing non-integer values for the ``periods`` argument. (:issue:`56607`)
- Bug in :meth:`Series.dt` methods in :class:`ArrowDtype` that were returning incorrect values. (:issue:`57355`)
- Bug in :meth:`Series.rank` that doesn't preserve missing values for nullable integers when ``na_option='keep'``. (:issue:`56976`)
- Bug in :meth:`Series.replace` and :meth:`DataFrame.replace` inconsistently replacing matching instances when ``regex=True`` and missing values are present. (:issue:`56599`)
- Bug in Dataframe Interchange Protocol implementation was returning incorrect results for data buffers' associated dtype, for string and datetime columns (:issue:`54781`)
Expand Down
35 changes: 18 additions & 17 deletions pandas/core/arrays/arrow/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@

from pandas._libs import lib
from pandas._libs.tslibs import (
NaT,
Timedelta,
Timestamp,
timezones,
Expand Down Expand Up @@ -2612,17 +2611,19 @@ def _str_wrap(self, width: int, **kwargs) -> Self:
@property
def _dt_days(self) -> Self:
return type(self)(
pa.array(self._to_timedeltaarray().days, from_pandas=True, type=pa.int32())
pa.array(
self._to_timedeltaarray().components.days,
from_pandas=True,
type=pa.int32(),
)
)

@property
def _dt_hours(self) -> Self:
return type(self)(
pa.array(
[
td.components.hours if td is not NaT else None
for td in self._to_timedeltaarray()
],
self._to_timedeltaarray().components.hours,
from_pandas=True,
type=pa.int32(),
)
)
Expand All @@ -2631,10 +2632,8 @@ def _dt_hours(self) -> Self:
def _dt_minutes(self) -> Self:
return type(self)(
pa.array(
[
td.components.minutes if td is not NaT else None
for td in self._to_timedeltaarray()
],
self._to_timedeltaarray().components.minutes,
from_pandas=True,
type=pa.int32(),
)
)
Expand All @@ -2643,18 +2642,18 @@ def _dt_minutes(self) -> Self:
def _dt_seconds(self) -> Self:
return type(self)(
pa.array(
self._to_timedeltaarray().seconds, from_pandas=True, type=pa.int32()
self._to_timedeltaarray().components.seconds,
from_pandas=True,
type=pa.int32(),
)
)

@property
def _dt_milliseconds(self) -> Self:
return type(self)(
pa.array(
[
td.components.milliseconds if td is not NaT else None
for td in self._to_timedeltaarray()
],
self._to_timedeltaarray().components.milliseconds,
from_pandas=True,
type=pa.int32(),
)
)
Expand All @@ -2663,7 +2662,7 @@ def _dt_milliseconds(self) -> Self:
def _dt_microseconds(self) -> Self:
return type(self)(
pa.array(
self._to_timedeltaarray().microseconds,
self._to_timedeltaarray().components.microseconds,
from_pandas=True,
type=pa.int32(),
)
Expand All @@ -2673,7 +2672,9 @@ def _dt_microseconds(self) -> Self:
def _dt_nanoseconds(self) -> Self:
return type(self)(
pa.array(
self._to_timedeltaarray().nanoseconds, from_pandas=True, type=pa.int32()
self._to_timedeltaarray().components.nanoseconds,
from_pandas=True,
type=pa.int32(),
)
)

Expand Down
25 changes: 25 additions & 0 deletions pandas/tests/extension/test_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -2905,6 +2905,31 @@ def test_dt_components():
tm.assert_frame_equal(result, expected)


def test_dt_components_large_values():
ser = pd.Series(
[
pd.Timedelta("365 days 23:59:59.999000"),
None,
],
dtype=ArrowDtype(pa.duration("ns")),
)
result = ser.dt.components
expected = pd.DataFrame(
[[365, 23, 59, 59, 999, 0, 0], [None, None, None, None, None, None, None]],
columns=[
"days",
"hours",
"minutes",
"seconds",
"milliseconds",
"microseconds",
"nanoseconds",
],
dtype="int32[pyarrow]",
)
tm.assert_frame_equal(result, expected)


@pytest.mark.parametrize("skipna", [True, False])
def test_boolean_reduce_series_all_null(all_boolean_reductions, skipna):
# GH51624
Expand Down

0 comments on commit 61379f7

Please sign in to comment.