Skip to content

Commit

Permalink
Backport PR #53213 on branch 2.0.x (FIX preserve dtype with datetime …
Browse files Browse the repository at this point in the history
…columns of different resolution when merging) (#53228)

FIX preserve dtype with datetime columns of different resolution when merging (#53213)

(cherry picked from commit 935244a)

Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>
  • Loading branch information
phofl and glemaitre committed May 15, 2023
1 parent cc47ec2 commit 7a28ced
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 1 deletion.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.0.2.rst
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ Bug fixes
- Bug in :func:`api.interchange.from_dataframe` was raising ``IndexError`` on empty categorical data (:issue:`53077`)
- Bug in :func:`api.interchange.from_dataframe` was returning :class:`DataFrame`'s of incorrect sizes when called on slices (:issue:`52824`)
- Bug in :func:`api.interchange.from_dataframe` was unnecessarily raising on bitmasks (:issue:`49888`)
- Bug in :func:`merge` when merging on datetime columns on different resolutions (:issue:`53200`)
- Bug in :meth:`DataFrame.convert_dtypes` ignores ``convert_*`` keywords when set to False ``dtype_backend="pyarrow"`` (:issue:`52872`)
- Bug in :meth:`Series.describe` treating pyarrow-backed timestamps and timedeltas as categorical data (:issue:`53001`)
- Bug in :meth:`Series.rename` not making a lazy copy when Copy-on-Write is enabled when a scalar is passed to it (:issue:`52450`)
Expand Down
8 changes: 7 additions & 1 deletion pandas/core/reshape/merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -1401,6 +1401,12 @@ def _maybe_coerce_merge_keys(self) -> None:
rk.dtype, DatetimeTZDtype
):
raise ValueError(msg)
elif (
isinstance(lk.dtype, DatetimeTZDtype)
and isinstance(rk.dtype, DatetimeTZDtype)
) or (lk.dtype.kind == "M" and rk.dtype.kind == "M"):
# allows datetime with different resolutions
continue

elif lk_is_object and rk_is_object:
continue
Expand Down Expand Up @@ -2355,7 +2361,7 @@ def _factorize_keys(
if isinstance(lk.dtype, DatetimeTZDtype) and isinstance(rk.dtype, DatetimeTZDtype):
# Extract the ndarray (UTC-localized) values
# Note: we dont need the dtypes to match, as these can still be compared
# TODO(non-nano): need to make sure resolutions match
lk, rk = cast("DatetimeArray", lk)._ensure_matching_resos(rk)
lk = cast("DatetimeArray", lk)._ndarray
rk = cast("DatetimeArray", rk)._ndarray

Expand Down
24 changes: 24 additions & 0 deletions pandas/tests/reshape/merge/test_merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

import numpy as np
import pytest
import pytz

from pandas.core.dtypes.common import (
is_categorical_dtype,
Expand Down Expand Up @@ -2750,3 +2751,26 @@ def test_merge_arrow_and_numpy_dtypes(dtype):
result = df2.merge(df)
expected = df2.copy()
tm.assert_frame_equal(result, expected)


@pytest.mark.parametrize("tzinfo", [None, pytz.timezone("America/Chicago")])
def test_merge_datetime_different_resolution(tzinfo):
# https://github.com/pandas-dev/pandas/issues/53200
df1 = DataFrame(
{
"t": [pd.Timestamp(2023, 5, 12, tzinfo=tzinfo, unit="ns")],
"a": [1],
}
)
df2 = df1.copy()
df2["t"] = df2["t"].dt.as_unit("s")

expected = DataFrame(
{
"t": [pd.Timestamp(2023, 5, 12, tzinfo=tzinfo)],
"a_x": [1],
"a_y": [1],
}
)
result = df1.merge(df2, on="t")
tm.assert_frame_equal(result, expected)

0 comments on commit 7a28ced

Please sign in to comment.