Skip to content

Commit

Permalink
Backport PR #53233 on branch 2.0.x (BUG: preserve dtype for right/out…
Browse files Browse the repository at this point in the history
…er merge of datetime with different resolutions) (#53275)

Backport PR #53233: BUG: preserve dtype for right/outer merge of datetime with different resolutions

Co-authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
  • Loading branch information
meeseeksmachine and jorisvandenbossche committed May 17, 2023
1 parent 340346c commit a23c15c
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 18 deletions.
8 changes: 8 additions & 0 deletions pandas/core/reshape/merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -1000,6 +1000,14 @@ def _maybe_add_join_keys(
else:
key_col = Index(lvals).where(~mask_left, rvals)
result_dtype = find_common_type([lvals.dtype, rvals.dtype])
if (
lvals.dtype.kind == "M"
and rvals.dtype.kind == "M"
and result_dtype.kind == "O"
):
# TODO(non-nano) Workaround for common_type not dealing
# with different resolutions
result_dtype = key_col.dtype

if result._is_label_reference(name):
result[name] = Series(
Expand Down
39 changes: 21 additions & 18 deletions pandas/tests/reshape/merge/test_merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@

import numpy as np
import pytest
import pytz

from pandas.core.dtypes.common import (
is_categorical_dtype,
Expand Down Expand Up @@ -2753,24 +2752,28 @@ def test_merge_arrow_and_numpy_dtypes(dtype):
tm.assert_frame_equal(result, expected)


@pytest.mark.parametrize("tzinfo", [None, pytz.timezone("America/Chicago")])
def test_merge_datetime_different_resolution(tzinfo):
@pytest.mark.parametrize("how", ["inner", "left", "outer", "right"])
@pytest.mark.parametrize("tz", [None, "America/Chicago"])
def test_merge_datetime_different_resolution(tz, how):
# https://github.com/pandas-dev/pandas/issues/53200
df1 = DataFrame(
{
"t": [pd.Timestamp(2023, 5, 12, tzinfo=tzinfo, unit="ns")],
"a": [1],
}
)
df2 = df1.copy()
vals = [
pd.Timestamp(2023, 5, 12, tz=tz),
pd.Timestamp(2023, 5, 13, tz=tz),
pd.Timestamp(2023, 5, 14, tz=tz),
]
df1 = DataFrame({"t": vals[:2], "a": [1.0, 2.0]})
df1["t"] = df1["t"].dt.as_unit("ns")
df2 = DataFrame({"t": vals[1:], "b": [1.0, 2.0]})
df2["t"] = df2["t"].dt.as_unit("s")

expected = DataFrame(
{
"t": [pd.Timestamp(2023, 5, 12, tzinfo=tzinfo)],
"a_x": [1],
"a_y": [1],
}
)
result = df1.merge(df2, on="t")
expected = DataFrame({"t": vals, "a": [1.0, 2.0, np.nan], "b": [np.nan, 1.0, 2.0]})
expected["t"] = expected["t"].dt.as_unit("ns")
if how == "inner":
expected = expected.iloc[[1]].reset_index(drop=True)
elif how == "left":
expected = expected.iloc[[0, 1]]
elif how == "right":
expected = expected.iloc[[1, 2]].reset_index(drop=True)

result = df1.merge(df2, on="t", how=how)
tm.assert_frame_equal(result, expected)

0 comments on commit a23c15c

Please sign in to comment.