Skip to content

Commit

Permalink
BUG: preserve dtype for right/outer merge of datetime with different …
Browse files Browse the repository at this point in the history
…resolutions (#53233)
  • Loading branch information
jorisvandenbossche committed May 17, 2023
1 parent 5c8c16e commit 93f9ae0
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 18 deletions.
8 changes: 8 additions & 0 deletions pandas/core/reshape/merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -994,6 +994,14 @@ def _maybe_add_join_keys(
else:
key_col = Index(lvals).where(~mask_left, rvals)
result_dtype = find_common_type([lvals.dtype, rvals.dtype])
if (
lvals.dtype.kind == "M"
and rvals.dtype.kind == "M"
and result_dtype.kind == "O"
):
# TODO(non-nano) Workaround for common_type not dealing
# with different resolutions
result_dtype = key_col.dtype

if result._is_label_reference(name):
result[name] = result._constructor_sliced(
Expand Down
39 changes: 21 additions & 18 deletions pandas/tests/reshape/merge/test_merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@

import numpy as np
import pytest
import pytz

from pandas.core.dtypes.common import is_object_dtype
from pandas.core.dtypes.dtypes import CategoricalDtype
Expand Down Expand Up @@ -2776,26 +2775,30 @@ def test_merge_arrow_and_numpy_dtypes(dtype):
tm.assert_frame_equal(result, expected)


@pytest.mark.parametrize("tzinfo", [None, pytz.timezone("America/Chicago")])
def test_merge_datetime_different_resolution(tzinfo):
@pytest.mark.parametrize("how", ["inner", "left", "outer", "right"])
@pytest.mark.parametrize("tz", [None, "America/Chicago"])
def test_merge_datetime_different_resolution(tz, how):
# https://github.com/pandas-dev/pandas/issues/53200
df1 = DataFrame(
{
"t": [pd.Timestamp(2023, 5, 12, tzinfo=tzinfo, unit="ns")],
"a": [1],
}
)
df2 = df1.copy()
vals = [
pd.Timestamp(2023, 5, 12, tz=tz),
pd.Timestamp(2023, 5, 13, tz=tz),
pd.Timestamp(2023, 5, 14, tz=tz),
]
df1 = DataFrame({"t": vals[:2], "a": [1.0, 2.0]})
df1["t"] = df1["t"].dt.as_unit("ns")
df2 = DataFrame({"t": vals[1:], "b": [1.0, 2.0]})
df2["t"] = df2["t"].dt.as_unit("s")

expected = DataFrame(
{
"t": [pd.Timestamp(2023, 5, 12, tzinfo=tzinfo)],
"a_x": [1],
"a_y": [1],
}
)
result = df1.merge(df2, on="t")
expected = DataFrame({"t": vals, "a": [1.0, 2.0, np.nan], "b": [np.nan, 1.0, 2.0]})
expected["t"] = expected["t"].dt.as_unit("ns")
if how == "inner":
expected = expected.iloc[[1]].reset_index(drop=True)
elif how == "left":
expected = expected.iloc[[0, 1]]
elif how == "right":
expected = expected.iloc[[1, 2]].reset_index(drop=True)

result = df1.merge(df2, on="t", how=how)
tm.assert_frame_equal(result, expected)


Expand Down

0 comments on commit 93f9ae0

Please sign in to comment.