diff --git a/pandas/tests/reshape/concat/test_datetimes.py b/pandas/tests/reshape/concat/test_datetimes.py index b1cba7ee31eac..2588f59268334 100644 --- a/pandas/tests/reshape/concat/test_datetimes.py +++ b/pandas/tests/reshape/concat/test_datetimes.py @@ -278,6 +278,32 @@ def test_concat_NaT_series_dataframe_all_NaT(self, tz1, tz2): result = concat([first, second]) tm.assert_frame_equal(result, expected) + def test_concat_ns_and_s_preserves_datetime64(self): + # GH 53307 + # ensure concatenating a datetime64[ns] column and a copy cast to M8[s] + # yields a datetime64 dtype (finest unit should be ns) + df = pd.DataFrame( + {"ints": range(2), "dates": pd.date_range("2000", periods=2, freq="min")} + ) + df2 = df.copy() + df2["dates"] = df2["dates"].astype("M8[s]") + + combined = pd.concat([df, df2], ignore_index=True) + + # dtype is a datetime64 type + assert pd.api.types.is_datetime64_any_dtype(combined["dates"].dtype) + + # unit should be the finest (ns) when mixing ns and s + unit = np.datetime_data(combined["dates"].dtype)[0] + assert unit == "ns" + + # values preserved (compare as ns) + exp = pd.to_datetime(list(df["dates"]) + list(df2["dates"])) + tm.assert_series_equal( + combined["dates"].astype("datetime64[ns]").reset_index(drop=True), + pd.Series(exp.astype("datetime64[ns]"), name="dates").reset_index(drop=True), + ) + class TestTimezoneConcat: def test_concat_tz_series(self): @@ -591,3 +617,52 @@ def test_concat_float_datetime64(): result = concat([df_time, df_float.iloc[:0]]) tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "unit,unit2", + [(u1, u2) for u1 in ("ns", "us", "ms", "s") for u2 in ("ns", "us", "ms", "s")], +) +def test_concat_mixed_units_preserve_datetime_and_unit(unit, unit2): + # GH 53307 + # for each pair of units, concatenating columns of those units should + # result in a datetime64 dtype with the finest unit + df = pd.DataFrame({"dates": pd.to_datetime(["2000-01-01", "2000-01-02"])}) + # cast copies to requested unit + df1 = df.copy() + df1["dates"] = df1["dates"].astype(f"M8[{unit}]") + df2 = df.copy() + df2["dates"] = df2["dates"].astype(f"M8[{unit2}]") + + exp_unit = tm.get_finest_unit(unit, unit2) + + # test both concat orders + for a, b in ((df1, df2), (df2, df1)): + combined = pd.concat([a, b], ignore_index=True) + + assert pd.api.types.is_datetime64_any_dtype(combined["dates"].dtype) + + res_unit = np.datetime_data(combined["dates"].dtype)[0] + assert res_unit == exp_unit + + +@pytest.mark.parametrize( + "unit,unit2", + [(u1, u2) for u1 in ("ns", "us", "ms", "s") for u2 in ("ns", "us", "ms", "s")], +) +def test_concat_mixed_units_with_all_nat(unit, unit2): + # GH 53307 + # mixing non-empty datetime column and an all-NaT column typed to unit2 + df = pd.DataFrame({"dates": pd.to_datetime(["2000-01-01"])}) + df1 = df.copy() + df1["dates"] = df1["dates"].astype(f"M8[{unit}]") + + ser_nat = pd.Series([pd.NaT], dtype=f"datetime64[{unit2}]") + df2 = pd.DataFrame({"dates": ser_nat}) + + exp_unit = tm.get_finest_unit(unit, unit2) + + combined = pd.concat([df1, df2], ignore_index=True) + assert pd.api.types.is_datetime64_any_dtype(combined["dates"].dtype) + res_unit = np.datetime_data(combined["dates"].dtype)[0] + assert res_unit == exp_unit