diff --git a/xarray/coding/times.py b/xarray/coding/times.py index f10b9b83558..4996c5b0d11 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -98,6 +98,8 @@ "scale_factor", ] +_ORDERED_PANDAS_TIME_RESOLUTIONS: list[PDDatetimeUnitOptions] = ["s", "ms", "us", "ns"] + def _is_standard_calendar(calendar: str) -> bool: return calendar.lower() in _STANDARD_CALENDARS @@ -293,6 +295,21 @@ def _maybe_strip_tz_from_timestamp(date: pd.Timestamp) -> pd.Timestamp: return date +def _cast_timestamp_to_coarsest_resolution(timestamp: pd.Timestamp) -> pd.Timestamp: + # Cast timestamp to the coarsest resolution that can be used without + # changing its value. If provided a string, the pandas.Timestamp + # constructor used to automatically infer this from the resolution of the + # string, but this behavior was changed in pandas-dev/pandas#62801. This + # function allows us to approximately restore the old behavior in a way + # that is perhaps more consistent with how we infer the resolution of the + # data values themselves. + for unit in _ORDERED_PANDAS_TIME_RESOLUTIONS: + coarsest_timestamp = timestamp.as_unit(unit) + if coarsest_timestamp == timestamp: + return coarsest_timestamp + return timestamp + + def _unpack_time_unit_and_ref_date( units: str, ) -> tuple[NPDatetimeUnitOptions, pd.Timestamp]: @@ -301,6 +318,7 @@ def _unpack_time_unit_and_ref_date( time_unit, _ref_date = _unpack_netcdf_time_units(units) time_unit = _netcdf_to_numpy_timeunit(time_unit) ref_date = pd.Timestamp(_ref_date) + ref_date = _cast_timestamp_to_coarsest_resolution(ref_date) ref_date = _maybe_strip_tz_from_timestamp(ref_date) return time_unit, ref_date @@ -442,8 +460,8 @@ def _check_higher_resolution( time_unit: PDDatetimeUnitOptions, ) -> tuple[np.ndarray, PDDatetimeUnitOptions]: """Iterate until fitting resolution found.""" - res: list[PDDatetimeUnitOptions] = ["s", "ms", "us", "ns"] - new_units = res[res.index(time_unit) :] + index = _ORDERED_PANDAS_TIME_RESOLUTIONS.index(time_unit) + new_units = _ORDERED_PANDAS_TIME_RESOLUTIONS[index:] for new_time_unit in new_units: if not ((np.unique(flat_num_dates % 1) > 0).any() and new_time_unit != "ns"): break diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index 187a2f7002c..f66dd4da711 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -1086,7 +1086,7 @@ def test_numpy_same_methods(self): [ (np.datetime64("2000-01-01"), "s"), ( - pd.Timestamp("2000-01-01T00"), + pd.Timestamp("2000-01-01T00").as_unit("s"), "s" if has_pandas_3 else "ns", ), ( @@ -1128,7 +1128,7 @@ def test_0d_str(self): assert v.values == "foo".encode("ascii") def test_0d_datetime(self): - v = Variable([], pd.Timestamp("2000-01-01")) + v = Variable([], pd.Timestamp("2000-01-01").as_unit("s")) expected_unit = "s" if has_pandas_3 else "ns" assert v.dtype == np.dtype(f"datetime64[{expected_unit}]") assert v.values == np.datetime64("2000-01-01", expected_unit) # type: ignore[call-overload]