diff --git a/doc/whats-new.rst b/doc/whats-new.rst index d086d4f411d..e109633a5e1 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -25,6 +25,10 @@ Breaking changes New Features ~~~~~~~~~~~~ +- Build :py:meth:`CFTimeIndex.__repr__` explicitly as :py:class:`pandas.Index`. Add ``calendar`` as a new + property for :py:class:`CFTimeIndex` and show ``calendar`` and ``length`` in + :py:meth:`CFTimeIndex.__repr__` (:issue:`2416`, :pull:`4092`) + `Aaron Spring `_. Bug fixes @@ -173,7 +177,6 @@ Enhancements (:pull:`3905`) By `Maximilian Roos `_ - Bug fixes ~~~~~~~~~ - Fix errors combining attrs in :py:func:`open_mfdataset` (:issue:`4009`, :pull:`4173`) diff --git a/xarray/coding/cftimeindex.py b/xarray/coding/cftimeindex.py index 2a7eaa99edb..cd57af5c7eb 100644 --- a/xarray/coding/cftimeindex.py +++ b/xarray/coding/cftimeindex.py @@ -50,8 +50,14 @@ from xarray.core.utils import is_scalar from ..core.common import _contains_cftime_datetimes +from ..core.options import OPTIONS from .times import _STANDARD_CALENDARS, cftime_to_nptime, infer_calendar_name +# constants for cftimeindex.repr +CFTIME_REPR_LENGTH = 19 +ITEMS_IN_REPR_MAX_ELSE_ELLIPSIS = 100 +REPR_ELLIPSIS_SHOW_ITEMS_FRONT_END = 10 + def named(name, pattern): return "(?P<" + name + ">" + pattern + ")" @@ -215,6 +221,48 @@ def assert_all_valid_date_type(data): ) +def format_row(times, indent=0, separator=", ", row_end=",\n"): + """Format a single row from format_times.""" + return indent * " " + separator.join(map(str, times)) + row_end + + +def format_times( + index, + max_width, + offset, + separator=", ", + first_row_offset=0, + intermediate_row_end=",\n", + last_row_end="", +): + """Format values of cftimeindex as pd.Index.""" + n_per_row = max(max_width // (CFTIME_REPR_LENGTH + len(separator)), 1) + n_rows = int(np.ceil(len(index) / n_per_row)) + + representation = "" + for row in range(n_rows): + indent = first_row_offset if row == 0 else offset + row_end = last_row_end if row == n_rows - 1 else intermediate_row_end + times_for_row = index[row * n_per_row : (row + 1) * n_per_row] + representation = representation + format_row( + times_for_row, indent=indent, separator=separator, row_end=row_end + ) + + return representation + + +def format_attrs(index, separator=", "): + """Format attributes of CFTimeIndex for __repr__.""" + attrs = { + "dtype": f"'{index.dtype}'", + "length": f"{len(index)}", + "calendar": f"'{index.calendar}'", + } + attrs_str = [f"{k}={v}" for k, v in attrs.items()] + attrs_str = f"{separator}".join(attrs_str) + return attrs_str + + class CFTimeIndex(pd.Index): """Custom Index for working with CF calendars and dates @@ -259,6 +307,46 @@ def __new__(cls, data, name=None): result._cache = {} return result + def __repr__(self): + """ + Return a string representation for this object. + """ + klass_name = type(self).__name__ + display_width = OPTIONS["display_width"] + offset = len(klass_name) + 2 + + if len(self) <= ITEMS_IN_REPR_MAX_ELSE_ELLIPSIS: + datastr = format_times( + self.values, display_width, offset=offset, first_row_offset=0 + ) + else: + front_str = format_times( + self.values[:REPR_ELLIPSIS_SHOW_ITEMS_FRONT_END], + display_width, + offset=offset, + first_row_offset=0, + last_row_end=",", + ) + end_str = format_times( + self.values[-REPR_ELLIPSIS_SHOW_ITEMS_FRONT_END:], + display_width, + offset=offset, + first_row_offset=offset, + ) + datastr = "\n".join([front_str, f"{' '*offset}...", end_str]) + + attrs_str = format_attrs(self) + # oneliner only if smaller than display_width + full_repr_str = f"{klass_name}([{datastr}], {attrs_str})" + if len(full_repr_str) <= display_width: + return full_repr_str + else: + # if attrs_str too long, one per line + if len(attrs_str) >= display_width - offset: + attrs_str = attrs_str.replace(",", f",\n{' '*(offset-2)}") + full_repr_str = f"{klass_name}([{datastr}],\n{' '*(offset-1)}{attrs_str})" + return full_repr_str + def _partial_date_slice(self, resolution, parsed): """Adapted from pandas.tseries.index.DatetimeIndex._partial_date_slice @@ -582,6 +670,13 @@ def asi8(self): dtype=np.int64, ) + @property + def calendar(self): + """The calendar used by the datetimes in the index.""" + from .times import infer_calendar_name + + return infer_calendar_name(self) + def _round_via_method(self, freq, method): """Round dates using a specified method.""" from .cftime_offsets import CFTIME_TICKS, to_offset diff --git a/xarray/tests/test_cftimeindex.py b/xarray/tests/test_cftimeindex.py index 745ae341370..642609ba059 100644 --- a/xarray/tests/test_cftimeindex.py +++ b/xarray/tests/test_cftimeindex.py @@ -1,4 +1,5 @@ from datetime import timedelta +from textwrap import dedent import numpy as np import pandas as pd @@ -884,6 +885,120 @@ def test_cftimeindex_shift_invalid_freq(): index.shift(1, 1) +@requires_cftime +@pytest.mark.parametrize( + ("calendar", "expected"), + [ + ("noleap", "noleap"), + ("365_day", "noleap"), + ("360_day", "360_day"), + ("julian", "julian"), + ("gregorian", "gregorian"), + ("proleptic_gregorian", "proleptic_gregorian"), + ], +) +def test_cftimeindex_calendar_property(calendar, expected): + index = xr.cftime_range(start="2000", periods=3, calendar=calendar) + assert index.calendar == expected + + +@requires_cftime +@pytest.mark.parametrize( + ("calendar", "expected"), + [ + ("noleap", "noleap"), + ("365_day", "noleap"), + ("360_day", "360_day"), + ("julian", "julian"), + ("gregorian", "gregorian"), + ("proleptic_gregorian", "proleptic_gregorian"), + ], +) +def test_cftimeindex_calendar_repr(calendar, expected): + """Test that cftimeindex has calendar property in repr.""" + index = xr.cftime_range(start="2000", periods=3, calendar=calendar) + repr_str = index.__repr__() + assert f" calendar='{expected}'" in repr_str + assert "2000-01-01 00:00:00, 2000-01-02 00:00:00" in repr_str + + +@requires_cftime +@pytest.mark.parametrize("periods", [2, 40]) +def test_cftimeindex_periods_repr(periods): + """Test that cftimeindex has periods property in repr.""" + index = xr.cftime_range(start="2000", periods=periods) + repr_str = index.__repr__() + assert f" length={periods}" in repr_str + + +@requires_cftime +@pytest.mark.parametrize( + "periods,expected", + [ + ( + 2, + """\ +CFTimeIndex([2000-01-01 00:00:00, 2000-01-02 00:00:00], + dtype='object', length=2, calendar='gregorian')""", + ), + ( + 4, + """\ +CFTimeIndex([2000-01-01 00:00:00, 2000-01-02 00:00:00, 2000-01-03 00:00:00, + 2000-01-04 00:00:00], + dtype='object', length=4, calendar='gregorian')""", + ), + ( + 101, + """\ +CFTimeIndex([2000-01-01 00:00:00, 2000-01-02 00:00:00, 2000-01-03 00:00:00, + 2000-01-04 00:00:00, 2000-01-05 00:00:00, 2000-01-06 00:00:00, + 2000-01-07 00:00:00, 2000-01-08 00:00:00, 2000-01-09 00:00:00, + 2000-01-10 00:00:00, + ... + 2000-04-01 00:00:00, 2000-04-02 00:00:00, 2000-04-03 00:00:00, + 2000-04-04 00:00:00, 2000-04-05 00:00:00, 2000-04-06 00:00:00, + 2000-04-07 00:00:00, 2000-04-08 00:00:00, 2000-04-09 00:00:00, + 2000-04-10 00:00:00], + dtype='object', length=101, calendar='gregorian')""", + ), + ], +) +def test_cftimeindex_repr_formatting(periods, expected): + """Test that cftimeindex.__repr__ is formatted similar to pd.Index.__repr__.""" + index = xr.cftime_range(start="2000", periods=periods) + expected = dedent(expected) + assert expected == repr(index) + + +@requires_cftime +@pytest.mark.parametrize("display_width", [40, 80, 100]) +@pytest.mark.parametrize("periods", [2, 3, 4, 100, 101]) +def test_cftimeindex_repr_formatting_width(periods, display_width): + """Test that cftimeindex is sensitive to OPTIONS['display_width'].""" + index = xr.cftime_range(start="2000", periods=periods) + len_intro_str = len("CFTimeIndex(") + with xr.set_options(display_width=display_width): + repr_str = index.__repr__() + splitted = repr_str.split("\n") + for i, s in enumerate(splitted): + # check that lines not longer than OPTIONS['display_width'] + assert len(s) <= display_width, f"{len(s)} {s} {display_width}" + if i > 0: + # check for initial spaces + assert s[:len_intro_str] == " " * len_intro_str + + +@requires_cftime +@pytest.mark.parametrize("periods", [22, 50, 100]) +def test_cftimeindex_repr_101_shorter(periods): + index_101 = xr.cftime_range(start="2000", periods=101) + index_periods = xr.cftime_range(start="2000", periods=periods) + index_101_repr_str = index_101.__repr__() + index_periods_repr_str = index_periods.__repr__() + assert len(index_101_repr_str) < len(index_periods_repr_str) + + @requires_cftime def test_parse_array_of_cftime_strings(): from cftime import DatetimeNoLeap