Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CFTimeIndex calendar in repr #4092

Merged
merged 37 commits into from
Jul 23, 2020
Merged
Show file tree
Hide file tree
Changes from 31 commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
0bd61d9
add property
May 25, 2020
28cbf84
test repr skip
May 25, 2020
c5d36cc
repr
Jun 3, 2020
8d9ebe0
linting
Jun 3, 2020
c1cac9f
remove unnecessary
Jun 3, 2020
d09092a
remove unnecessary
Jun 3, 2020
59dc912
add quotation marks to calendar
Jun 4, 2020
1fccb53
add length to wrapper
Jun 4, 2020
a52e997
linting
Jun 4, 2020
6d022ea
coords.to_index() if CFTimeIndex
Jun 6, 2020
bf8c5a0
to_index() iff CFTimeIndex
Jun 6, 2020
1e809d0
revert linting
Jun 6, 2020
1bfdc7c
revert linting
Jun 6, 2020
d58cb7d
revert linting
Jun 6, 2020
a0d00ab
to_index in short_data_repr_html
Jun 9, 2020
019d309
refine test and rm prints
Jun 9, 2020
269e967
fix to pass all tests
Jun 9, 2020
68a37c6
revert linting changes
Jun 11, 2020
0907cef
revert to_index()
Jun 12, 2020
a9c048f
require cftime for added test
Jun 12, 2020
fcb48fd
implement format_array_flat repr without commata and multiple lines
Jun 14, 2020
83839ec
reproduce pd.Index repr for CFTimeIndex repr
Jun 15, 2020
e3c8c01
reproduce pd.Index repr for CFTimeIndex repr
Jun 15, 2020
69d000f
sensitive to display_width
Jul 7, 2020
80ca891
rewritte format_cftimeindex_array from template of format_array_flat
Jul 7, 2020
3080d81
bugfix
Jul 7, 2020
b77a2ee
new approach
Jul 15, 2020
1e67f3f
Merge branch 'master' into AS_CFTimeIndex_repr_calendar
aaronspring Jul 15, 2020
d8d54ce
docstring
Jul 15, 2020
4b62406
Merge branch 'AS_CFTimeIndex_repr_calendar' of https://github.com/aar…
Jul 15, 2020
249ae24
attrs spaces fix
Jul 15, 2020
ecef05a
rm pandas test, refactor format_attrs and repr test dedent
Jul 18, 2020
7c31e3a
rm f lint
Jul 18, 2020
683f00c
Pass index to format_attrs instead of attrs dict
spencerkclark Jul 19, 2020
2707409
Update whats-new.rst
spencerkclark Jul 19, 2020
b7552b3
Add docstring for new calendar property
spencerkclark Jul 19, 2020
e8d85db
Update doc/whats-new.rst
spencerkclark Jul 19, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
5 changes: 4 additions & 1 deletion doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@ Breaking changes

New Features
~~~~~~~~~~~~
- Build CFTimeIndex.__repr__ explicitly as pandas.Index. Add ``calendar`` as a new
property for ``CFTimeIndex`` and show in ``calendar`` and ``length`` in
``CFTimeIndex.__repr__`` (:issue:`2416`, :pull:`4092`)
`Aaron Spring <https://github.com/aaronspring>`


Bug fixes
Expand Down Expand Up @@ -173,7 +177,6 @@ Enhancements
(:pull:`3905`)
By `Maximilian Roos <https://github.com/max-sixty>`_


Bug fixes
~~~~~~~~~
- Fix errors combining attrs in :py:func:`open_mfdataset` (:issue:`4009`, :pull:`4173`)
Expand Down
56 changes: 56 additions & 0 deletions xarray/coding/cftimeindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@
from xarray.core.utils import is_scalar

from ..core.common import _contains_cftime_datetimes
from ..core.formatting import format_times
from ..core.options import OPTIONS
from .times import _STANDARD_CALENDARS, cftime_to_nptime, infer_calendar_name


Expand Down Expand Up @@ -259,6 +261,54 @@ def __new__(cls, data, name=None):
result._cache = {}
return result

def __repr__(self):
"""
Return a string representation for this object.
"""
klass_name = type(self).__name__
display_width = OPTIONS["display_width"]
offset = len(klass_name) + 2
ITEMS_IN_REPR_MAX = 100
aaronspring marked this conversation as resolved.
Show resolved Hide resolved

if len(self) <= ITEMS_IN_REPR_MAX:
datastr = format_times(
self.values, display_width, offset=offset, first_row_offset=0
)
else:
SHOW_ITEMS_FRONT_END = 10
front_str = format_times(
self.values[:SHOW_ITEMS_FRONT_END],
display_width,
offset=offset,
first_row_offset=0,
last_row_end=",",
)
end_str = format_times(
self.values[-SHOW_ITEMS_FRONT_END:],
display_width,
offset=offset,
first_row_offset=offset,
)
datastr = "\n".join([front_str, f"{' '*offset}...", end_str])

attrs = {
"dtype": f"'{self.dtype}'",
"length": f"{len(self)}",
"calendar": f"'{self.calendar}'",
}
attrs_str = [f"{k}={v}" for k, v in attrs.items()]
attrs_str = f",{' '}".join(attrs_str)
aaronspring marked this conversation as resolved.
Show resolved Hide resolved
# oneliner only if smaller than display_width
full_repr_str = f"{klass_name}([{datastr}], {attrs_str})"
if len(full_repr_str) <= display_width:
return full_repr_str
else:
# if attrs_str too long, one per line
if len(attrs_str) >= display_width - offset:
attrs_str = attrs_str.replace(",", f",\n{' '*(offset-2)}")
full_repr_str = f"{klass_name}([{datastr}],\n{' '*(offset-1)}{attrs_str})"
return full_repr_str

def _partial_date_slice(self, resolution, parsed):
"""Adapted from
pandas.tseries.index.DatetimeIndex._partial_date_slice
Expand Down Expand Up @@ -582,6 +632,12 @@ def asi8(self):
dtype=np.int64,
)

@property
def calendar(self):
from .times import infer_calendar_name

return infer_calendar_name(self)

def _round_via_method(self, freq, method):
"""Round dates using a specified method."""
from .cftime_offsets import CFTIME_TICKS, to_offset
Expand Down
31 changes: 31 additions & 0 deletions xarray/core/formatting.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,37 @@ def format_array_flat(array, max_width: int):
return pprint_str


def format_row(times, indent=0, separator=", ", row_end=",\n"):
aaronspring marked this conversation as resolved.
Show resolved Hide resolved
"""Format a single row from format_times."""
return indent * " " + separator.join(map(str, times)) + row_end


def format_times(
index,
max_width,
offset,
separator=", ",
first_row_offset=0,
intermediate_row_end=",\n",
last_row_end="",
):
"""Format values of cftimeindex as pd.Index."""
CFTIME_REPR_LENGTH = 19
n_per_row = max(max_width // (CFTIME_REPR_LENGTH + len(separator)), 1)
n_rows = int(np.ceil(len(index) / n_per_row))

representation = ""
for row in range(n_rows):
indent = first_row_offset if row == 0 else offset
row_end = last_row_end if row == n_rows - 1 else intermediate_row_end
times_for_row = index[row * n_per_row : (row + 1) * n_per_row]
representation = representation + format_row(
times_for_row, indent=indent, separator=separator, row_end=row_end
)

return representation


_KNOWN_TYPE_REPRS = {np.ndarray: "np.ndarray"}
with contextlib.suppress(ImportError):
import sparse
Expand Down
130 changes: 130 additions & 0 deletions xarray/tests/test_cftimeindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
assert_all_valid_date_type,
parse_iso8601,
)
from xarray.core.options import OPTIONS
from xarray.tests import assert_array_equal, assert_identical

from . import raises_regex, requires_cftime, requires_cftime_1_1_0
Expand Down Expand Up @@ -884,6 +885,135 @@ def test_cftimeindex_shift_invalid_freq():
index.shift(1, 1)


@requires_cftime
@pytest.mark.parametrize(
("calendar", "expected"),
[
("noleap", "noleap"),
("365_day", "noleap"),
("360_day", "360_day"),
("julian", "julian"),
("gregorian", "gregorian"),
("proleptic_gregorian", "proleptic_gregorian"),
],
)
def test_cftimeindex_calendar_property(calendar, expected):
index = xr.cftime_range(start="2000", periods=3, calendar=calendar)
assert index.calendar == expected


@requires_cftime
@pytest.mark.parametrize(
("calendar", "expected"),
[
("noleap", "noleap"),
("365_day", "noleap"),
("360_day", "360_day"),
("julian", "julian"),
("gregorian", "gregorian"),
("proleptic_gregorian", "proleptic_gregorian"),
],
)
def test_cftimeindex_calendar_repr(calendar, expected):
"""Test that cftimeindex has calendar property in repr."""
index = xr.cftime_range(start="2000", periods=3, calendar=calendar)
repr_str = index.__repr__()
assert f" calendar='{expected}'" in repr_str
assert "2000-01-01 00:00:00, 2000-01-02 00:00:00" in repr_str


@requires_cftime
@pytest.mark.parametrize("periods", [2, 40])
def test_cftimeindex_periods_repr(periods):
"""Test that cftimeindex has periods property in repr."""
index = xr.cftime_range(start="2000", periods=periods)
repr_str = index.__repr__()
assert f" length={periods}" in repr_str


@requires_cftime
@pytest.mark.parametrize("periods", [2, 3, 4, 100, 101])
def test_cftimeindex_repr_formatting(periods):
aaronspring marked this conversation as resolved.
Show resolved Hide resolved
"""Test that cftimeindex.__repr__ is formatted as pd.Index.__repr__."""
index = xr.cftime_range(start="2000", periods=periods)
repr_str = index.__repr__()
# check for commata
assert "2000-01-01 00:00:00, 2000-01-02 00:00:00" in repr_str
# check oneline repr
if len(repr_str) <= OPTIONS["display_width"]:
assert "\n" not in repr_str
# if time items in first line only
elif periods * 19 < OPTIONS["display_width"]:
assert "\n" in repr_str
else:
# check for times have same indent
lines = repr_str.split("\n")
firststr = "2000"
assert lines[0].find(firststr) == lines[1].find(firststr)
# check for attrs line has one less indent than times
assert lines[-1].find("dtype") + 1 == lines[0].find(firststr)
# check for ... separation dots
if periods > 100:
assert "..." in repr_str


@requires_cftime
@pytest.mark.parametrize("display_width", [40, 80, 100])
@pytest.mark.parametrize("periods", [2, 3, 4, 100, 101])
def test_cftimeindex_repr_formatting_width(periods, display_width):
"""Test that cftimeindex is sensitive to OPTIONS['display_width']."""
index = xr.cftime_range(start="2000", periods=periods)
len_intro_str = len("CFTimeIndex(")
with xr.set_options(display_width=display_width):
repr_str = index.__repr__()
aaronspring marked this conversation as resolved.
Show resolved Hide resolved
splitted = repr_str.split("\n")
for i, s in enumerate(splitted):
# check that lines not longer than OPTIONS['display_width']
assert len(s) <= display_width, f"{len(s)} {s} {display_width}"
if i > 0:
# check for initial spaces
assert s[:len_intro_str] == " " * len_intro_str


@requires_cftime
@pytest.mark.parametrize("periods", [22, 50, 100])
def test_cftimeindex_repr_101_shorter(periods):
index_101 = xr.cftime_range(start="2000", periods=101)
index_periods = xr.cftime_range(start="2000", periods=periods)
index_101_repr_str = index_101.__repr__()
index_periods_repr_str = index_periods.__repr__()
assert len(index_101_repr_str) < len(index_periods_repr_str)


@requires_cftime
@pytest.mark.parametrize("periods", [3, 4, 100, 101])
def test_cftimeindex_repr_compare_pandasIndex(periods):
aaronspring marked this conversation as resolved.
Show resolved Hide resolved
"""Test xr.cftimeindex.__repr__ against previous pandas.Index.__repr__. Small adjustments to similarize visuals like indent."""
cfindex = xr.cftime_range(start="2000", periods=periods)
pdindex = pd.Index(cfindex)
cfindex_repr_str = cfindex.__repr__()
pdindex_repr_str = pdindex.__repr__()
pdindex_repr_str = pdindex_repr_str.replace("Index", "CFTimeIndex")
pdindex_repr_str = pdindex_repr_str.replace(f"\n{' '*7}", f"\n{' '*13}")
if periods <= 3:
# pd.Index doesnt worry about display_width
cfindex_repr_str = cfindex_repr_str.replace("\n", "").replace(" " * 12, " ")
if periods > 3:
# indent similarly
pdindex_repr_str = pdindex_repr_str.replace("dtype", f"{' '*6}dtype")
# add length attribute if many periods
if periods <= 100:
lengthstr = f"length={periods}, "
else:
lengthstr = ""
pdindex_repr_str = pdindex_repr_str.replace(
")", f", {lengthstr}calendar='gregorian')"
)
assert pdindex_repr_str == cfindex_repr_str, print(
f"pandas:\n{pdindex_repr_str}\n vs.\ncftime: \n{cfindex_repr_str}"
)


@requires_cftime
def test_parse_array_of_cftime_strings():
from cftime import DatetimeNoLeap
Expand Down