Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BUG: TimedeltaIndex.__repr__ with non-nano and round values #55405

Merged
merged 9 commits into from
Oct 5, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v2.2.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -296,7 +296,7 @@ Datetimelike

Timedelta
^^^^^^^^^
-
- Bug in rendering (``__repr__``) of :class:`TimedeltaIndex` and :class:`Series` with timedelta64 values with non-nanosecond resolution entries that are all multiples of 24 hours failing to use the compact representation used in the nanosecond cases (:issue:`55405`)
-

Timezones
Expand Down
6 changes: 3 additions & 3 deletions pandas/core/arrays/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -2167,11 +2167,11 @@ def _repr_categories(self) -> list[str]:
)
if len(self.categories) > max_categories:
num = max_categories // 2
head = format_array(self.categories[:num])
tail = format_array(self.categories[-num:])
head = format_array(self.categories[:num]._values)
tail = format_array(self.categories[-num:]._values)
category_strs = head + ["..."] + tail
else:
category_strs = format_array(self.categories)
category_strs = format_array(self.categories._values)

# Strip all leading spaces, which format_array adds for columns...
category_strs = [x.strip() for x in category_strs]
Expand Down
25 changes: 25 additions & 0 deletions pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
iNaT,
ints_to_pydatetime,
ints_to_pytimedelta,
periods_per_day,
to_offset,
)
from pandas._libs.tslibs.fields import (
Expand Down Expand Up @@ -2312,6 +2313,30 @@ def interpolate(
return self
return type(self)._simple_new(out_data, dtype=self.dtype)

# --------------------------------------------------------------
# Unsorted

@property
def _is_dates_only(self) -> bool:
"""
Check if we are round times at midnight (and no timezone), which will
be given a more compact __repr__ than other cases. For TimedeltaArray
we are checking for multiples of 24H.
"""
if not lib.is_np_dtype(self.dtype):
# i.e. we have a timezone
return False

values_int = self.asi8
consider_values = values_int != iNaT
reso = get_unit_from_dtype(self.dtype)
ppd = periods_per_day(reso)

# TODO: can we reuse is_date_array_normalized? would need a skipna kwd
# (first attempt at this was less performant than this implementation)
even_days = np.logical_and(consider_values, values_int % ppd != 0).sum() == 0
return even_days


# -------------------------------------------------------------------
# Shared Constructor Helpers
Expand Down
21 changes: 0 additions & 21 deletions pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,14 +28,12 @@
get_resolution,
get_supported_reso,
get_unit_from_dtype,
iNaT,
ints_to_pydatetime,
is_date_array_normalized,
is_supported_unit,
is_unitless,
normalize_i8_timestamps,
npy_unit_to_abbrev,
periods_per_day,
timezones,
to_offset,
tz_convert_from_utc,
Expand Down Expand Up @@ -745,25 +743,6 @@ def _format_native_types(
self.asi8, tz=self.tz, format=date_format, na_rep=na_rep, reso=self._creso
)

@property
def _is_dates_only(self) -> bool:
"""
Check if we are round times at midnight (and no timezone), which will
be given a more compact __repr__ than other cases.
"""
if self.tz is not None:
return False

values_int = self.asi8
consider_values = values_int != iNaT
dtype = cast(np.dtype, self.dtype) # since we checked tz above
reso = get_unit_from_dtype(dtype)
ppd = periods_per_day(reso)

# TODO: can we reuse is_date_array_normalized? would need a skipna kwd
even_days = np.logical_and(consider_values, values_int % ppd != 0).sum() == 0
return even_days

# -----------------------------------------------------------------
# Comparison Methods

Expand Down
3 changes: 1 addition & 2 deletions pandas/core/indexes/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -336,8 +336,7 @@ def timedelta_range(
**Specify a unit**

>>> pd.timedelta_range("1 Day", periods=3, freq="100000D", unit="s")
TimedeltaIndex(['1 days 00:00:00', '100001 days 00:00:00',
'200001 days 00:00:00'],
TimedeltaIndex(['1 days', '100001 days', '200001 days'],
dtype='timedelta64[s]', freq='100000D')
"""
if freq is None and com.any_none(periods, start, end):
Expand Down
12 changes: 1 addition & 11 deletions pandas/io/formats/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@
NaT,
Timedelta,
Timestamp,
iNaT,
)
from pandas._libs.tslibs.nattype import NaTType

Expand Down Expand Up @@ -103,7 +102,6 @@
SequenceNotStr,
StorageOptions,
WriteBuffer,
npt,
)

from pandas import (
Expand Down Expand Up @@ -1775,15 +1773,7 @@ def get_format_timedelta64(

If box, then show the return in quotes
"""
values_int = values.view(np.int64)
values_int = cast("npt.NDArray[np.int64]", values_int)

consider_values = values_int != iNaT

one_day_nanos = 86400 * 10**9
not_midnight = values_int % one_day_nanos != 0
both = np.logical_and(consider_values, not_midnight)
even_days = both.sum() == 0
even_days = values._is_dates_only

if even_days:
format = None
Expand Down
12 changes: 12 additions & 0 deletions pandas/tests/indexes/timedeltas/test_formats.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,18 @@


class TestTimedeltaIndexRendering:
def test_repr_round_days_non_nano(self):
# GH#55405
# we should get "1 days", not "1 days 00:00:00" with non-nano
tdi = TimedeltaIndex(["1 days"], freq="D").as_unit("s")
result = repr(tdi)
expected = "TimedeltaIndex(['1 days'], dtype='timedelta64[s]', freq='D')"
assert result == expected

result2 = repr(Series(tdi))
expected2 = "0 1 days\ndtype: timedelta64[s]"
assert result2 == expected2

@pytest.mark.parametrize("method", ["__repr__", "__str__"])
def test_representation(self, method):
idx1 = TimedeltaIndex([], freq="D")
Expand Down
32 changes: 16 additions & 16 deletions pandas/tests/io/formats/test_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -3186,7 +3186,7 @@ def test_all(self):

class TestTimedelta64Formatter:
def test_days(self):
x = pd.to_timedelta(list(range(5)) + [NaT], unit="D")
x = pd.to_timedelta(list(range(5)) + [NaT], unit="D")._values
result = fmt._Timedelta64Formatter(x, box=True).get_result()
assert result[0].strip() == "'0 days'"
assert result[1].strip() == "'1 days'"
Expand All @@ -3202,48 +3202,48 @@ def test_days(self):
assert result[0].strip() == "1 days"

def test_days_neg(self):
x = pd.to_timedelta(list(range(5)) + [NaT], unit="D")
x = pd.to_timedelta(list(range(5)) + [NaT], unit="D")._values
result = fmt._Timedelta64Formatter(-x, box=True).get_result()
assert result[0].strip() == "'0 days'"
assert result[1].strip() == "'-1 days'"

def test_subdays(self):
y = pd.to_timedelta(list(range(5)) + [NaT], unit="s")
y = pd.to_timedelta(list(range(5)) + [NaT], unit="s")._values
result = fmt._Timedelta64Formatter(y, box=True).get_result()
assert result[0].strip() == "'0 days 00:00:00'"
assert result[1].strip() == "'0 days 00:00:01'"

def test_subdays_neg(self):
y = pd.to_timedelta(list(range(5)) + [NaT], unit="s")
y = pd.to_timedelta(list(range(5)) + [NaT], unit="s")._values
result = fmt._Timedelta64Formatter(-y, box=True).get_result()
assert result[0].strip() == "'0 days 00:00:00'"
assert result[1].strip() == "'-1 days +23:59:59'"

def test_zero(self):
x = pd.to_timedelta(list(range(1)) + [NaT], unit="D")
x = pd.to_timedelta(list(range(1)) + [NaT], unit="D")._values
result = fmt._Timedelta64Formatter(x, box=True).get_result()
assert result[0].strip() == "'0 days'"

x = pd.to_timedelta(list(range(1)), unit="D")
x = pd.to_timedelta(list(range(1)), unit="D")._values
result = fmt._Timedelta64Formatter(x, box=True).get_result()
assert result[0].strip() == "'0 days'"


class Test_Datetime64Formatter:
def test_mixed(self):
x = Series([datetime(2013, 1, 1), datetime(2013, 1, 1, 12), NaT])
x = Series([datetime(2013, 1, 1), datetime(2013, 1, 1, 12), NaT])._values
result = fmt._Datetime64Formatter(x).get_result()
assert result[0].strip() == "2013-01-01 00:00:00"
assert result[1].strip() == "2013-01-01 12:00:00"

def test_dates(self):
x = Series([datetime(2013, 1, 1), datetime(2013, 1, 2), NaT])
x = Series([datetime(2013, 1, 1), datetime(2013, 1, 2), NaT])._values
result = fmt._Datetime64Formatter(x).get_result()
assert result[0].strip() == "2013-01-01"
assert result[1].strip() == "2013-01-02"

def test_date_nanos(self):
x = Series([Timestamp(200)])
x = Series([Timestamp(200)])._values
result = fmt._Datetime64Formatter(x).get_result()
assert result[0].strip() == "1970-01-01 00:00:00.000000200"

Expand All @@ -3252,41 +3252,41 @@ def test_dates_display(self):
# make sure that we are consistently display date formatting
x = Series(date_range("20130101 09:00:00", periods=5, freq="D"))
x.iloc[1] = np.nan
result = fmt._Datetime64Formatter(x).get_result()
result = fmt._Datetime64Formatter(x._values).get_result()
assert result[0].strip() == "2013-01-01 09:00:00"
assert result[1].strip() == "NaT"
assert result[4].strip() == "2013-01-05 09:00:00"

x = Series(date_range("20130101 09:00:00", periods=5, freq="s"))
x.iloc[1] = np.nan
result = fmt._Datetime64Formatter(x).get_result()
result = fmt._Datetime64Formatter(x._values).get_result()
assert result[0].strip() == "2013-01-01 09:00:00"
assert result[1].strip() == "NaT"
assert result[4].strip() == "2013-01-01 09:00:04"

x = Series(date_range("20130101 09:00:00", periods=5, freq="ms"))
x.iloc[1] = np.nan
result = fmt._Datetime64Formatter(x).get_result()
result = fmt._Datetime64Formatter(x._values).get_result()
assert result[0].strip() == "2013-01-01 09:00:00.000"
assert result[1].strip() == "NaT"
assert result[4].strip() == "2013-01-01 09:00:00.004"

x = Series(date_range("20130101 09:00:00", periods=5, freq="us"))
x.iloc[1] = np.nan
result = fmt._Datetime64Formatter(x).get_result()
result = fmt._Datetime64Formatter(x._values).get_result()
assert result[0].strip() == "2013-01-01 09:00:00.000000"
assert result[1].strip() == "NaT"
assert result[4].strip() == "2013-01-01 09:00:00.000004"

x = Series(date_range("20130101 09:00:00", periods=5, freq="ns"))
x.iloc[1] = np.nan
result = fmt._Datetime64Formatter(x).get_result()
result = fmt._Datetime64Formatter(x._values).get_result()
assert result[0].strip() == "2013-01-01 09:00:00.000000000"
assert result[1].strip() == "NaT"
assert result[4].strip() == "2013-01-01 09:00:00.000000004"

def test_datetime64formatter_yearmonth(self):
x = Series([datetime(2016, 1, 1), datetime(2016, 2, 2)])
x = Series([datetime(2016, 1, 1), datetime(2016, 2, 2)])._values

def format_func(x):
return x.strftime("%Y-%m")
Expand All @@ -3298,7 +3298,7 @@ def format_func(x):
def test_datetime64formatter_hoursecond(self):
x = Series(
pd.to_datetime(["10:10:10.100", "12:12:12.120"], format="%H:%M:%S.%f")
)
)._values

def format_func(x):
return x.strftime("%H:%M")
Expand Down