Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PERF: RangeIndex.format performance #35712

Merged
merged 22 commits into from
Aug 26, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v0.25.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -540,7 +540,7 @@ with :attr:`numpy.nan` in the case of an empty :class:`DataFrame` (:issue:`26397

.. ipython:: python

df.describe()
df.describe()

``__str__`` methods now call ``__repr__`` rather than vice versa
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Expand Down
5 changes: 3 additions & 2 deletions doc/source/whatsnew/v1.1.2.rst
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,9 @@ including other versions of pandas.
Fixed regressions
~~~~~~~~~~~~~~~~~
- Regression in :meth:`DatetimeIndex.intersection` incorrectly raising ``AssertionError`` when intersecting against a list (:issue:`35876`)
- Performance regression for :meth:`RangeIndex.format` (:issue:`35712`)
simonjayhawkins marked this conversation as resolved.
Show resolved Hide resolved
-
-


.. ---------------------------------------------------------------------------

Expand All @@ -26,7 +27,7 @@ Bug fixes
~~~~~~~~~
- Bug in :meth:`DataFrame.eval` with ``object`` dtype column binary operations (:issue:`35794`)
- Bug in :meth:`DataFrame.apply` with ``result_type="reduce"`` returning with incorrect index (:issue:`35683`)
-
- Bug in :meth:`DateTimeIndex.format` and :meth:`PeriodIndex.format` with ``name=True`` setting the first item to ``"None"`` where it should bw ``""`` (:issue:`35712`)
-

.. ---------------------------------------------------------------------------
Expand Down
4 changes: 3 additions & 1 deletion pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -933,7 +933,9 @@ def format(

return self._format_with_header(header, na_rep=na_rep)

def _format_with_header(self, header, na_rep="NaN") -> List[str_t]:
def _format_with_header(
self, header: List[str_t], na_rep: str_t = "NaN"
) -> List[str_t]:
from pandas.io.formats.format import format_array

values = self._values
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/indexes/category.py
Original file line number Diff line number Diff line change
Expand Up @@ -347,7 +347,7 @@ def _format_attrs(self):
attrs.append(("length", len(self)))
return attrs

def _format_with_header(self, header, na_rep="NaN") -> List[str]:
def _format_with_header(self, header: List[str], na_rep: str = "NaN") -> List[str]:
from pandas.io.formats.printing import pprint_thing

result = [
Expand Down
11 changes: 8 additions & 3 deletions pandas/core/indexes/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -354,15 +354,20 @@ def format(
"""
header = []
if name:
fmt_name = ibase.pprint_thing(self.name, escape_chars=("\t", "\r", "\n"))
header.append(fmt_name)
header.append(
ibase.pprint_thing(self.name, escape_chars=("\t", "\r", "\n"))
if self.name is not None
else ""
)

if formatter is not None:
return header + list(self.map(formatter))

return self._format_with_header(header, na_rep=na_rep, date_format=date_format)

def _format_with_header(self, header, na_rep="NaT", date_format=None) -> List[str]:
def _format_with_header(
self, header: List[str], na_rep: str = "NaT", date_format: Optional[str] = None
) -> List[str]:
return header + list(
self._format_native_types(na_rep=na_rep, date_format=date_format)
)
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/indexes/interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -948,7 +948,7 @@ def take(self, indices, axis=0, allow_fill=True, fill_value=None, **kwargs):
# Rendering Methods
# __repr__ associated methods are based on MultiIndex

def _format_with_header(self, header, na_rep="NaN") -> List[str]:
def _format_with_header(self, header: List[str], na_rep: str = "NaN") -> List[str]:
return header + list(self._format_native_types(na_rep=na_rep))

def _format_native_types(self, na_rep="NaN", quoting=None, **kwargs):
Expand Down
11 changes: 10 additions & 1 deletion pandas/core/indexes/range.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from datetime import timedelta
import operator
from sys import getsizeof
from typing import Any
from typing import Any, List
import warnings

import numpy as np
Expand Down Expand Up @@ -187,6 +187,15 @@ def _format_data(self, name=None):
# we are formatting thru the attributes
return None

def _format_with_header(self, header: List[str], na_rep: str = "NaN") -> List[str]:
if not len(self._range):
return header
first_val_str = str(self._range[0])
jreback marked this conversation as resolved.
Show resolved Hide resolved
last_val_str = str(self._range[-1])
max_length = max(len(first_val_str), len(last_val_str))

return header + [f"{x:<{max_length}}" for x in self._range]

# --------------------------------------------------------------------
_deprecation_message = (
"RangeIndex.{} is deprecated and will be "
Expand Down
10 changes: 8 additions & 2 deletions pandas/tests/indexes/common.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import gc
from typing import Optional, Type
from typing import Type

import numpy as np
import pytest
Expand Down Expand Up @@ -33,7 +33,7 @@
class Base:
""" base class for index sub-class tests """

_holder: Optional[Type[Index]] = None
_holder: Type[Index]
_compat_props = ["shape", "ndim", "size", "nbytes"]

def create_index(self) -> Index:
Expand Down Expand Up @@ -686,6 +686,12 @@ def test_format(self):
expected = [str(x) for x in idx]
assert idx.format() == expected

def test_format_empty(self):
# GH35712
empty_idx = self._holder([])
assert empty_idx.format() == []
assert empty_idx.format(name=True) == [""]

def test_hasnans_isnans(self, index):
# GH 11343, added tests for hasnans / isnans
if isinstance(index, MultiIndex):
Expand Down
6 changes: 6 additions & 0 deletions pandas/tests/indexes/period/test_period.py
Original file line number Diff line number Diff line change
Expand Up @@ -536,6 +536,12 @@ def test_contains_raise_error_if_period_index_is_in_multi_index(self, msg, key):
with pytest.raises(KeyError, match=msg):
df.loc[key]

def test_format_empty(self):
# GH35712
empty_idx = self._holder([], freq="A")
assert empty_idx.format() == []
assert empty_idx.format(name=True) == [""]


def test_maybe_convert_timedelta():
pi = PeriodIndex(["2000", "2001"], freq="D")
Expand Down
12 changes: 12 additions & 0 deletions pandas/tests/indexes/ranges/test_range.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,8 +171,14 @@ def test_cache(self):
pass
assert idx._cache == {}

idx.format()
assert idx._cache == {}

df = pd.DataFrame({"a": range(10)}, index=idx)

str(df)
assert idx._cache == {}

df.loc[50]
assert idx._cache == {}

Expand Down Expand Up @@ -515,3 +521,9 @@ def test_engineless_lookup(self):
idx.get_loc("a")

assert "_engine" not in idx._cache

def test_format_empty(self):
# GH35712
empty_idx = self._holder(0)
assert empty_idx.format() == []
assert empty_idx.format(name=True) == [""]