From 54dcde16648fcaafbfe29df9080318e23541352a Mon Sep 17 00:00:00 2001 From: tp Date: Thu, 13 Aug 2020 19:22:57 +0100 Subject: [PATCH 01/18] PERF: RangeIndex.format perf regression --- pandas/core/indexes/range.py | 7 ++++++- pandas/tests/indexes/ranges/test_range.py | 6 ++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index c65c3d5ff3d9c..0d49fa15ce352 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -1,7 +1,7 @@ from datetime import timedelta import operator from sys import getsizeof -from typing import Any +from typing import Any, List import warnings import numpy as np @@ -187,6 +187,11 @@ def _format_data(self, name=None): # we are formatting thru the attributes return None + def _format_with_header(self, header, na_rep="NaN") -> List[str]: + start_str, stop_str = str(self._range.start), str(self._range.stop) + max_length = max(len(start_str), len(stop_str)) + return header + [f"{x:<{max_length}}" for x in self._range] + # -------------------------------------------------------------------- _deprecation_message = ( "RangeIndex.{} is deprecated and will be " diff --git a/pandas/tests/indexes/ranges/test_range.py b/pandas/tests/indexes/ranges/test_range.py index c4c242746e92c..fbb4f3e506205 100644 --- a/pandas/tests/indexes/ranges/test_range.py +++ b/pandas/tests/indexes/ranges/test_range.py @@ -171,8 +171,14 @@ def test_cache(self): pass assert idx._cache == {} + idx.format() + assert idx._cache == {} + df = pd.DataFrame({"a": range(10)}, index=idx) + str(df) + assert idx._cache == {} + df.loc[50] assert idx._cache == {} From e36403d830f6a9eca2241371ce34b1dcff561ec3 Mon Sep 17 00:00:00 2001 From: tp Date: Fri, 14 Aug 2020 08:04:12 +0100 Subject: [PATCH 02/18] fix errors --- pandas/core/indexes/range.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 0d49fa15ce352..2360027eecfaa 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -188,8 +188,10 @@ def _format_data(self, name=None): return None def _format_with_header(self, header, na_rep="NaN") -> List[str]: - start_str, stop_str = str(self._range.start), str(self._range.stop) - max_length = max(len(start_str), len(stop_str)) + first_val_str = str(self._range[0]) + last_val_str = str(self._range[-1]) + max_length = max(len(first_val_str), len(last_val_str)) + return header + [f"{x:<{max_length}}" for x in self._range] # -------------------------------------------------------------------- From fef0b2c20ea5a6ea1440aab5d6786d40bb8f09bf Mon Sep 17 00:00:00 2001 From: tp Date: Fri, 14 Aug 2020 22:59:19 +0100 Subject: [PATCH 03/18] TYP: add types to params --- pandas/core/indexes/base.py | 4 +++- pandas/core/indexes/category.py | 2 +- pandas/core/indexes/datetimelike.py | 4 +++- pandas/core/indexes/interval.py | 2 +- 4 files changed, 8 insertions(+), 4 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 623ce68201492..327c85e6c2d14 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -924,7 +924,9 @@ def format( return self._format_with_header(header, na_rep=na_rep) - def _format_with_header(self, header, na_rep="NaN") -> List[str_t]: + def _format_with_header( + self, header: List[str_t], na_rep: str_t = "NaN" + ) -> List[str_t]: from pandas.io.formats.format import format_array values = self._values diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 4990e6a8e20e9..cbb30763797d1 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -347,7 +347,7 @@ def _format_attrs(self): attrs.append(("length", len(self))) return attrs - def _format_with_header(self, header, na_rep="NaN") -> List[str]: + def _format_with_header(self, header: List[str], na_rep: str = "NaN") -> List[str]: from pandas.io.formats.printing import pprint_thing result = [ diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 6d9d75a69e91d..c455e4831200e 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -362,7 +362,9 @@ def format( return self._format_with_header(header, na_rep=na_rep, date_format=date_format) - def _format_with_header(self, header, na_rep="NaT", date_format=None) -> List[str]: + def _format_with_header( + self, header: List[str], na_rep: str = "NaT", date_format: Optional[str] = None + ) -> List[str]: return header + list( self._format_native_types(na_rep=na_rep, date_format=date_format) ) diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index e8d0a44324cc5..9281f8017761d 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -948,7 +948,7 @@ def take(self, indices, axis=0, allow_fill=True, fill_value=None, **kwargs): # Rendering Methods # __repr__ associated methods are based on MultiIndex - def _format_with_header(self, header, na_rep="NaN") -> List[str]: + def _format_with_header(self, header: List[str], na_rep: str = "NaN") -> List[str]: return header + list(self._format_native_types(na_rep=na_rep)) def _format_native_types(self, na_rep="NaN", quoting=None, **kwargs): From 5998b5e40bd8bdc1735bf0806c2bc5ec723fbd0a Mon Sep 17 00:00:00 2001 From: tp Date: Wed, 19 Aug 2020 21:05:17 +0100 Subject: [PATCH 04/18] add more types --- pandas/core/indexes/range.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 2360027eecfaa..a3ac6da06209d 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -187,7 +187,7 @@ def _format_data(self, name=None): # we are formatting thru the attributes return None - def _format_with_header(self, header, na_rep="NaN") -> List[str]: + def _format_with_header(self, header: List[str], na_rep: str = "NaN") -> List[str]: first_val_str = str(self._range[0]) last_val_str = str(self._range[-1]) max_length = max(len(first_val_str), len(last_val_str)) From 6ffdb82a0a4c7ce8bf3106cccfc87067ae91cefb Mon Sep 17 00:00:00 2001 From: tp Date: Fri, 21 Aug 2020 20:00:59 +0100 Subject: [PATCH 05/18] fix indention in directive --- doc/source/whatsnew/v0.25.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 3cd920158f774..0f0f009307c75 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -540,7 +540,7 @@ with :attr:`numpy.nan` in the case of an empty :class:`DataFrame` (:issue:`26397 .. ipython:: python - df.describe() + df.describe() ``__str__`` methods now call ``__repr__`` rather than vice versa ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ From f5759e05f231de0038e7ef796403eaf78e977d3c Mon Sep 17 00:00:00 2001 From: tp Date: Fri, 21 Aug 2020 20:32:05 +0100 Subject: [PATCH 06/18] fix zero length RangeIndex + add tests --- pandas/core/indexes/range.py | 2 ++ pandas/tests/indexes/common.py | 8 ++++++-- pandas/tests/indexes/period/test_period.py | 4 ++++ pandas/tests/indexes/ranges/test_range.py | 4 ++++ 4 files changed, 16 insertions(+), 2 deletions(-) diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index a3ac6da06209d..1ab6302144538 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -188,6 +188,8 @@ def _format_data(self, name=None): return None def _format_with_header(self, header: List[str], na_rep: str = "NaN") -> List[str]: + if len(self._range) == 0: + return [] first_val_str = str(self._range[0]) last_val_str = str(self._range[-1]) max_length = max(len(first_val_str), len(last_val_str)) diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index 98f7c0eadb4bb..2586cb1432c4f 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -1,5 +1,5 @@ import gc -from typing import Optional, Type +from typing import Type import numpy as np import pytest @@ -33,7 +33,7 @@ class Base: """ base class for index sub-class tests """ - _holder: Optional[Type[Index]] = None + _holder: Type[Index] _compat_props = ["shape", "ndim", "size", "nbytes"] def create_index(self) -> Index: @@ -681,6 +681,10 @@ def test_format(self): expected = [str(x) for x in idx] assert idx.format() == expected + def test_format_empty(self): + # GH35712 + assert self._holder([]).format() == [] + def test_hasnans_isnans(self, index): # GH 11343, added tests for hasnans / isnans if isinstance(index, MultiIndex): diff --git a/pandas/tests/indexes/period/test_period.py b/pandas/tests/indexes/period/test_period.py index 15a88ab3819ce..0a29cfd5d06a6 100644 --- a/pandas/tests/indexes/period/test_period.py +++ b/pandas/tests/indexes/period/test_period.py @@ -536,6 +536,10 @@ def test_contains_raise_error_if_period_index_is_in_multi_index(self, msg, key): with pytest.raises(KeyError, match=msg): df.loc[key] + def test_format_empty(self): + # GH35712 + assert self._holder([], freq="A").format() == [] + def test_maybe_convert_timedelta(): pi = PeriodIndex(["2000", "2001"], freq="D") diff --git a/pandas/tests/indexes/ranges/test_range.py b/pandas/tests/indexes/ranges/test_range.py index fbb4f3e506205..dcc6922811340 100644 --- a/pandas/tests/indexes/ranges/test_range.py +++ b/pandas/tests/indexes/ranges/test_range.py @@ -521,3 +521,7 @@ def test_engineless_lookup(self): idx.get_loc("a") assert "_engine" not in idx._cache + + def test_format_empty(self): + # GH35712 + assert self._holder(0).format() == [] From e1476a965a3acd9c08c74a6dc0f45f0d3a0861b4 Mon Sep 17 00:00:00 2001 From: tp Date: Sat, 22 Aug 2020 10:59:34 +0100 Subject: [PATCH 07/18] add whatsnew --- doc/source/whatsnew/v1.1.2.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.1.2.rst b/doc/source/whatsnew/v1.1.2.rst index c1b73c60be92b..4e40419bbb9c4 100644 --- a/doc/source/whatsnew/v1.1.2.rst +++ b/doc/source/whatsnew/v1.1.2.rst @@ -15,7 +15,7 @@ including other versions of pandas. Fixed regressions ~~~~~~~~~~~~~~~~~ -- +- Performance regression for :meth:`RangeIndex.format` (:issue:`35712`) - .. --------------------------------------------------------------------------- From 041d4dff279097ab605b67aed329bca8e9ff62c5 Mon Sep 17 00:00:00 2001 From: tp Date: Mon, 24 Aug 2020 18:14:24 +0100 Subject: [PATCH 08/18] fix name=True bug --- doc/source/whatsnew/v1.1.2.rst | 2 +- pandas/core/indexes/datetimelike.py | 7 +++++-- pandas/core/indexes/range.py | 2 +- pandas/tests/indexes/common.py | 4 +++- pandas/tests/indexes/period/test_period.py | 4 +++- pandas/tests/indexes/ranges/test_range.py | 4 +++- 6 files changed, 16 insertions(+), 7 deletions(-) diff --git a/doc/source/whatsnew/v1.1.2.rst b/doc/source/whatsnew/v1.1.2.rst index 4e40419bbb9c4..279f11fff8a77 100644 --- a/doc/source/whatsnew/v1.1.2.rst +++ b/doc/source/whatsnew/v1.1.2.rst @@ -26,7 +26,7 @@ Bug fixes ~~~~~~~~~ - Bug in :meth:`DataFrame.eval` with ``object`` dtype column binary operations (:issue:`35794`) - Bug in :meth:`DataFrame.apply` with ``result_type="reduce"`` returning with incorrect index (:issue:`35683`) -- +- Bug in :meth:`DateTimeIndex.format` and :meth:`PeriodIndex.format` with ``name=True`` setting the first item to ``"None"`` where it should bw ``""`` (:issue:`35712`) - .. --------------------------------------------------------------------------- diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index c455e4831200e..4fbfe6f6ae7a9 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -354,8 +354,11 @@ def format( """ header = [] if name: - fmt_name = ibase.pprint_thing(self.name, escape_chars=("\t", "\r", "\n")) - header.append(fmt_name) + header.append( + ibase.pprint_thing(self.name, escape_chars=("\t", "\r", "\n")) + if self.name is not None + else "" + ) if formatter is not None: return header + list(self.map(formatter)) diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 1ab6302144538..dd8a4b76db64d 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -189,7 +189,7 @@ def _format_data(self, name=None): def _format_with_header(self, header: List[str], na_rep: str = "NaN") -> List[str]: if len(self._range) == 0: - return [] + return header first_val_str = str(self._range[0]) last_val_str = str(self._range[-1]) max_length = max(len(first_val_str), len(last_val_str)) diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index 2586cb1432c4f..4a7622479b074 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -683,7 +683,9 @@ def test_format(self): def test_format_empty(self): # GH35712 - assert self._holder([]).format() == [] + empty_idx = self._holder([]) + assert empty_idx.format() == [] + assert empty_idx.format(name=True) == [""] def test_hasnans_isnans(self, index): # GH 11343, added tests for hasnans / isnans diff --git a/pandas/tests/indexes/period/test_period.py b/pandas/tests/indexes/period/test_period.py index 0a29cfd5d06a6..085d41aaa5b76 100644 --- a/pandas/tests/indexes/period/test_period.py +++ b/pandas/tests/indexes/period/test_period.py @@ -538,7 +538,9 @@ def test_contains_raise_error_if_period_index_is_in_multi_index(self, msg, key): def test_format_empty(self): # GH35712 - assert self._holder([], freq="A").format() == [] + empty_idx = self._holder([], freq="A") + assert empty_idx.format() == [] + assert empty_idx.format(name=True) == [""] def test_maybe_convert_timedelta(): diff --git a/pandas/tests/indexes/ranges/test_range.py b/pandas/tests/indexes/ranges/test_range.py index dcc6922811340..172cd4a106ac1 100644 --- a/pandas/tests/indexes/ranges/test_range.py +++ b/pandas/tests/indexes/ranges/test_range.py @@ -524,4 +524,6 @@ def test_engineless_lookup(self): def test_format_empty(self): # GH35712 - assert self._holder(0).format() == [] + empty_idx = self._holder(0) + assert empty_idx.format() == [] + assert empty_idx.format(name=True) == [""] From b402d47371b07e475b3229414484202ec4c7188f Mon Sep 17 00:00:00 2001 From: tp Date: Thu, 13 Aug 2020 19:22:57 +0100 Subject: [PATCH 09/18] PERF: RangeIndex.format perf regression --- pandas/core/indexes/range.py | 7 ++++++- pandas/tests/indexes/ranges/test_range.py | 6 ++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index c5572a9de7fa5..e14720ca7fb30 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -1,7 +1,7 @@ from datetime import timedelta import operator from sys import getsizeof -from typing import Any +from typing import Any, List import warnings import numpy as np @@ -187,6 +187,11 @@ def _format_data(self, name=None): # we are formatting thru the attributes return None + def _format_with_header(self, header, na_rep="NaN") -> List[str]: + start_str, stop_str = str(self._range.start), str(self._range.stop) + max_length = max(len(start_str), len(stop_str)) + return header + [f"{x:<{max_length}}" for x in self._range] + # -------------------------------------------------------------------- _deprecation_message = ( "RangeIndex.{} is deprecated and will be " diff --git a/pandas/tests/indexes/ranges/test_range.py b/pandas/tests/indexes/ranges/test_range.py index c4c242746e92c..fbb4f3e506205 100644 --- a/pandas/tests/indexes/ranges/test_range.py +++ b/pandas/tests/indexes/ranges/test_range.py @@ -171,8 +171,14 @@ def test_cache(self): pass assert idx._cache == {} + idx.format() + assert idx._cache == {} + df = pd.DataFrame({"a": range(10)}, index=idx) + str(df) + assert idx._cache == {} + df.loc[50] assert idx._cache == {} From af1e4008803382439de1f255efc133dc376d5b5b Mon Sep 17 00:00:00 2001 From: tp Date: Fri, 14 Aug 2020 08:04:12 +0100 Subject: [PATCH 10/18] fix errors --- pandas/core/indexes/range.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index e14720ca7fb30..af483d5c59758 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -188,8 +188,10 @@ def _format_data(self, name=None): return None def _format_with_header(self, header, na_rep="NaN") -> List[str]: - start_str, stop_str = str(self._range.start), str(self._range.stop) - max_length = max(len(start_str), len(stop_str)) + first_val_str = str(self._range[0]) + last_val_str = str(self._range[-1]) + max_length = max(len(first_val_str), len(last_val_str)) + return header + [f"{x:<{max_length}}" for x in self._range] # -------------------------------------------------------------------- From d6b3b35e71bb32dd690faa55c7952f2a8ca8b57a Mon Sep 17 00:00:00 2001 From: tp Date: Fri, 14 Aug 2020 22:59:19 +0100 Subject: [PATCH 11/18] TYP: add types to params --- pandas/core/indexes/base.py | 4 +++- pandas/core/indexes/category.py | 2 +- pandas/core/indexes/datetimelike.py | 4 +++- pandas/core/indexes/interval.py | 2 +- 4 files changed, 8 insertions(+), 4 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index ceb109fdf6d7a..b1e5d5627e3f6 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -933,7 +933,9 @@ def format( return self._format_with_header(header, na_rep=na_rep) - def _format_with_header(self, header, na_rep="NaN") -> List[str_t]: + def _format_with_header( + self, header: List[str_t], na_rep: str_t = "NaN" + ) -> List[str_t]: from pandas.io.formats.format import format_array values = self._values diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 4990e6a8e20e9..cbb30763797d1 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -347,7 +347,7 @@ def _format_attrs(self): attrs.append(("length", len(self))) return attrs - def _format_with_header(self, header, na_rep="NaN") -> List[str]: + def _format_with_header(self, header: List[str], na_rep: str = "NaN") -> List[str]: from pandas.io.formats.printing import pprint_thing result = [ diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 9d00f50a65a06..037f182f765ff 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -362,7 +362,9 @@ def format( return self._format_with_header(header, na_rep=na_rep, date_format=date_format) - def _format_with_header(self, header, na_rep="NaT", date_format=None) -> List[str]: + def _format_with_header( + self, header: List[str], na_rep: str = "NaT", date_format: Optional[str] = None + ) -> List[str]: return header + list( self._format_native_types(na_rep=na_rep, date_format=date_format) ) diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index e8d0a44324cc5..9281f8017761d 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -948,7 +948,7 @@ def take(self, indices, axis=0, allow_fill=True, fill_value=None, **kwargs): # Rendering Methods # __repr__ associated methods are based on MultiIndex - def _format_with_header(self, header, na_rep="NaN") -> List[str]: + def _format_with_header(self, header: List[str], na_rep: str = "NaN") -> List[str]: return header + list(self._format_native_types(na_rep=na_rep)) def _format_native_types(self, na_rep="NaN", quoting=None, **kwargs): From d129776daae33825e5edd45e2383019c0c6c8b4c Mon Sep 17 00:00:00 2001 From: tp Date: Wed, 19 Aug 2020 21:05:17 +0100 Subject: [PATCH 12/18] add more types --- pandas/core/indexes/range.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index af483d5c59758..e8330985991c0 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -187,7 +187,7 @@ def _format_data(self, name=None): # we are formatting thru the attributes return None - def _format_with_header(self, header, na_rep="NaN") -> List[str]: + def _format_with_header(self, header: List[str], na_rep: str = "NaN") -> List[str]: first_val_str = str(self._range[0]) last_val_str = str(self._range[-1]) max_length = max(len(first_val_str), len(last_val_str)) From 4c697af1eb90362851a5d368354ab68d6618d3e0 Mon Sep 17 00:00:00 2001 From: tp Date: Fri, 21 Aug 2020 20:00:59 +0100 Subject: [PATCH 13/18] fix indention in directive --- doc/source/whatsnew/v0.25.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 3cd920158f774..0f0f009307c75 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -540,7 +540,7 @@ with :attr:`numpy.nan` in the case of an empty :class:`DataFrame` (:issue:`26397 .. ipython:: python - df.describe() + df.describe() ``__str__`` methods now call ``__repr__`` rather than vice versa ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ From b3f47c94e2d9eae16feccc80145a09fc73150855 Mon Sep 17 00:00:00 2001 From: tp Date: Fri, 21 Aug 2020 20:32:05 +0100 Subject: [PATCH 14/18] fix zero length RangeIndex + add tests --- pandas/core/indexes/range.py | 2 ++ pandas/tests/indexes/common.py | 8 ++++++-- pandas/tests/indexes/period/test_period.py | 4 ++++ pandas/tests/indexes/ranges/test_range.py | 4 ++++ 4 files changed, 16 insertions(+), 2 deletions(-) diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index e8330985991c0..3e9b7a594b855 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -188,6 +188,8 @@ def _format_data(self, name=None): return None def _format_with_header(self, header: List[str], na_rep: str = "NaN") -> List[str]: + if len(self._range) == 0: + return [] first_val_str = str(self._range[0]) last_val_str = str(self._range[-1]) max_length = max(len(first_val_str), len(last_val_str)) diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index e4d0b46f7c716..7fe66c298620c 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -1,5 +1,5 @@ import gc -from typing import Optional, Type +from typing import Type import numpy as np import pytest @@ -33,7 +33,7 @@ class Base: """ base class for index sub-class tests """ - _holder: Optional[Type[Index]] = None + _holder: Type[Index] _compat_props = ["shape", "ndim", "size", "nbytes"] def create_index(self) -> Index: @@ -686,6 +686,10 @@ def test_format(self): expected = [str(x) for x in idx] assert idx.format() == expected + def test_format_empty(self): + # GH35712 + assert self._holder([]).format() == [] + def test_hasnans_isnans(self, index): # GH 11343, added tests for hasnans / isnans if isinstance(index, MultiIndex): diff --git a/pandas/tests/indexes/period/test_period.py b/pandas/tests/indexes/period/test_period.py index 15a88ab3819ce..0a29cfd5d06a6 100644 --- a/pandas/tests/indexes/period/test_period.py +++ b/pandas/tests/indexes/period/test_period.py @@ -536,6 +536,10 @@ def test_contains_raise_error_if_period_index_is_in_multi_index(self, msg, key): with pytest.raises(KeyError, match=msg): df.loc[key] + def test_format_empty(self): + # GH35712 + assert self._holder([], freq="A").format() == [] + def test_maybe_convert_timedelta(): pi = PeriodIndex(["2000", "2001"], freq="D") diff --git a/pandas/tests/indexes/ranges/test_range.py b/pandas/tests/indexes/ranges/test_range.py index fbb4f3e506205..dcc6922811340 100644 --- a/pandas/tests/indexes/ranges/test_range.py +++ b/pandas/tests/indexes/ranges/test_range.py @@ -521,3 +521,7 @@ def test_engineless_lookup(self): idx.get_loc("a") assert "_engine" not in idx._cache + + def test_format_empty(self): + # GH35712 + assert self._holder(0).format() == [] From 1e813db150835812a2f5e8a2cdbebe838781436b Mon Sep 17 00:00:00 2001 From: tp Date: Sat, 22 Aug 2020 10:59:34 +0100 Subject: [PATCH 15/18] add whatsnew --- doc/source/whatsnew/v1.1.2.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.1.2.rst b/doc/source/whatsnew/v1.1.2.rst index af61354470a71..ed5979fd65f23 100644 --- a/doc/source/whatsnew/v1.1.2.rst +++ b/doc/source/whatsnew/v1.1.2.rst @@ -15,7 +15,7 @@ including other versions of pandas. Fixed regressions ~~~~~~~~~~~~~~~~~ - Regression in :meth:`DatetimeIndex.intersection` incorrectly raising ``AssertionError`` when intersecting against a list (:issue:`35876`) -- +- Performance regression for :meth:`RangeIndex.format` (:issue:`35712`) - .. --------------------------------------------------------------------------- From db932e505ec933b7bf5df5f4ae0605d5d328b8cd Mon Sep 17 00:00:00 2001 From: tp Date: Mon, 24 Aug 2020 18:14:24 +0100 Subject: [PATCH 16/18] fix name=True bug --- doc/source/whatsnew/v1.1.2.rst | 2 +- pandas/core/indexes/datetimelike.py | 7 +++++-- pandas/core/indexes/range.py | 2 +- pandas/tests/indexes/common.py | 4 +++- pandas/tests/indexes/period/test_period.py | 4 +++- pandas/tests/indexes/ranges/test_range.py | 4 +++- 6 files changed, 16 insertions(+), 7 deletions(-) diff --git a/doc/source/whatsnew/v1.1.2.rst b/doc/source/whatsnew/v1.1.2.rst index ed5979fd65f23..1b13e90f1f644 100644 --- a/doc/source/whatsnew/v1.1.2.rst +++ b/doc/source/whatsnew/v1.1.2.rst @@ -26,7 +26,7 @@ Bug fixes ~~~~~~~~~ - Bug in :meth:`DataFrame.eval` with ``object`` dtype column binary operations (:issue:`35794`) - Bug in :meth:`DataFrame.apply` with ``result_type="reduce"`` returning with incorrect index (:issue:`35683`) -- +- Bug in :meth:`DateTimeIndex.format` and :meth:`PeriodIndex.format` with ``name=True`` setting the first item to ``"None"`` where it should bw ``""`` (:issue:`35712`) - .. --------------------------------------------------------------------------- diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 037f182f765ff..0e8d7c1b866b8 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -354,8 +354,11 @@ def format( """ header = [] if name: - fmt_name = ibase.pprint_thing(self.name, escape_chars=("\t", "\r", "\n")) - header.append(fmt_name) + header.append( + ibase.pprint_thing(self.name, escape_chars=("\t", "\r", "\n")) + if self.name is not None + else "" + ) if formatter is not None: return header + list(self.map(formatter)) diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 3e9b7a594b855..a4c3a74de9634 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -189,7 +189,7 @@ def _format_data(self, name=None): def _format_with_header(self, header: List[str], na_rep: str = "NaN") -> List[str]: if len(self._range) == 0: - return [] + return header first_val_str = str(self._range[0]) last_val_str = str(self._range[-1]) max_length = max(len(first_val_str), len(last_val_str)) diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index 7fe66c298620c..e95e7267f17ec 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -688,7 +688,9 @@ def test_format(self): def test_format_empty(self): # GH35712 - assert self._holder([]).format() == [] + empty_idx = self._holder([]) + assert empty_idx.format() == [] + assert empty_idx.format(name=True) == [""] def test_hasnans_isnans(self, index): # GH 11343, added tests for hasnans / isnans diff --git a/pandas/tests/indexes/period/test_period.py b/pandas/tests/indexes/period/test_period.py index 0a29cfd5d06a6..085d41aaa5b76 100644 --- a/pandas/tests/indexes/period/test_period.py +++ b/pandas/tests/indexes/period/test_period.py @@ -538,7 +538,9 @@ def test_contains_raise_error_if_period_index_is_in_multi_index(self, msg, key): def test_format_empty(self): # GH35712 - assert self._holder([], freq="A").format() == [] + empty_idx = self._holder([], freq="A") + assert empty_idx.format() == [] + assert empty_idx.format(name=True) == [""] def test_maybe_convert_timedelta(): diff --git a/pandas/tests/indexes/ranges/test_range.py b/pandas/tests/indexes/ranges/test_range.py index dcc6922811340..172cd4a106ac1 100644 --- a/pandas/tests/indexes/ranges/test_range.py +++ b/pandas/tests/indexes/ranges/test_range.py @@ -524,4 +524,6 @@ def test_engineless_lookup(self): def test_format_empty(self): # GH35712 - assert self._holder(0).format() == [] + empty_idx = self._holder(0) + assert empty_idx.format() == [] + assert empty_idx.format(name=True) == [""] From d88202a2997776b33d3d658a4c3e049e340a8d18 Mon Sep 17 00:00:00 2001 From: tp Date: Tue, 25 Aug 2020 16:46:45 +0100 Subject: [PATCH 17/18] small change --- pandas/core/indexes/range.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index a4c3a74de9634..b85e2d3947cb1 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -188,7 +188,7 @@ def _format_data(self, name=None): return None def _format_with_header(self, header: List[str], na_rep: str = "NaN") -> List[str]: - if len(self._range) == 0: + if not len(self._range): return header first_val_str = str(self._range[0]) last_val_str = str(self._range[-1]) From b90c8fed0fd8343b42bc8cfa3bd4c501c851bc24 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Wed, 26 Aug 2020 10:43:53 +0100 Subject: [PATCH 18/18] remove dup whatsnew entry --- doc/source/whatsnew/v1.1.2.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.1.2.rst b/doc/source/whatsnew/v1.1.2.rst index 49a7318fb8422..7739a483e3d38 100644 --- a/doc/source/whatsnew/v1.1.2.rst +++ b/doc/source/whatsnew/v1.1.2.rst @@ -14,11 +14,11 @@ including other versions of pandas. Fixed regressions ~~~~~~~~~~~~~~~~~ -- Performance regression for :meth:`RangeIndex.format` (:issue:`35712`) - Regression in :meth:`DatetimeIndex.intersection` incorrectly raising ``AssertionError`` when intersecting against a list (:issue:`35876`) - Performance regression for :meth:`RangeIndex.format` (:issue:`35712`) - + .. --------------------------------------------------------------------------- .. _whatsnew_112.bug_fixes: