From 37638cc9a820ce7981350e2ceefcea9317a9e2f8 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 9 Nov 2018 14:29:25 -0600 Subject: [PATCH] wip --- pandas/core/arrays/base.py | 6 +-- pandas/core/internals/blocks.py | 4 +- pandas/io/formats/format.py | 60 +++++++++++++----------------- pandas/tests/arrays/test_period.py | 20 +++++----- 4 files changed, 40 insertions(+), 50 deletions(-) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 238dbdd5576b2..e15eb74119578 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -680,8 +680,8 @@ def __repr__(self): length=len(self), dtype=self.dtype) - def _formatter(self, boxed=False): - # type: (bool) -> Callable[Any] + def _formatter(self, formatter): + # type: (ExtensionArrayFormatter) -> Callable[Any] """Formatting function for scalar values. This is used in the default '__repr__'. The formatting function @@ -693,7 +693,7 @@ def _formatter(self, boxed=False): Whether the formatter is to be used by pandas inside a Series or DataFrame repr. """ - return str + return formatter.formatter or str def _formatting_values(self): # type: () -> np.ndarray diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index b9c13196aea1d..267b769ce7a65 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1963,9 +1963,7 @@ def formatting_values(self): warnings.warn(msg, FutureWarning, stacklevel=10) return self.values._formatting_values() - # the future implementation (and current, if not overrode) - formatter = self.values._formatter(boxed=True) - return np.array([formatter(x) for x in self.values], dtype=object) + return self.values def concat_same_type(self, to_concat, placement=None): """ diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 6f64605bcf175..22d91a47c4082 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -16,11 +16,12 @@ from pandas.compat import StringIO, lzip, map, u, zip from pandas.core.dtypes.common import ( - is_categorical_dtype, is_datetime64_dtype, is_datetimetz, is_float, - is_float_dtype, is_integer, is_integer_dtype, is_interval_dtype, - is_list_like, is_numeric_dtype, is_period_arraylike, is_scalar, - is_timedelta64_dtype) -from pandas.core.dtypes.generic import ABCMultiIndex, ABCSparseArray + is_categorical_dtype, is_datetime64_dtype, is_datetimetz, + is_extension_array_dtype, is_float, is_float_dtype, is_integer, + is_integer_dtype, is_list_like, is_numeric_dtype, is_period_arraylike, + is_scalar, is_timedelta64_dtype) +from pandas.core.dtypes.generic import ( + ABCIndex, ABCMultiIndex, ABCSeries, ABCSparseArray) from pandas.core.dtypes.missing import isna, notna from pandas import compat @@ -849,22 +850,20 @@ def _get_column_name_list(self): def format_array(values, formatter, float_format=None, na_rep='NaN', digits=None, space=None, justify='right', decimal='.'): - if is_categorical_dtype(values): - fmt_klass = CategoricalArrayFormatter - elif is_interval_dtype(values): - fmt_klass = IntervalArrayFormatter + if is_period_arraylike(values): + fmt_klass = PeriodArrayFormatter + elif is_datetime64_dtype(values.dtype): + fmt_klass = Datetime64Formatter + elif is_timedelta64_dtype(values.dtype): + fmt_klass = Timedelta64Formatter + elif is_extension_array_dtype(values.dtype): + fmt_klass = ExtensionArrayFormatter elif is_float_dtype(values.dtype): fmt_klass = FloatArrayFormatter - elif is_period_arraylike(values): - fmt_klass = PeriodArrayFormatter elif is_integer_dtype(values.dtype): fmt_klass = IntArrayFormatter elif is_datetimetz(values): fmt_klass = Datetime64TZFormatter - elif is_datetime64_dtype(values.dtype): - fmt_klass = Datetime64Formatter - elif is_timedelta64_dtype(values.dtype): - fmt_klass = Timedelta64Formatter else: fmt_klass = GenericArrayFormatter @@ -1126,14 +1125,18 @@ def _format_strings(self): return fmt_values.tolist() -class IntervalArrayFormatter(GenericArrayFormatter): - - def __init__(self, values, *args, **kwargs): - GenericArrayFormatter.__init__(self, values, *args, **kwargs) - +class ExtensionArrayFormatter(GenericArrayFormatter): def _format_strings(self): - formatter = self.formatter or str - fmt_values = np.array([formatter(x) for x in self.values]) + values = self.values + if isinstance(values, (ABCIndex, ABCSeries)): + values = values._values + + formatter = self.values._formatter(self) + fmt_values = format_array(np.asarray(self.values), + formatter, + float_format=self.float_format, + na_rep=self.na_rep, digits=self.digits, + space=self.space, justify=self.justify) return fmt_values @@ -1152,19 +1155,6 @@ def _format_strings(self): return fmt_values -class CategoricalArrayFormatter(GenericArrayFormatter): - - def __init__(self, values, *args, **kwargs): - GenericArrayFormatter.__init__(self, values, *args, **kwargs) - - def _format_strings(self): - fmt_values = format_array(self.values.get_values(), self.formatter, - float_format=self.float_format, - na_rep=self.na_rep, digits=self.digits, - space=self.space, justify=self.justify) - return fmt_values - - def format_percentiles(percentiles): """ Outputs rounded and formatted percentiles. diff --git a/pandas/tests/arrays/test_period.py b/pandas/tests/arrays/test_period.py index cc9fef90d959f..245d932bb139e 100644 --- a/pandas/tests/arrays/test_period.py +++ b/pandas/tests/arrays/test_period.py @@ -215,14 +215,16 @@ def test_repr_large(): arr = period_array(['2000', '2001'] * 500, freq='D') result = str(arr) expected = ( - '\n' - '[2000-01-01, 2001-01-01, 2000-01-01, 2001-01-01, 2000-01-01, ' - '2001-01-01,\n' # continuation - ' 2000-01-01, 2001-01-01, 2000-01-01, 2001-01-01,\n' - ' ...\n' - ' 2000-01-01, 2001-01-01, 2000-01-01, 2001-01-01, 2000-01-01, ' - '2001-01-01,\n' # continuation - ' 2000-01-01, 2001-01-01, 2000-01-01, 2001-01-01]\n' - 'Length: 1000, dtype: period[D]' + "\n" + "['2000-01-01', '2001-01-01', '2000-01-01', '2001-01-01', " + "'2000-01-01',\n" + " '2001-01-01', '2000-01-01', '2001-01-01', '2000-01-01', " + "'2001-01-01',\n" + " ...\n" + " '2000-01-01', '2001-01-01', '2000-01-01', '2001-01-01', " + "'2000-01-01',\n" + " '2001-01-01', '2000-01-01', '2001-01-01', '2000-01-01', " + "'2001-01-01']\n" + "Length: 1000, dtype: period[D]" ) assert result == expected