From 59c73c666b3590895d58bcb91a3732820da3c7bb Mon Sep 17 00:00:00 2001 From: Marten van Kerkwijk Date: Mon, 20 Feb 2023 21:50:20 -0500 Subject: [PATCH] ENH: Use threshold also inside SubArrayFormat. Previously, for structured arrays that contain a lot of elements, those are always all shown, independent of the threshold print option. This PR ensures that threshold and edgeitems are respected. Note that multidimensional items are typeset without line breaks, to avoid breaking up the structure. Hence, this does not solve the issue that structured array elements can easily overfill a line on the console. --- numpy/core/arrayprint.py | 34 ++++++++++++++++++------- numpy/core/tests/test_arrayprint.py | 39 ++++++++++++++++++++++++++++- 2 files changed, 63 insertions(+), 10 deletions(-) diff --git a/numpy/core/arrayprint.py b/numpy/core/arrayprint.py index dcfb6e6a834e..62cd527073a6 100644 --- a/numpy/core/arrayprint.py +++ b/numpy/core/arrayprint.py @@ -1096,7 +1096,7 @@ def format_float_scientific(x, precision=None, unique=True, trim='k', identify the value may be printed and rounded unbiased. -- versionadded:: 1.21.0 - + Returns ------- rep : string @@ -1181,7 +1181,7 @@ def format_float_positional(x, precision=None, unique=True, Minimum number of digits to print. Only has an effect if `unique=True` in which case additional digits past those necessary to uniquely identify the value may be printed, rounding the last additional digit. - + -- versionadded:: 1.21.0 Returns @@ -1339,13 +1339,29 @@ def _format_non_nat(self, x): class SubArrayFormat: - def __init__(self, format_function): + def __init__(self, format_function, **options): self.format_function = format_function + self.threshold = options['threshold'] + self.edge_items = options['edgeitems'] + + def __call__(self, a): + self.summary_insert = "..." if a.size > self.threshold else "" + return self.format_array(a) + + def format_array(self, a): + if np.ndim(a) == 0: + return self.format_function(a) + + if self.summary_insert and a.shape[0] > 2*self.edge_items: + formatted = ( + [self.format_array(a_) for a_ in a[:self.edge_items]] + + [self.summary_insert] + + [self.format_array(a_) for a_ in a[-self.edge_items:]] + ) + else: + formatted = [self.format_array(a_) for a_ in a] - def __call__(self, arr): - if arr.ndim <= 1: - return "[" + ", ".join(self.format_function(a) for a in arr) + "]" - return "[" + ", ".join(self.__call__(a) for a in arr) + "]" + return "[" + ", ".join(formatted) + "]" class StructuredVoidFormat: @@ -1369,7 +1385,7 @@ def from_data(cls, data, **options): for field_name in data.dtype.names: format_function = _get_format_function(data[field_name], **options) if data.dtype[field_name].shape != (): - format_function = SubArrayFormat(format_function) + format_function = SubArrayFormat(format_function, **options) format_functions.append(format_function) return cls(format_functions) @@ -1428,7 +1444,7 @@ def dtype_is_implied(dtype): # not just void types can be structured, and names are not part of the repr if dtype.names is not None: return False - + # should care about endianness *unless size is 1* (e.g., int8, bool) if not dtype.isnative: return False diff --git a/numpy/core/tests/test_arrayprint.py b/numpy/core/tests/test_arrayprint.py index 372cb8d415f4..b92c8ae8c85b 100644 --- a/numpy/core/tests/test_arrayprint.py +++ b/numpy/core/tests/test_arrayprint.py @@ -353,6 +353,33 @@ def test_summarize_2d(self): ' [ 501, 502, 503, ..., 999, 1000, 1001]])' assert_equal(repr(A), reprA) + def test_summarize_structure(self): + A = (np.arange(2002, dtype="i8", (2, 1001))]) + strB = "[([[1, 1, 1, ..., 1, 1, 1], [1, 1, 1, ..., 1, 1, 1]],)]" + assert_equal(str(B), strB) + + reprB = ( + "array([([[1, 1, 1, ..., 1, 1, 1], [1, 1, 1, ..., 1, 1, 1]],)],\n" + " dtype=[('i', '>i8', (2, 1001))])" + ) + assert_equal(repr(B), reprB) + + C = (np.arange(22, dtype="