From c3aae34a0a646cab06c878b56bcad6397e9af239 Mon Sep 17 00:00:00 2001 From: Theofilos Manitaras Date: Thu, 23 Jul 2020 13:45:29 +0200 Subject: [PATCH 1/7] Support multiple tags in extract* sanity functions * Support also passing of multiple conversion functions for each tag to be extracted. --- reframe/utility/sanity.py | 78 ++++++++++++++++++++++-------- unittests/test_sanity_functions.py | 43 ++++++++++++++++ 2 files changed, 101 insertions(+), 20 deletions(-) diff --git a/reframe/utility/sanity.py b/reframe/utility/sanity.py index 0f8ec8d82b..bc0015988b 100644 --- a/reframe/utility/sanity.py +++ b/reframe/utility/sanity.py @@ -1,9 +1,10 @@ -# Copyright 2016-2020 Swiss National Supercomputing Centre (CSCS/ETH Zurich) +# Copyrigh 2016-2020 Swiss National Supercomputing Centre (CSCS/ETH Zurich) # ReFrame Project Developers. See the top-level LICENSE file for details. # # SPDX-License-Identifier: BSD-3-Clause import builtins +import collections import glob as pyglob import itertools import re @@ -530,28 +531,65 @@ def extractiter(patt, filename, tag=0, conv=None, encoding='utf-8'): the extracted values. ''' for m in finditer(patt, filename, encoding): - try: - val = m.group(tag) - except (IndexError, KeyError): - raise SanityError( - "no such group in pattern `%s': %s" % (patt, tag)) - - try: - yield conv(val) if callable(conv) else val - except ValueError: - fn_name = '' + if isinstance(tag, collections.Iterable) and not isinstance(tag, str): + val = [] + for t in tag: + try: + val.append(m.group(t)) + except (IndexError, KeyError): + raise SanityError( + "no such group in pattern `%s': %s" % (patt, t)) + else: try: - # Assume conv is standard function - fn_name = conv.__name__ - except AttributeError: + val = m.group(tag) + except (IndexError, KeyError): + raise SanityError( + "no such group in pattern `%s': %s" % (patt, tag)) + + if isinstance(val, list): + converted_vals = [] + if not isinstance(conv, collections.Iterable): + conv = [conv] * len(val) + + # Here we use the last conversion function for the remaining + # tags which don't have a corresponding one + for v, c in itertools.zip_longest(val, conv, fillvalue=conv[-1]): try: - # Assume conv is callable object - fn_name = conv.__class__.__name__ + converted_vals.append(c(v) if callable(c) else v) + except ValueError: + fn_name = '' + try: + # Assume conv is standard function + fn_name = c.__name__ + except AttributeError: + try: + # Assume conv is callable object + fn_name = c.__class__.__name__ + except AttributeError: + pass + + raise SanityError( + "could not convert value `%s' using `%s()'" % + (v, fn_name)) + + yield(tuple(converted_vals)) + else: + try: + yield conv(val) if callable(conv) else val + except ValueError: + fn_name = '' + try: + # Assume conv is standard function + fn_name = conv.__name__ except AttributeError: - pass - - raise SanityError("could not convert value `%s' using `%s()'" % - (val, fn_name)) + try: + # Assume conv is callable object + fn_name = conv.__class__.__name__ + except AttributeError: + pass + + raise SanityError("could not convert value `%s' using `%s()'" % + (val, fn_name)) @deferrable diff --git a/unittests/test_sanity_functions.py b/unittests/test_sanity_functions.py index 76e3d65bfc..f20ce105aa 100644 --- a/unittests/test_sanity_functions.py +++ b/unittests/test_sanity_functions.py @@ -597,6 +597,9 @@ def setUp(self): fp.write('Step: 1\n') fp.write('Step: 2\n') fp.write('Step: 3\n') + fp.write('Number: 1 2\n') + fp.write('Number: 2 4\n') + fp.write('Number: 3 6\n') def tearDown(self): os.remove(self.tempfile) @@ -650,6 +653,46 @@ def test_extractall(self): for expected, v in enumerate(res, start=1): assert expected == v + def test_extractall_multiple_tags(self): + # Check multiple numeric groups + res = sn.evaluate(sn.extractall( + r'Number: (\d+) (\d+)', self.tempfile, (1, 2))) + for expected, v in enumerate(res, start=1): + assert str(expected) == v[0] + assert str(2 * expected) == v[1] + + # Check multiple named groups + res = sn.evaluate(sn.extractall( + r'Number: (?P\d+) (?P\d+)', self.tempfile, + ('no1', 'no2'))) + for expected, v in enumerate(res, start=1): + assert str(expected) == v[0] + assert str(2 * expected) == v[1] + + # Check single convert function + res = sn.evaluate(sn.extractall(r'Number: (?P\d+) (?P\d+)', + self.tempfile, ('no1', 'no2'), int)) + for expected, v in enumerate(res, start=1): + assert expected == v[0] + assert 2 * expected == v[1] + + # Check multiple convert functions + res = sn.evaluate(sn.extractall(r'Number: (?P\d+) (?P\d+)', + self.tempfile, ('no1', 'no2'), + (int, float))) + for expected, v in enumerate(res, start=1): + assert expected == v[0] + assert 2 * expected == v[1] + assert isinstance(v[1], float) + + # Check fewer convert functions than tags + res = sn.evaluate(sn.extractall(r'Number: (?P\d+) (?P\d+)', + self.tempfile, ('no1', 'no2'), + [int])) + for expected, v in enumerate(res, start=1): + assert expected == v[0] + assert 2 * expected == v[1] + def test_extractall_encoding(self): res = sn.evaluate( sn.extractall('Odyssey', self.utf16_file, encoding='utf-16') From ef23a08d97a6ca97274da4ba97486ee96f10dc85 Mon Sep 17 00:00:00 2001 From: Theofilos Manitaras Date: Thu, 23 Jul 2020 13:50:20 +0200 Subject: [PATCH 2/7] Fix copyright changed by mistake --- reframe/utility/sanity.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/reframe/utility/sanity.py b/reframe/utility/sanity.py index bc0015988b..6c824d01c6 100644 --- a/reframe/utility/sanity.py +++ b/reframe/utility/sanity.py @@ -1,4 +1,4 @@ -# Copyrigh 2016-2020 Swiss National Supercomputing Centre (CSCS/ETH Zurich) +# Copyright 2016-2020 Swiss National Supercomputing Centre (CSCS/ETH Zurich) # ReFrame Project Developers. See the top-level LICENSE file for details. # # SPDX-License-Identifier: BSD-3-Clause From d9d1e16f55511036ef8859b16fa49e36d6a38d98 Mon Sep 17 00:00:00 2001 From: Theofilos Manitaras Date: Thu, 23 Jul 2020 15:52:27 +0200 Subject: [PATCH 3/7] Address PR comments and provide documentation --- reframe/utility/sanity.py | 149 ++++++++++++++++------------- unittests/test_sanity_functions.py | 18 +++- 2 files changed, 101 insertions(+), 66 deletions(-) diff --git a/reframe/utility/sanity.py b/reframe/utility/sanity.py index 6c824d01c6..c3ff344c06 100644 --- a/reframe/utility/sanity.py +++ b/reframe/utility/sanity.py @@ -521,6 +521,68 @@ def findall(patt, filename, encoding='utf-8'): return list(evaluate(x) for x in finditer(patt, filename, encoding)) +def _callable_name(fn): + fn_name = '' + try: + # Assume fn is standard function + fn_name = fn.__name__ + except AttributeError: + try: + # Assume fn is callable object + fn_name = fn.__class__.__name__ + except AttributeError: + pass + + return fn_name + + +def _extractiter_tag(patt, filename, tag, conv, encoding): + if isinstance(conv, collections.Iterable): + conv = conv[0] + + for m in finditer(patt, filename, encoding): + try: + val = m.group(tag) + except (IndexError, KeyError): + raise SanityError(f'no such group in pattern {patt!r}: {tag}') + + try: + yield conv(val) if callable(conv) else val + except ValueError: + fn_name = _callable_name(conv) + raise SanityError( + f'could not convert value {val!r} using {fn_name}()') + + +def _extractiter_multitag(patt, filename, tags, conv, encoding): + for m in finditer(patt, filename, encoding): + val = [] + for t in tags: + try: + val.append(m.group(t)) + except (IndexError, KeyError): + raise SanityError(f'no such group in pattern {patt!r}: {t}') + + converted_vals = [] + if not isinstance(conv, collections.Iterable): + conv = [conv] * builtins.len(val) + elif builtins.len(conv) > builtins.len(val): + conv = conv[:builtins.len(val)] + + # Here we use the last conversion function for the remaining + # tags which don't have a corresponding one, if length of the + # conversion function iterable is less that the one of tags + for v, c in itertools.zip_longest(val, conv, fillvalue=conv[-1]): + try: + converted_vals.append(c(v) if callable(c) else v) + except ValueError: + fn_name = _callable_name(conv) + raise SanityError( + f'could not convert value {v!r} using {fn_name}()') + + yield tuple(converted_vals) + + @deferrable def extractiter(patt, filename, tag=0, conv=None, encoding='utf-8'): '''Get an iterator over the values extracted from the capturing group @@ -530,66 +592,10 @@ def extractiter(patt, filename, tag=0, conv=None, encoding='utf-8'): a generator object, instead of a list, which you can use to iterate over the extracted values. ''' - for m in finditer(patt, filename, encoding): - if isinstance(tag, collections.Iterable) and not isinstance(tag, str): - val = [] - for t in tag: - try: - val.append(m.group(t)) - except (IndexError, KeyError): - raise SanityError( - "no such group in pattern `%s': %s" % (patt, t)) - else: - try: - val = m.group(tag) - except (IndexError, KeyError): - raise SanityError( - "no such group in pattern `%s': %s" % (patt, tag)) - - if isinstance(val, list): - converted_vals = [] - if not isinstance(conv, collections.Iterable): - conv = [conv] * len(val) - - # Here we use the last conversion function for the remaining - # tags which don't have a corresponding one - for v, c in itertools.zip_longest(val, conv, fillvalue=conv[-1]): - try: - converted_vals.append(c(v) if callable(c) else v) - except ValueError: - fn_name = '' - try: - # Assume conv is standard function - fn_name = c.__name__ - except AttributeError: - try: - # Assume conv is callable object - fn_name = c.__class__.__name__ - except AttributeError: - pass - - raise SanityError( - "could not convert value `%s' using `%s()'" % - (v, fn_name)) - - yield(tuple(converted_vals)) - else: - try: - yield conv(val) if callable(conv) else val - except ValueError: - fn_name = '' - try: - # Assume conv is standard function - fn_name = conv.__name__ - except AttributeError: - try: - # Assume conv is callable object - fn_name = conv.__class__.__name__ - except AttributeError: - pass - - raise SanityError("could not convert value `%s' using `%s()'" % - (val, fn_name)) + if isinstance(tag, collections.Iterable) and not isinstance(tag, str): + yield from _extractiter_multitag(patt, filename, tag, conv, encoding) + else: + yield from _extractiter_tag(patt, filename, tag, conv, encoding) @deferrable @@ -610,11 +616,26 @@ def extractall(patt, filename, tag=0, conv=None, encoding='utf-8'): Group ``0`` refers always to the whole match. Since the file is processed line by line, this means that group ``0`` returns the whole line that was matched. - :arg conv: A callable that takes a single argument and returns a new value. - If provided, it will be used to convert the extracted values before - returning them. - :returns: A list of the extracted values from the matched regex. + :arg conv: A callable or iterable of callables taking a single argument + and returning a new value. + If provided, and is not an iterable it will be used to convert + the extracted values for all the capturing groups of ``tag`` + returning the converted values. + If an iterable of callables is provided, each one will be used to + convert the corresponding extracted capturing group of `tag`. + If more callables functions than the corresponding capturing groups of + ``tag`` are provided, the last conversion function is used for the + remaining capturing groups. + :returns: A list of the extracted values from the matched regex if ``tag`` + converted using the ``conv`` callable if ``tag`` is a single value. + In case of multiple capturing groups, a list of tuples where each one + contains the extracted capturing converted using the corresponding + callable of ``conv``. :raises reframe.core.exceptions.SanityError: In case of errors. + + .. versionchanged:: 3.1 + Multiple regex capturing groups are now supporetd via ``tag`` and + multiple callables can be used in ``conv``. ''' return list(evaluate(x) for x in extractiter(patt, filename, tag, conv, encoding)) diff --git a/unittests/test_sanity_functions.py b/unittests/test_sanity_functions.py index f20ce105aa..8cb219fba8 100644 --- a/unittests/test_sanity_functions.py +++ b/unittests/test_sanity_functions.py @@ -659,7 +659,7 @@ def test_extractall_multiple_tags(self): r'Number: (\d+) (\d+)', self.tempfile, (1, 2))) for expected, v in enumerate(res, start=1): assert str(expected) == v[0] - assert str(2 * expected) == v[1] + assert str(2*expected) == v[1] # Check multiple named groups res = sn.evaluate(sn.extractall( @@ -667,7 +667,7 @@ def test_extractall_multiple_tags(self): ('no1', 'no2'))) for expected, v in enumerate(res, start=1): assert str(expected) == v[0] - assert str(2 * expected) == v[1] + assert str(2*expected) == v[1] # Check single convert function res = sn.evaluate(sn.extractall(r'Number: (?P\d+) (?P\d+)', @@ -685,6 +685,20 @@ def test_extractall_multiple_tags(self): assert 2 * expected == v[1] assert isinstance(v[1], float) + # Check more convert functions than tags + res = sn.evaluate(sn.extractall(r'Number: (?P\d+) (?P\d+)', + self.tempfile, ('no1', 'no2'), + [int, float, float, float])) + for expected, v in enumerate(res, start=1): + assert expected == v[0] + assert 2 * expected == v[1] + + # Check multiple convert functions and single tag + res = sn.evaluate(sn.extractall( + r'Number: (?P\d+) \d+', self.tempfile, 'no', [int, float])) + for expected, v in enumerate(res, start=1): + assert expected == v + # Check fewer convert functions than tags res = sn.evaluate(sn.extractall(r'Number: (?P\d+) (?P\d+)', self.tempfile, ('no1', 'no2'), From 5711d8cd736e7b7f1b0b74cc5f0059661972dbf1 Mon Sep 17 00:00:00 2001 From: Theofilos Manitaras Date: Fri, 24 Jul 2020 16:00:04 +0200 Subject: [PATCH 4/7] Address PR comments (version 2) --- reframe/utility/sanity.py | 45 +++++++++++++++--------------- unittests/test_sanity_functions.py | 14 +++++----- 2 files changed, 30 insertions(+), 29 deletions(-) diff --git a/reframe/utility/sanity.py b/reframe/utility/sanity.py index c3ff344c06..4402273e24 100644 --- a/reframe/utility/sanity.py +++ b/reframe/utility/sanity.py @@ -536,9 +536,11 @@ def _callable_name(fn): return fn_name -def _extractiter_tag(patt, filename, tag, conv, encoding): +def _extractiter_singletag(patt, filename, tag, conv, encoding): if isinstance(conv, collections.Iterable): - conv = conv[0] + raise SanityError( + f'multiple conversion functions given for single group: {tag}' + ) for m in finditer(patt, filename, encoding): try: @@ -551,7 +553,8 @@ def _extractiter_tag(patt, filename, tag, conv, encoding): except ValueError: fn_name = _callable_name(conv) raise SanityError( - f'could not convert value {val!r} using {fn_name}()') + f'could not convert value {val!r} using {fn_name}()' + ) def _extractiter_multitag(patt, filename, tags, conv, encoding): @@ -569,16 +572,16 @@ def _extractiter_multitag(patt, filename, tags, conv, encoding): elif builtins.len(conv) > builtins.len(val): conv = conv[:builtins.len(val)] - # Here we use the last conversion function for the remaining - # tags which don't have a corresponding one, if length of the - # conversion function iterable is less that the one of tags + # Use the last function in case we have less conversion functions than + # tags for v, c in itertools.zip_longest(val, conv, fillvalue=conv[-1]): try: converted_vals.append(c(v) if callable(c) else v) except ValueError: fn_name = _callable_name(conv) raise SanityError( - f'could not convert value {v!r} using {fn_name}()') + f'could not convert value {v!r} using {fn_name}()' + ) yield tuple(converted_vals) @@ -595,7 +598,7 @@ def extractiter(patt, filename, tag=0, conv=None, encoding='utf-8'): if isinstance(tag, collections.Iterable) and not isinstance(tag, str): yield from _extractiter_multitag(patt, filename, tag, conv, encoding) else: - yield from _extractiter_tag(patt, filename, tag, conv, encoding) + yield from _extractiter_singletag(patt, filename, tag, conv, encoding) @deferrable @@ -618,24 +621,22 @@ def extractall(patt, filename, tag=0, conv=None, encoding='utf-8'): returns the whole line that was matched. :arg conv: A callable or iterable of callables taking a single argument and returning a new value. - If provided, and is not an iterable it will be used to convert - the extracted values for all the capturing groups of ``tag`` - returning the converted values. - If an iterable of callables is provided, each one will be used to - convert the corresponding extracted capturing group of `tag`. - If more callables functions than the corresponding capturing groups of - ``tag`` are provided, the last conversion function is used for the - remaining capturing groups. - :returns: A list of the extracted values from the matched regex if ``tag`` - converted using the ``conv`` callable if ``tag`` is a single value. - In case of multiple capturing groups, a list of tuples where each one - contains the extracted capturing converted using the corresponding - callable of ``conv``. + If not an iterable it will be used to convert the extracted values for + all the capturing groups specified in ``tag``. + Otherwise, each conversion function will be used to convert the value + extracted from the corresponding capturing group in ``tag``. + If more conversion functions are supplied than the corresponding + capturing groups in ``tag``, the last conversion function will be used + for the additional capturing groups. + :returns: A list of tuples of converted values extracted from the + capturing groups specified in ``tag``, if ``tag`` is an iterable. + Otherwise, a list of the converted values extracted from the single + capturing group specified in ``tag``. :raises reframe.core.exceptions.SanityError: In case of errors. .. versionchanged:: 3.1 Multiple regex capturing groups are now supporetd via ``tag`` and - multiple callables can be used in ``conv``. + multiple conversion functions can be used in ``conv``. ''' return list(evaluate(x) for x in extractiter(patt, filename, tag, conv, encoding)) diff --git a/unittests/test_sanity_functions.py b/unittests/test_sanity_functions.py index 8cb219fba8..bf33e621df 100644 --- a/unittests/test_sanity_functions.py +++ b/unittests/test_sanity_functions.py @@ -685,7 +685,7 @@ def test_extractall_multiple_tags(self): assert 2 * expected == v[1] assert isinstance(v[1], float) - # Check more convert functions than tags + # Check more conversion functions than tags res = sn.evaluate(sn.extractall(r'Number: (?P\d+) (?P\d+)', self.tempfile, ('no1', 'no2'), [int, float, float, float])) @@ -693,12 +693,6 @@ def test_extractall_multiple_tags(self): assert expected == v[0] assert 2 * expected == v[1] - # Check multiple convert functions and single tag - res = sn.evaluate(sn.extractall( - r'Number: (?P\d+) \d+', self.tempfile, 'no', [int, float])) - for expected, v in enumerate(res, start=1): - assert expected == v - # Check fewer convert functions than tags res = sn.evaluate(sn.extractall(r'Number: (?P\d+) (?P\d+)', self.tempfile, ('no1', 'no2'), @@ -707,6 +701,12 @@ def test_extractall_multiple_tags(self): assert expected == v[0] assert 2 * expected == v[1] + # Check multiple conversion functions and a single tag + with pytest.raises(SanityError): + res = sn.evaluate(sn.extractall( + r'Number: (?P\d+) \d+', self.tempfile, 'no', [int, float]) + ) + def test_extractall_encoding(self): res = sn.evaluate( sn.extractall('Odyssey', self.utf16_file, encoding='utf-16') From f660b521b95bed309da6b5863243c6864c2950fb Mon Sep 17 00:00:00 2001 From: Theofilos Manitaras Date: Fri, 24 Jul 2020 16:47:00 +0200 Subject: [PATCH 5/7] Address PR comments (version 3) --- reframe/utility/sanity.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/reframe/utility/sanity.py b/reframe/utility/sanity.py index 4402273e24..709030de37 100644 --- a/reframe/utility/sanity.py +++ b/reframe/utility/sanity.py @@ -538,9 +538,8 @@ def _callable_name(fn): def _extractiter_singletag(patt, filename, tag, conv, encoding): if isinstance(conv, collections.Iterable): - raise SanityError( - f'multiple conversion functions given for single group: {tag}' - ) + raise SanityError('multiple conversion functions given for the single ' + f'capturing group: {tag!r}') for m in finditer(patt, filename, encoding): try: @@ -621,7 +620,7 @@ def extractall(patt, filename, tag=0, conv=None, encoding='utf-8'): returns the whole line that was matched. :arg conv: A callable or iterable of callables taking a single argument and returning a new value. - If not an iterable it will be used to convert the extracted values for + If not an iterable, it will be used to convert the extracted values for all the capturing groups specified in ``tag``. Otherwise, each conversion function will be used to convert the value extracted from the corresponding capturing group in ``tag``. From 5c53575aead31efbf82e86964507a854fb468107 Mon Sep 17 00:00:00 2001 From: Theofilos Manitaras Date: Fri, 24 Jul 2020 16:51:49 +0200 Subject: [PATCH 6/7] Remove extra ':' --- reframe/utility/sanity.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/reframe/utility/sanity.py b/reframe/utility/sanity.py index 709030de37..794c538a53 100644 --- a/reframe/utility/sanity.py +++ b/reframe/utility/sanity.py @@ -539,7 +539,7 @@ def _callable_name(fn): def _extractiter_singletag(patt, filename, tag, conv, encoding): if isinstance(conv, collections.Iterable): raise SanityError('multiple conversion functions given for the single ' - f'capturing group: {tag!r}') + f'capturing group {tag!r}') for m in finditer(patt, filename, encoding): try: From 8352b4af9b5c41ed9b9388ae23a013eddaf4bcbd Mon Sep 17 00:00:00 2001 From: Vasileios Karakasis Date: Fri, 24 Jul 2020 17:09:47 +0200 Subject: [PATCH 7/7] Minor code style change --- reframe/utility/sanity.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/reframe/utility/sanity.py b/reframe/utility/sanity.py index 794c538a53..0e36038570 100644 --- a/reframe/utility/sanity.py +++ b/reframe/utility/sanity.py @@ -538,8 +538,8 @@ def _callable_name(fn): def _extractiter_singletag(patt, filename, tag, conv, encoding): if isinstance(conv, collections.Iterable): - raise SanityError('multiple conversion functions given for the single ' - f'capturing group {tag!r}') + raise SanityError(f'multiple conversion functions given for the ' + f'single capturing group {tag!r}') for m in finditer(patt, filename, encoding): try: