diff --git a/reframe/utility/sanity.py b/reframe/utility/sanity.py index 0f8ec8d82b..0e36038570 100644 --- a/reframe/utility/sanity.py +++ b/reframe/utility/sanity.py @@ -4,6 +4,7 @@ # SPDX-License-Identifier: BSD-3-Clause import builtins +import collections import glob as pyglob import itertools import re @@ -520,38 +521,83 @@ def findall(patt, filename, encoding='utf-8'): return list(evaluate(x) for x in finditer(patt, filename, encoding)) -@deferrable -def extractiter(patt, filename, tag=0, conv=None, encoding='utf-8'): - '''Get an iterator over the values extracted from the capturing group - ``tag`` of a matching regex ``patt`` in the file ``filename``. +def _callable_name(fn): + fn_name = '' + try: + # Assume fn is standard function + fn_name = fn.__name__ + except AttributeError: + try: + # Assume fn is callable object + fn_name = fn.__class__.__name__ + except AttributeError: + pass + + return fn_name + + +def _extractiter_singletag(patt, filename, tag, conv, encoding): + if isinstance(conv, collections.Iterable): + raise SanityError(f'multiple conversion functions given for the ' + f'single capturing group {tag!r}') - This function is equivalent to :func:`extractall` except that it returns - a generator object, instead of a list, which you can use to iterate over - the extracted values. - ''' for m in finditer(patt, filename, encoding): try: val = m.group(tag) except (IndexError, KeyError): - raise SanityError( - "no such group in pattern `%s': %s" % (patt, tag)) + raise SanityError(f'no such group in pattern {patt!r}: {tag}') try: yield conv(val) if callable(conv) else val except ValueError: - fn_name = '' + fn_name = _callable_name(conv) + raise SanityError( + f'could not convert value {val!r} using {fn_name}()' + ) + + +def _extractiter_multitag(patt, filename, tags, conv, encoding): + for m in finditer(patt, filename, encoding): + val = [] + for t in tags: try: - # Assume conv is standard function - fn_name = conv.__name__ - except AttributeError: - try: - # Assume conv is callable object - fn_name = conv.__class__.__name__ - except AttributeError: - pass + val.append(m.group(t)) + except (IndexError, KeyError): + raise SanityError(f'no such group in pattern {patt!r}: {t}') + + converted_vals = [] + if not isinstance(conv, collections.Iterable): + conv = [conv] * builtins.len(val) + elif builtins.len(conv) > builtins.len(val): + conv = conv[:builtins.len(val)] + + # Use the last function in case we have less conversion functions than + # tags + for v, c in itertools.zip_longest(val, conv, fillvalue=conv[-1]): + try: + converted_vals.append(c(v) if callable(c) else v) + except ValueError: + fn_name = _callable_name(conv) + raise SanityError( + f'could not convert value {v!r} using {fn_name}()' + ) + + yield tuple(converted_vals) + + +@deferrable +def extractiter(patt, filename, tag=0, conv=None, encoding='utf-8'): + '''Get an iterator over the values extracted from the capturing group + ``tag`` of a matching regex ``patt`` in the file ``filename``. - raise SanityError("could not convert value `%s' using `%s()'" % - (val, fn_name)) + This function is equivalent to :func:`extractall` except that it returns + a generator object, instead of a list, which you can use to iterate over + the extracted values. + ''' + if isinstance(tag, collections.Iterable) and not isinstance(tag, str): + yield from _extractiter_multitag(patt, filename, tag, conv, encoding) + else: + yield from _extractiter_singletag(patt, filename, tag, conv, encoding) @deferrable @@ -572,11 +618,24 @@ def extractall(patt, filename, tag=0, conv=None, encoding='utf-8'): Group ``0`` refers always to the whole match. Since the file is processed line by line, this means that group ``0`` returns the whole line that was matched. - :arg conv: A callable that takes a single argument and returns a new value. - If provided, it will be used to convert the extracted values before - returning them. - :returns: A list of the extracted values from the matched regex. + :arg conv: A callable or iterable of callables taking a single argument + and returning a new value. + If not an iterable, it will be used to convert the extracted values for + all the capturing groups specified in ``tag``. + Otherwise, each conversion function will be used to convert the value + extracted from the corresponding capturing group in ``tag``. + If more conversion functions are supplied than the corresponding + capturing groups in ``tag``, the last conversion function will be used + for the additional capturing groups. + :returns: A list of tuples of converted values extracted from the + capturing groups specified in ``tag``, if ``tag`` is an iterable. + Otherwise, a list of the converted values extracted from the single + capturing group specified in ``tag``. :raises reframe.core.exceptions.SanityError: In case of errors. + + .. versionchanged:: 3.1 + Multiple regex capturing groups are now supporetd via ``tag`` and + multiple conversion functions can be used in ``conv``. ''' return list(evaluate(x) for x in extractiter(patt, filename, tag, conv, encoding)) diff --git a/unittests/test_sanity_functions.py b/unittests/test_sanity_functions.py index 76e3d65bfc..bf33e621df 100644 --- a/unittests/test_sanity_functions.py +++ b/unittests/test_sanity_functions.py @@ -597,6 +597,9 @@ def setUp(self): fp.write('Step: 1\n') fp.write('Step: 2\n') fp.write('Step: 3\n') + fp.write('Number: 1 2\n') + fp.write('Number: 2 4\n') + fp.write('Number: 3 6\n') def tearDown(self): os.remove(self.tempfile) @@ -650,6 +653,60 @@ def test_extractall(self): for expected, v in enumerate(res, start=1): assert expected == v + def test_extractall_multiple_tags(self): + # Check multiple numeric groups + res = sn.evaluate(sn.extractall( + r'Number: (\d+) (\d+)', self.tempfile, (1, 2))) + for expected, v in enumerate(res, start=1): + assert str(expected) == v[0] + assert str(2*expected) == v[1] + + # Check multiple named groups + res = sn.evaluate(sn.extractall( + r'Number: (?P\d+) (?P\d+)', self.tempfile, + ('no1', 'no2'))) + for expected, v in enumerate(res, start=1): + assert str(expected) == v[0] + assert str(2*expected) == v[1] + + # Check single convert function + res = sn.evaluate(sn.extractall(r'Number: (?P\d+) (?P\d+)', + self.tempfile, ('no1', 'no2'), int)) + for expected, v in enumerate(res, start=1): + assert expected == v[0] + assert 2 * expected == v[1] + + # Check multiple convert functions + res = sn.evaluate(sn.extractall(r'Number: (?P\d+) (?P\d+)', + self.tempfile, ('no1', 'no2'), + (int, float))) + for expected, v in enumerate(res, start=1): + assert expected == v[0] + assert 2 * expected == v[1] + assert isinstance(v[1], float) + + # Check more conversion functions than tags + res = sn.evaluate(sn.extractall(r'Number: (?P\d+) (?P\d+)', + self.tempfile, ('no1', 'no2'), + [int, float, float, float])) + for expected, v in enumerate(res, start=1): + assert expected == v[0] + assert 2 * expected == v[1] + + # Check fewer convert functions than tags + res = sn.evaluate(sn.extractall(r'Number: (?P\d+) (?P\d+)', + self.tempfile, ('no1', 'no2'), + [int])) + for expected, v in enumerate(res, start=1): + assert expected == v[0] + assert 2 * expected == v[1] + + # Check multiple conversion functions and a single tag + with pytest.raises(SanityError): + res = sn.evaluate(sn.extractall( + r'Number: (?P\d+) \d+', self.tempfile, 'no', [int, float]) + ) + def test_extractall_encoding(self): res = sn.evaluate( sn.extractall('Odyssey', self.utf16_file, encoding='utf-16')