Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
109 changes: 84 additions & 25 deletions reframe/utility/sanity.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
# SPDX-License-Identifier: BSD-3-Clause

import builtins
import collections
import glob as pyglob
import itertools
import re
Expand Down Expand Up @@ -520,38 +521,83 @@ def findall(patt, filename, encoding='utf-8'):
return list(evaluate(x) for x in finditer(patt, filename, encoding))


@deferrable
def extractiter(patt, filename, tag=0, conv=None, encoding='utf-8'):
'''Get an iterator over the values extracted from the capturing group
``tag`` of a matching regex ``patt`` in the file ``filename``.
def _callable_name(fn):
fn_name = '<unknown>'
try:
# Assume fn is standard function
fn_name = fn.__name__
except AttributeError:
try:
# Assume fn is callable object
fn_name = fn.__class__.__name__
except AttributeError:
pass

return fn_name


def _extractiter_singletag(patt, filename, tag, conv, encoding):
if isinstance(conv, collections.Iterable):
raise SanityError(f'multiple conversion functions given for the '
f'single capturing group {tag!r}')

This function is equivalent to :func:`extractall` except that it returns
a generator object, instead of a list, which you can use to iterate over
the extracted values.
'''
for m in finditer(patt, filename, encoding):
try:
val = m.group(tag)
except (IndexError, KeyError):
raise SanityError(
"no such group in pattern `%s': %s" % (patt, tag))
raise SanityError(f'no such group in pattern {patt!r}: {tag}')

try:
yield conv(val) if callable(conv) else val
except ValueError:
fn_name = '<unknown>'
fn_name = _callable_name(conv)
raise SanityError(
f'could not convert value {val!r} using {fn_name}()'
)


def _extractiter_multitag(patt, filename, tags, conv, encoding):
for m in finditer(patt, filename, encoding):
val = []
for t in tags:
try:
# Assume conv is standard function
fn_name = conv.__name__
except AttributeError:
try:
# Assume conv is callable object
fn_name = conv.__class__.__name__
except AttributeError:
pass
val.append(m.group(t))
except (IndexError, KeyError):
raise SanityError(f'no such group in pattern {patt!r}: {t}')

converted_vals = []
if not isinstance(conv, collections.Iterable):
conv = [conv] * builtins.len(val)
elif builtins.len(conv) > builtins.len(val):
conv = conv[:builtins.len(val)]

# Use the last function in case we have less conversion functions than
# tags
for v, c in itertools.zip_longest(val, conv, fillvalue=conv[-1]):
try:
converted_vals.append(c(v) if callable(c) else v)
except ValueError:
fn_name = _callable_name(conv)
raise SanityError(
f'could not convert value {v!r} using {fn_name}()'
)

yield tuple(converted_vals)


@deferrable
def extractiter(patt, filename, tag=0, conv=None, encoding='utf-8'):
'''Get an iterator over the values extracted from the capturing group
``tag`` of a matching regex ``patt`` in the file ``filename``.

raise SanityError("could not convert value `%s' using `%s()'" %
(val, fn_name))
This function is equivalent to :func:`extractall` except that it returns
a generator object, instead of a list, which you can use to iterate over
the extracted values.
'''
if isinstance(tag, collections.Iterable) and not isinstance(tag, str):
yield from _extractiter_multitag(patt, filename, tag, conv, encoding)
else:
yield from _extractiter_singletag(patt, filename, tag, conv, encoding)


@deferrable
Expand All @@ -572,11 +618,24 @@ def extractall(patt, filename, tag=0, conv=None, encoding='utf-8'):
Group ``0`` refers always to the whole match.
Since the file is processed line by line, this means that group ``0``
returns the whole line that was matched.
:arg conv: A callable that takes a single argument and returns a new value.
If provided, it will be used to convert the extracted values before
returning them.
:returns: A list of the extracted values from the matched regex.
:arg conv: A callable or iterable of callables taking a single argument
and returning a new value.
If not an iterable, it will be used to convert the extracted values for
all the capturing groups specified in ``tag``.
Otherwise, each conversion function will be used to convert the value
extracted from the corresponding capturing group in ``tag``.
If more conversion functions are supplied than the corresponding
capturing groups in ``tag``, the last conversion function will be used
for the additional capturing groups.
:returns: A list of tuples of converted values extracted from the
capturing groups specified in ``tag``, if ``tag`` is an iterable.
Otherwise, a list of the converted values extracted from the single
capturing group specified in ``tag``.
:raises reframe.core.exceptions.SanityError: In case of errors.

.. versionchanged:: 3.1
Multiple regex capturing groups are now supporetd via ``tag`` and
multiple conversion functions can be used in ``conv``.
'''
return list(evaluate(x)
for x in extractiter(patt, filename, tag, conv, encoding))
Expand Down
57 changes: 57 additions & 0 deletions unittests/test_sanity_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -597,6 +597,9 @@ def setUp(self):
fp.write('Step: 1\n')
fp.write('Step: 2\n')
fp.write('Step: 3\n')
fp.write('Number: 1 2\n')
fp.write('Number: 2 4\n')
fp.write('Number: 3 6\n')

def tearDown(self):
os.remove(self.tempfile)
Expand Down Expand Up @@ -650,6 +653,60 @@ def test_extractall(self):
for expected, v in enumerate(res, start=1):
assert expected == v

def test_extractall_multiple_tags(self):
# Check multiple numeric groups
res = sn.evaluate(sn.extractall(
r'Number: (\d+) (\d+)', self.tempfile, (1, 2)))
for expected, v in enumerate(res, start=1):
assert str(expected) == v[0]
assert str(2*expected) == v[1]

# Check multiple named groups
res = sn.evaluate(sn.extractall(
r'Number: (?P<no1>\d+) (?P<no2>\d+)', self.tempfile,
('no1', 'no2')))
for expected, v in enumerate(res, start=1):
assert str(expected) == v[0]
assert str(2*expected) == v[1]

# Check single convert function
res = sn.evaluate(sn.extractall(r'Number: (?P<no1>\d+) (?P<no2>\d+)',
self.tempfile, ('no1', 'no2'), int))
for expected, v in enumerate(res, start=1):
assert expected == v[0]
assert 2 * expected == v[1]

# Check multiple convert functions
res = sn.evaluate(sn.extractall(r'Number: (?P<no1>\d+) (?P<no2>\d+)',
self.tempfile, ('no1', 'no2'),
(int, float)))
for expected, v in enumerate(res, start=1):
assert expected == v[0]
assert 2 * expected == v[1]
assert isinstance(v[1], float)

# Check more conversion functions than tags
res = sn.evaluate(sn.extractall(r'Number: (?P<no1>\d+) (?P<no2>\d+)',
self.tempfile, ('no1', 'no2'),
[int, float, float, float]))
for expected, v in enumerate(res, start=1):
assert expected == v[0]
assert 2 * expected == v[1]

# Check fewer convert functions than tags
res = sn.evaluate(sn.extractall(r'Number: (?P<no1>\d+) (?P<no2>\d+)',
self.tempfile, ('no1', 'no2'),
[int]))
for expected, v in enumerate(res, start=1):
assert expected == v[0]
assert 2 * expected == v[1]

# Check multiple conversion functions and a single tag
with pytest.raises(SanityError):
res = sn.evaluate(sn.extractall(
r'Number: (?P<no>\d+) \d+', self.tempfile, 'no', [int, float])
)

def test_extractall_encoding(self):
res = sn.evaluate(
sn.extractall('Odyssey', self.utf16_file, encoding='utf-16')
Expand Down