From ea5e640db3330814dc70baf09b313bcc0f37a9fd Mon Sep 17 00:00:00 2001 From: Matthew Barnett Date: Sun, 24 Jul 2022 23:27:26 +0100 Subject: [PATCH] Git issue 474: regex has no equivalent to re.Match.groups() for captures Added 'allcaptures' and 'allspans' methods to match objects. Fixed bug where compiling a pattern didn't always check for unused arguments. --- README.rst | 41 ++++++++++++++++++---- docs/Features.html | 39 +++++++++++++++++---- regex_3/_regex.c | 80 +++++++++++++++++++++++++++++++++++++++++++ regex_3/regex.py | 20 +++++++---- regex_3/test_regex.py | 4 +++ setup.py | 2 +- 6 files changed, 166 insertions(+), 20 deletions(-) diff --git a/README.rst b/README.rst index 1bfbf5b..cd475f1 100644 --- a/README.rst +++ b/README.rst @@ -379,6 +379,23 @@ Examples: >>> m.capturesdict() {'word': ['one', 'two', 'three'], 'digits': ['1', '2', '3']} +Added ``allcaptures`` and ``allspans`` (`Git issue 474 `_) +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +``allcaptures`` returns a list of all the captures of all the groups. + +``allspans`` returns a list of all the spans of the all captures of all the groups. + +Examples: + +.. sourcecode:: python + + >>> m = regex.match(r"(?:(?P\w+) (?P\d+)\n)+", "one 1\ntwo 2\nthree 3\n") + >>> m.allcaptures() + (['one 1\ntwo 2\nthree 3\n'], ['one', 'two', 'three'], ['1', '2', '3']) + >>> m.allspans() + ([(0, 20)], [(0, 3), (6, 9), (12, 17)], [(4, 5), (10, 11), (18, 19)]) + Allow duplicate names of groups (`Hg issue 87 `_) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -706,7 +723,7 @@ The order of the items is irrelevant, they are treated as a set. The named lists .. sourcecode:: python >>> print(p.named_lists) - {'options': frozenset({'fifth', 'first', 'fourth', 'second', 'third'})} + {'options': frozenset({'third', 'first', 'fifth', 'fourth', 'second'})} If there are any unused keyword arguments, ``ValueError`` will be raised unless you tell it otherwise: @@ -716,12 +733,24 @@ If there are any unused keyword arguments, ``ValueError`` will be raised unless >>> p = regex.compile(r"\L", options=option_set, other_options=[]) Traceback (most recent call last): File "", line 1, in - File "C:\Python37\lib\site-packages\regex\regex.py", line 348, in compile - return _compile(pattern, flags, ignore_unused, kwargs) - File "C:\Python37\lib\site-packages\regex\regex.py", line 585, in _compile + File "C:\Python310\lib\site-packages\regex\regex.py", line 353, in compile + return _compile(pattern, flags, ignore_unused, kwargs, cache_pattern) + File "C:\Python310\lib\site-packages\regex\regex.py", line 500, in _compile + complain_unused_args() + File "C:\Python310\lib\site-packages\regex\regex.py", line 483, in complain_unused_args raise ValueError('unused keyword argument {!a}'.format(any_one)) ValueError: unused keyword argument 'other_options' >>> p = regex.compile(r"\L", options=option_set, other_options=[], ignore_unused=True) + >>> p = regex.compile(r"\L", options=option_set, other_options=[], ignore_unused=False) + Traceback (most recent call last): + File "", line 1, in + File "C:\Python310\lib\site-packages\regex\regex.py", line 353, in compile + return _compile(pattern, flags, ignore_unused, kwargs, cache_pattern) + File "C:\Python310\lib\site-packages\regex\regex.py", line 500, in _compile + complain_unused_args() + File "C:\Python310\lib\site-packages\regex\regex.py", line 483, in complain_unused_args + raise ValueError('unused keyword argument {!a}'.format(any_one)) + ValueError: unused keyword argument 'other_options' >>> Start and end of word @@ -1065,6 +1094,6 @@ The matching methods and functions support timeouts. The timeout (in seconds) ap >>> regex.sub(r'[a-z]', slow_replace, 'abcde', timeout=2) Traceback (most recent call last): File "", line 1, in - File "C:\Python37\lib\site-packages\regex\regex.py", line 276, in sub - endpos, concurrent, timeout) + File "C:\Python310\lib\site-packages\regex\regex.py", line 278, in sub + return pat.sub(repl, string, count, pos, endpos, concurrent, timeout) TimeoutError: regex timed out diff --git a/docs/Features.html b/docs/Features.html index b8c3de7..f829436 100644 --- a/docs/Features.html +++ b/docs/Features.html @@ -3,7 +3,7 @@ - + README.rst