Skip to content

Commit

Permalink
Added python_quoted_string; fixed exception messages for ParseElement…
Browse files Browse the repository at this point in the history
…Enhance subclasses
  • Loading branch information
ptmcg committed Jun 29, 2022
1 parent cb6858c commit 4cd691f
Show file tree
Hide file tree
Showing 7 changed files with 193 additions and 114 deletions.
9 changes: 8 additions & 1 deletion CHANGES
Expand Up @@ -28,14 +28,18 @@ help from Devin J. Pohly in structuring the code to enable this peaceful transit

Suggested by Antony Lee (issue #412), PR (#413) by Devin J. Pohly.

- Added new builtin `python_quoted_string`, which will match any form
of single-line or multiline quoted strings defined in Python. (Inspired
by discussion with Andreas Schörgenhumer in Issue #421.)

- Fixed bug in `Word` when `max=2`. Also added performance enhancement
when specifying `exact` argument. Reported in issue #409 by
panda-34, nice catch!

- `Word` arguments are now validated if `min` and `max` are both
given, that `min` <= `max`; raises `ValueError` if values are invalid.

- Extended `expr[]` notation for repetition of expr to accept a
- Extended `expr[]` notation for repetition of `expr` to accept a
slice, where the slice's stop value indicates a `stop_on`
expression:

Expand All @@ -62,6 +66,9 @@ help from Devin J. Pohly in structuring the code to enable this peaceful transit
- Fixed bug in srange, when parsing escaped '/' and '\' inside a
range set.

- Fixed exception messages for some ParserElements with custom names,
which instead showed their contained expression names.

- Multiple added and corrected type annotations. With much help from
Stephen Rosen, thanks!

Expand Down
4 changes: 3 additions & 1 deletion docs/HowToUsePyparsing.rst
Expand Up @@ -6,7 +6,7 @@ Using the pyparsing module
:address: ptmcg.pm+pyparsing@gmail.com

:revision: 3.0.10
:date: May, 2022
:date: July, 2022

:copyright: Copyright |copy| 2003-2022 Paul McGuire.

Expand Down Expand Up @@ -1308,6 +1308,8 @@ Common string and token constants

- ``quoted_string`` - ``sgl_quoted_string | dbl_quoted_string``

- ``python_quoted_string`` - ``quoted_string | multiline quoted string``

- ``c_style_comment`` - a comment block delimited by ``'/*'`` and ``'*/'`` sequences; can span
multiple lines, but does not support nesting of comments

Expand Down
2 changes: 1 addition & 1 deletion pyparsing/__init__.py
Expand Up @@ -121,7 +121,7 @@ def __repr__(self):


__version_info__ = version_info(3, 0, 10, "final", 0)
__version_time__ = "24 Jun 2022 16:29 UTC"
__version_time__ = "29 Jun 2022 06:57 UTC"
__version__ = __version_info__.__version__
__versionTime__ = __version_time__
__author__ = "Paul McGuire <ptmcg.gm+pyparsing@gmail.com>"
Expand Down
29 changes: 26 additions & 3 deletions pyparsing/core.py
Expand Up @@ -4446,7 +4446,11 @@ def recurse(self) -> Sequence[ParserElement]:

def parseImpl(self, instring, loc, doActions=True):
if self.expr is not None:
return self.expr._parse(instring, loc, doActions, callPreParse=False)
try:

This comment has been minimized.

Copy link
@InSyncWithFoo

InSyncWithFoo Jan 13, 2024

Contributor

This change causes #527.

return self.expr._parse(instring, loc, doActions, callPreParse=False)
except ParseBaseException as pbe:
pbe.msg = self.errmsg
raise
else:
raise ParseException(instring, loc, "No expression defined", self)

Expand Down Expand Up @@ -5870,10 +5874,29 @@ def autoname_elements() -> None:
).set_name("string enclosed in single quotes")

quoted_string = Combine(
Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"'
| Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'"
(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"').set_name(
"double quoted string"
)
| (Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'").set_name(
"single quoted string"
)
).set_name("quoted string using single or double quotes")

python_quoted_string = Combine(
(Regex(r'"([^"]|""?(?!"))*', flags=re.MULTILINE) + '"""').set_name(
"multiline double quoted string"
)
| (Regex(r"'([^']|''?(?!'))*", flags=re.MULTILINE) + "'''").set_name(
"multiline single quoted string"
)
| (Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"').set_name(
"double quoted string"
)
| (Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'").set_name(
"single quoted string"
)
).set_name("Python quoted string")

unicode_string = Combine("u" + quoted_string.copy()).set_name("unicode string literal")


Expand Down
4 changes: 3 additions & 1 deletion pyparsing/exceptions.py
Expand Up @@ -80,7 +80,9 @@ def explain_exception(exc, depth=16):

f_self = frm.f_locals.get("self", None)
if isinstance(f_self, ParserElement):
if not frm.f_code.co_name.startswith(("parseImpl", "_parseNoCache")):
if not frm.f_code.co_name.startswith(
("parseImpl", "_parseNoCache")
):
continue
if id(f_self) in seen:
continue
Expand Down
4 changes: 2 additions & 2 deletions tests/test_diagram.py
Expand Up @@ -70,11 +70,11 @@ def test_json(self):

def test_sql(self):
railroad = self.generate_railroad(simpleSQL, "simpleSQL")
assert len(railroad) == 18
assert len(railroad) == 20
railroad = self.generate_railroad(
simpleSQL, "simpleSQL", show_results_names=True
)
assert len(railroad) == 18
assert len(railroad) == 20

def test_calendars(self):
railroad = self.generate_railroad(calendars, "calendars")
Expand Down

0 comments on commit 4cd691f

Please sign in to comment.