diff --git a/pyparsing/core.py b/pyparsing/core.py index 54846f5a..2a280c95 100644 --- a/pyparsing/core.py +++ b/pyparsing/core.py @@ -6,7 +6,9 @@ from typing import ( Any, Callable, - Generator, + ClassVar, + Iterable, + Iterator, List, NamedTuple, Sequence, @@ -23,7 +25,6 @@ import warnings import re import sys -from collections.abc import Iterable import traceback import types from operator import itemgetter @@ -475,6 +476,52 @@ def __init__(self, savelist: bool = False): self.callDuringTry = False self.suppress_warnings_: List[Diagnostics] = [] + def __repr__(self) -> str: + """Returns a canonical string representation of this element. This is + often, but not always, an expression that would produce a similar + element if evaluated. + + Instances of the literal wrapper class (set by + :meth:`inline_literals_using`) are written as literals when possible. + """ + return self._make_repr() + + def __format__(self, spec: str) -> str: + """Formats an element for printing or f-string interpolation. + + Supported conversion specifiers are: + + - ``s`` or none: as given by :meth:`__str__` + - ``r``: as given by :meth:`__repr__` + - Other specifiers are used internally and may change. + + Example:: + + parser = OneOrMore("Hello") + "world" + print(f"{parser}\\n{parser:r}) + + results in:: + + {{'Hello'}... 'world'} + Literal('Hello')[1, ...] + 'world' + """ + if not spec or spec == "s": + return str(self) + + # Specifiers "+r", "lr", and "+lr" are used internally for _make_repr() + # variants. The "+" modifier means that this element is a term in a + # larger expression and may need parentheses. The "l" modifier means + # that an instance of _literalStringClass may be written as a string + # literal in this context. + if spec in ("r", "lr", "+r", "+lr"): + as_literal = "l" in spec and isinstance(self, ParserElement._literalStringClass) + is_term = "+" in spec + return self._make_repr(as_literal=as_literal, is_term=is_term) + raise ValueError(f"Invalid format specifier: {spec!r}") + + def _make_repr(self, as_literal=False, is_term=False): + return f"{type(self).__name__}()" + def suppress_warning(self, warning_type: Diagnostics) -> "ParserElement": """ Suppress warnings emitted for a particular diagnostic on this expression. @@ -1146,7 +1193,7 @@ def scan_string( *, debug: bool = False, maxMatches: int = _MAX_INT, - ) -> Generator[Tuple[ParseResults, int, int], None, None]: + ) -> Iterator[Tuple[ParseResults, int, int]]: """ Scan the input string for expression matches. Each match will return the matching tokens, start location, and end location. May be called with optional @@ -1320,7 +1367,7 @@ def split( include_separators: bool = False, *, includeSeparators=False, - ) -> Generator[str, None, None]: + ) -> Iterator[str]: """ Generator method to split a string using the given expression as a separator. May be called with optional ``maxsplit`` argument, to limit the number of splits; @@ -1402,7 +1449,8 @@ def __sub__(self, other) -> "ParserElement": other = self._literalStringClass(other) if not isinstance(other, ParserElement): return NotImplemented - return self + And._ErrorStop() + other + # Construct And directly to ensure that an _ErrorStop is never the last element + return And([self, And._ErrorStop(), other]) def __rsub__(self, other) -> "ParserElement": """ @@ -1833,9 +1881,6 @@ def name(self) -> str: def __str__(self) -> str: return self.name - def __repr__(self) -> str: - return str(self) - def streamline(self) -> "ParserElement": self.streamlined = True self._defaultName = None @@ -2275,7 +2320,7 @@ def must_skip(t): def show_skip(t): if t._skipped.as_list()[-1:] == [""]: t.pop("_skipped") - t["_skipped"] = "missing <" + repr(self.anchor) + ">" + t["_skipped"] = f"missing <{self.anchor}>" return ( self.anchor + skipper().add_parse_action(must_skip) @@ -2284,9 +2329,6 @@ def show_skip(t): return self.anchor + skipper + other - def __repr__(self): - return self.defaultName - def parseImpl(self, *args): raise Exception( "use of `...` expression without following SkipTo target expression" @@ -2366,6 +2408,11 @@ def __copy__(self) -> "Literal": obj.__dict__.update(self.__dict__) return obj + def _make_repr(self, as_literal=False, is_term=False): + if as_literal: + return repr(self.match) + return f"{type(self).__name__}({self.match!r})" + def _generateDefaultName(self) -> str: return repr(self.match) @@ -2387,6 +2434,11 @@ def __init__(self, match_string="", *, matchString=""): self.mayReturnEmpty = True self.mayIndexError = False + def _make_repr(self, as_literal=False, is_term=False): + if as_literal: + return repr("") + return f"{type(self).__name__}()" + def _generateDefaultName(self) -> str: return "Empty" @@ -2395,6 +2447,11 @@ def parseImpl(self, instring, loc, doActions=True): class _SingleCharLiteral(Literal): + def _make_repr(self, as_literal=False, is_term=False): + if as_literal: + return repr(self.match) + return f"Literal({self.match!r})" + def parseImpl(self, instring, loc, doActions=True): if instring[loc] == self.firstMatchChar: return loc + 1, self.match @@ -2462,6 +2519,11 @@ def __init__( identChars = identChars.upper() self.identChars = set(identChars) + def _make_repr(self, as_literal=False, is_term=False): + if as_literal: + return repr(self.match) + return f"{type(self).__name__}({self.match!r})" + def _generateDefaultName(self) -> str: return repr(self.match) @@ -2543,6 +2605,11 @@ def __init__(self, match_string: str = "", *, matchString: str = ""): self.returnString = match_string self.errmsg = "Expected " + self.name + def _make_repr(self, as_literal=False, is_term=False): + if as_literal: + return repr(self.returnString) + return f"CaselessLiteral({self.returnString!r})" + def parseImpl(self, instring, loc, doActions=True): if instring[loc : loc + self.matchLen].upper() == self.match: return loc + self.matchLen, self.returnString @@ -2626,6 +2693,13 @@ def __init__( self.mayIndexError = False self.mayReturnEmpty = False + def _make_repr(self, as_literal=False, is_term=False): + if as_literal: + return repr(self.match_string) + if self.maxMismatches == 1: + return f"{type(self).__name__}({self.match_string!r})" + return f"{type(self).__name__}({self.match_string!r}, {self.maxMismatches})" + def _generateDefaultName(self) -> str: return f"{type(self).__name__}:{self.match_string!r}" @@ -2846,6 +2920,24 @@ def __init__( self.re_match = self.re.match self.parseImpl = self.parseImpl_regex + def _make_repr(self, as_literal=False, is_term=False): + init_repr = abbrev_charset(self.initChars) + if self.maxLen == 1: + return f"Char({init_repr})" + args = [init_repr] + if self.initChars != self.bodyChars: + args.append(abbrev_charset(self.bodyChars)) + if self.minLen == self.maxLen: + args.append(f"exact={self.minLen}") + else: + if self.minLen > 1: + args.append(f"min={self.minLen}") + if self.maxLen < _MAX_INT: + args.append(f"max={self.maxLen}") + + arg_str = ", ".join(args) + return f"{type(self).__name__}({arg_str})" + def _generateDefaultName(self) -> str: def charsAsStr(s): max_repr_len = 16 @@ -3007,6 +3099,11 @@ def __init__( if self.asMatch: self.parseImpl = self.parseImplAsMatch # type: ignore [assignment] + def _make_repr(self, as_literal=False, is_term=False): + if as_literal: + return repr(self.pattern) + return f"Regex({self.pattern!r})" + @cached_property def re(self): if self._re: @@ -3239,6 +3336,11 @@ def __init__( self.mayIndexError = False self.mayReturnEmpty = True + def _make_repr(self, as_literal=False, is_term=False): + if self.quoteChar == self.endQuoteChar and isinstance(self.quoteChar, str_type): + return f"QuotedString({self.quoteChar!r})" + return f"QuotedString({self.quoteChar!r}, end_quote_char={self.endQuoteChar!r})" + def _generateDefaultName(self) -> str: if self.quoteChar == self.endQuoteChar and isinstance(self.quoteChar, str_type): return f"string enclosed in {self.quoteChar!r}" @@ -3335,6 +3437,10 @@ def __init__( self.mayReturnEmpty = self.minLen == 0 self.mayIndexError = False + def _make_repr(self, as_literal=False, is_term=False): + not_chars_repr = abbrev_charset(self.notCharsSet) + return f"CharsNotIn({not_chars_repr})" + def _generateDefaultName(self) -> str: not_chars_str = _collapse_string_to_ranges(self.notChars) if len(not_chars_str) > 16: @@ -3417,8 +3523,14 @@ def __init__(self, ws: str = " \t\r\n", min: int = 1, max: int = 0, exact: int = self.maxLen = exact self.minLen = exact + def _make_repr(self, as_literal=False, is_term=False): + if set(self.matchWhite) == set(" \t\r\n"): + return f"{type(self).__name__}()" + ws_repr = abbrev_charset(self.matchWhite) + return f"{type(self).__name__}({ws_repr})" + def _generateDefaultName(self) -> str: - return "".join(White.whiteStrs[c] for c in self.matchWhite) + return "".join(White.whiteStrs[c] for c in sorted(set(self.matchWhite))) def parseImpl(self, instring, loc, doActions=True): if instring[loc] not in self.matchWhite: @@ -3452,6 +3564,9 @@ def __init__(self, colno: int): super().__init__() self.col = colno + def _make_repr(self, as_literal=False, is_term=False): + return f"{type(self).__name__}({self.col!r})" + def preParse(self, instring: str, loc: int) -> int: if col(loc, instring) != self.col: instrlen = len(instring) @@ -3597,6 +3712,12 @@ def __init__(self, word_chars: str = printables, *, wordChars: str = printables) self.wordChars = set(wordChars) self.errmsg = "Not at the start of a word" + def _make_repr(self, as_literal=False, is_term=False): + if self.wordChars == set(printables): + return f"{type(self).__name__}()" + chars_repr = abbrev_charset(self.wordChars) + return f"{type(self).__name__}({chars_repr})" + def parseImpl(self, instring, loc, doActions=True): if loc != 0: if ( @@ -3623,6 +3744,12 @@ def __init__(self, word_chars: str = printables, *, wordChars: str = printables) self.skipWhitespace = False self.errmsg = "Not at the end of a word" + def _make_repr(self, as_literal=False, is_term=False): + if self.wordChars == set(printables): + return f"{type(self).__name__}()" + chars_repr = abbrev_charset(self.wordChars) + return f"{type(self).__name__}({chars_repr})" + def parseImpl(self, instring, loc, doActions=True): instrlen = len(instring) if instrlen > 0 and loc < instrlen: @@ -3639,6 +3766,8 @@ class ParseExpression(ParserElement): post-processing parsed tokens. """ + OPERATOR: ClassVar[str] = "" + def __init__(self, exprs: typing.Iterable[ParserElement], savelist: bool = False): super().__init__(savelist) self.exprs: List[ParserElement] @@ -3665,6 +3794,29 @@ def __init__(self, exprs: typing.Iterable[ParserElement], savelist: bool = False self.exprs = [exprs] self.callPreparse = False + def _make_repr(self, as_literal=False, is_term=False): + if not self.exprs: + return f"{type(self).__name__}([])" + if len(self.exprs) == 1: + return f"{type(self).__name__}([{self.exprs[0]:lr}])" + + op = f" {type(self).OPERATOR} " + # Allow the first term to be a literal string if the second is not + if isinstance(self.exprs[1], ParserElement._literalStringClass): + first = f"{self.exprs[0]:+r}" + else: + first = f"{self.exprs[0]:+lr}" + # Any term after the first may be a literal string + result = op.join([ + first, + *(f"{expr:+lr}" for expr in self.exprs[1:]), + ]) + return f"({result})" if is_term else result + + def _generateDefaultName(self) -> str: + op = f" {type(self).OPERATOR} " + return "{" + op.join(map(str, self.exprs)) + "}" + def recurse(self) -> Sequence[ParserElement]: return self.exprs[:] @@ -3710,9 +3862,6 @@ def ignore(self, other) -> ParserElement: e.ignore(self.ignoreExprs[-1]) return self - def _generateDefaultName(self) -> str: - return f"{self.__class__.__name__}:({str(self.exprs)})" - def streamline(self) -> ParserElement: if self.streamlined: return self @@ -3820,6 +3969,8 @@ class And(ParseExpression): expr = integer("id") + name_expr("name") + integer("age") """ + OPERATOR = "+" + class _ErrorStop(Empty): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) @@ -3863,6 +4014,30 @@ def __init__( self.mayReturnEmpty = True self.callPreparse = True + def _make_repr(self, as_literal=False, is_term=False): + if not self.exprs: + return f"{type(self).__name__}([])" + if len(self.exprs) == 1: + return f"{type(self).__name__}([{self.exprs[0]:lr}])" + + # Allow the first term to be a literal string if the second is not + if isinstance(self.exprs[1], (And._ErrorStop, ParserElement._literalStringClass)): + builder = [f"{self.exprs[0]:+r}"] + else: + builder = [f"{self.exprs[0]:+lr}"] + nextOp = "+" + for expr in self.exprs[1:]: + if isinstance(expr, And._ErrorStop): + nextOp = "-" + continue + builder.append(nextOp) + builder.append(f"{expr:+lr}") + nextOp = "+" + if nextOp == "-": + builder.append("+ _ErrorStop()") + result = " ".join(builder) + return f"({result})" if is_term else result + def streamline(self) -> ParserElement: # collapse any _PendingSkip's if self.exprs: @@ -3977,6 +4152,8 @@ class Or(ParseExpression): [['123'], ['3.1416'], ['789']] """ + OPERATOR = "^" + def __init__(self, exprs: typing.Iterable[ParserElement], savelist: bool = False): super().__init__(exprs, savelist) if self.exprs: @@ -4086,9 +4263,6 @@ def __ixor__(self, other): return NotImplemented return self.append(other) # Or([self, other]) - def _generateDefaultName(self) -> str: - return "{" + " ^ ".join(str(e) for e in self.exprs) + "}" - def _setResultsName(self, name, listAllMatches=False): if ( __diag__.warn_multiple_tokens_in_named_alternation @@ -4134,6 +4308,8 @@ class MatchFirst(ParseExpression): print(number.search_string("123 3.1416 789")) # Better -> [['123'], ['3.1416'], ['789']] """ + OPERATOR = "|" + def __init__(self, exprs: typing.Iterable[ParserElement], savelist: bool = False): super().__init__(exprs, savelist) if self.exprs: @@ -4199,9 +4375,6 @@ def __ior__(self, other): return NotImplemented return self.append(other) # MatchFirst([self, other]) - def _generateDefaultName(self) -> str: - return "{" + " | ".join(str(e) for e in self.exprs) + "}" - def _setResultsName(self, name, listAllMatches=False): if ( __diag__.warn_multiple_tokens_in_named_alternation @@ -4287,6 +4460,8 @@ class Each(ParseExpression): - size: 20 """ + OPERATOR = "&" + def __init__(self, exprs: typing.Iterable[ParserElement], savelist: bool = True): super().__init__(exprs, savelist) if self.exprs: @@ -4399,9 +4574,6 @@ def parseImpl(self, instring, loc, doActions=True): return loc, total_results - def _generateDefaultName(self) -> str: - return "{" + " & ".join(str(e) for e in self.exprs) + "}" - class ParseElementEnhance(ParserElement): """Abstract subclass of :class:`ParserElement`, for combining and @@ -4430,6 +4602,11 @@ def __init__(self, expr: Union[ParserElement, str], savelist: bool = False): self.callPreparse = expr.callPreparse self.ignoreExprs.extend(expr.ignoreExprs) + def _make_repr(self, as_literal=False, is_term=False): + if as_literal and type(self.expr) is Literal: + return repr(self.expr.match) + return f"{type(self).__name__}({self.expr:lr})" + def recurse(self) -> Sequence[ParserElement]: return [self.expr] if self.expr is not None else [] @@ -4818,6 +4995,9 @@ def __init__(self, expr: Union[ParserElement, str]): self.mayReturnEmpty = True self.errmsg = "Found unwanted token, " + str(self.expr) + def _make_repr(self, as_literal=False, is_term=False): + return f"~{self.expr:+r}" + def parseImpl(self, instring, loc, doActions=True): if self.expr.can_parse_next(instring, loc): raise ParseException(instring, loc, self.errmsg, self) @@ -4932,6 +5112,12 @@ class OneOrMore(_MultipleMatch): (attr_expr * (1,)).parse_string(text).pprint() """ + def _make_repr(self, as_literal=False, is_term=False): + if self.not_ender is not None: + ender = self.not_ender.expr + return f"{self.expr:+r}[1, ...: {ender:lr}]" + return f"{self.expr:+r}[1, ...]" + def _generateDefaultName(self) -> str: return "{" + str(self.expr) + "}..." @@ -4960,6 +5146,12 @@ def __init__( super().__init__(expr, stopOn=stopOn or stop_on) self.mayReturnEmpty = True + def _make_repr(self, as_literal=False, is_term=False): + if self.not_ender is not None: + ender = self.not_ender.expr + return f"{self.expr:+r}[...: {ender:lr}]" + return f"{self.expr:+r}[...]" + def parseImpl(self, instring, loc, doActions=True): try: return super().parseImpl(instring, loc, doActions) @@ -5280,6 +5472,11 @@ def __del__(self): lineno=self.caller_frame.lineno, ) + def _make_repr(self, as_literal=False, is_term=False): + if self.expr is None: + return f"{type(self).__name__}()" + return f"{type(self).__name__}(...)" + def parseImpl(self, instring, loc, doActions=True): if ( self.expr is None @@ -5804,6 +6001,40 @@ def srange(s: str) -> str: except Exception as e: return "" +def _srange_escape(char: str) -> str: + # Characters that need to be escaped + if char in " -\\]": + return "\\" + char + c = ord(char) + if 0x20 <= c <= 0x7f: + return char + return "\\" + hex(c) + +def _gen_srange(chars: Sequence[str]) -> Iterator[str]: + # Precondition: s is sorted + if len(chars) <= 2: + yield from map(_srange_escape, chars) + return + first = last = chars[0] + for i, char in enumerate(chars[1:], 1): + if chr(ord(first) + i) != char: + break + last = char + else: + i += 1 + if i <= 3: + yield from map(_srange_escape, chars[:i]) + else: + yield f"{_srange_escape(first)}-{_srange_escape(last)}" + yield from _gen_srange(chars[i:]) + +def abbrev_charset(chars: Iterable[str]) -> str: + charset = sorted(set(chars)) + orig = repr("".join(charset)) + sr_arg = "[" + "".join(_gen_srange(charset)) + "]" + sr = f"srange({sr_arg!r})" + return min((orig, sr), key=len) + def token_map(func, *args) -> ParseAction: """Helper to define a parse action by mapping a function to all diff --git a/tests/test_unit.py b/tests/test_unit.py index 59e549a4..379f62fc 100644 --- a/tests/test_unit.py +++ b/tests/test_unit.py @@ -183,6 +183,930 @@ def runTest(self): except ParseException as pe: pass +class Test01c_ParseExpressionFormatting(TestCase): + def setUp(self): + # Make sure the inline class is reset to default + pp.ParserElement.inline_literals_using(pp.Literal) + + def check_both(self, p, good_str, good_repr): + self.assertEqual(str(p), good_str) + self.assertEqual(repr(p), good_repr) + + def testNoMatch(self): + p = pp.NoMatch() + self.check_both(p, "NoMatch", "NoMatch()") + + def testLiteral(self): + p = pp.Literal("Hello") + self.check_both(p, "'Hello'", "Literal('Hello')") + + p = pp.Literal("") + self.check_both(p, "Empty", "Empty()") + + p = pp.Literal("Z") + self.check_both(p, "'Z'", "Literal('Z')") + + def testEmpty(self): + p = pp.Empty() + self.check_both(p, "Empty", "Empty()") + + def testKeyword(self): + p = pp.Keyword("Hello") + self.check_both(p, "'Hello'", "Keyword('Hello')") + + @unittest.expectedFailure + def testCaselessLiteral(self): + p = pp.CaselessLiteral("Hello") + self.check_both(p, "'Hello'", "CaselessLiteral('Hello')") + + def testCaselessKeyword(self): + p = pp.CaselessKeyword("Hello") + self.check_both(p, "'Hello'", "CaselessKeyword('Hello')") + + def testCloseMatch(self): + p = pp.CloseMatch("Hello") + self.check_both(p, "CloseMatch:'Hello'", "CloseMatch('Hello')") + + # Default for maxMismatches is 1 + p = pp.CloseMatch("Hello", 1) + self.check_both(p, "CloseMatch:'Hello'", "CloseMatch('Hello')") + + p = pp.CloseMatch("Hello", 3) + self.check_both(p, "CloseMatch:'Hello'", "CloseMatch('Hello', 3)") + + def testWord(self): + p = pp.Word("ABC") + self.check_both(p, "W:(ABC)", "Word('ABC')") + + p = pp.Word("ABC", "cba") + self.check_both(p, "W:(ABC, abc)", "Word('ABC', 'abc')") + + p = pp.Word("ABC", exact=5) + self.check_both(p, "W:(ABC){5}", "Word('ABC', exact=5)") + + p = pp.Word("ABC", min=3, max=3) + self.check_both(p, "W:(ABC){3}", "Word('ABC', exact=3)") + + p = pp.Word("ABC", min=3, max=5) + self.check_both(p, "W:(ABC){3,5}", "Word('ABC', min=3, max=5)") + + p = pp.Word("ABC", min=3) + self.check_both(p, "W:(ABC){3,...}", "Word('ABC', min=3)") + + p = pp.Word("ABCDEFG", "#b", max=2) + self.check_both(p, "W:(A-G, #b){1,2}", "Word('ABCDEFG', '#b', max=2)") + + p = pp.Word("ABC", exact=1) + self.check_both(p, "(ABC)", "Char('ABC')") + + p = pp.Char("beadgcf") + self.check_both(p, "(a-g)", "Char('abcdefg')") + + def testRegex(self): + p = pp.Regex(r"(?:[A-Za-z0-9+/]{4})*[A-Za-z0-9+/=]{4}") + self.check_both( + p, + r"Re:('(?:[A-Za-z0-9+/]{4})*[A-Za-z0-9+/=]{4}')", + r"Regex('(?:[A-Za-z0-9+/]{4})*[A-Za-z0-9+/=]{4}')", + ) + + p = pp.Regex(r"\([^)]*\)") + self.check_both( + p, + r"Re:('\([^)]*\)')", + r"Regex('\\([^)]*\\)')", + ) + + def testQuotedString(self): + p = pp.QuotedString("/") + self.check_both(p, "string enclosed in '/'", "QuotedString('/')") + + p = pp.QuotedString("@{", end_quote_char="}") + self.check_both( + p, + "quoted string, starting with @{ ending with }", + "QuotedString('@{', end_quote_char='}')", + ) + + def testCharsNotIn(self): + p = pp.CharsNotIn("aeiou") + self.check_both(p, "!W:(aeiou)", "CharsNotIn('aeiou')") + + p = pp.CharsNotIn("ACEGIKMOQSacegikmoqs") + self.check_both(p, "!W:(ACEGIKMOQSace...)", "CharsNotIn('ACEGIKMOQSacegikmoqs')") + + def testWhitespace(self): + p = pp.White() + self.check_both(p, "", "White()") + + p = pp.White("\t \u00a0\u202F") + self.check_both( + p, + "", + r"White('\t \xa0\u202f')", + ) + + def testGoToColumn(self): + p = pp.GoToColumn(80) + self.check_both(p, "GoToColumn", "GoToColumn(80)") + + def testAnchors(self): + p = pp.LineStart() + self.check_both(p, "LineStart", "LineStart()") + + p = pp.LineEnd() + self.check_both(p, "LineEnd", "LineEnd()") + + p = pp.StringStart() + self.check_both(p, "StringStart", "StringStart()") + + p = pp.StringEnd() + self.check_both(p, "StringEnd", "StringEnd()") + + def testWordStartEnd(self): + p = pp.WordStart() + self.check_both(p, "WordStart", "WordStart()") + + p = pp.WordStart("0123457689") + self.check_both(p, "WordStart", "WordStart('0123456789')") + + p = pp.WordEnd() + self.check_both(p, "WordEnd", "WordEnd()") + + p = pp.WordEnd("0123457689") + self.check_both(p, "WordEnd", "WordEnd('0123456789')") + + def testAnd(self): + p = pp.And([]) + self.check_both( + p, + "{}", + "And([])", + ) + + p = pp.And(["Hello"]) + self.check_both( + p, + "{'Hello'}", + "And(['Hello'])", + ) + + p = pp.And([pp.Word("aeiou")]) + self.check_both( + p, + "{W:(aeiou)}", + "And([Word('aeiou')])", + ) + + p = pp.And(["Hello", "world"]) + self.check_both( + p, + "{'Hello' 'world'}", + "Literal('Hello') + 'world'", + ) + + p = pp.And(["Hello", pp.Word("aeiou")]) + self.check_both( + p, + "{'Hello' W:(aeiou)}", + "'Hello' + Word('aeiou')", + ) + + p = pp.And([pp.Word("aeiou"), "Hello"]) + self.check_both( + p, + "{W:(aeiou) 'Hello'}", + "Word('aeiou') + 'Hello'", + ) + + p = pp.And([pp.Word("aeiou"), pp.Word("AEIOU")]) + self.check_both( + p, + "{W:(aeiou) W:(AEIOU)}", + "Word('aeiou') + Word('AEIOU')", + ) + + p = pp.And([pp.MatchFirst(["Hello", "there"]), "Hi"]) + self.check_both( + p, + "{{'Hello' | 'there'} 'Hi'}", + "(Literal('Hello') | 'there') + 'Hi'", + ) + + p = pp.And(["Hello", pp.MatchFirst(["Hi", "there"])]) + self.check_both( + p, + "{'Hello' {'Hi' | 'there'}}", + "'Hello' + (Literal('Hi') | 'there')", + ) + + p = pp.And(["Hello", "Hi", "Greetings"]) + self.check_both( + p, + "{'Hello' 'Hi' 'Greetings'}", + "Literal('Hello') + 'Hi' + 'Greetings'", + ) + + p = pp.And(["Hello", pp.MatchFirst(["Hi", "there"]), pp.Word("aeiou")]) + self.check_both( + p, + "{'Hello' {'Hi' | 'there'} W:(aeiou)}", + "'Hello' + (Literal('Hi') | 'there') + Word('aeiou')", + ) + + p = pp.And(["Hello", pp.Word("aeiou"), pp.MatchFirst(["Hi", "there"])]) + self.check_both( + p, + "{'Hello' W:(aeiou) {'Hi' | 'there'}}", + "'Hello' + Word('aeiou') + (Literal('Hi') | 'there')", + ) + + def testErrorStop(self): + p = pp.Literal("Hello") - "world" + self.check_both( + p, + "{'Hello' - 'world'}", + "Literal('Hello') - 'world'", + ) + + p = pp.Literal("Hello") - pp.Word("aeiou") + self.check_both( + p, + "{'Hello' - W:(aeiou)}", + "Literal('Hello') - Word('aeiou')", + # Maybe in future: + #"'Hello' - Word('aeiou')", + ) + + p = pp.And(["Hello", pp.MatchFirst(["Hi", "there"]), pp.And._ErrorStop(), pp.Word("aeiou")]) + self.check_both( + p, + "{'Hello' {'Hi' | 'there'} - W:(aeiou)}", + "'Hello' + (Literal('Hi') | 'there') - Word('aeiou')", + ) + + def testOr(self): + p = pp.Or([]) + self.check_both( + p, + "{}", + "Or([])", + ) + + p = pp.Or(["Hello"]) + self.check_both( + p, + "{'Hello'}", + "Or(['Hello'])", + ) + + p = pp.Or([pp.Word("aeiou")]) + self.check_both( + p, + "{W:(aeiou)}", + "Or([Word('aeiou')])", + ) + + p = pp.Or(["Hello", "world"]) + self.check_both( + p, + "{'Hello' ^ 'world'}", + "Literal('Hello') ^ 'world'", + ) + + p = pp.Or(["Hello", pp.Word("aeiou")]) + self.check_both( + p, + "{'Hello' ^ W:(aeiou)}", + "'Hello' ^ Word('aeiou')", + ) + + p = pp.Or([pp.Word("aeiou"), "Hello"]) + self.check_both( + p, + "{W:(aeiou) ^ 'Hello'}", + "Word('aeiou') ^ 'Hello'", + ) + + p = pp.Or([pp.Word("aeiou"), pp.Word("AEIOU")]) + self.check_both( + p, + "{W:(aeiou) ^ W:(AEIOU)}", + "Word('aeiou') ^ Word('AEIOU')", + ) + + p = pp.Or([pp.And(["Hello", "there"]), "Hi"]) + self.check_both( + p, + "{{'Hello' 'there'} ^ 'Hi'}", + "(Literal('Hello') + 'there') ^ 'Hi'", + ) + + p = pp.Or(["Hello", pp.And(["Hi", "there"])]) + self.check_both( + p, + "{'Hello' ^ {'Hi' 'there'}}", + "'Hello' ^ (Literal('Hi') + 'there')", + ) + + p = pp.Or(["Hello", "Hi", "Greetings"]) + self.check_both( + p, + "{'Hello' ^ 'Hi' ^ 'Greetings'}", + "Literal('Hello') ^ 'Hi' ^ 'Greetings'", + ) + + p = pp.Or(["Hello", pp.And(["Hi", "there"]), pp.Word("aeiou")]) + self.check_both( + p, + "{'Hello' ^ {'Hi' 'there'} ^ W:(aeiou)}", + "'Hello' ^ (Literal('Hi') + 'there') ^ Word('aeiou')", + ) + + p = pp.Or(["Hello", pp.Word("aeiou"), pp.And(["Hi", "there"])]) + self.check_both( + p, + "{'Hello' ^ W:(aeiou) ^ {'Hi' 'there'}}", + "'Hello' ^ Word('aeiou') ^ (Literal('Hi') + 'there')", + ) + + def testMatchFirst(self): + p = pp.MatchFirst([]) + self.check_both( + p, + "{}", + "MatchFirst([])", + ) + + p = pp.MatchFirst(["Hello"]) + self.check_both( + p, + "{'Hello'}", + "MatchFirst(['Hello'])", + ) + + p = pp.MatchFirst([pp.Word("aeiou")]) + self.check_both( + p, + "{W:(aeiou)}", + "MatchFirst([Word('aeiou')])", + ) + + p = pp.MatchFirst(["Hello", "world"]) + self.check_both( + p, + "{'Hello' | 'world'}", + "Literal('Hello') | 'world'", + ) + + p = pp.MatchFirst(["Hello", pp.Word("aeiou")]) + self.check_both( + p, + "{'Hello' | W:(aeiou)}", + "'Hello' | Word('aeiou')", + ) + + p = pp.MatchFirst([pp.Word("aeiou"), "Hello"]) + self.check_both( + p, + "{W:(aeiou) | 'Hello'}", + "Word('aeiou') | 'Hello'", + ) + + p = pp.MatchFirst([pp.Word("aeiou"), pp.Word("AEIOU")]) + self.check_both( + p, + "{W:(aeiou) | W:(AEIOU)}", + "Word('aeiou') | Word('AEIOU')", + ) + + p = pp.MatchFirst([pp.And(["Hello", "there"]), "Hi"]) + self.check_both( + p, + "{{'Hello' 'there'} | 'Hi'}", + "(Literal('Hello') + 'there') | 'Hi'", + ) + + p = pp.MatchFirst(["Hello", pp.And(["Hi", "there"])]) + self.check_both( + p, + "{'Hello' | {'Hi' 'there'}}", + "'Hello' | (Literal('Hi') + 'there')", + ) + + p = pp.MatchFirst(["Hello", "Hi", "Greetings"]) + self.check_both( + p, + "{'Hello' | 'Hi' | 'Greetings'}", + "Literal('Hello') | 'Hi' | 'Greetings'", + ) + + p = pp.MatchFirst(["Hello", pp.And(["Hi", "there"]), pp.Word("aeiou")]) + self.check_both( + p, + "{'Hello' | {'Hi' 'there'} | W:(aeiou)}", + "'Hello' | (Literal('Hi') + 'there') | Word('aeiou')", + ) + + p = pp.MatchFirst(["Hello", pp.Word("aeiou"), pp.And(["Hi", "there"])]) + self.check_both( + p, + "{'Hello' | W:(aeiou) | {'Hi' 'there'}}", + "'Hello' | Word('aeiou') | (Literal('Hi') + 'there')", + ) + + def testEach(self): + p = pp.Each([]) + self.check_both( + p, + "{}", + "Each([])", + ) + + p = pp.Each(["Hello"]) + self.check_both( + p, + "{'Hello'}", + "Each(['Hello'])", + ) + + p = pp.Each([pp.Word("aeiou")]) + self.check_both( + p, + "{W:(aeiou)}", + "Each([Word('aeiou')])", + ) + + p = pp.Each(["Hello", "world"]) + self.check_both( + p, + "{'Hello' & 'world'}", + "Literal('Hello') & 'world'", + ) + + p = pp.Each(["Hello", pp.Word("aeiou")]) + self.check_both( + p, + "{'Hello' & W:(aeiou)}", + "'Hello' & Word('aeiou')", + ) + + p = pp.Each([pp.Word("aeiou"), "Hello"]) + self.check_both( + p, + "{W:(aeiou) & 'Hello'}", + "Word('aeiou') & 'Hello'", + ) + + p = pp.Each([pp.Word("aeiou"), pp.Word("AEIOU")]) + self.check_both( + p, + "{W:(aeiou) & W:(AEIOU)}", + "Word('aeiou') & Word('AEIOU')", + ) + + p = pp.Each([pp.And(["Hello", "there"]), "Hi"]) + self.check_both( + p, + "{{'Hello' 'there'} & 'Hi'}", + "(Literal('Hello') + 'there') & 'Hi'", + ) + + p = pp.Each(["Hello", pp.And(["Hi", "there"])]) + self.check_both( + p, + "{'Hello' & {'Hi' 'there'}}", + "'Hello' & (Literal('Hi') + 'there')", + ) + + p = pp.Each(["Hello", "Hi", "Greetings"]) + self.check_both( + p, + "{'Hello' & 'Hi' & 'Greetings'}", + "Literal('Hello') & 'Hi' & 'Greetings'", + ) + + p = pp.Each(["Hello", pp.And(["Hi", "there"]), pp.Word("aeiou")]) + self.check_both( + p, + "{'Hello' & {'Hi' 'there'} & W:(aeiou)}", + "'Hello' & (Literal('Hi') + 'there') & Word('aeiou')", + ) + + p = pp.Each(["Hello", pp.Word("aeiou"), pp.And(["Hi", "there"])]) + self.check_both( + p, + "{'Hello' & W:(aeiou) & {'Hi' 'there'}}", + "'Hello' & Word('aeiou') & (Literal('Hi') + 'there')", + ) + + def testIndentedBlock(self): + p = pp.IndentedBlock("Hello") + self.check_both( + p, + "IndentedBlock:('Hello')", + "IndentedBlock('Hello')", + ) + + p = pp.IndentedBlock(pp.Word("aeiou")) + self.check_both( + p, + "IndentedBlock:(W:(aeiou))", + "IndentedBlock(Word('aeiou'))", + ) + + def testAtStringStart(self): + p = pp.AtStringStart("Hello") + self.check_both( + p, + "AtStringStart:('Hello')", + "AtStringStart('Hello')", + ) + + p = pp.AtStringStart(pp.Word("aeiou")) + self.check_both( + p, + "AtStringStart:(W:(aeiou))", + "AtStringStart(Word('aeiou'))", + ) + + def testAtLineStart(self): + p = pp.AtLineStart("Hello") + self.check_both( + p, + "AtLineStart:('Hello')", + "AtLineStart('Hello')", + ) + + p = pp.AtLineStart(pp.Word("aeiou")) + self.check_both( + p, + "AtLineStart:(W:(aeiou))", + "AtLineStart(Word('aeiou'))", + ) + + + def testFollowedBy(self): + p = pp.FollowedBy("Hello") + self.check_both( + p, + "FollowedBy:('Hello')", + "FollowedBy('Hello')", + ) + + p = pp.FollowedBy(pp.Word("aeiou")) + self.check_both( + p, + "FollowedBy:(W:(aeiou))", + "FollowedBy(Word('aeiou'))", + ) + + def testPrecededBy(self): + p = pp.PrecededBy("Hello") + self.check_both( + p, + "PrecededBy:('Hello')", + "PrecededBy('Hello')", + ) + + p = pp.PrecededBy(pp.Word("aeiou")) + self.check_both( + p, + "PrecededBy:(W:(aeiou))", + "PrecededBy(Word('aeiou'))", + ) + + def testLocated(self): + p = pp.Located("Hello") + self.check_both( + p, + "Located:('Hello')", + "Located('Hello')", + ) + + p = pp.Located(pp.Word("aeiou")) + self.check_both( + p, + "Located:(W:(aeiou))", + "Located(Word('aeiou'))", + ) + + def testNotAny(self): + p = pp.NotAny("Hello") + self.check_both( + p, + "~{'Hello'}", + "~Literal('Hello')", + ) + + p = pp.NotAny(pp.Word("aeiou")) + self.check_both( + p, + "~{W:(aeiou)}", + "~Word('aeiou')", + ) + + p = pp.NotAny(pp.Word("aeiou") | "hello") + self.check_both( + p, + "~{{W:(aeiou) | 'hello'}}", + "~(Word('aeiou') | 'hello')", + ) + + + def testZeroOrMore(self): + p = pp.ZeroOrMore("Hello") + self.check_both( + p, + "['Hello']...", + "Literal('Hello')[...]", + ) + + p = pp.ZeroOrMore(pp.Word("aeiou")) + self.check_both( + p, + "[W:(aeiou)]...", + "Word('aeiou')[...]", + ) + + p = pp.ZeroOrMore(pp.Word("aeiou") | "hello") + self.check_both( + p, + "[{W:(aeiou) | 'hello'}]...", + "(Word('aeiou') | 'hello')[...]", + ) + + p = pp.ZeroOrMore("Hello", stop_on="bye") + self.check_both( + p, + "['Hello']...", + "Literal('Hello')[...: 'bye']", + ) + + p = pp.ZeroOrMore("Hello", stop_on=pp.Word("xyz")) + self.check_both( + p, + "['Hello']...", + "Literal('Hello')[...: Word('xyz')]", + ) + + p = pp.ZeroOrMore("Hello", stop_on=(pp.Word("xyz") | "bye")) + self.check_both( + p, + "['Hello']...", + "Literal('Hello')[...: Word('xyz') | 'bye']", + ) + + def testOneOrMore(self): + p = pp.OneOrMore("Hello") + self.check_both( + p, + "{'Hello'}...", + "Literal('Hello')[1, ...]", + ) + + p = pp.OneOrMore(pp.Word("aeiou")) + self.check_both( + p, + "{W:(aeiou)}...", + "Word('aeiou')[1, ...]", + ) + + p = pp.OneOrMore(pp.Word("aeiou") | "hello") + self.check_both( + p, + "{{W:(aeiou) | 'hello'}}...", + "(Word('aeiou') | 'hello')[1, ...]", + ) + + p = pp.OneOrMore("Hello", stop_on="bye") + self.check_both( + p, + "{'Hello'}...", + "Literal('Hello')[1, ...: 'bye']", + ) + + p = pp.OneOrMore("Hello", stop_on=pp.Word("xyz")) + self.check_both( + p, + "{'Hello'}...", + "Literal('Hello')[1, ...: Word('xyz')]", + ) + + p = pp.OneOrMore("Hello", stop_on=(pp.Word("xyz") | "bye")) + self.check_both( + p, + "{'Hello'}...", + "Literal('Hello')[1, ...: Word('xyz') | 'bye']", + ) + + def testOpt(self): + p = pp.Opt("Hello") + self.check_both( + p, + "['Hello']", + "Opt('Hello')", + ) + + p = pp.Opt(pp.Word("aeiou")) + self.check_both( + p, + "[W:(aeiou)]", + "Opt(Word('aeiou'))", + ) + + p = pp.Opt(pp.Word("aeiou") | "hello") + self.check_both( + p, + "[W:(aeiou) | 'hello']", + "Opt(Word('aeiou') | 'hello')", + ) + + def testSkipTo(self): + p = pp.SkipTo("Hello") + self.check_both( + p, + "SkipTo:('Hello')", + "SkipTo('Hello')", + ) + + p = pp.SkipTo(pp.Word("aeiou")) + self.check_both( + p, + "SkipTo:(W:(aeiou))", + "SkipTo(Word('aeiou'))", + ) + + p = pp.SkipTo(pp.Word("aeiou") | "hello") + self.check_both( + p, + "SkipTo:({W:(aeiou) | 'hello'})", + "SkipTo(Word('aeiou') | 'hello')", + ) + + def testForward(self): + p = pp.Forward() + p <<= ppc.integer() | pp.And(["(", p, pp.Char("+*"), ppc.integer(), ")"]) + self.check_both( + p, + "Forward: {integer | {'(' : ... (*+) integer ')'}}", + "Forward(...)", + # Perhaps in future: + # "Forward(Word('0123456789') | (Literal('(') + Forward(...) + Char('*+') + Word('0123456789') + ')'))", + ) + + @unittest.expectedFailure + def testForwardIndirect(self): + f = pp.Forward() + g = pp.Forward() + f <<= g + g <<= f + + # f and g are identical, but str() doesn't yield identical results + self.check_both( + f, + "Forward: Forward: : ...", + "Forward(Forward(Forward(...)))", + ) + self.check_both( + g, + "Forward: Forward: : ...", + "Forward(Forward(Forward(...)))", + ) + + def testCombine(self): + p = pp.Combine("Hello") + self.check_both( + p, + "Combine:('Hello')", + "Combine('Hello')", + ) + + p = pp.Combine(pp.Word("aeiou")) + self.check_both( + p, + "Combine:(W:(aeiou))", + "Combine(Word('aeiou'))", + ) + + def testGroup(self): + p = pp.Group("Hello") + self.check_both( + p, + "Group:('Hello')", + "Group('Hello')", + ) + + p = pp.Group(pp.Word("aeiou")) + self.check_both( + p, + "Group:(W:(aeiou))", + "Group(Word('aeiou'))", + ) + + def testDict(self): + p = pp.Dict("Hello") + self.check_both( + p, + "Dict:('Hello')", + "Dict('Hello')", + ) + + p = pp.Dict(pp.Word("aeiou")) + self.check_both( + p, + "Dict:(W:(aeiou))", + "Dict(Word('aeiou'))", + ) + + def testSuppress(self): + p = pp.Suppress("Hello") + self.check_both( + p, + "Suppress:('Hello')", + "Suppress('Hello')", + ) + + p = pp.Suppress(pp.Word("aeiou")) + self.check_both( + p, + "Suppress:(W:(aeiou))", + "Suppress(Word('aeiou'))", + ) + + p = pp.Suppress(pp.Word("aeiou") | "hello") + self.check_both( + p, + "Suppress:({W:(aeiou) | 'hello'})", + "Suppress(Word('aeiou') | 'hello')", + ) + + def testInlineLiterals(self): + lit = pp.Opt(pp.Literal("Hello")) + key = pp.Opt(pp.Keyword("Hello")) + ck = pp.Opt(pp.CaselessKeyword("Hello")) + sup = pp.Opt(pp.Suppress("Hello")) + + self.check_both( + lit, + "['Hello']", + "Opt('Hello')", + ) + self.check_both( + key, + "['Hello']", + "Opt(Keyword('Hello'))", + ) + self.check_both( + ck, + "['Hello']", + "Opt(CaselessKeyword('Hello'))", + ) + self.check_both( + sup, + "[Suppress:('Hello')]", + "Opt(Suppress('Hello'))", + ) + + pp.ParserElement.inline_literals_using(pp.Keyword) + + self.check_both( + lit, + "['Hello']", + "Opt(Literal('Hello'))", + ) + self.check_both( + key, + "['Hello']", + "Opt('Hello')", + ) + + pp.ParserElement.inline_literals_using(pp.CaselessKeyword) + + self.check_both( + key, + "['Hello']", + "Opt(Keyword('Hello'))", + ) + self.check_both( + ck, + "['Hello']", + "Opt('Hello')", + ) + + pp.ParserElement.inline_literals_using(pp.Suppress) + + self.check_both( + ck, + "['Hello']", + "Opt(CaselessKeyword('Hello'))", + ) + self.check_both( + sup, + "[Suppress:('Hello')]", + "Opt('Hello')", + ) + class Test02_WithoutPackrat(ppt.TestParseResultsAsserts, TestCase): suite_context = None @@ -5706,6 +6630,8 @@ def testSetName(self): recursive = pp.Forward() recursive <<= a + (b + recursive)[...] + zOpenTag, zCloseTag = pp.makeHTMLTags("Z") + tests = [ a, b, @@ -5718,8 +6644,10 @@ def testSetName(self): pp.delimitedList(pp.Word(pp.nums).setName("int")), pp.countedArray(pp.Word(pp.nums).setName("int")), pp.nestedExpr(), - pp.makeHTMLTags("Z"), - (pp.anyOpenTag, pp.anyCloseTag), + zOpenTag, + zCloseTag, + pp.anyOpenTag, + pp.anyCloseTag, pp.commonHTMLEntity, pp.commonHTMLEntity.setParseAction(pp.replaceHTMLEntity).transformString( "lsdjkf <lsdjkf>&'"&xyzzy;" @@ -5740,8 +6668,10 @@ def testSetName(self): int [, int]... (len) int... nested () expression - (, ) - (, ) + + + + common HTML entity lsdjkf &'"&xyzzy;""".splitlines(), ) @@ -9407,30 +10337,30 @@ def testMiscellaneousExceptionBits(self): f"{expected_function!r} not found in ParseException.explain()", ) - def testExpressionDefaultStrings(self): + def testWordCharDefaultStrings(self): expr = pp.Word(pp.nums) print(expr) - self.assertEqual("W:(0-9)", repr(expr)) + self.assertEqual("W:(0-9)", str(expr)) expr = pp.Word(pp.nums, exact=3) print(expr) - self.assertEqual("W:(0-9){3}", repr(expr)) + self.assertEqual("W:(0-9){3}", str(expr)) expr = pp.Word(pp.nums, min=2) print(expr) - self.assertEqual("W:(0-9){2,...}", repr(expr)) + self.assertEqual("W:(0-9){2,...}", str(expr)) expr = pp.Word(pp.nums, max=3) print(expr) - self.assertEqual("W:(0-9){1,3}", repr(expr)) + self.assertEqual("W:(0-9){1,3}", str(expr)) expr = pp.Word(pp.nums, min=2, max=3) print(expr) - self.assertEqual("W:(0-9){2,3}", repr(expr)) + self.assertEqual("W:(0-9){2,3}", str(expr)) expr = pp.Char(pp.nums) print(expr) - self.assertEqual("(0-9)", repr(expr)) + self.assertEqual("(0-9)", str(expr)) def testEmptyExpressionsAreHandledProperly(self): from pyparsing.diagram import to_railroad