release 8.190.22228

klahnakoski · Aug 16, 2022 · cd3f488 · cd3f488
2 parents 539e9a0 + a99ad1d
commit cd3f488
Show file tree

Hide file tree

Showing 13 changed files with 154 additions and 118 deletions.
diff --git a/mo_parsing/__init__.py b/mo_parsing/__init__.py
@@ -40,7 +40,13 @@
 whitespaces.NO_WHITESPACE = Whitespace("").use()
 whitespaces.STANDARD_WHITESPACE = Whitespace().use()
 
-from mo_parsing.infix import LEFT_ASSOC, RIGHT_ASSOC, infix_notation, delimited_list, one_of
+from mo_parsing.infix import (
+    LEFT_ASSOC,
+    RIGHT_ASSOC,
+    infix_notation,
+    delimited_list,
+    one_of,
+)
 from mo_parsing.regex import Regex
 from mo_parsing.tokens import *
 

diff --git a/mo_parsing/core.py b/mo_parsing/core.py
@@ -2,6 +2,7 @@
 import sys
 from collections import namedtuple
 from threading import RLock
+from typing import List
 
 from mo_future import text
 from mo_imports import export, expect
@@ -81,18 +82,25 @@ def output(*args, **kwargs):
     return output
 
 
-def _verify_whitespace(eng):
-    if eng is None:
+def _verify_whitespace(whi: List):
+    if whi is None:
         return None
-    if isinstance(eng, list):
-        engs = [v for e in eng for v in [_verify_whitespace(e)] if v is not None]
-        if not engs:
+    if isinstance(whi, list):
+        whis = [
+            v
+            for e in whi
+            for v in [_verify_whitespace(e)]
+            if v is not None and v.regex.pattern  # IGNORE NO_WHITESPACE
+        ]
+        if not whis:
             return None
-        whitespace = engs[0]
-        if any(e.id != whitespace.id for e in engs[1:]):
+        whitespace = whis[0]
+        if any(e.id != whitespace.id for e in whis[1:]):
+            # THE TOP-MOST WHITESPACE RULES ARE DIFFERENT FOR EACH ParserElement,
+            # SO PROGRAM DOES NOT KNOW WHICH IS THE MASTER WHITESPACE
             Log.error("must dis-ambiguate the whitespace before parsing")
         return whitespace
-    return eng
+    return whi
 
 
 class Parser(object):

diff --git a/mo_parsing/enhancement.py b/mo_parsing/enhancement.py
@@ -223,6 +223,9 @@ def __init__(
         :param max_match: MAXIMUM MATCH REQUIRED FOR SUCCESS (-1 IS INVALID)
         """
         ParseEnhancement.__init__(self, expr)
+        if isinstance(self.expr, LookBehind):
+            # TODO: support Optional(LookBehind()))
+            Log.error("can only look behind once")
         if exact is not None:
             min_match = exact
             max_match = exact
@@ -251,18 +254,14 @@ def whitespace(self):
 
     def parse_impl(self, string, start, do_actions=True):
         acc = []
-        end = index = start
+        end = start
         max = self.parser_config.max_match
         stopper = self.parser_config.end
         count = 0
         failures = []
         try:
-            while end < len(string) and count < max:
-                if end > index:
-                    if isinstance(self.expr, LookBehind):
-                        index = end
-                    else:
-                        index = self.parser_config.whitespace.skip(string, end)
+            while end < len(string):
+                index = self.parser_config.whitespace.skip(string, end)
                 if stopper:
                     if stopper.match(string, index):
                         if self.parser_config.min_match <= count:
@@ -277,6 +276,9 @@ def parse_impl(self, string, start, do_actions=True):
                     acc.append(result)
                     failures.extend(result.failures)
                     count += 1
+                    if count >= max:
+                        break
+
         except ParseException as cause:
             if self.parser_config.min_match <= count <= max:
                 failures.append(cause)
@@ -288,40 +290,35 @@ def parse_impl(self, string, start, do_actions=True):
                     msg="Not correct amount of matches",
                     cause=cause,
                 ) from None
-        if count:
-            if (
-                count < self.parser_config.min_match
-                or self.parser_config.max_match < count
-            ):
-                raise ParseException(
-                    self,
-                    acc[0].start,
-                    string,
-                    msg=(
-                        f"Expecting between {self.parser_config.min_match} and"
-                        f" {self.parser_config.max_match} of {self.expr}"
-                    ),
-                )
-            else:
+
+        if self.parser_config.min_match <= count <= self.parser_config.max_match:
+            if count:
                 return ParseResults(self, acc[0].start, acc[-1].end, acc, failures)
-        else:
-            if not self.parser_config.min_match:
-                return ParseResults(self, start, start, [], failures)
             else:
-                raise ParseException(
-                    self,
-                    start,
-                    string,
-                    msg=f"Expecting at least {self.parser_config.min_match} of {self}",
-                )
+                return ParseResults(self, start, end, acc, failures)
+
+        elif count < self.parser_config.min_match:
+            raise ParseException(
+                self,
+                start,
+                string,
+                msg=f"Expecting at least {self.parser_config.min_match} of {self}",
+            )
+        else:
+            raise ParseException(
+                self,
+                acc[0].start,
+                string,
+                msg=(
+                    f"Expecting between {self.parser_config.min_match} and"
+                    f" {self.parser_config.max_match} of {self.expr}"
+                ),
+            )
 
     def streamline(self):
         if self.streamlined:
             return self
-        try:
-            expr = self.expr.streamline()
-        except Exception as e:
-            print(e)
+        expr = self.expr.streamline()
         if (
             self.parser_config.min_match == self.parser_config.max_match
             and not self.is_annotated()
@@ -596,25 +593,25 @@ class Forward(ParserElement):
     parser created using ``Forward``.
     """
 
-    __slots__ = ["expr", "used_by", "_str", "_reg", "_eng"]
+    __slots__ = ["expr", "used_by", "_str", "_in_regex", "__in_whitespace"]
 
     def __init__(self, expr=Null):
         ParserElement.__init__(self)
         self.expr = None
         self.used_by = []
 
         self._str = None  # avoid recursion
-        self._reg = None  # avoid recursion
-        self._eng = False
+        self._in_regex = None  # avoid recursion
+        self.__in_whitespace = False
         if expr:
             self << whitespaces.CURRENT.normalize(expr)
 
     def copy(self):
         output = ParserElement.copy(self)
         output.expr = self
         output._str = None
-        output._reg = None
-        output._eng = False
+        output._in_regex = None
+        output.__in_whitespace = False
 
         output.used_by = []
         return output
@@ -673,18 +670,15 @@ def min_length(self):
 
     @property
     def whitespace(self):
-        try:
-            if self._eng:
-                return None
-        except Exception as cause:
-            Log.error("", cause=cause)
+        if self.__in_whitespace:
+            return None
 
         # Avoid infinite recursion by setting a temporary
-        self._eng = True
+        self.__in_whitespace = True
         try:
             return self.expr.whitespace
         finally:
-            self._eng = False
+            self.__in_whitespace = False
 
     def parse_impl(self, string, loc, do_actions=True):
         try:
@@ -701,14 +695,17 @@ def parse_impl(self, string, loc, do_actions=True):
             raise cause from None
 
     def __regex__(self):
-        if self._reg or not self.expr:
-            return None
+        if self._in_regex:
+            Log.error("recursion not supported")
+
+        if not self.expr:
+            Log.error("Forward is incomplete")
 
         try:
-            self._reg = True
+            self._in_regex = True
             return self.expr.__regex__()
         finally:
-            self._reg = None
+            self._in_regex = None
 
     def __str__(self):
         if self.parser_name:

diff --git a/mo_parsing/expressions.py b/mo_parsing/expressions.py
@@ -305,7 +305,16 @@ def reverse(self):
         )
 
     def __regex__(self):
-        return "+", "".join(regex_iso(*e.__regex__(), "+") for e in self.exprs)
+        if self.whitespace is whitespaces.NO_WHITESPACE:
+            return "+", "".join(regex_iso(*e.__regex__(), "+") for e in self.exprs)
+
+        return (
+            "+",
+            regex_iso(*self.whitespace.__regex__(), "+").join(
+                regex_iso(*e.__regex__(), "+")
+                for e in self.exprs
+            ),
+        )
 
     def __str__(self):
         if self.parser_name:

diff --git a/mo_parsing/helpers.py b/mo_parsing/helpers.py
@@ -95,20 +95,21 @@ def QuotedString(
         anychar = Char(exclude="\n")
         excluded |= Char("\r\n")
 
-    included = ~Literal(end_quote_char) + anychar
-
-    if esc_quote:
-        included = Literal(esc_quote) | included
-    if esc_char:
-        excluded |= Literal(esc_char)
-        included = esc_char + Char(printables) | included
-        esc_char_replace_pattern = re.escape(esc_char) + "(.)"
-
-    prec, pattern = (
-        Literal(quote_char) + ((~excluded + anychar) | included)[0:]
-    ).__regex__()
-    # IMPORTANT: THE end_quote_char IS OUTSIDE THE Regex BECAUSE OF PATHOLOGICAL BACKTRACKING
-    output = Combine(Regex(pattern) + Literal(end_quote_char))
+    with whitespaces.NO_WHITESPACE:
+        included = ~Literal(end_quote_char) + anychar
+
+        if esc_quote:
+            included = Literal(esc_quote) | included
+        if esc_char:
+            excluded |= Literal(esc_char)
+            included = esc_char + Char(printables) | included
+            esc_char_replace_pattern = re.escape(esc_char) + "(.)"
+
+        prec, pattern = (
+            Literal(quote_char) + ((~excluded + anychar) | included)[0:]
+        ).__regex__()
+        # IMPORTANT: THE end_quote_char IS OUTSIDE THE Regex BECAUSE OF PATHOLOGICAL BACKTRACKING
+        output = Combine(Regex(pattern) + Literal(end_quote_char))
 
     def post_parse(tokens):
         ret = tokens[0]

diff --git a/mo_parsing/infix.py b/mo_parsing/infix.py
@@ -22,7 +22,7 @@
     Keyword,
     NoMatch,
     Literal,
-    Empty,
+    Empty, Log,
 )
 from mo_parsing.utils import regex_range, wrap_parse_action
 
@@ -58,11 +58,9 @@ def one_of(strs, caseless=False, as_keyword=False):
      - as_keyword - (default=``False``) - enforce Keyword-style matching on the
        generated expressions
     """
-    if isinstance(caseless, text):
-        warnings.warn(
-            "More than one string argument passed to one_of, pass "
-            "choices as a list or space-delimited string",
-            stacklevel=2,
+    if isinstance(caseless, str):
+        Log.error(
+            "More than one string argument passed to one_of, pass choices as a list or space-delimited string"
         )
 
     if caseless:
@@ -368,7 +366,7 @@ def make_tree(tokens, loc, string):
     flat = Forward()
     iso = lpar.suppress() + flat + rpar.suppress()
     atom = (base_expr | iso) / record_op(base_expr)
-    modified = ZeroOrMore(prefix_ops) + atom + ZeroOrMore(suffix_ops)
-    flat << ((modified + ZeroOrMore(ops + modified)) / make_tree).streamline()
+    decorated = ZeroOrMore(prefix_ops) + atom + ZeroOrMore(suffix_ops)
+    flat << ((decorated + ZeroOrMore(ops + decorated)) / make_tree).streamline()
 
     return flat.streamline()
diff --git a/mo_parsing/tokens.py b/mo_parsing/tokens.py
@@ -119,7 +119,7 @@ def __init__(self, match):
             Log.error("Expecting string for literal")
         Token.__init__(self)
 
-        self.set_config(match=match)
+        self.set_config(match=match, regex=regex_compile(re.escape(match)))
 
         if len(match) == 0:
             Log.error("Literal must be at least one character")
@@ -143,7 +143,7 @@ def reverse(self):
         return Literal(self.parser_config.match[::-1])
 
     def __regex__(self):
-        return "+", re.escape(self.parser_config.match)
+        return "+", self.parser_config.regex.pattern
 
     def __str__(self):
         return self.parser_config.match
@@ -169,9 +169,6 @@ def min_length(self):
     def reverse(self):
         return self
 
-    def __regex__(self):
-        return "*", re.escape(self.parser_config.match)
-
 
 class Keyword(Token):
     __slots__ = []
@@ -252,7 +249,8 @@ class CaselessLiteral(Literal):
     def __init__(self, match):
         Literal.__init__(self, match.upper())
         self.set_config(
-            match=match, regex=regex_compile(regex_caseless(match)),
+            match=match,
+            regex=regex_compile(regex_caseless(re.escape(match))),
         )
         self.parser_name = repr(self.parser_config.regex.pattern)
 
@@ -364,15 +362,17 @@ def __init__(
                 init_chars = init_chars.expecting().keys()
             prec, regexp = Char(init_chars, exclude=exclude)[min:max].__regex__()
         elif max is None or max == MAX_INT:
-            prec, regexp = (
-                Char(init_chars, exclude=exclude)
-                + Char(body_chars, exclude=exclude)[min - 1 :]
-            ).__regex__()
+            with whitespaces.NO_WHITESPACE:
+                prec, regexp = (
+                    Char(init_chars, exclude=exclude)
+                    + Char(body_chars, exclude=exclude)[min - 1 :]
+                ).__regex__()
         else:
-            prec, regexp = (
-                Char(init_chars, exclude=exclude)
-                + Char(body_chars, exclude=exclude)[min - 1 : max - 1]
-            ).__regex__()
+            with whitespaces.NO_WHITESPACE:
+                prec, regexp = (
+                    Char(init_chars, exclude=exclude)
+                    + Char(body_chars, exclude=exclude)[min - 1 : max - 1]
+                ).__regex__()
 
         if as_keyword:
             regexp = r"\b" + regexp + r"\b"

diff --git a/mo_parsing/utils.py b/mo_parsing/utils.py
@@ -46,7 +46,7 @@ def alert(cls, template, cause=None, **params):
 
         @classmethod
         def error(cls, template, cause=None, **params):
-            raise ParseException(Null, -1, -1, "", msg=template, cause=cause)
+            raise ParseException(Null, -1, "", msg=template, cause=cause)
 
 
 MAX_INT = sys.maxsize