Skip to content

Commit

Permalink
release 8.190.22228
Browse files Browse the repository at this point in the history
  • Loading branch information
klahnakoski committed Aug 16, 2022
2 parents 539e9a0 + a99ad1d commit cd3f488
Show file tree
Hide file tree
Showing 13 changed files with 154 additions and 118 deletions.
8 changes: 7 additions & 1 deletion mo_parsing/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,13 @@
whitespaces.NO_WHITESPACE = Whitespace("").use()
whitespaces.STANDARD_WHITESPACE = Whitespace().use()

from mo_parsing.infix import LEFT_ASSOC, RIGHT_ASSOC, infix_notation, delimited_list, one_of
from mo_parsing.infix import (
LEFT_ASSOC,
RIGHT_ASSOC,
infix_notation,
delimited_list,
one_of,
)
from mo_parsing.regex import Regex
from mo_parsing.tokens import *

Expand Down
24 changes: 16 additions & 8 deletions mo_parsing/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import sys
from collections import namedtuple
from threading import RLock
from typing import List

from mo_future import text
from mo_imports import export, expect
Expand Down Expand Up @@ -81,18 +82,25 @@ def output(*args, **kwargs):
return output


def _verify_whitespace(eng):
if eng is None:
def _verify_whitespace(whi: List):
if whi is None:
return None
if isinstance(eng, list):
engs = [v for e in eng for v in [_verify_whitespace(e)] if v is not None]
if not engs:
if isinstance(whi, list):
whis = [
v
for e in whi
for v in [_verify_whitespace(e)]
if v is not None and v.regex.pattern # IGNORE NO_WHITESPACE
]
if not whis:
return None
whitespace = engs[0]
if any(e.id != whitespace.id for e in engs[1:]):
whitespace = whis[0]
if any(e.id != whitespace.id for e in whis[1:]):
# THE TOP-MOST WHITESPACE RULES ARE DIFFERENT FOR EACH ParserElement,
# SO PROGRAM DOES NOT KNOW WHICH IS THE MASTER WHITESPACE
Log.error("must dis-ambiguate the whitespace before parsing")
return whitespace
return eng
return whi


class Parser(object):
Expand Down
99 changes: 48 additions & 51 deletions mo_parsing/enhancement.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,9 @@ def __init__(
:param max_match: MAXIMUM MATCH REQUIRED FOR SUCCESS (-1 IS INVALID)
"""
ParseEnhancement.__init__(self, expr)
if isinstance(self.expr, LookBehind):
# TODO: support Optional(LookBehind()))
Log.error("can only look behind once")
if exact is not None:
min_match = exact
max_match = exact
Expand Down Expand Up @@ -251,18 +254,14 @@ def whitespace(self):

def parse_impl(self, string, start, do_actions=True):
acc = []
end = index = start
end = start
max = self.parser_config.max_match
stopper = self.parser_config.end
count = 0
failures = []
try:
while end < len(string) and count < max:
if end > index:
if isinstance(self.expr, LookBehind):
index = end
else:
index = self.parser_config.whitespace.skip(string, end)
while end < len(string):
index = self.parser_config.whitespace.skip(string, end)
if stopper:
if stopper.match(string, index):
if self.parser_config.min_match <= count:
Expand All @@ -277,6 +276,9 @@ def parse_impl(self, string, start, do_actions=True):
acc.append(result)
failures.extend(result.failures)
count += 1
if count >= max:
break

except ParseException as cause:
if self.parser_config.min_match <= count <= max:
failures.append(cause)
Expand All @@ -288,40 +290,35 @@ def parse_impl(self, string, start, do_actions=True):
msg="Not correct amount of matches",
cause=cause,
) from None
if count:
if (
count < self.parser_config.min_match
or self.parser_config.max_match < count
):
raise ParseException(
self,
acc[0].start,
string,
msg=(
f"Expecting between {self.parser_config.min_match} and"
f" {self.parser_config.max_match} of {self.expr}"
),
)
else:

if self.parser_config.min_match <= count <= self.parser_config.max_match:
if count:
return ParseResults(self, acc[0].start, acc[-1].end, acc, failures)
else:
if not self.parser_config.min_match:
return ParseResults(self, start, start, [], failures)
else:
raise ParseException(
self,
start,
string,
msg=f"Expecting at least {self.parser_config.min_match} of {self}",
)
return ParseResults(self, start, end, acc, failures)

elif count < self.parser_config.min_match:
raise ParseException(
self,
start,
string,
msg=f"Expecting at least {self.parser_config.min_match} of {self}",
)
else:
raise ParseException(
self,
acc[0].start,
string,
msg=(
f"Expecting between {self.parser_config.min_match} and"
f" {self.parser_config.max_match} of {self.expr}"
),
)

def streamline(self):
if self.streamlined:
return self
try:
expr = self.expr.streamline()
except Exception as e:
print(e)
expr = self.expr.streamline()
if (
self.parser_config.min_match == self.parser_config.max_match
and not self.is_annotated()
Expand Down Expand Up @@ -596,25 +593,25 @@ class Forward(ParserElement):
parser created using ``Forward``.
"""

__slots__ = ["expr", "used_by", "_str", "_reg", "_eng"]
__slots__ = ["expr", "used_by", "_str", "_in_regex", "__in_whitespace"]

def __init__(self, expr=Null):
ParserElement.__init__(self)
self.expr = None
self.used_by = []

self._str = None # avoid recursion
self._reg = None # avoid recursion
self._eng = False
self._in_regex = None # avoid recursion
self.__in_whitespace = False
if expr:
self << whitespaces.CURRENT.normalize(expr)

def copy(self):
output = ParserElement.copy(self)
output.expr = self
output._str = None
output._reg = None
output._eng = False
output._in_regex = None
output.__in_whitespace = False

output.used_by = []
return output
Expand Down Expand Up @@ -673,18 +670,15 @@ def min_length(self):

@property
def whitespace(self):
try:
if self._eng:
return None
except Exception as cause:
Log.error("", cause=cause)
if self.__in_whitespace:
return None

# Avoid infinite recursion by setting a temporary
self._eng = True
self.__in_whitespace = True
try:
return self.expr.whitespace
finally:
self._eng = False
self.__in_whitespace = False

def parse_impl(self, string, loc, do_actions=True):
try:
Expand All @@ -701,14 +695,17 @@ def parse_impl(self, string, loc, do_actions=True):
raise cause from None

def __regex__(self):
if self._reg or not self.expr:
return None
if self._in_regex:
Log.error("recursion not supported")

if not self.expr:
Log.error("Forward is incomplete")

try:
self._reg = True
self._in_regex = True
return self.expr.__regex__()
finally:
self._reg = None
self._in_regex = None

def __str__(self):
if self.parser_name:
Expand Down
11 changes: 10 additions & 1 deletion mo_parsing/expressions.py
Original file line number Diff line number Diff line change
Expand Up @@ -305,7 +305,16 @@ def reverse(self):
)

def __regex__(self):
return "+", "".join(regex_iso(*e.__regex__(), "+") for e in self.exprs)
if self.whitespace is whitespaces.NO_WHITESPACE:
return "+", "".join(regex_iso(*e.__regex__(), "+") for e in self.exprs)

return (
"+",
regex_iso(*self.whitespace.__regex__(), "+").join(
regex_iso(*e.__regex__(), "+")
for e in self.exprs
),
)

def __str__(self):
if self.parser_name:
Expand Down
29 changes: 15 additions & 14 deletions mo_parsing/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,20 +95,21 @@ def QuotedString(
anychar = Char(exclude="\n")
excluded |= Char("\r\n")

included = ~Literal(end_quote_char) + anychar

if esc_quote:
included = Literal(esc_quote) | included
if esc_char:
excluded |= Literal(esc_char)
included = esc_char + Char(printables) | included
esc_char_replace_pattern = re.escape(esc_char) + "(.)"

prec, pattern = (
Literal(quote_char) + ((~excluded + anychar) | included)[0:]
).__regex__()
# IMPORTANT: THE end_quote_char IS OUTSIDE THE Regex BECAUSE OF PATHOLOGICAL BACKTRACKING
output = Combine(Regex(pattern) + Literal(end_quote_char))
with whitespaces.NO_WHITESPACE:
included = ~Literal(end_quote_char) + anychar

if esc_quote:
included = Literal(esc_quote) | included
if esc_char:
excluded |= Literal(esc_char)
included = esc_char + Char(printables) | included
esc_char_replace_pattern = re.escape(esc_char) + "(.)"

prec, pattern = (
Literal(quote_char) + ((~excluded + anychar) | included)[0:]
).__regex__()
# IMPORTANT: THE end_quote_char IS OUTSIDE THE Regex BECAUSE OF PATHOLOGICAL BACKTRACKING
output = Combine(Regex(pattern) + Literal(end_quote_char))

def post_parse(tokens):
ret = tokens[0]
Expand Down
14 changes: 6 additions & 8 deletions mo_parsing/infix.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
Keyword,
NoMatch,
Literal,
Empty,
Empty, Log,
)
from mo_parsing.utils import regex_range, wrap_parse_action

Expand Down Expand Up @@ -58,11 +58,9 @@ def one_of(strs, caseless=False, as_keyword=False):
- as_keyword - (default=``False``) - enforce Keyword-style matching on the
generated expressions
"""
if isinstance(caseless, text):
warnings.warn(
"More than one string argument passed to one_of, pass "
"choices as a list or space-delimited string",
stacklevel=2,
if isinstance(caseless, str):
Log.error(
"More than one string argument passed to one_of, pass choices as a list or space-delimited string"
)

if caseless:
Expand Down Expand Up @@ -368,7 +366,7 @@ def make_tree(tokens, loc, string):
flat = Forward()
iso = lpar.suppress() + flat + rpar.suppress()
atom = (base_expr | iso) / record_op(base_expr)
modified = ZeroOrMore(prefix_ops) + atom + ZeroOrMore(suffix_ops)
flat << ((modified + ZeroOrMore(ops + modified)) / make_tree).streamline()
decorated = ZeroOrMore(prefix_ops) + atom + ZeroOrMore(suffix_ops)
flat << ((decorated + ZeroOrMore(ops + decorated)) / make_tree).streamline()

return flat.streamline()
28 changes: 14 additions & 14 deletions mo_parsing/tokens.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ def __init__(self, match):
Log.error("Expecting string for literal")
Token.__init__(self)

self.set_config(match=match)
self.set_config(match=match, regex=regex_compile(re.escape(match)))

if len(match) == 0:
Log.error("Literal must be at least one character")
Expand All @@ -143,7 +143,7 @@ def reverse(self):
return Literal(self.parser_config.match[::-1])

def __regex__(self):
return "+", re.escape(self.parser_config.match)
return "+", self.parser_config.regex.pattern

def __str__(self):
return self.parser_config.match
Expand All @@ -169,9 +169,6 @@ def min_length(self):
def reverse(self):
return self

def __regex__(self):
return "*", re.escape(self.parser_config.match)


class Keyword(Token):
__slots__ = []
Expand Down Expand Up @@ -252,7 +249,8 @@ class CaselessLiteral(Literal):
def __init__(self, match):
Literal.__init__(self, match.upper())
self.set_config(
match=match, regex=regex_compile(regex_caseless(match)),
match=match,
regex=regex_compile(regex_caseless(re.escape(match))),
)
self.parser_name = repr(self.parser_config.regex.pattern)

Expand Down Expand Up @@ -364,15 +362,17 @@ def __init__(
init_chars = init_chars.expecting().keys()
prec, regexp = Char(init_chars, exclude=exclude)[min:max].__regex__()
elif max is None or max == MAX_INT:
prec, regexp = (
Char(init_chars, exclude=exclude)
+ Char(body_chars, exclude=exclude)[min - 1 :]
).__regex__()
with whitespaces.NO_WHITESPACE:
prec, regexp = (
Char(init_chars, exclude=exclude)
+ Char(body_chars, exclude=exclude)[min - 1 :]
).__regex__()
else:
prec, regexp = (
Char(init_chars, exclude=exclude)
+ Char(body_chars, exclude=exclude)[min - 1 : max - 1]
).__regex__()
with whitespaces.NO_WHITESPACE:
prec, regexp = (
Char(init_chars, exclude=exclude)
+ Char(body_chars, exclude=exclude)[min - 1 : max - 1]
).__regex__()

if as_keyword:
regexp = r"\b" + regexp + r"\b"
Expand Down
2 changes: 1 addition & 1 deletion mo_parsing/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def alert(cls, template, cause=None, **params):

@classmethod
def error(cls, template, cause=None, **params):
raise ParseException(Null, -1, -1, "", msg=template, cause=cause)
raise ParseException(Null, -1, "", msg=template, cause=cause)


MAX_INT = sys.maxsize
Expand Down

0 comments on commit cd3f488

Please sign in to comment.