From 7e779f810fe0e714d4e078cbf5f556a842535164 Mon Sep 17 00:00:00 2001 From: Copeland Carter Date: Fri, 19 Apr 2024 19:15:38 -0600 Subject: [PATCH] v2.0.0: Moved dialects to use sub-classes --- README.md | 18 +-- ezregex/EZRegex.py | 197 ++++++++---------------- ezregex/EZRegex.pyi | 33 ++-- ezregex/__init__.py | 2 +- ezregex/_dialects.py | 23 --- ezregex/_docs.py | 1 + ezregex/base/__init__.py | 25 +-- ezregex/base/elements.py | 66 ++++++++ ezregex/invert.py | 7 +- ezregex/javascript/JavaScriptEZRegex.py | 10 ++ ezregex/javascript/__init__.py | 2 +- ezregex/javascript/elements.py | 3 +- ezregex/javascript/psuedonymns.py | 67 -------- ezregex/perl/PerlEZRegex.py | 10 ++ ezregex/perl/__init__.py | 2 +- ezregex/perl/elements.py | 3 +- ezregex/perl/psuedonymns.py | 67 -------- ezregex/python/PythonEZRegex.py | 48 ++++++ ezregex/python/PythonEZRegex.pyi | 21 +++ ezregex/python/__init__.py | 4 +- ezregex/python/elements.py | 3 +- ezregex/python/psuedonymns.py | 67 -------- tests/test_EZRegex.py | 20 ++- tests/test_api.py | 12 +- tests/test_generate.py | 4 +- tests/test_invert.py | 9 +- tests/test_javascript.py | 5 +- tests/test_operators.py | 1 + tests/test_python.py | 18 ++- tests/test_replacement.py | 6 +- 30 files changed, 320 insertions(+), 434 deletions(-) delete mode 100644 ezregex/_dialects.py create mode 100644 ezregex/javascript/JavaScriptEZRegex.py delete mode 100644 ezregex/javascript/psuedonymns.py create mode 100644 ezregex/perl/PerlEZRegex.py delete mode 100644 ezregex/perl/psuedonymns.py create mode 100644 ezregex/python/PythonEZRegex.py create mode 100644 ezregex/python/PythonEZRegex.pyi delete mode 100644 ezregex/python/psuedonymns.py diff --git a/README.md b/README.md index 4fe07f3..3ed8bc5 100644 --- a/README.md +++ b/README.md @@ -153,7 +153,8 @@ If you know a particular flavor of regex and would like to contribute, feel free ## Documentation ### Notes and Gotchas - The different Regular Expression dialects don't all have the same features, and those features don't all work the same way. I've tried to standardize these as best I can and use reasonable names for all the elements. If you're confused by something not working as expected, be sure to understand how your language specifically handles regular expressions. -- When using the Python `re` library, functions like re.search() and re.sub() don't accept EZRegex patterns as valid regex. Be sure to either call .str() (or cast it to a string) or .compile() (to compile to an re.Pattern) when passing to those. Also, be careful to call the function on the entire pattern: chunk + whitespace.str() is not the same as (chunk + whitespace).str(). +- All the functions in the Python `re` library (`search`, `match`, `sub`, etc.) are implemented in the Python EZRegex dialect, and act identically to their equivalents. If you still want to use the Python `re` library, note that functions like `search` and `sub` don't accept EZRegex patterns as valid regex. Be sure to either call .str() (or cast it to a string) or .compile() (to compile to an re.Pattern) when passing to those. Using the member functions however, will be more efficient, as EZRegex caches the compiled re.Pattern internally. +- Be careful to call functions on the entire pattern: chunk + whitespace.str() is not the same as (chunk + whitespace).str(). - In regular regex, a lot of random things capture groups for no apparent reason. All regexes in EZRegex intentionally capture passively, so to capture any groups, use group(), with the optional `name` parameter. - All EZRegexs (except for `raw`) auto-sanitize strings given to them, so there's no need to escape characters or use r strings. This *does* mean, however, that you cannot pass actual regex strings to any of them, as they'll think you're talking about it literally (unless you want that, of course). To include already written regex strings, use `raw` - Note that I have camelCase and snake_case versions of each of the functions, because I waver back and forth between which I like better. Both versions function identically. @@ -1021,7 +1022,7 @@ and such, which all the other EZRegexs do automatically ## Developer Documentation ### The EZRegex class -Everything relies on the EZRegex class. EZRegex shouldn't be instantiated by the user, as each dialect defines their own EZRegex elements specific to that dialect (more on that later). Each element represents a fundamental part of the Regular Expression syntax for that language, as well as less-fundemental common combinations for convenience (like email and float). +Everything relies on the EZRegex class. EZRegex shouldn't be instantiated by the user, as each dialect subclasses the EZRegex class and defines their own elements specific to that dialect (more on that later). Each element represents a fundamental part of the Regular Expression syntax for that language, as well as less-fundemental common combinations for convenience (like email and float). EZRegex can accept a string or a function to define how it's supposed to interact with the current "chain" of elements. If it's a string, it just adds it to the end. If it's a function, it can accept any positional or named parameters, but has to accept `cur=...` as the last parameter (it's complicated). The `cur` parameter is the current regular expression chain, as a string. What's returned becomes the new `cur` parameter of the next element, or, if there is no next element, the final regex. That way you can add to the front or back of an expression, and you can change what exactly gets added to the current expression based on other parameters. @@ -1031,15 +1032,15 @@ The EZRegex class has operators overloaded so you can combine them in intuitive The updated method of doing this is to define all the EZRegex elements of a dialect in `elements.py`, and then add type hints and doc strings in the `elements.pyi` file. EZRegex elements that accept parameters are typed as functions (even though they're not), for both convenience for the user when using linter, and to give documentation in an easier way. EZRegex elements that don't accept parameters should be typed as EZRegex, and given documentation as a string on the line below it. This is *slightly* non-standard, but linters support it, as well as my documentation generator script, which parses the .pyi files. The elements can also be seperated into groups in the .pyi files by using `"Group: \\n\"`, which also gets parsed by the documentation script. The groups aren't used in the actual library, but are helpful in seperating the documentation, as well as used in [ezregex.org](http://ezregex.org) ### Dialects -Because most regex dialects *are* 90% identical, a hidden "base" dialect is implemented, but it works a bit differently. It has an `elements.py` file, but it defines all the elements as a dict in the form of {"element_name": {"keyword": "arguements"}}. It then has a `load_dialect()` function, which is the only thing importable from it. The reason it's done this way is because `dialect` is a required parameter of the EZRegex constructor, so `load_dialect()` takes a `dialect` parameter, and constructs the base elements from it's dict and returns a new dict of initialized elements to be dumped into the global scope of the dialect. The `elements.py` file of a specific dialect can then remove any elements that it doesn't support (using the `del` keyword) and add/overwrite any it does support. +Because most regex dialects *are* 90% identical, a parent EZRegex class implements most of the applicable logic, and a hidden "base" dialect is implemented, but works a bit differently. It has an `elements.py` file, but it defines all the elements as a dict in the form of {"element_name": {"keyword": "arguements"}}. It then has a `load_dialect()` function, which is the only thing importable from it. The reason it's done this way is because most elements, though identical in different dialects, have to be the appropriate dialect subclass. `load_dialect()` takes the dialect type as a parameter, and instantiates the base elements from it's dict and returns a new dict of initialized elements to be dumped into the global scope of the dialect. The `elements.py` file of a specific dialect can then remove any elements that it doesn't support (using the `del` keyword) and add/overwrite any it does support, or that work differently. -There's also a _dialects.py file that has a dict for each dialect to describe the dialect-specific behavior of the EZRegex class, for example, in the JavaScript dialect, /'s are added to the beginning and end of the pattern, and flags are handled differently in each dialect. This has to be implemented directly into the EZRegex class. The dicts *would* be in the dialect folders themselves, but that causes all sorts of circular dependancies, so they're all just in the _dialects.py file. +Each subclass of EZRegex must implement a few options to describe the dialect-specific behavior of the EZRegex class, for example, in the JavaScript dialect, /'s are added to the beginning and end of the pattern, and flags are handled differently in each dialect. This has to be implemented directly into the EZRegex subclass. -There's 4 parts to the dicts in the _dialects.py file: +There's 4 parts that are required: - `beginning` and `end` - Plain strings which describe what to tack onto the beginning and end of the compiled pattern (but *before* flags are added) - `flag_func` - - A function that gets called with `final`, which is the final compiled pattern *with* `beginning` and `end` attached, and `flags`, which is a string of all the flags applied to the pattern. Internally, the flags are single digits, because flags usually are. They get passed to this function as a single string, which can be parsed and modified if necissary (it usually isn't) + - An abstract function that gets called with `final`, which is the final compiled pattern *with* `beginning` and `end` attached, and `flags`, which is a string of all the flags applied to the pattern. Internally, the flags are single digits, because flags usually are. They get passed to this function as a single string, which can be parsed and modified if necissary (it usually isn't) - `escape_chars` - The characters that need to be escaped. Should be a byte string (i.e. b'...') @@ -1050,9 +1051,6 @@ Later, when I was reading up on abstract syntax trees, and scrolling around on P Along the way, I also discovered, deep in the corners of the internet, 2 other Python libraries which do almost the same thing: `xeger` (regex backwards), and `sre_yield`. `xeger` technically works, however it tends to include unprintable characters, so it's output isn't very readable. `sre_yeild` is better, but it can be very slow, and is not quite the use case I'm going for. My invert algorithm is meant to be a debugging tool (though it doubles well for a testing tool), so it does things like detecting words (as opposed to seperate word characters) and inserts actual words, and doing the same for numbers and inserting `12345...`, as well as a couple other enhancements. -### Tests -Tests for a while now have just been in a single `tests.py` file, which was a giant pile of all the tests. I'm currently moving to use pytest. There's a `regexs.json` file (and a `replacements.json` file) that have a bunch of regexs, along with things they're supposed to match, and things they're not supposed to match, for testing. - ## Installation EZRegex is distributed on [PyPI](https://pypi.org) as a universal wheel and is available on Linux, macOS and Windows and supports Python 3.10+ and PyPy. @@ -1066,7 +1064,7 @@ import ezregex as er ``` ## Todo -See [the todo](todo.txt). I'm slowly moving these to [GitHub issues](https://github.com/smartycope/ezregex/issues), but for now, they're mostly still there +See the [GitHub Issue Page](https://github.com/smartycope/ezregex/issues) ## License diff --git a/ezregex/EZRegex.py b/ezregex/EZRegex.py index 9c5531b..35914d5 100644 --- a/ezregex/EZRegex.py +++ b/ezregex/EZRegex.py @@ -1,97 +1,57 @@ import logging import re -from copy import deepcopy +from abc import ABC from functools import partial -import sys -from typing import Callable, List, Literal from .api import api from .generate import * from .invert import invert -from ._dialects import dialects # TODO: Seperate EZRegex into a "bytes" mode vs "string" mode -# TODO: consider changing addFlags to "outer" or "end" or something +# TODO: consider changing add_flags to "outer" or "end" or something # TODO: Seriously consider removing the debug functions # TODO: in all the magic functions assert that we're not mixing dialects -# TODO: delete induvidual deepcopy statements and rerun all the tests to see what ones I can remove +# TODO: figure out if theres a way to make a "change dialect" function -class EZRegex: +class EZRegex(ABC): """ Represent parts of the Regex syntax. Should not be instantiated by the user directly.""" - def __init__(self, - # This line is more accurate, but I don't want any dependancies, including mypy_extensions - # definition:str|"EZRegex"|Callable[[VarArg, DefaultNamedArg[str, "cur"]], str], - definition:str|Callable[..., str]|"EZRegex", - dialect: str, - sanatize:bool=True, - init:bool=True, - replacement:bool=False, - flags:str='', - ): + def __init__(self, definition, *, sanatize=True, replacement=False, flags=''): """ The workhorse of the EZRegex library. This represents a regex pattern that can be combined with other EZRegexs and strings. Ideally, this should only be called internally, but it should still work from the user's end """ - - if dialect not in dialects.keys(): - raise ValueError(f'Unsupported dialect `{dialect}` given. Supported dialects are: {list(dialects.keys())}') - # Set attributes like this so the class can remain "immutable", while still being usable self.__setattr__('flags', flags, True) - # Parse params - # Add flags if it's another EZRegex - if isinstance(definition, EZRegex): - self.__setattr__('flags', definition.flags, True) - if definition.dialect != dialect: - raise ValueError('Cannot mix regex dialects') - - if isinstance(definition, (str, EZRegex)): - definition = [str(definition)] - elif not isinstance(definition, (list, tuple)): - definition = [definition] - self.__setattr__('_sanatize', sanatize, True) self.__setattr__('replacement', replacement, True) - # This allows strings in the list now, but they get converted later in this function - # self._funcList: list[str|partial[str]|Callable] = list(definition) - self.__setattr__('_funcList', list(definition), True) - self.__setattr__('dialect', dialect, True) - # The dict that has the values - self.__setattr__('_dialect_attr', dialects[dialect], True) - - - # The init parameter is not actually required, but it will make it more - # efficient, so we don't have to check that the whole chain is callable - if init: - # Go through the chain (most likely of length 1) and parse any strings - # This is for simplicity when defining all the members - for i in range(len(self._funcList)): - if isinstance(self._funcList[i], str): - # I *hate* how Python handles lambdas - stringBecauseHatred = deepcopy(self._funcList[i]) - self._funcList[i] = lambda cur=...: cur + stringBecauseHatred - elif not callable(self._funcList[i]) and self._funcList[i] is not None: - raise ValueError(f"Invalid type {type(self._funcList[i])} passed to EZRegex constructor") + + if isinstance(definition, str): + self.__setattr__('_funcList', [lambda cur=...: cur + definition], True) + elif callable(definition): + self.__setattr__('_funcList', [definition], True) + elif isinstance(definition, list): + self.__setattr__('_funcList', definition, True) # Private functions + def _flag_func(self, final:str) -> str: + raise NotImplementedError('Subclasses need to implement _flag_func(final)') + def _escape(self, pattern:str): """ This function was modified from the one in /usr/lib64/python3.12/re/__init__.py line 255 """ - _special_chars_map = {i: '\\' + chr(i) for i in self._dialect_attr['escape_chars']} + _special_chars_map = {i: '\\' + chr(i) for i in self._escape_chars} return pattern.translate(_special_chars_map) - def _sanitizeInput(self, i, addFlags=False): + def _sanitizeInput(self, i, add_flags=False): """ Instead of rasising an error if passed a strange datatype, it now trys to cast it to a string """ - i = deepcopy(i) - # Don't sanatize anything if this is a replacement string if self.replacement: return str(i) # If it's another chain, compile it if isinstance(i, EZRegex): - return i._compile(addFlags=addFlags) + return i._compile(add_flags=add_flags) # It's a string (so we need to escape it) elif isinstance(i, str): return self._escape(i) @@ -109,22 +69,33 @@ def _sanitizeInput(self, i, addFlags=False): except: raise ValueError(f'Incorrect type {type(i)} given to EZRegex parameter: Must be string or another EZRegex chain.') - def _compile(self, addFlags=True): + def _compile(self, add_flags=True): regex = '' for func in self._funcList: regex = func(cur=regex) # type: ignore # Add the flags - if addFlags: - regex = self._dialect_attr['beginning'] + regex + self._dialect_attr['end'] + if add_flags: + regex = self._beginning + regex + self._end + if len(self.flags): - regex = self._dialect_attr['flag_func'](regex, self.flags) + regex = self._flag_func(regex) return regex - # Regular functions - def compile(self, addFlags=True): - return re.compile(self._compile(addFlags)) + def _copy(self, definition=..., sanatize=..., replacement=..., flags=...): + if definition is Ellipsis: + definition = self._compile() + if sanatize is Ellipsis: + sanatize = self._sanatize + if replacement is Ellipsis: + replacement = self.replacement + if flags is Ellipsis: + flags = self.flags + + return type(self)(definition, sanatize=sanatize, replacement=replacement, flags=flags) + + # Regular functions def str(self): return self.__str__() @@ -137,7 +108,7 @@ def debug(self): debug(self, name='Compiled ezregex string', calls=2) return self - def copy(self, addFlags=True): + def copy(self, add_flags=True): try: from clipboard import copy # type: ignore except ImportError as err: @@ -145,7 +116,7 @@ def copy(self, addFlags=True): 'Please install the clipboard module in order to auto copy ezregex expressions (i.e. pip install clipboard)' ) from err else: - copy(self._compile(addFlags=addFlags)) + copy(self._compile(add_flags=add_flags)) def test(self, testString=None, show=True, context=True) -> bool: """ Tests the current regex expression to see if it's in @param testString. @@ -224,31 +195,6 @@ def inverse(self, amt=1, **kwargs): def invert(self, amt=1, **kwargs): return self.inverse(amt, **kwargs) - # Shadowing the re functions - def search(self, string, pos=0, endpos=sys.maxsize): - return self.compile().search(string, pos, endpos) - - def match(self, string, pos=0, endpos=sys.maxsize): - return self.compile().match(string, pos, endpos) - - def fullmatch(self, string, pos=0, endpos=sys.maxsize): - return self.compile().fullmatch(string, pos, endpos) - - def split(self, string, maxsplit=0): - return self.compile().split(string, maxsplit) - - def findall(self, string, pos=0, endpos=sys.maxsize): - return self.compile().findall(string, pos, endpos) - - def finditer(self, string, pos=0, endpos=sys.maxsize): - return self.compile().finditer(string, pos, endpos) - - def sub(self, repl, string, count=0): - return self.compile().sub(repl, string, count) - - def subn(self, repl, string, count=0): - return self.compile().subn(repl, string, count) - # Magic Functions def __call__(self, *args, **kwargs): """ This should be called by the user to specify the specific parameters of this instance i.e. anyof('a', 'b') """ @@ -262,33 +208,28 @@ def __call__(self, *args, **kwargs): raise TypeError("You're trying to pass parameters to a chain of expressions. That doesn't make any sense. Stop that.") # Sanatize the arguments - args = list(map(self._sanitizeInput if self._sanatize else deepcopy, args)) + if self._sanatize: + args = list(map(self._sanitizeInput, args)) _kwargs = {} for key, val in kwargs.items(): - _kwargs[key] = self._sanitizeInput(val) if self._sanatize else deepcopy(val) - - return EZRegex( - [partial(self._funcList[0], *args, **_kwargs)], - dialect=self.dialect, - init=False, - sanatize=self._sanatize, - replacement=self.replacement, - flags=self.flags - ) + _kwargs[key] = self._sanitizeInput(val) if self._sanatize else val - def __str__(self, addFlags=True): - return self._compile(addFlags) + return self._copy([partial(self._funcList[0], *args, **_kwargs)]) + + def __str__(self, add_flags=True): + return self._compile(add_flags) def __repr__(self): - return 'EZRegex("' + self._compile() + '")' + return f'{type(self).__name__}("{self}")' def __eq__(self, thing): - return self._sanitizeInput(thing, addFlags=True) == self._compile() + """ NOTE: This will return True for equivelent EZRegex expressions of different dialects """ + return self._sanitizeInput(thing, add_flags=True) == self._compile() def __mul__(self, amt): if amt is Ellipsis: - return EZRegex(f'(?{self})*', self.dialect, sanatize=False) + return self._copy(f'(?{self})*', sanatize=False) rtn = self # This isn't optimal, but it's unlikely anyone will use this with large numbers for i in range(amt-1): @@ -305,32 +246,30 @@ def __imul__(self, amt): return self * amt def __add__(self, thing): - return EZRegex(self._funcList + [partial(lambda cur=...: cur + self._sanitizeInput(thing))], - dialect=self.dialect, - init=False, + return self._copy( + self._funcList + [partial(lambda cur=...: cur + self._sanitizeInput(thing))], sanatize=self._sanatize or thing._sanatize if isinstance(thing, EZRegex) else self._sanatize, replacement=self.replacement or thing.replacement if isinstance(thing, EZRegex) else self.replacement, flags=(self.flags + thing.flags) if isinstance(thing, EZRegex) else self.flags ) def __radd__(self, thing): - return EZRegex([partial(lambda cur=...: self._sanitizeInput(thing) + cur)] + self._funcList, - dialect=self.dialect, - init=False, + return self._copy([partial(lambda cur=...: self._sanitizeInput(thing) + cur)] + self._funcList, sanatize=self._sanatize or thing._sanatize if isinstance(thing, EZRegex) else self._sanatize, replacement=self.replacement or thing.replacement if isinstance(thing, EZRegex) else self.replacement, flags=(self.flags + thing.flags) if isinstance(thing, EZRegex) else self.flags ) def __iadd__(self, thing): - # return self + self._sanitizeInput(thing) return self + thing def __and__(self, thing): + raise NotImplementedError logging.warning('The & operator is unstable still. Use each() instead.') return EZRegex(fr'(?={self}){thing}', self.dialect, sanatize=False) def __rand__(self, thing): + raise NotImplementedError logging.warning('The & operator is unstable still. Use each() instead.') return EZRegex(fr'(?={thing}){self}', self.dialect, sanatize=False) # The shift operators just shadow the add operators @@ -357,20 +296,20 @@ def __invert__(self): def __pos__(self): comp = self._compile() - return EZRegex(('' if not len(comp) else r'(?:' + comp + r')') + r'+', self.dialect, sanatize=False) + return self._copy(('' if not len(comp) else r'(?:' + comp + r')') + r'+', sanatize=False) def __ror__(self, thing): - return EZRegex(f'(?:{self._sanitizeInput(thing)}|{self._compile()})', self.dialect, sanatize=False) + return self._copy(f'(?:{self._sanitizeInput(thing)}|{self._compile()})', sanatize=False) def __or__(self, thing): logging.warning('The or operator is unstable and likely to fail, if used more than twice. Use anyof() instead, for now.') - return EZRegex(f'(?:{self._compile()}|{self._sanitizeInput(thing)})', self.dialect, sanatize=False) + return self._copy(f'(?:{self._compile()}|{self._sanitizeInput(thing)})', sanatize=False) def __xor__(self, thing): - return NotImplemented + return NotImplementedError def __rxor__(self, thing): - return NotImplemented + return NotImplementedError def __mod__(self, other): """ I would prefer __rmod__(), but it doesn't work on strings, since __mod__() is already specified for string formmating. """ @@ -390,7 +329,7 @@ def __hash__(self): return hash(id(self)) def __contains__(self, thing): - assert isinstance(thing, str), "`in` statement can only be used with a string" + # assert isinstance(thing, str), "`in` statement can only be used with a string" return re.search(self._compile(), thing) is not None def __rcontains__(self, thing): @@ -426,31 +365,31 @@ def __getitem__(self, args): args = args[0] if args is None or args is Ellipsis or args == 0: # at_least_0(self) - return EZRegex(fr'(?:{self._compile()})*', self.dialect, sanatize=False) + return self._copy(fr'(?:{self._compile()})*', sanatize=False) elif args == 1: # at_least_1(self) - return EZRegex(fr'(?:{self._compile()})+', self.dialect, sanatize=False) + return self._copy(fr'(?:{self._compile()})+', sanatize=False) else: # match_at_least(args, self) - return EZRegex(fr'(?:{self._compile()}){{{args},}}', self.dialect, sanatize=False) + return self._copy(fr'(?:{self._compile()}){{{args},}}', sanatize=False) else: start, end = args if start is None or start is Ellipsis: # match_at_most(2, self) - return EZRegex(fr'(?:{self._compile()}){{0,{end}}}', self.dialect, sanatize=False) + return self._copy(fr'(?:{self._compile()}){{0,{end}}}', sanatize=False) elif end is None or end is Ellipsis: if start == 0: # at_least_0(self) - return EZRegex(fr'(?:{self._compile()})*', self.dialect, sanatize=False) + return self._copy(fr'(?:{self._compile()})*', sanatize=False) elif start == 1: # at_least_1(self) - return EZRegex(fr'(?:{self._compile()})+', self.dialect, sanatize=False) + return self._copy(fr'(?:{self._compile()})+', sanatize=False) else: # match_at_least(start, self) - return EZRegex(fr'(?:{self._compile()}){{{start},}}', self.dialect, sanatize=False) + return self._copy(fr'(?:{self._compile()}){{{start},}}', sanatize=False) else: # match_range(start, end, self) - return EZRegex(fr'(?:{self._compile()}){{{start},{end}}}', self.dialect, sanatize=False) + return self._copy(fr'(?:{self._compile()}){{{start},{end}}}', sanatize=False) def __reversed__(self): return self.inverse() diff --git a/ezregex/EZRegex.pyi b/ezregex/EZRegex.pyi index 8696e1f..da2dbfb 100644 --- a/ezregex/EZRegex.pyi +++ b/ezregex/EZRegex.pyi @@ -1,32 +1,31 @@ import re import sys +from functools import partial from typing import Any, Callable, Iterator + from mypy_extensions import DefaultNamedArg, VarArg + from .base.interface import InputType +type EZRegexDefinition = str|Callable[[VarArg, DefaultNamedArg[str, "cur"]], str]|list[partial[str]] + class EZRegex: """ Represent parts of the Regex syntax. Should not be instantiated by the user directly.""" - def __init__(self, - definition:str|"EZRegex"|Callable[[VarArg, DefaultNamedArg[str, "cur"]], str], - dialect: str, - sanatize:bool=True, - init:bool=True, - replacement:bool=False, - flags:str='', - ) -> None: - """ - The workhorse of the EZRegex library. This represents a regex pattern that can be combined - with other EZRegexs and strings. Ideally, this should only be called internally, but it should - still work from the user's end + def __init__(self, definition:EZRegexDefinition, *, sanatize:bool=True, replacement:bool=False, flags:str='') -> None: + """ The workhorse of the EZRegex library. This represents a regex pattern that can be combined + with other EZRegexs and strings. Ideally, this should only be called internally, but it should + still work from the user's end """ # Private functions + def _flag_func(self, final:str) -> str: ... def _escape(self, pattern:str) -> str: """ This function was modified from the one in /usr/lib64/python3.12/re/__init__.py line 255 """ def _sanitizeInput(self, i:InputType, addFlags:bool=False) -> str: """ Instead of rasising an error if passed a strange datatype, it now trys to cast it to a string """ def _compile(self, addFlags=True) -> str: ... + def _copy(self, definition:EZRegexDefinition=..., sanatize:bool=..., replacement:bool=..., flags:str=...): ... # Regular functions def compile(self, addFlags=True) -> re.Pattern: ... @@ -42,16 +41,6 @@ class EZRegex: """ "Inverts" the current Regex expression to give an example of a string it would match. Useful for debugging purposes. """ - # Shadowing the re functions - def search(self, string, pos:int=0, endpos:int=sys.maxsize) -> re.Match|None: ... - def match(self, string, pos:int=0, endpos:int=sys.maxsize) -> re.Match|None: ... - def fullmatch(self, string, pos:int=0, endpos:int=sys.maxsize) -> re.Match|None: ... - def split(self, string, maxsplit:int=0) -> list: ... - def findall(self, string, pos: int = 0, endpos: int = sys.maxsize) -> list: ... - def finditer(self, string, pos: int = 0, endpos: int = sys.maxsize) -> Iterator[re.Match]: ... - def sub(self, repl: Any | Callable[[re.Match], Any], string, count: int = 0): ... - def subn(self, repl: Any | Callable[[re.Match], Any], string, count: int = 0): ... - # Magic Functions def __call__(self, *args, **kwargs) -> EZRegex | str: """ This should be called by the user to specify the specific parameters of this instance i.e. anyof('a', 'b') """ diff --git a/ezregex/__init__.py b/ezregex/__init__.py index dfec88a..ac1705d 100644 --- a/ezregex/__init__.py +++ b/ezregex/__init__.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 """ A readable and intuitive way to generate Regular Expressions """ -__version__ = '1.10.0' +__version__ = '2.0.0' # Import this as a submodule from . import generate diff --git a/ezregex/_dialects.py b/ezregex/_dialects.py deleted file mode 100644 index f675543..0000000 --- a/ezregex/_dialects.py +++ /dev/null @@ -1,23 +0,0 @@ -# This is the final say of what's supported -# Even though they can still import elements directly from the other modules, they'll fail if they're not here -dialects = { - 'python': { - "beginning": '', - "end": '', - 'flag_func': lambda final, flags: f'(?{flags}){final}', - 'escape_chars': b'()[]{}?*+-|^$\\.&~# \t\n\r\v\f', - }, - 'javascript': { - "beginning": '/', - "end": '/', - 'flag_func': lambda final, flags: final + flags, - 'escape_chars': b'()[]{}?*+-|^$\\.&~# \t\n\r\v\f', - }, - 'perl': { - "beginning": '', - "end": '', - 'flag_func': lambda final, flags: f'(?{flags}){final}', - 'escape_chars': b'()[]{}?*+-|^$\\.&~# \t\n\r\v\f', - }, - -} diff --git a/ezregex/_docs.py b/ezregex/_docs.py index d4fee68..826480d 100644 --- a/ezregex/_docs.py +++ b/ezregex/_docs.py @@ -2,6 +2,7 @@ import os from copy import deepcopy from pathlib import Path + # from clipboard import copy from rich import print diff --git a/ezregex/base/__init__.py b/ezregex/base/__init__.py index 6fe8ed8..d9e51ea 100644 --- a/ezregex/base/__init__.py +++ b/ezregex/base/__init__.py @@ -1,14 +1,14 @@ +from string import Formatter from types import EllipsisType from typing import Callable -from ..EZRegex import EZRegex -from .elements import base -from string import Formatter + +from .elements import base, psuedonymns -def load_base(dialect, rgroup_func: Callable[[int|str, EllipsisType], str], replace_entire_func=...) -> dict: +def load_base(cls, rgroup_func: Callable[[int|str, EllipsisType], str], replace_entire_func=...) -> dict: rtn = {} for name, kwargs in base.items(): - rtn[name] = EZRegex(**kwargs, dialect=dialect) + rtn[name] = cls(**kwargs) # I'm creating this here, so we don't have to reimplement both of them every time def replace(string, rtn_str=True): @@ -18,15 +18,22 @@ def get_value(self, key, args, kwargs): string = CustomFormatter().format(string) - return string if rtn_str else EZRegex(string, dialect, sanatize=False, replacement=True) + return string if rtn_str else cls(string, sanatize=False, replacement=True) rtn['replace'] = replace - rtn['rgroup'] = EZRegex(rgroup_func, dialect, replacement=True) - rtn['replace_entire'] = EZRegex( + rtn['rgroup'] = cls(rgroup_func, replacement=True) + rtn['replace_entire'] = cls( lambda cur=...: rgroup_func(0, cur=cur) if replace_entire_func is Ellipsis else replace_entire_func, - dialect, replacement=True ) + print('~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~') + print(psuedonymns) + # Add all the psuedonymns + for original, aliases in psuedonymns.items(): + print(f'setting {original} to {aliases}') + for alias in aliases: + print(f'creating alias of {original} as {alias}') + rtn[alias] = rtn[original] return rtn diff --git a/ezregex/base/elements.py b/ezregex/base/elements.py index 29674c8..acbdb16 100644 --- a/ezregex/base/elements.py +++ b/ezregex/base/elements.py @@ -3,6 +3,7 @@ from string import digits from sys import version_info + def input_not_empty(func, name, not_empty=0): def rtn(*args, **kwargs): if not len(str(args[not_empty])): @@ -313,3 +314,68 @@ def if_exists(num_or_name, does, doesnt, cur=...): 'MULTILINE': {'definition': lambda cur=...: cur, 'flags': 'm'}, 'UNICODE': {'definition': lambda cur=...: cur, 'flags': 'u'}, } + + +psuedonymns = { + 'match_max': ('matchMax',), + 'match_at_most': ('matchAtMost', 'atMost', 'at_most',), + 'match_num': ('matchNum', 'matchAmt', 'match_amt', 'amt', 'num',), + 'match_range': ('matchRange',), + 'match_more_than': ('matchMoreThan', 'match_greater_than', 'matchGreaterThan', 'moreThan', 'more_than',), + 'match_at_least': ('matchAtLeast', 'match_min', 'matchMin', 'atLeast', 'at_least',), + 'line_starts_with': ('lineStartsWith', 'line_start', 'lineStart',), + 'string_starts_with': ('stringStartsWith', 'string_start', 'stringStart',), + 'line_ends_with': ('lineEndsWith', 'line_end', 'lineEnd',), + 'string_ends_with': ('stringEndsWith', 'string_end', 'stringEnd',), + 'chunk': ('stuff',), + 'whitechunk': ('whiteChunk',), + 'anything': ('anychar',), + 'anything': ('anyChar',), + 'anything': ('char',), + 'letter': ('alpha',), + 'alpha_num': ('alphanum' , 'alpha_num',), + 'whitechunk': ('white' ,), + 'at_least_none': ('anyAmt', 'any_amt', 'zeroOrMore', 'zero_or_more',), + 'any_between': ('anyBetween',), + 'word_char': ('wordChar',), + 'hex_digit': ('hexDigit', 'hex',), + 'oct_digit': ('octDigit',), + 'new_line': ('newline', 'newLine',), + 'space_or_tab': ('spaceOrTab',), + 'carriage_return': ('carriageReturn',), + 'vertical_tab': ('verticalTab',), + 'form_feed': ('formFeed',), + 'period': ('dot',), + 'int_or_float': ('intOrFloat',), + 'not_whitespace': ('notWhitespace',), + 'not_digit': ('notDigit',), + 'not_word': ('notWord',), + 'any_of': ('anyof', 'any_of', 'anyOf', 'oneOf', 'one_of',), + 'any_except': ('anyExcept',), + 'any_char_except': ('anyCharExcept',), + 'printable_and_space': ('printableAndSpace',), + 'if_proceded_by': ('ifFollowedBy', 'if_followed_by', 'ifProcededBy',), + 'if_exists': ('ifExists',), + 'if_not_proceded_by': ('ifNotFollowedBy', 'if_not_followed_by',), + 'if_preceded_by': ('ifPrecededBy',), + 'if_not_preceded_by': ('ifNotPrecededBy',), + 'if_enclosed_with': ('ifEnclosedWith', 'if_enclosed_by', 'ifEnclosedBy',), + 'if_not_proceded_by': ('ifNotProcededBy',), + 'passive_group': ('passiveGroup',), + 'earlier_group': ('sameAs', 'same_as', 'earlierGroup', 'sameAsGroup', 'same_as_group',), + 'is_exactly': ('exactly', 'isExactly',), + 'optional': ('oneOrNone', 'one_or_none', 'opt',), + 'at_least_one': ('oneOrMore', 'one_or_more', 'atLeastOne', 'atLeast1', 'at_least_1',), + 'at_least_none': ('noneOrMore', 'none_or_more', 'atLeastNone', 'at_least_0', 'atLeast0',), + 'ASCII': ('ascii', 'a',), + 'DOTALL': ('dotall', 's',), + 'IGNORECASE': ('ignorecase', 'i', 'ignoreCase', 'ignore_case',), + 'LOCALE': ('locale', 'L',), + 'MULTILINE': ('multiline', 'm',), + 'signed': ('integer',), + 'literally_anything': ('literallyAnything',), + 'word_boundary': ('wordBoundary',), + 'not_word_boundary': ('notWordBoundary',), + 'rgroup': ('replaceGroup', 'replace_group',), + 'replace_entire': ('replaceAll', 'replace_all', 'replaceEntire',), +} diff --git a/ezregex/invert.py b/ezregex/invert.py index a3b001d..366c66d 100644 --- a/ezregex/invert.py +++ b/ezregex/invert.py @@ -1,11 +1,12 @@ import json import string import traceback +from pathlib import Path from random import choice, choices, randint from re import search from sys import version_info from typing import Literal, Union -from pathlib import Path + from ezregex import * if version_info.minor <= 10: @@ -400,14 +401,14 @@ def invert_xeger(self) -> str | None: self._attempts['xeger'] += 1 self._log(f'xeger attempt #{self._attempts["xeger"]}', end='... ') if self._xeger: - from xeger import Xeger # type: ignore + from xeger import Xeger # type: ignore return Xeger().xeger(self.expr) def invert_sre_yield(self) -> str | None: self._attempts['sre_yield'] += 1 self._log(f'sre_yield attempt #{self._attempts["sre_yield"]}', end='... ') if self._sre_yield: - import sre_yield # type: ignore + import sre_yield # type: ignore for i in sre_yield.AllStrings(self.expr): return i diff --git a/ezregex/javascript/JavaScriptEZRegex.py b/ezregex/javascript/JavaScriptEZRegex.py new file mode 100644 index 0000000..3ba1e0e --- /dev/null +++ b/ezregex/javascript/JavaScriptEZRegex.py @@ -0,0 +1,10 @@ +from ..EZRegex import EZRegex + + +class JavaScriptEZRegex(EZRegex): + _escape_chars=b'()[]{}?*+-|^$\\.&~# \t\n\r\v\f' + _end = '/' + _beginning = '/' + + def _flag_func(self, final): + return final + self.flags diff --git a/ezregex/javascript/__init__.py b/ezregex/javascript/__init__.py index cde12fa..b47dc25 100644 --- a/ezregex/javascript/__init__.py +++ b/ezregex/javascript/__init__.py @@ -2,4 +2,4 @@ __version__ = '0.0.1' from .elements import * -from .psuedonymns import * +from .JavaScriptEZRegex import JavaScriptEZRegex diff --git a/ezregex/javascript/elements.py b/ezregex/javascript/elements.py index 121b499..3dd717a 100644 --- a/ezregex/javascript/elements.py +++ b/ezregex/javascript/elements.py @@ -2,5 +2,6 @@ # pyright: reportUndefinedVariable = false from ..base import load_base from ..EZRegex import EZRegex +from .JavaScriptEZRegex import JavaScriptEZRegex -globals().update(load_base('javascript', lambda num_or_name, cur=...: fr'{cur}\g<{num_or_name}>')) +globals().update(load_base(JavaScriptEZRegex, lambda num_or_name, cur=...: fr'{cur}\g<{num_or_name}>')) diff --git a/ezregex/javascript/psuedonymns.py b/ezregex/javascript/psuedonymns.py deleted file mode 100644 index e570302..0000000 --- a/ezregex/javascript/psuedonymns.py +++ /dev/null @@ -1,67 +0,0 @@ -from .elements import * - -matchMax = match_max -matchAtMost = atMost = at_most = match_at_most -matchNum = matchAmt = match_amt = amt = num = match_num -matchRange = match_range -matchMoreThan = match_greater_than = matchGreaterThan = moreThan = more_than = match_more_than -matchAtLeast = match_min = matchMin = atLeast = at_least = match_at_least -lineStartsWith = line_start = lineStart = line_starts_with -stringStartsWith = string_start = stringStart = string_starts_with -lineEndsWith = line_end = lineEnd = line_ends_with -stringEndsWith = string_end = stringEnd = string_ends_with -stuff = chunk -whiteChunk = whitechunk -anychar = anything -anyChar = anything -char = anything -alpha = letter -alphanum = alpha_num = alpha_num -white = whitechunk - -anyAmt = any_amt = zeroOrMore = zero_or_more = at_least_none -anyBetween = any_between -wordChar = word_char -hexDigit = hex -octDigit = oct_digit -newline = newLine = new_line -spaceOrTab = space_or_tab -carriageReturn = carriage_return -verticalTab = vertical_tab -formFeed = form_feed -dot = period -intOrFloat = int_or_float -notWhitespace = not_whitespace -notDigit = not_digit -notWord = not_word -anyof = any_of = anyOf = oneOf = one_of = any_of -anyExcept = any_except -anyCharExcept = any_char_except -printableAndSpace = printable_and_space -ifFollowedBy = if_followed_by = if_proceded_by -ifExists = if_exists -ifNotFollowedBy = if_not_followed_by = if_not_proceded_by -ifPrecededBy = if_preceded_by -ifNotPrecededBy = if_not_preceded_by -ifEnclosedWith = if_enclosed_by = ifEnclosedBy = if_enclosed_with -ifProcededBy = if_proceded_by -ifNotProcededBy = if_not_proceded_by -passiveGroup = passive_group -sameAs = same_as = earlierGroup = sameAsGroup = same_as_group = earlier_group -exactly = isExactly = is_exactly -oneOrNone = one_or_none = opt = optional -oneOrMore = one_or_more = atLeastOne = atLeast1 = at_least_1 = at_least_one -noneOrMore = none_or_more = atLeastNone = at_least_0 = atLeast0 = at_least_none -ascii = a = ASCII -dotall = s = DOTALL -ignorecase = i = ignoreCase = ignore_case = IGNORECASE -locale = L = LOCALE -multiline = m = MULTILINE -# Useful combinations -integer = signed -literallyAnything = literally_anything -wordBoundary = word_boundary -notWordBoundary = not_word_boundary - -replaceGroup = replace_group = rgroup -replaceAll = replace_all = replaceEntire = replace_entire diff --git a/ezregex/perl/PerlEZRegex.py b/ezregex/perl/PerlEZRegex.py new file mode 100644 index 0000000..8b775a3 --- /dev/null +++ b/ezregex/perl/PerlEZRegex.py @@ -0,0 +1,10 @@ +from ..EZRegex import EZRegex + + +class PythonEZRegex(EZRegex): + _escape_chars=b'()[]{}?*+-|^$\\.&~# \t\n\r\v\f' + _end = '' + _beginning = '' + + def _flag_func(self, final): + return f'(?{self.flags}){final}' diff --git a/ezregex/perl/__init__.py b/ezregex/perl/__init__.py index d07df17..aafa81b 100644 --- a/ezregex/perl/__init__.py +++ b/ezregex/perl/__init__.py @@ -2,4 +2,4 @@ __version__ = '0.0.1' from .elements import * -from .psuedonymns import * +from .PerlEZRegex import PerlEZRegex diff --git a/ezregex/perl/elements.py b/ezregex/perl/elements.py index 692a21f..d6aae57 100644 --- a/ezregex/perl/elements.py +++ b/ezregex/perl/elements.py @@ -2,5 +2,6 @@ # pyright: reportUndefinedVariable = false from ..base import load_base from ..EZRegex import EZRegex +from .PerlEZRegex import PerlEZRegex -globals().update(load_base('perl', lambda num_or_name, cur=...: fr'{cur}\g<{num_or_name}>')) +globals().update(load_base(PerlEZRegex, lambda num_or_name, cur=...: fr'{cur}\g<{num_or_name}>')) diff --git a/ezregex/perl/psuedonymns.py b/ezregex/perl/psuedonymns.py deleted file mode 100644 index e570302..0000000 --- a/ezregex/perl/psuedonymns.py +++ /dev/null @@ -1,67 +0,0 @@ -from .elements import * - -matchMax = match_max -matchAtMost = atMost = at_most = match_at_most -matchNum = matchAmt = match_amt = amt = num = match_num -matchRange = match_range -matchMoreThan = match_greater_than = matchGreaterThan = moreThan = more_than = match_more_than -matchAtLeast = match_min = matchMin = atLeast = at_least = match_at_least -lineStartsWith = line_start = lineStart = line_starts_with -stringStartsWith = string_start = stringStart = string_starts_with -lineEndsWith = line_end = lineEnd = line_ends_with -stringEndsWith = string_end = stringEnd = string_ends_with -stuff = chunk -whiteChunk = whitechunk -anychar = anything -anyChar = anything -char = anything -alpha = letter -alphanum = alpha_num = alpha_num -white = whitechunk - -anyAmt = any_amt = zeroOrMore = zero_or_more = at_least_none -anyBetween = any_between -wordChar = word_char -hexDigit = hex -octDigit = oct_digit -newline = newLine = new_line -spaceOrTab = space_or_tab -carriageReturn = carriage_return -verticalTab = vertical_tab -formFeed = form_feed -dot = period -intOrFloat = int_or_float -notWhitespace = not_whitespace -notDigit = not_digit -notWord = not_word -anyof = any_of = anyOf = oneOf = one_of = any_of -anyExcept = any_except -anyCharExcept = any_char_except -printableAndSpace = printable_and_space -ifFollowedBy = if_followed_by = if_proceded_by -ifExists = if_exists -ifNotFollowedBy = if_not_followed_by = if_not_proceded_by -ifPrecededBy = if_preceded_by -ifNotPrecededBy = if_not_preceded_by -ifEnclosedWith = if_enclosed_by = ifEnclosedBy = if_enclosed_with -ifProcededBy = if_proceded_by -ifNotProcededBy = if_not_proceded_by -passiveGroup = passive_group -sameAs = same_as = earlierGroup = sameAsGroup = same_as_group = earlier_group -exactly = isExactly = is_exactly -oneOrNone = one_or_none = opt = optional -oneOrMore = one_or_more = atLeastOne = atLeast1 = at_least_1 = at_least_one -noneOrMore = none_or_more = atLeastNone = at_least_0 = atLeast0 = at_least_none -ascii = a = ASCII -dotall = s = DOTALL -ignorecase = i = ignoreCase = ignore_case = IGNORECASE -locale = L = LOCALE -multiline = m = MULTILINE -# Useful combinations -integer = signed -literallyAnything = literally_anything -wordBoundary = word_boundary -notWordBoundary = not_word_boundary - -replaceGroup = replace_group = rgroup -replaceAll = replace_all = replaceEntire = replace_entire diff --git a/ezregex/python/PythonEZRegex.py b/ezregex/python/PythonEZRegex.py new file mode 100644 index 0000000..3b80aa1 --- /dev/null +++ b/ezregex/python/PythonEZRegex.py @@ -0,0 +1,48 @@ +import re +import sys + +from ezregex import EZRegex + + +class PythonEZRegex(EZRegex): + _escape_chars=b'()[]{}?*+-|^$\\.&~# \t\n\r\v\f' + _end = '' + _beginning = '' + _compiled = None + + def _flag_func(self, final): + return f'(?{self.flags}){final}' + + def compile(self, add_flags=True): + return re.compile(self._compile(add_flags)) + + @property + def compiled(self): + if self._compiled is None: + self.__setattr__('_compiled', self.compile(), True) + return self._compiled + + # Shadowing the re functions + def search(self, string, pos=0, endpos=sys.maxsize): + return self.compile().search(string, pos, endpos) + + def match(self, string, pos=0, endpos=sys.maxsize): + return self.compile().match(string, pos, endpos) + + def fullmatch(self, string, pos=0, endpos=sys.maxsize): + return self.compile().fullmatch(string, pos, endpos) + + def split(self, string, maxsplit=0): + return self.compile().split(string, maxsplit) + + def findall(self, string, pos=0, endpos=sys.maxsize): + return self.compile().findall(string, pos, endpos) + + def finditer(self, string, pos=0, endpos=sys.maxsize): + return self.compile().finditer(string, pos, endpos) + + def sub(self, repl, string, count=0): + return self.compile().sub(repl, string, count) + + def subn(self, repl, string, count=0): + return self.compile().subn(repl, string, count) diff --git a/ezregex/python/PythonEZRegex.pyi b/ezregex/python/PythonEZRegex.pyi new file mode 100644 index 0000000..76d62d4 --- /dev/null +++ b/ezregex/python/PythonEZRegex.pyi @@ -0,0 +1,21 @@ +import re +import sys +from os import replace +from typing import Any, Callable, Iterator + +# from mypy_extensions import DefaultNamedArg, VarArg +from ezregex import EZRegex + +from ..EZRegex import EZRegex + +class PythonEZRegex(EZRegex): + # Shadowing the re functions + def compile(self, add_flags:bool=True): ... + def search(self, string, pos:int=0, endpos:int=sys.maxsize) -> re.Match|None: ... + def match(self, string, pos:int=0, endpos:int=sys.maxsize) -> re.Match|None: ... + def fullmatch(self, string, pos:int=0, endpos:int=sys.maxsize) -> re.Match|None: ... + def split(self, string, maxsplit:int=0) -> list: ... + def findall(self, string, pos: int = 0, endpos: int = sys.maxsize) -> list: ... + def finditer(self, string, pos: int = 0, endpos: int = sys.maxsize) -> Iterator[re.Match]: ... + def sub(self, repl: Any | Callable[[re.Match], Any], string, count: int = 0): ... + def subn(self, repl: Any | Callable[[re.Match], Any], string, count: int = 0): ... diff --git a/ezregex/python/__init__.py b/ezregex/python/__init__.py index 1176acf..8822e45 100644 --- a/ezregex/python/__init__.py +++ b/ezregex/python/__init__.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 """ Support for the Python dialect of regular expressions""" -__version__ = '1.1.0' +__version__ = '2.0.0' from .elements import * -from .psuedonymns import * +from .PythonEZRegex import PythonEZRegex diff --git a/ezregex/python/elements.py b/ezregex/python/elements.py index 42175f9..e9e14bb 100644 --- a/ezregex/python/elements.py +++ b/ezregex/python/elements.py @@ -2,8 +2,9 @@ # pyright: reportUndefinedVariable = false from ..base import load_base from ..EZRegex import EZRegex +from .PythonEZRegex import PythonEZRegex -globals().update(load_base('python', lambda num_or_name, cur=...: fr'{cur}\g<{num_or_name}>')) +globals().update(load_base(PythonEZRegex, lambda num_or_name, cur=...: fr'{cur}\g<{num_or_name}>')) del UNICODE diff --git a/ezregex/python/psuedonymns.py b/ezregex/python/psuedonymns.py deleted file mode 100644 index e570302..0000000 --- a/ezregex/python/psuedonymns.py +++ /dev/null @@ -1,67 +0,0 @@ -from .elements import * - -matchMax = match_max -matchAtMost = atMost = at_most = match_at_most -matchNum = matchAmt = match_amt = amt = num = match_num -matchRange = match_range -matchMoreThan = match_greater_than = matchGreaterThan = moreThan = more_than = match_more_than -matchAtLeast = match_min = matchMin = atLeast = at_least = match_at_least -lineStartsWith = line_start = lineStart = line_starts_with -stringStartsWith = string_start = stringStart = string_starts_with -lineEndsWith = line_end = lineEnd = line_ends_with -stringEndsWith = string_end = stringEnd = string_ends_with -stuff = chunk -whiteChunk = whitechunk -anychar = anything -anyChar = anything -char = anything -alpha = letter -alphanum = alpha_num = alpha_num -white = whitechunk - -anyAmt = any_amt = zeroOrMore = zero_or_more = at_least_none -anyBetween = any_between -wordChar = word_char -hexDigit = hex -octDigit = oct_digit -newline = newLine = new_line -spaceOrTab = space_or_tab -carriageReturn = carriage_return -verticalTab = vertical_tab -formFeed = form_feed -dot = period -intOrFloat = int_or_float -notWhitespace = not_whitespace -notDigit = not_digit -notWord = not_word -anyof = any_of = anyOf = oneOf = one_of = any_of -anyExcept = any_except -anyCharExcept = any_char_except -printableAndSpace = printable_and_space -ifFollowedBy = if_followed_by = if_proceded_by -ifExists = if_exists -ifNotFollowedBy = if_not_followed_by = if_not_proceded_by -ifPrecededBy = if_preceded_by -ifNotPrecededBy = if_not_preceded_by -ifEnclosedWith = if_enclosed_by = ifEnclosedBy = if_enclosed_with -ifProcededBy = if_proceded_by -ifNotProcededBy = if_not_proceded_by -passiveGroup = passive_group -sameAs = same_as = earlierGroup = sameAsGroup = same_as_group = earlier_group -exactly = isExactly = is_exactly -oneOrNone = one_or_none = opt = optional -oneOrMore = one_or_more = atLeastOne = atLeast1 = at_least_1 = at_least_one -noneOrMore = none_or_more = atLeastNone = at_least_0 = atLeast0 = at_least_none -ascii = a = ASCII -dotall = s = DOTALL -ignorecase = i = ignoreCase = ignore_case = IGNORECASE -locale = L = LOCALE -multiline = m = MULTILINE -# Useful combinations -integer = signed -literallyAnything = literally_anything -wordBoundary = word_boundary -notWordBoundary = not_word_boundary - -replaceGroup = replace_group = rgroup -replaceAll = replace_all = replaceEntire = replace_entire diff --git a/tests/test_EZRegex.py b/tests/test_EZRegex.py index eda744a..3fe5636 100644 --- a/tests/test_EZRegex.py +++ b/tests/test_EZRegex.py @@ -1,14 +1,24 @@ -import pytest import re + +import pytest + +import ezregex as er from ezregex import * +def test_basic(): + assert literal('test') == 'test' + +def test_basic_concat(): + assert str(literal('test') + digit) == r'test\d' + assert str('test' + digit) == r'test\d' + def test_access_dialect(): - assert literal('thing').dialect == 'python' + assert type(literal('thing')) is PythonEZRegex -def test_no_change_dialect(): - with pytest.raises(TypeError): - digit.dialect = 'asdf' +def test_psuedonyms(): + assert er.matchMax(digit) == er.match_max(digit) + assert matchMax(digit) == match_max(digit) def test_immutability(): diff --git a/tests/test_api.py b/tests/test_api.py index af4bd94..c432c70 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -1,19 +1,21 @@ import json from logging import warning from sys import version_info + if version_info < (3, 12): from typing_extensions import TypedDict else: from typing import TypedDict + from warnings import warn -from ezregex import * -from ezregex import api -# from ezregex.api import APIStructure -from ezregex import python -import jstyleson +import jstyleson from pydantic import TypeAdapter, ValidationError +# from ezregex.api import APIStructure +from ezregex import * +from ezregex import api, python + # from typing_extensions import TypedDict # Required by pydantic for python < 3.12 # import importlib, sys diff --git a/tests/test_generate.py b/tests/test_generate.py index f45d915..5646cd2 100644 --- a/tests/test_generate.py +++ b/tests/test_generate.py @@ -1,8 +1,8 @@ import re -from tests.data.groups import * -from tests.data.groups import _losers, _winners from ezregex.generate import * +from tests.data.groups import * +from tests.data.groups import _losers, _winners def words(text): diff --git a/tests/test_invert.py b/tests/test_invert.py index 1de8699..f6349c8 100644 --- a/tests/test_invert.py +++ b/tests/test_invert.py @@ -1,11 +1,12 @@ +import threading +import time + from rich import print as rprint from rich.table import Table -import time -import threading from rich.text import Text -from ezregex import invert + +from ezregex import invert, python from ezregex.python import literal -from ezregex import python strictness=20 dontIncludePassed=True diff --git a/tests/test_javascript.py b/tests/test_javascript.py index 3ea3abf..b80cb2f 100644 --- a/tests/test_javascript.py +++ b/tests/test_javascript.py @@ -1,10 +1,11 @@ # import jsonc -from ezregex import javascript +# from ezregex import javascript # import js2py -import py_js_runner import jstyleson +import py_js_runner from Cope import RedirectStd + def runjs(js): pass # with RedirectStd(stdout=): diff --git a/tests/test_operators.py b/tests/test_operators.py index f96626a..424a0c8 100644 --- a/tests/test_operators.py +++ b/tests/test_operators.py @@ -1,6 +1,7 @@ import re import pytest + from ezregex import * strictness=20 diff --git a/tests/test_python.py b/tests/test_python.py index 1f75425..74adc19 100644 --- a/tests/test_python.py +++ b/tests/test_python.py @@ -1,9 +1,10 @@ -import pytest -from ezregex import python import jstyleson +import pytest + import ezregex as er -from ezregex import EZRegex from ezregex import * +from ezregex import EZRegex, python + def test_python(): try: @@ -91,14 +92,15 @@ def test_misc(): a = word + ow # b = stuff + UNICODE c = IGNORECASE + '9' - assert a + c == word + ow + IGNORECASE + '9', f"{a + b + c} != {word + ow + IGNORECASE + '9'}" + assert a + c == word + ow + IGNORECASE + '9', f"{a + c} != {word + ow + IGNORECASE + '9'}" - a = str(EZRegex(r'\s+', 'python')) - b = str(EZRegex(raw(r'\s+'), 'python')) + a = str(PythonEZRegex(r'\s+')) + with pytest.raises(TypeError): + b = str(PythonEZRegex(raw(r'\s+'))) c = r'\s+' d = str(raw(r'\s+')) # e = str(whitespace + matchMax) - assert a == b == c == d, f'\na: {a}\n b: {b}\n c: {c}\n d: {d}\n e: {e}' + assert a == c == d, f'\na: {a}\n c: {c}\n d: {d}\n e: {e}' # assert (word + ow + anything + ':').test('word d:', show=False) # assert not (word + ow + anything + ':').test('word', show=False) assert 'word d:' in (word + ow + anything + ':') @@ -108,7 +110,7 @@ def test_misc(): assert str(test) == str(word + chunk + word), f"{str(test)} != {str(word + chunk + word)}" assert test == word + chunk + word assert either('(' + word + ')', '.') == either(er.literal('(') + word() + er.literal(')'), '.'), f"{either('(' + word + ')', '.')} != {either(er.literal('(') + word() + er.literal(')'), '.')}" - assert str(ifFollowedBy(word)) == r'(?=\w+)' + assert str(er.ifFollowedBy(word)) == r'(?=\w+)' #TODO: assert (word + ow + anything + ':') in 'word d:' #TODO: assert (word + ow + anything + ':') not in 'word' diff --git a/tests/test_replacement.py b/tests/test_replacement.py index e8426ea..d591958 100644 --- a/tests/test_replacement.py +++ b/tests/test_replacement.py @@ -1,10 +1,10 @@ import re -from ezregex import python -from ezregex import * - import jstyleson +from ezregex import * +from ezregex import python + def test_replacement(): offset = 2