Added basic R dialect

smartycope · Jun 4, 2024 · f37e289 · f37e289
1 parent a6c4b4a
commit f37e289
Show file tree

Hide file tree

Showing 8 changed files with 825 additions and 129 deletions.
diff --git a/README.md b/README.md
diff --git a/ezregex/EZRegex.py b/ezregex/EZRegex.py
@@ -39,6 +39,9 @@ def __init__(self, definition, *, sanatize=True, replacement=False, flags=''):
     def _flag_func(self, final:str) -> str:
         raise NotImplementedError('Subclasses need to implement _flag_func(final)')
 
+    def _final_func(self, s:str) -> str:
+        return s
+
     def _escape(self, pattern:str):
         """ This function was modified from the one in /usr/lib64/python3.12/re/__init__.py line 255 """
         _special_chars_map = {i: '\\' + chr(i) for i in self._escape_chars}
@@ -81,6 +84,9 @@ def _compile(self, add_flags=True):
 
             if len(self._flags):
                 regex = self._flag_func(regex)
+
+            # This has to go in the add_flags scope so it only runs at the very end, like flags
+            regex = self._final_func(regex)
         return regex
 
     def _copy(self, definition=..., sanatize=..., replacement=..., flags=...):

diff --git a/ezregex/R/REZRegex.py b/ezregex/R/REZRegex.py
@@ -0,0 +1,13 @@
+from ..EZRegex import EZRegex
+
+
+class REZRegex(EZRegex):
+    _escape_chars=b'()[]{}?*+-|^$\\.&~# '
+    _end = ''
+    _beginning = ''
+
+    def _flag_func(self, final):
+        return f'(?{self.flags}){final}'
+
+    def _final_func(self, s:str) -> str:
+        return s.replace('\\', '\\\\')
diff --git a/ezregex/R/__init__.py b/ezregex/R/__init__.py
@@ -0,0 +1,5 @@
+""" Support for the Perl dialect of regular expressions"""
+__version__ = '0.0.1'
+
+from .elements import *
+from .REZRegex import REZRegex
diff --git a/ezregex/R/elements.py b/ezregex/R/elements.py
@@ -0,0 +1,27 @@
+# pyright: reportArgumentType = false
+# pyright: reportUndefinedVariable = false
+from ..base import load_base
+from ..EZRegex import EZRegex
+from .REZRegex import REZRegex
+
+globals().update(load_base(REZRegex, lambda num_or_name, cur=...: fr'{cur}\g{{{num_or_name}}}'))
+
+# I can't figure out how flags work in R, so I'm just ignoring them
+del line_starts_with
+del lineStartsWith
+del line_start
+del lineStart
+del line_ends_with
+del lineEndsWith
+del line_end
+del lineEnd
+
+del ASCII
+del DOTALL
+del IGNORECASE
+del LOCALE
+del MULTILINE
+del UNICODE
+
+# Matches any single character except line break characters, like the dot, but is not affected by any options that make the dot match all characters including line breaks.
+not_newline = REZRegex(r'\N')
diff --git a/ezregex/R/elements.pyi b/ezregex/R/elements.pyi
@@ -0,0 +1,23 @@
+from .REZRegex import REZRegex
+from ..base.interface import *
+
+# I can't figure out how flags work in R, so I'm just ignoring them
+del line_starts_with
+del lineStartsWith
+del line_start
+del lineStart
+del line_ends_with
+del lineEndsWith
+del line_end
+del lineEnd
+
+del ASCII
+del DOTALL
+del IGNORECASE
+del LOCALE
+del MULTILINE
+del UNICODE
+
+"Group: Not Literals"
+not_newline: REZRegex = REZRegex(r'\N')
+'Matches any single character except line break characters, like the dot, but is not affected by any options that make the dot match all characters including line breaks.'
diff --git a/ezregex/_docs.py b/ezregex/_docs.py
@@ -9,7 +9,8 @@
 # ASSUMPTION: Groups are designated using the form "Group: <name>\n<optional description>"
 # ASSUMPTION: strings below variables act as the descriptions for those variables
 # ASSUMPTION: There aren't any extraneous variables or functions in the .pyi dialect files
-
+# TODO: This needs to add to groups instead of setting groups, so if we specify a group in a dialect, it doesn't reset
+# the docs in that group to just the ones specified in the dialect, it also includes the base ones
 class DocGenerator(ast.NodeVisitor):
     """ This parses the .pyi file and gets all the relevant info out of it """
     def __init__(self, node) -> None:

diff --git a/tests/test_R.py b/tests/test_R.py
@@ -0,0 +1,8 @@
+import jstyleson
+from ezregex.R import *
+from ezregex import EZRegex, R
+
+
+def test_R():
+    print(word + group(digit + '45') + raw('\\w+'))
+    assert str(word + group(digit + '45') + raw('\\w+')) == r'\\w+(\\d45)\\w+'