Skip to content

Commit

Permalink
Added basic R dialect
Browse files Browse the repository at this point in the history
  • Loading branch information
smartycope committed Jun 4, 2024
1 parent a6c4b4a commit f37e289
Show file tree
Hide file tree
Showing 8 changed files with 825 additions and 129 deletions.
869 changes: 741 additions & 128 deletions README.md

Large diffs are not rendered by default.

6 changes: 6 additions & 0 deletions ezregex/EZRegex.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,9 @@ def __init__(self, definition, *, sanatize=True, replacement=False, flags=''):
def _flag_func(self, final:str) -> str:
raise NotImplementedError('Subclasses need to implement _flag_func(final)')

def _final_func(self, s:str) -> str:
return s

def _escape(self, pattern:str):
""" This function was modified from the one in /usr/lib64/python3.12/re/__init__.py line 255 """
_special_chars_map = {i: '\\' + chr(i) for i in self._escape_chars}
Expand Down Expand Up @@ -81,6 +84,9 @@ def _compile(self, add_flags=True):

if len(self._flags):
regex = self._flag_func(regex)

# This has to go in the add_flags scope so it only runs at the very end, like flags
regex = self._final_func(regex)
return regex

def _copy(self, definition=..., sanatize=..., replacement=..., flags=...):
Expand Down
13 changes: 13 additions & 0 deletions ezregex/R/REZRegex.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
from ..EZRegex import EZRegex


class REZRegex(EZRegex):
_escape_chars=b'()[]{}?*+-|^$\\.&~# '
_end = ''
_beginning = ''

def _flag_func(self, final):
return f'(?{self.flags}){final}'

def _final_func(self, s:str) -> str:
return s.replace('\\', '\\\\')
5 changes: 5 additions & 0 deletions ezregex/R/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
""" Support for the Perl dialect of regular expressions"""
__version__ = '0.0.1'

from .elements import *
from .REZRegex import REZRegex
27 changes: 27 additions & 0 deletions ezregex/R/elements.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# pyright: reportArgumentType = false
# pyright: reportUndefinedVariable = false
from ..base import load_base
from ..EZRegex import EZRegex
from .REZRegex import REZRegex

globals().update(load_base(REZRegex, lambda num_or_name, cur=...: fr'{cur}\g{{{num_or_name}}}'))

# I can't figure out how flags work in R, so I'm just ignoring them
del line_starts_with
del lineStartsWith
del line_start
del lineStart
del line_ends_with
del lineEndsWith
del line_end
del lineEnd

del ASCII
del DOTALL
del IGNORECASE
del LOCALE
del MULTILINE
del UNICODE

# Matches any single character except line break characters, like the dot, but is not affected by any options that make the dot match all characters including line breaks.
not_newline = REZRegex(r'\N')
23 changes: 23 additions & 0 deletions ezregex/R/elements.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
from .REZRegex import REZRegex
from ..base.interface import *

# I can't figure out how flags work in R, so I'm just ignoring them
del line_starts_with
del lineStartsWith
del line_start
del lineStart
del line_ends_with
del lineEndsWith
del line_end
del lineEnd

del ASCII
del DOTALL
del IGNORECASE
del LOCALE
del MULTILINE
del UNICODE

"Group: Not Literals"
not_newline: REZRegex = REZRegex(r'\N')
'Matches any single character except line break characters, like the dot, but is not affected by any options that make the dot match all characters including line breaks.'
3 changes: 2 additions & 1 deletion ezregex/_docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@
# ASSUMPTION: Groups are designated using the form "Group: <name>\n<optional description>"
# ASSUMPTION: strings below variables act as the descriptions for those variables
# ASSUMPTION: There aren't any extraneous variables or functions in the .pyi dialect files

# TODO: This needs to add to groups instead of setting groups, so if we specify a group in a dialect, it doesn't reset
# the docs in that group to just the ones specified in the dialect, it also includes the base ones
class DocGenerator(ast.NodeVisitor):
""" This parses the .pyi file and gets all the relevant info out of it """
def __init__(self, node) -> None:
Expand Down
8 changes: 8 additions & 0 deletions tests/test_R.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
import jstyleson
from ezregex.R import *
from ezregex import EZRegex, R


def test_R():
print(word + group(digit + '45') + raw('\\w+'))
assert str(word + group(digit + '45') + raw('\\w+')) == r'\\w+(\\d45)\\w+'

0 comments on commit f37e289

Please sign in to comment.