Skip to content

Commit

Permalink
[refactor] Store regex submatch indices and string
Browse files Browse the repository at this point in the history
Rather than submatch strings.

This will make it possible to implement _start(i) and m => start(i).
  • Loading branch information
Andy Chu committed Dec 15, 2023
1 parent b6ea809 commit 3af44f2
Show file tree
Hide file tree
Showing 5 changed files with 31 additions and 33 deletions.
6 changes: 3 additions & 3 deletions builtin/func_eggex.py
Expand Up @@ -29,10 +29,10 @@ def Call(self, rd):
# TODO: Support strings for named captures
i = rd.OptionalInt(default_=0)

matches = self.mem.GetRegexMatches()
num_groups = len(matches) # including group 0
groups = self.mem.RegexGroups()
num_groups = len(groups) # including group 0
if i < num_groups:
captured = matches[i]
captured = groups[i]
if captured is None:
return value.Null
else:
Expand Down
28 changes: 17 additions & 11 deletions core/state.py
Expand Up @@ -25,6 +25,7 @@
from core import pyutil
from core import optview
from core import ui
from core import util
from frontend import consts
from frontend import location
from frontend import match
Expand Down Expand Up @@ -968,7 +969,8 @@ def __init__(self, mem):
mem.pipe_status.append([])
mem.process_sub_status.append([])

mem.regex_matches.append([])
mem.regex_indices.append([])
mem.regex_string.append('')
self.mem = mem

def __enter__(self):
Expand All @@ -977,7 +979,8 @@ def __enter__(self):

def __exit__(self, type, value, traceback):
# type: (Any, Any, Any) -> None
self.mem.regex_matches.pop()
self.mem.regex_string.pop()
self.mem.regex_indices.pop()
self.mem.process_sub_status.pop()
self.mem.pipe_status.pop()
self.mem.try_status.pop()
Expand Down Expand Up @@ -1065,7 +1068,8 @@ def __init__(self, dollar0, argv, arena, debug_stack):
self.this_dir = [] # type: List[str]

# 0 is the whole match, 1..n are submatches
self.regex_matches = [[]] # type: List[List[str]]
self.regex_indices = [[]] # type: List[List[int]]
self.regex_string = [''] # type: List[str]

self.last_bg_pid = -1 # Uninitialized value mutable public variable

Expand Down Expand Up @@ -1817,7 +1821,7 @@ def GetValue(self, name, which_scopes=scope_e.Shopt):
return value.List(items)

if name == 'BASH_REMATCH':
return value.BashArray(self.regex_matches[-1]) # top of stack
return value.BashArray(self.RegexGroups())

# Do lookup of system globals before looking at user variables. Note: we
# could optimize this at compile-time like $?. That would break
Expand Down Expand Up @@ -2143,18 +2147,20 @@ def IsGlobalScope(self):
# type: () -> bool
return len(self.var_stack) == 1

def ClearMatches(self):
def ClearRegexIndices(self):
# type: () -> None
top = self.regex_matches[-1]
top = self.regex_indices[-1]
del top[:] # no clear() in Python 2
self.regex_string[-1] = ''

def SetMatches(self, matches):
# type: (List[str]) -> None
self.regex_matches[-1] = matches
def SetRegexIndices(self, s, indices):
# type: (str, List[int]) -> None
self.regex_string[-1] = s
self.regex_indices[-1] = indices

def GetRegexMatches(self):
def RegexGroups(self):
# type: () -> List[Optional[str]]
return self.regex_matches[-1]
return util.RegexGroups(self.regex_string[-1], self.regex_indices[-1])


#
Expand Down
11 changes: 2 additions & 9 deletions core/util.py
Expand Up @@ -19,7 +19,7 @@
from typing import List, Optional


def _Groups(s, indices):
def RegexGroups(s, indices):
# type: (str, Optional[List[int]]) -> List[str]
if indices is None:
return None
Expand All @@ -40,14 +40,7 @@ def simple_regex_search(pat, s):
# type: (str, str) -> List[str]
"""Convenience wrapper around libc."""
indices = libc.regex_search(pat, 0, s)
return _Groups(s, indices)


def regex_search(pat, comp_flags, s):
# type: (str, int, str) -> List[str]
"""Convenience wrapper around libc."""
indices = libc.regex_search(pat, comp_flags, s)
return _Groups(s, indices)
return RegexGroups(s, indices)


class UserExit(Exception):
Expand Down
9 changes: 4 additions & 5 deletions osh/sh_expr_eval.py
Expand Up @@ -47,7 +47,6 @@
from core.error import e_die, e_die_status, e_strict, e_usage
from core import state
from core import ui
from core import util
from frontend import consts
from frontend import match
from frontend import parse_lib
Expand Down Expand Up @@ -1071,19 +1070,19 @@ def EvalB(self, node):
if self.exec_opts.nocasematch() else 0)

try:
matches = util.regex_search(s2, regex_flags, s1)
indices = libc.regex_search(s2, regex_flags, s1)
except RuntimeError as e:
# Status 2 indicates a regex parse error. This is fatal in OSH but
# not in bash, which treats [[ like a command with an exit code.
msg = e.message # type: str
e_die_status(2, 'Invalid regex %r: %s' % (s2, msg),
loc.Word(node.right))

if matches is not None:
self.mem.SetMatches(matches)
if indices is not None:
self.mem.SetRegexIndices(s1, indices)
return True
else:
self.mem.ClearMatches()
self.mem.ClearRegexIndices()
return False

if op_id == Id.Op_Less:
Expand Down
10 changes: 5 additions & 5 deletions ysh/val_ops.py
Expand Up @@ -8,12 +8,12 @@
from _devbuild.gen.value_asdl import (value, value_e, value_t)
from core import error
from core import ui
from core import util
from mycpp.mylib import tagswitch
from ysh import regex_translate

from typing import TYPE_CHECKING, cast, Dict, List, Optional

import libc
from libc import REG_ICASE, REG_NEWLINE

if TYPE_CHECKING:
Expand Down Expand Up @@ -476,14 +476,14 @@ def RegexMatch(left, right, mem):
# - libc_regex_match should populate _start() and _end() too (out params?)
# - What is the ordering for named captures? See demo/ere*.sh

matches = util.regex_search(right_s, regex_flags, left_s)
if matches is not None:
indices = libc.regex_search(right_s, regex_flags, left_s)
if indices is not None:
if mem:
mem.SetMatches(matches)
mem.SetRegexIndices(left_s, indices)
return True
else:
if mem:
mem.ClearMatches()
mem.ClearRegexIndices()
return False


Expand Down

0 comments on commit 3af44f2

Please sign in to comment.