Skip to content

Commit

Permalink
[refactor] Add regex wrappers so we can add submatch indices to libc
Browse files Browse the repository at this point in the history
  • Loading branch information
Andy Chu committed Dec 14, 2023
1 parent 7075fc5 commit 183d13f
Show file tree
Hide file tree
Showing 7 changed files with 44 additions and 18 deletions.
3 changes: 1 addition & 2 deletions core/process.py
Expand Up @@ -43,7 +43,6 @@
from mycpp import mylib
from mycpp.mylib import log, print_stderr, tagswitch, iteritems

import libc
import posix_ as posix
from posix_ import (
# translated by mycpp and directly called! No wrapper!
Expand Down Expand Up @@ -1681,7 +1680,7 @@ def GetJobWithSpec(self, job_spec):
return previous

# TODO: Add support for job specs based on prefixes of process argv.
m = libc.regex_match(r'^%([0-9]+)$', job_spec)
m = util.simple_regex_search(r'^%([0-9]+)$', job_spec)
if m is not None and len(m) > 1:
job_id = int(m[1])
if job_id in self.jobs:
Expand Down
16 changes: 16 additions & 0 deletions core/util.py
Expand Up @@ -14,6 +14,22 @@
from core import pyutil
from mycpp import mylib

import libc

from typing import List


def simple_regex_search(pat, s):
# type: (str, str) -> List[str]
"""Convenience wrapper around libc."""
return libc.regex_match(pat, s, 0)


def regex_search(pat, comp_flags, s):
# type: (str, int, str) -> List[str]
"""Convenience wrapper around libc."""
return libc.regex_match(pat, s, comp_flags)


class UserExit(Exception):
"""For explicit 'exit'."""
Expand Down
3 changes: 2 additions & 1 deletion osh/sh_expr_eval.py
Expand Up @@ -47,6 +47,7 @@
from core.error import e_die, e_die_status, e_strict, e_usage
from core import state
from core import ui
from core import util
from frontend import consts
from frontend import match
from frontend import parse_lib
Expand Down Expand Up @@ -1070,7 +1071,7 @@ def EvalB(self, node):
if self.exec_opts.nocasematch() else 0)

try:
matches = libc.regex_match(s2, s1, regex_flags)
matches = util.regex_search(s2, regex_flags, s1)
except RuntimeError as e:
# Status 2 indicates a regex parse error. This is fatal in OSH but
# not in bash, which treats [[ like a command with an exit code.
Expand Down
5 changes: 2 additions & 3 deletions osh/word_eval.py
Expand Up @@ -52,6 +52,7 @@
from core import pyutil
from core import state
from core import ui
from core import util
from data_lang import qsn
from core.error import e_die
from frontend import consts
Expand All @@ -66,8 +67,6 @@
from ysh import expr_eval
from ysh import val_ops

import libc

from typing import Optional, Tuple, List, Dict, cast, TYPE_CHECKING

if TYPE_CHECKING:
Expand Down Expand Up @@ -169,7 +168,7 @@ def _SplitAssignArg(arg, blame_word):
"""
# Note: it would be better to cache regcomp(), but we don't have an API for
# that, and it probably isn't a bottleneck now
m = libc.regex_match(ASSIGN_ARG_RE, arg)
m = util.simple_regex_search(ASSIGN_ARG_RE, arg)
if m is None:
e_die("Assignment builtin expected NAME=value, got %r" % arg,
blame_word)
Expand Down
5 changes: 2 additions & 3 deletions osh/word_eval_test.py
Expand Up @@ -14,11 +14,10 @@

from core import error
from core import test_lib
from core import util
from osh import word_eval
from osh.cmd_parse_test import assertParseSimpleCommand

import libc


def InitEvaluator():
word_ev = test_lib.InitWordEvaluator()
Expand Down Expand Up @@ -49,7 +48,7 @@ def testSplitAssignArg(self):
]

for s, expected in CASES:
actual = libc.regex_match(word_eval.ASSIGN_ARG_RE, s)
actual = util.simple_regex_search(word_eval.ASSIGN_ARG_RE, s)
if actual is None:
self.assertEqual(expected, actual) # no match
else:
Expand Down
26 changes: 19 additions & 7 deletions pyext/libc_test.py
Expand Up @@ -179,14 +179,16 @@ def testGlob(self):
print(libc.glob('\\\\'))
print(libc.glob('[[:punct:]]'))

def testRegexParse(self):
self.assertEqual(True, libc.regex_parse(r'.*\.py'))
def testRegexMatch(self):
# TODO: can delete this function
if 0:
self.assertEqual(True, libc.regex_parse(r'.*\.py'))

# Syntax errors
self.assertRaises(RuntimeError, libc.regex_parse, r'*')
self.assertRaises(RuntimeError, libc.regex_parse, '\\')
if not IS_DARWIN:
self.assertRaises(RuntimeError, libc.regex_parse, '{')
# Syntax errors
self.assertRaises(RuntimeError, libc.regex_parse, r'*')
self.assertRaises(RuntimeError, libc.regex_parse, '\\')
if not IS_DARWIN:
self.assertRaises(RuntimeError, libc.regex_parse, '{')

cases = [
('([a-z]+)([0-9]+)', 'foo123', ['foo123', 'foo', '123']),
Expand All @@ -202,6 +204,16 @@ def testRegexParse(self):
(r'^.$', '', None),
]

# TODO:
#
# return a single list of length 2*(1 + nsub)
# 2 is for start and end, +1 is for 0
#
# indices = regex_search(...)
# indices[2*group] is start
# indices[2*group+1] is end
# group is from 0 ... n

for pat, s, expected in filter(None, cases):
#print('CASE %s' % pat)
actual = libc.regex_match(pat, s)
Expand Down
4 changes: 2 additions & 2 deletions ysh/val_ops.py
Expand Up @@ -8,12 +8,12 @@
from _devbuild.gen.value_asdl import (value, value_e, value_t)
from core import error
from core import ui
from core import util
from mycpp.mylib import tagswitch
from ysh import regex_translate

from typing import TYPE_CHECKING, cast, Dict, List, Optional

import libc
from libc import REG_ICASE, REG_NEWLINE

if TYPE_CHECKING:
Expand Down Expand Up @@ -476,7 +476,7 @@ def RegexMatch(left, right, mem):
# - libc_regex_match should populate _start() and _end() too (out params?)
# - What is the ordering for named captures? See demo/ere*.sh

matches = libc.regex_match(right_s, left_s, regex_flags)
matches = util.regex_search(right_s, regex_flags, left_s)
if matches is not None:
if mem:
mem.SetMatches(matches)
Expand Down

0 comments on commit 183d13f

Please sign in to comment.