Skip to content

Commit

Permalink
[osh] Fix BASH_REMATCH - unmatched groups are empty strings
Browse files Browse the repository at this point in the history
The internal SplitAssignArg function should be unaffected by this
change.
  • Loading branch information
Andy Chu committed Apr 9, 2024
1 parent e593a8f commit 729614c
Show file tree
Hide file tree
Showing 7 changed files with 35 additions and 17 deletions.
2 changes: 1 addition & 1 deletion core/process.py
Original file line number Diff line number Diff line change
Expand Up @@ -1719,7 +1719,7 @@ def GetJobWithSpec(self, job_spec):
return previous

# TODO: Add support for job specs based on prefixes of process argv.
m = util.simple_regex_search(r'^%([0-9]+)$', job_spec)
m = util.RegexSearch(r'^%([0-9]+)$', job_spec)
if m is not None:
assert len(m) == 2
job_id = int(m[1])
Expand Down
2 changes: 1 addition & 1 deletion core/state.py
Original file line number Diff line number Diff line change
Expand Up @@ -1885,7 +1885,7 @@ def GetValue(self, name, which_scopes=scope_e.Shopt):
groups = [] # type: List[str]
elif case2(regex_match_e.Yes):
m = cast(RegexMatch, top_match)
groups = util.RegexGroups(m.s, m.indices)
groups = util.RegexGroupStrings(m.s, m.indices)
return value.BashArray(groups)

# Do lookup of system globals before looking at user variables. Note: we
Expand Down
21 changes: 14 additions & 7 deletions core/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,30 +16,37 @@

import libc

from typing import List
from typing import List, Optional


def RegexGroups(s, indices):
def RegexGroupStrings(s, indices):
# type: (str, List[int]) -> List[str]
"""
For BASH_REMATCH: returns empty string for unmatched group
"""
groups = [] # type: List[str]
n = len(indices)
for i in xrange(n / 2):
start = indices[2 * i]
end = indices[2 * i + 1]
if start == -1:
groups.append(None)
groups.append('')
else:
groups.append(s[start:end])
return groups


def simple_regex_search(pat, s):
# type: (str, str) -> List[str]
"""Convenience wrapper around libc."""
def RegexSearch(pat, s):
# type: (str, str) -> Optional[List[str]]
"""Search a string for pattern.
Return None for no match, or a list of groups that match. Used for some
runtime parsing.
"""
indices = libc.regex_search(pat, 0, s, 0)
if indices is None:
return None
return RegexGroups(s, indices)
return RegexGroupStrings(s, indices)


class UserExit(Exception):
Expand Down
12 changes: 10 additions & 2 deletions core/util_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def testDebugFile(self):
n = util.NullDebugFile()
n.write('foo')

def testSimpleRegexSearch(self):
def testRegexSearch(self):
cases = [
('([a-z]+)([0-9]+)', 'foo123', ['foo123', 'foo', '123']),
(r'.*\.py', 'foo.py', ['foo.py']),
Expand All @@ -35,6 +35,14 @@ def testSimpleRegexSearch(self):
None if IS_DARWIN else (r'', '', ['']),
(r'^$', '', ['']),
(r'^.$', '', None),

(r'(a*)(b*)', '', ['', '', '']),
(r'(a*)(b*)', 'aa', ['aa', 'aa', '']),
(r'(a*)(b*)', 'bb', ['bb', '', 'bb']),
(r'(a*)(b*)', 'aabb', ['aabb', 'aa', 'bb']),

(r'(a*(z)?)|(b*)', 'aaz', ['aaz', 'aaz', 'z', '']),
(r'(a*(z)?)|(b*)', 'bb', ['bb', '', '', 'bb']),
]

# TODO:
Expand All @@ -49,7 +57,7 @@ def testSimpleRegexSearch(self):

for pat, s, expected in filter(None, cases):
#print('CASE %s' % pat)
actual = util.simple_regex_search(pat, s)
actual = util.RegexSearch(pat, s)
#print('actual %r' % actual)
self.assertEqual(expected, actual)

Expand Down
5 changes: 3 additions & 2 deletions osh/word_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ def _SplitAssignArg(arg, blame_word):
"""
# Note: it would be better to cache regcomp(), but we don't have an API for
# that, and it probably isn't a bottleneck now
m = util.simple_regex_search(ASSIGN_ARG_RE, arg)
m = util.RegexSearch(ASSIGN_ARG_RE, arg)
if m is None:
e_die("Assignment builtin expected NAME=value, got %r" % arg,
blame_word)
Expand All @@ -178,7 +178,8 @@ def _SplitAssignArg(arg, blame_word):
# m[2] is used for grouping; ERE doesn't have non-capturing groups

op = m[3]
if op is not None and len(op): # declare NAME=
assert op is not None, op
if len(op): # declare NAME=
val = value.Str(m[4]) # type: Optional[value_t]
append = op[0] == '+'
else: # declare NAME
Expand Down
8 changes: 5 additions & 3 deletions osh/word_eval_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,10 @@ class RegexTest(unittest.TestCase):

def testSplitAssignArg(self):
CASES = [
('s', ['s', None, None]),
('value', ['value', None, None]),
# var name, op, value
('s', ['s', '', '']),
('value', ['value', '', '']),

('s!', None),
('!', None),
('=s', None),
Expand All @@ -49,7 +51,7 @@ def testSplitAssignArg(self):
]

for s, expected in CASES:
actual = util.simple_regex_search(word_eval.ASSIGN_ARG_RE, s)
actual = util.RegexSearch(word_eval.ASSIGN_ARG_RE, s)
if actual is None:
self.assertEqual(expected, actual) # no match
else:
Expand Down
2 changes: 1 addition & 1 deletion spec/regex.test.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
## oils_failures_allowed: 1
## oils_failures_allowed: 0
## compare_shells: bash zsh

#
Expand Down

0 comments on commit 729614c

Please sign in to comment.