Permalink
Browse files

Get rid of redundant glob -> regex conversion in ${array[@]//pat*/sub}.

Now we have a _Replacer interface that caches this computation.
  • Loading branch information...
Andy Chu
Andy Chu committed Jun 24, 2018
1 parent a4c178a commit 38564720e117da221321c5e7e1912a887ee53961
Showing with 54 additions and 29 deletions.
  1. +47 −23 core/libstr.py
  2. +5 −3 core/word_eval.py
  3. +2 −3 osh/lex.py
View
@@ -148,42 +148,48 @@ def _PatSubAll(s, regex, replace_str):
return ''.join(parts)
# TODO: For patsub of arrays, it would be worth it to CACHE the constant part
# of this computation. Turn this into a class, which translates and regcomp()s
# the regex exactly once.
class _Replacer(object):
def Replace(self, s, op):
raise NotImplementedError
def PatSub(s, op, pat, replace_str):
"""Helper for ${x/pat/replace}."""
#log('PAT %r REPLACE %r', pat, replace_str)
regex, warnings = glob_.GlobToERE(pat)
if warnings:
# TODO: Add strict mode and expose warnings.
pass
class _ConstStringReplacer(_Replacer):
def __init__(self, pat, replace_str):
self.pat = pat
self.replace_str = replace_str
if regex is None: # Simple/fast path for fixed strings
def Replace(self, s, op):
if op.do_all:
return s.replace(pat, replace_str)
return s.replace(self.pat, self.replace_str)
elif op.do_prefix:
if s.startswith(pat):
n = len(pat)
return replace_str + s[n:]
if s.startswith(self.pat):
n = len(self.pat)
return self.replace_str + s[n:]
else:
return s
elif op.do_suffix:
if s.endswith(pat):
n = len(pat)
return s[:-n] + replace_str
if s.endswith(self.pat):
n = len(self.pat)
return s[:-n] + self.replace_str
else:
return s
else:
return s.replace(pat, replace_str, 1) # just the first one
return s.replace(self.pat, self.replace_str, 1) # just the first one
else:
regex = '(%s)' % regex # make it a group
class _GlobReplacer(_Replacer):
def __init__(self, regex, replace_str):
# TODO: It would be nice to cache the compilation of the regex here,
# instead of just the string. That would require more sophisticated use of
# the Python/C API in libc.c, which we might want to avoid.
self.regex = regex
self.replace_str = replace_str
def Replace(self, s, op):
regex = '(%s)' % self.regex # make it a group
if op.do_all:
return _PatSubAll(s, regex, replace_str) # loop over matches
return _PatSubAll(s, regex, self.replace_str) # loop over matches
if op.do_prefix:
regex = '^' + regex
@@ -195,4 +201,22 @@ def PatSub(s, op, pat, replace_str):
if m is None:
return s
start, end = m
return s[:start] + replace_str + s[end:]
return s[:start] + self.replace_str + s[end:]
def MakeReplacer(pat, replace_str):
"""Helper for ${x/pat/replace}
Parses 'pat' and returns either a _GlobReplacer or a _ConstStringReplacer.
Using these objects is more efficient when performing the same operation on
multiple strings.
"""
regex, warnings = glob_.GlobToERE(pat)
if warnings:
# TODO: Add strict mode and expose warnings.
pass
if regex is None:
return _ConstStringReplacer(pat, replace_str)
else:
return _GlobReplacer(regex, replace_str)
View
@@ -553,16 +553,18 @@ def _EvalBracedVarSub(self, part, part_vals, quoted):
else:
replace_str = ''
pat = pat_val.s
# Either GlobReplacer or ConstStringReplacer
replacer = libstr.MakeReplacer(pat_val.s, replace_str)
if val.tag == value_e.Str:
s = libstr.PatSub(val.s, op, pat, replace_str)
s = replacer.Replace(val.s, op)
val = runtime.Str(s)
elif val.tag == value_e.StrArray:
strs = []
for s in val.strs:
if s is not None:
strs.append(libstr.PatSub(s, op, pat, replace_str))
strs.append(replacer.Replace(s, op))
val = runtime.StrArray(strs)
else:
View
@@ -536,9 +536,8 @@ def IsKeyword(name):
R(r'\\[^\0]', Id.Glob_EscapedChar),
C('\\', Id.Glob_BadBackslash), # Trailing single backslash
# For efficiency, combine other characters into a single token, e.g. '.py'
# or ':alpha:'. TODO: re2c has the '*' clause; could we this in Python too?
# Although that only matches on character.
# For efficiency, combine other characters into a single token, e.g. 'py' in
# '*.py' or 'alpha' in '[[:alpha:]]'.
R(r'[a-zA-Z0-9_]+', Id.Glob_CleanLiterals), # no regex escaping
R(r'[^\0]', Id.Glob_OtherLiteral), # anything else -- examine the char
]

0 comments on commit 3856472

Please sign in to comment.