View
@@ -8,11 +8,12 @@
except ImportError:
from benchmarks import fake_libc as libc
from osh.meta import glob as glob_ast
from osh.meta import ast, Id
from osh import match
from core import util
log = util.log
glob_part_e = glob_ast.glob_part_e
log = util.log
glob_part_e = ast.glob_part_e
def LooksLikeGlob(s):
@@ -62,189 +63,211 @@ def GlobEscape(s):
return escaped
# We need to handle glob patterns, but fnmatch doesn't give you the positions
# of matches. So we convert globs to regexps.
def _GlobUnescape(s): # used by cmd_exec
"""Remove glob escaping from a string.
# Problems:
# - What about unicode? Do we have to set any global variables? We want it to
# always use utf-8?
# - Character class for glob is different than char class for regex? According
# to the standard, anyway.
# - Honestly I would like a more principled parser for globs! Can re2c do
# better here?
Used when there is no glob match.
TODO: Can probably get rid of this, as long as you save the original word.
class GlobParser(object):
"""
Parses glob patterns. Can convert directly to libc extended regexp or output
an intermediate AST (defined at osh/glob.asdl).
Complicated example: 'a*b'*.py, which will be escaped to a\*b*.py. So in
word_eval _JoinElideEscape and EvalWordToString you have to build two
'parallel' strings -- one escaped and one not.
"""
unescaped = ''
i = 0
n = len(s)
while i < n:
c = s[i]
if c == '\\':
assert i != n - 1, 'Trailing backslash: %r' % s
i += 1
c2 = s[i]
if c2 in GLOB_META_CHARS:
unescaped += c2
else:
raise AssertionError("Unexpected escaped character %r" % c2)
else:
unescaped += c
i += 1
return unescaped
def Parse(self, glob_pat):
"""Parses a glob pattern into AST form (defined at osh/glob.asdl).
Returns:
A 2-tuple of (<glob AST>, <error message>).
If the pattern is not actually a glob, the first element is None. The
second element is None unless there was an error during parsing.
"""
try:
return self._ParseUnsafe(glob_pat)
except RuntimeError as e:
return None, str(e)
def _ParseUnsafe(self, glob_pat):
"""
Parses a glob pattern into AST form.
Raises:
RuntimeError: if glob is invalid
"""
is_glob = False
i = 0
n = len(glob_pat)
parts = []
while i < n:
c = glob_pat[i]
if c == '\\': # glob escape like \* or \?
i += 1
parts.append(glob_ast.Literal(glob_pat[i]))
elif c == '*':
is_glob = True
parts.append(glob_ast.Star())
elif c == '?':
is_glob = True
parts.append(glob_ast.QMark())
elif c == '[':
is_glob = True
char_class_expr, i = self.ParseCharClassExpr(glob_pat, i)
parts.append(char_class_expr)
else:
parts.append(glob_ast.Literal(c))
# For ${x//foo*/y}, we need to glob patterns, but fnmatch doesn't give you the
# positions of matches. So we convert globs to regexps.
i += 1
# Problems:
# - What about unicode? Do we have to set any global variables? We want it to
# always use utf-8?
if is_glob:
return glob_ast.glob(parts), None
class _GlobParser(object):
return None, None
def __init__(self, lexer):
self.lexer = lexer
self.token_type = None
self.token_val = ''
self.warnings = []
def ParseCharClassExpr(self, glob_pat, start_i):
"""Parses a character class expression, e.g. [abc], [[:space:]], [!a-z]
def _Next(self):
"""Move to the next token."""
try:
self.token_type, self.token_val = self.lexer.next()
except StopIteration:
self.token_type = Id.Glob_Eof
self.token_val = ''
def _ParseCharClass(self):
"""
Returns:
A 2-tuple of (<CharClassExpr instance>, <next parse index>)
Raises:
RuntimeError: If error during parsing the character class.
a CharClass if the parse suceeds, or a GlobLit if fails. In the latter
case, we also append a warning.
"""
balance = 1 # We already saw a [
tokens = []
# NOTE: There is a special rule where []] and [[] are valid globs. Also
# [^[] and sometimes [^]], although that one is ambiguous!
# And [[:space:]] and [[.class.]] has to be taken into account too. I'm
# punting on this now because the rule isn't clear and consistent between
# shells.
while True:
self._Next()
if self.token_type == Id.Glob_Eof:
# TODO: location info
self.warnings.append('Malformed character class; treating as literal')
return [ast.GlobLit(id_, s) for (id_, s) in tokens]
if self.token_type == Id.Glob_LBracket:
balance += 1
elif self.token_type == Id.Glob_RBracket:
balance -= 1
if balance == 0:
break
tokens.append((self.token_type, self.token_val)) # Don't append the last ]
negated = False
if tokens:
id1, _ = tokens[0]
# NOTE: Both ! and ^ work for negation in globs
# https://www.gnu.org/software/bash/manual/html_node/Pattern-Matching.html#Pattern-Matching
# TODO: Warn about the one that's not recommended?
if id1 in (Id.Glob_Bang, Id.Glob_Caret):
negated = True
tokens = tokens[1:]
return [ast.CharClass(negated, [s for _, s in tokens])]
def Parse(self):
"""
Returns:
regex string (or None if it's not a glob)
A list of warnings about the syntax
"""
i = start_i
if glob_pat[i] != '[':
raise RuntimeError('invalid CharClassExpr start!')
parts = []
i += 1
# NOTE: Both ! and ^ work for negation in globs
# https://www.gnu.org/software/bash/manual/html_node/Pattern-Matching.html#Pattern-Matching
negated = glob_pat[i] in '!^'
if negated:
i += 1
while True:
self._Next()
id_ = self.token_type
s = self.token_val
in_posix_class = False
expr_body = []
n = len(glob_pat)
#util.log('%s %r', self.token_type, self.token_val)
if id_ == Id.Glob_Eof:
break
# NOTE: special case: ] is matched iff it's the first char in the expression
if glob_pat[i] == ']':
expr_body.append(']')
i += 1
if id_ in (Id.Glob_Star, Id.Glob_QMark):
parts.append(ast.GlobOp(id_))
while i < n:
c = glob_pat[i]
if c == ']':
if not in_posix_class:
break
in_posix_class = False
elif id_ == Id.Glob_LBracket:
# Could return a GlobLit or a CharClass
parts.extend(self._ParseCharClass())
elif c == '[':
if in_posix_class:
raise RuntimeError('invalid character [ in CharClassExpr')
in_posix_class = (glob_pat[i+1] == ':')
else: # Glob_{Bang,Caret,CleanLiterals,OtherLiteral,RBracket,EscapedChar,
# BadBackslash}
parts.append(ast.GlobLit(id_, s))
elif c == '\\':
expr_body.append(c)
i += 1
c = glob_pat[i]
# Also check for warnings. TODO: location info.
if id_ == Id.Glob_RBracket:
self.warnings.append('Got unescaped right bracket')
if id_ == Id.Glob_BadBackslash:
self.warnings.append('Got unescaped trailing backslash')
expr_body.append(c)
i += 1
return parts, self.warnings
else:
raise RuntimeError('unclosed CharClassExpr!')
return glob_ast.CharClassExpr(negated, ''.join(expr_body)), i
_REGEX_CHARS_TO_ESCAPE = '.|^$()+*?[]{}\\'
def ASTToExtendedRegex(self, ast):
if not ast:
return None
def _GenerateERE(parts):
out = []
out = []
for part in ast.parts:
if part.tag == glob_part_e.Literal:
if part.s in '.|^$()+*?[]{}\\':
for part in parts:
if part.tag == glob_part_e.GlobLit:
if part.id == Id.Glob_EscapedChar:
assert len(part.s) == 2, part.s
# The user could have escaped a char that doesn't need regex escaping,
# like \b or something.
c = part.s[1]
if c in _REGEX_CHARS_TO_ESCAPE:
out.append('\\')
out.append(part.s)
out.append(c)
elif part.tag == glob_part_e.Star:
out.append('.*')
elif part.tag == glob_part_e.QMark:
out.append('.')
elif part.tag == glob_part_e.CharClassExpr:
out.append('[')
if part.negated:
out.append('^')
out.append(part.body + ']')
return ''.join(out)
def GlobToExtendedRegex(self, glob_pat):
ast, err = self.Parse(glob_pat)
return self.ASTToExtendedRegex(ast), err
elif part.id == Id.Glob_CleanLiterals:
out.append(part.s) # e.g. 'py' doesn't need to be escaped
elif part.id == Id.Glob_OtherLiteral:
assert len(part.s) == 1, part.s
c = part.s
if c in _REGEX_CHARS_TO_ESCAPE:
out.append('\\')
out.append(c)
def _GlobUnescape(s): # used by cmd_exec
"""Remove glob escaping from a string.
Used when there is no glob match.
TODO: Can probably get rid of this, as long as you save the original word.
Complicated example: 'a*b'*.py, which will be escaped to a\*b*.py. So in
word_eval _JoinElideEscape and EvalWordToString you have to build two
'parallel' strings -- one escaped and one not.
"""
unescaped = ''
i = 0
n = len(s)
while i < n:
c = s[i]
if c == '\\':
assert i != n - 1, 'Trailing backslash: %r' % s
i += 1
c2 = s[i]
if c2 in GLOB_META_CHARS:
unescaped += c2
elif part.tag == glob_part_e.GlobOp:
if part.op_id == Id.Glob_QMark:
out.append('.')
elif part.op_id == Id.Glob_Star:
out.append('.*')
else:
raise AssertionError("Unexpected escaped character %r" % c2)
else:
unescaped += c
i += 1
return unescaped
raise AssertionError
elif part.tag == glob_part_e.CharClass:
out.append('[')
if part.negated:
out.append('^')
# Important: the character class is LITERALLY preserved, because we
# assume glob char classes are EXACTLY the same as regex char classes,
# including the escaping rules.
for s in part.strs:
out.append(s)
out.append(']')
return ''.join(out)
def GlobToERE(pat):
lexer = match.GLOB_LEXER.Tokens(pat)
p = _GlobParser(lexer)
parts, warnings = p.Parse()
# If there is nothing like * ? or [abc], then the whole string is a literal,
# and we can use a more efficient mechanism.
is_glob = False
for p in parts:
if p.tag in (glob_part_e.GlobOp, glob_part_e.CharClass):
is_glob = True
if not is_glob:
return None, warnings
if 0:
import sys
from asdl import format as fmt
print('---')
for p in parts:
print(p)
regex = _GenerateERE(parts)
return regex, warnings
class Globber(object):
View
@@ -9,7 +9,7 @@
from asdl import py_meta
from core import glob_
from osh.meta import glob as g
from osh import match
class GlobEscapeTest(unittest.TestCase):
@@ -93,75 +93,70 @@ def testPatSubRegexes(self):
result = r2.sub('X', 'a-b-c', count=1)
self.assertEqual('X-b-c', result)
def assertASTEqual(self, expected_ast, ast):
"""Asserts that 2 ASDL-defined ASTs are equal."""
expected_is_node = isinstance(expected_ast, py_meta.CompoundObj)
given_is_node = isinstance(ast, py_meta.CompoundObj)
if not expected_is_node and not given_is_node:
self.assertEqual(expected_ast, ast)
return
self.assertEqual(expected_ast.tag, ast.tag)
if not hasattr(expected_ast, '__slots__'):
return
self.assertEqual(expected_ast.__slots__, ast.__slots__)
for attr in expected_ast.__slots__:
exp_slot, slot = getattr(expected_ast, attr), getattr(ast, attr)
if isinstance(slot, list):
for exp_elem, elem in zip(exp_slot, slot):
self.assertASTEqual(exp_elem, elem)
else:
self.assertASTEqual(exp_slot, slot)
def _ReadTokens(s):
lex = match.GLOB_LEXER
return list(lex.Tokens(s))
class GlobParserTest(unittest.TestCase):
def testGlobLexer(self):
print(_ReadTokens(''))
print(_ReadTokens('*.py'))
print(_ReadTokens('\*.py'))
print(_ReadTokens('[abc]'))
print(_ReadTokens('\\')) # Enf
print(_ReadTokens('\\x'))
print(_ReadTokens(r'\\'))
print(_ReadTokens(r'[[:alpha:]]'))
print(_ReadTokens(r'[?]'))
def testGlobParser(self):
CASES = [
# (glob input, expected AST, expected extended regexp, has error)
('*.py', [g.Star()] + [g.Literal(c) for c in '.py'], '.*\.py', False),
('*.?', [g.Star(), g.Literal('.'), g.QMark()], '.*\..', False),
('<*>', [g.Literal('<'), g.Star(), g.Literal('>')], '<.*>', False),
('\**+', [g.Literal('*'), g.Star(), g.Literal('+')], '\*.*\+', False),
('\**', [g.Literal('*'), g.Star()], '\*.*', False),
('*.py', '.*\.py', False),
('*.?', '.*\..', False),
('<*>', '<.*>', False),
('\**+', '\*.*\+', False),
('\**', '\*.*', False),
('*.[ch]pp', '.*\.[ch]pp', False),
# not globs
('abc', None, None, False),
('\\*', None, None, False),
('c:\\foo', None, None, False),
('strange]one', None, None, False),
('abc', None, False),
('\\*', None, False),
('c:\\foo', None, False),
('strange]one', None, False),
# character class globs
('[[:space:]abc]', [g.CharClassExpr(False, '[:space:]abc')], '[[:space:]abc]', False),
('[abc]', [g.CharClassExpr(False, 'abc')], '[abc]', False),
('[\a\b\c]', [g.CharClassExpr(False, '\a\b\c')], '[\a\b\c]', False),
('[abc\[]', [g.CharClassExpr(False, 'abc\[')], '[abc\[]', False),
('[!not]', [g.CharClassExpr(True, 'not')], '[^not]', False),
('[^also_not]', [g.CharClassExpr(True, 'also_not')], '[^also_not]', False),
('[]closed_bracket]', [g.CharClassExpr(False, ']closed_bracket')], '[]closed_bracket]', False),
('[!]closed_bracket]', [g.CharClassExpr(True, ']closed_bracket')], '[^]closed_bracket]', False),
('[!*?!\\[]', [g.CharClassExpr(True, '*?!\[')], '[^*?!\\[]', False),
('[!\]foo]', [g.CharClassExpr(True, '\]foo')], '[^\]foo]', False),
('wow[[[[]]]]', ([g.Literal(c) for c in 'wow'] +
[g.CharClassExpr(False, '[[[')] +
[g.Literal(c) for c in ']]']), 'wow[[[[]\]\]\]', False),
('[[:space:]abc]', '[[:space:]abc]', False),
('[abc]', '[abc]', False),
(r'[\a\b\c]', r'[\a\b\c]', False),
('[abc\[]', '[abc\[]', False),
('[!not]', '[^not]', False),
('[^also_not]', '[^also_not]', False),
('[!*?!\\[]', '[^*?!\\[]', False),
('[!\]foo]', '[^\]foo]', False),
# invalid globs
('not_closed[a-z', None, None, True),
('[[:spa[ce:]]', None, None, True),
('not_closed[a-z', None, True),
('[[:spa[ce:]]', None, True),
# Regression test for IndexError.
('[', None, True),
('\\', None, True),
(']', None, False),
]
for glob, expected_parts, expected_ere, expected_err in CASES:
if expected_parts:
expected_ast = g.glob(expected_parts)
else:
expected_ast = None
parser = glob_.GlobParser()
ast, err = parser.Parse(glob)
ere = parser.ASTToExtendedRegex(ast)
self.assertASTEqual(expected_ast, ast)
self.assertEqual(expected_ere, ere)
self.assertEqual(expected_err, err is not None,
'%s: expected %r, got %r' % (glob, expected_err, err))
for glob, expected_ere, expected_err in CASES:
print('===')
print(glob)
regex, warnings = glob_.GlobToERE(glob)
self.assertEqual(
expected_ere, regex,
'Expected %r to translate to %r, got %r' % (glob, expected_ere, regex))
print('regex : %s' % regex)
print('warnings: %s' % warnings)
if __name__ == '__main__':
View
@@ -367,6 +367,15 @@ def AddKinds(spec):
'BadBackslash', # \D or trailing \
])
# For parsing globs and converting them to regexes.
spec.AddKind('Glob', [
'LBracket', 'RBracket',
'Star', 'QMark', 'Bang', 'Caret',
'EscapedChar', 'BadBackslash',
'CleanLiterals', 'OtherLiteral',
'Eof',
])
# Shared between [[ and test/[.
_UNARY_STR_CHARS = 'zn' # -z -n
View
@@ -196,9 +196,9 @@ def TranslateRegex(pat):
# http://re2c.org/examples/example_03.html
def TranslateEcholexer(echo_def):
def TranslateSimpleLexer(func_name, lexer_def):
print(r"""
static inline void MatchEchoToken(unsigned char* line, int line_len,
static inline void %s(unsigned char* line, int line_len,
int start_pos, int* id, int* end_pos) {
assert(start_pos <= line_len); /* caller should have checked */
@@ -208,9 +208,9 @@ def TranslateEcholexer(echo_def):
for (;;) {
/*!re2c
""")
""" % func_name)
for is_regex, pat, token_id in echo_def:
for is_regex, pat, token_id in lexer_def:
if is_regex:
re2c_pat = TranslateRegex(pat)
else:
@@ -354,7 +354,8 @@ def main(argv):
if action == 'c':
# Print code to stdout.
TranslateOshLexer(lex.LEXER_DEF)
TranslateEcholexer(lex.ECHO_E_DEF)
TranslateSimpleLexer('MatchEchoToken', lex.ECHO_E_DEF)
TranslateSimpleLexer('MatchGlobToken', lex.GLOB_DEF)
TranslateRegexToPredicate(lex.VAR_NAME_RE, 'IsValidVarName')
TranslateRegexToPredicate(pretty.PLAIN_WORD_RE, 'IsPlainWord')
View
@@ -132,7 +132,7 @@ def _AllMatchPositions(s, regex):
break
matches.append(m)
start, end = m
log('m = %r, %r' % (start, end))
#log('m = %r, %r' % (start, end))
pos = end # advance position
return matches
@@ -156,9 +156,10 @@ def PatSub(s, op, pat, replace_str):
"""Helper for ${x/pat/replace}."""
#log('PAT %r REPLACE %r', pat, replace_str)
regex, err = glob_.GlobParser().GlobToExtendedRegex(pat)
if err:
e_die("Can't convert glob to regex: %r", pat)
regex, warnings = glob_.GlobToERE(pat)
if warnings:
# TODO: Add strict mode and expose warnings.
pass
if regex is None: # Simple/fast path for fixed strings
if op.do_all:
@@ -190,7 +191,7 @@ def PatSub(s, op, pat, replace_str):
regex = regex + '$'
m = libc.regex_first_group_match(regex, s, 0)
log('regex = %r, s = %r, match = %r', regex, s, m)
#log('regex = %r, s = %r, match = %r', regex, s, m)
if m is None:
return s
start, end = m
View
@@ -37,7 +37,7 @@ fastlex_MatchOshToken(PyObject *self, PyObject *args) {
return NULL;
}
// bounds checking. It's OK to be called with a start_pos looking at \0.
// Bounds checking. It's OK to be called with a start_pos looking at \0.
// Eol_Tok is inserted everywhere.
if (start_pos > line_len) {
PyErr_Format(PyExc_ValueError,
@@ -62,8 +62,7 @@ fastlex_MatchEchoToken(PyObject *self, PyObject *args) {
return NULL;
}
// bounds checking. It's OK to be called with a start_pos looking at \0.
// Eol_Tok is inserted everywhere.
// Bounds checking.
if (start_pos > line_len) {
PyErr_Format(PyExc_ValueError,
"Invalid MatchEchoToken call (start_pos = %d, line_len = %d)",
@@ -77,6 +76,30 @@ fastlex_MatchEchoToken(PyObject *self, PyObject *args) {
return Py_BuildValue("(ii)", id, end_pos);
}
static PyObject *
fastlex_MatchGlobToken(PyObject *self, PyObject *args) {
unsigned char* line;
int line_len;
int start_pos;
if (!PyArg_ParseTuple(args, "s#i", &line, &line_len, &start_pos)) {
return NULL;
}
// Bounds checking.
if (start_pos > line_len) {
PyErr_Format(PyExc_ValueError,
"Invalid MatchGlobToken call (start_pos = %d, line_len = %d)",
start_pos, line_len);
return NULL;
}
int id;
int end_pos;
MatchGlobToken(line, line_len, start_pos, &id, &end_pos);
return Py_BuildValue("(ii)", id, end_pos);
}
static PyObject *
fastlex_IsValidVarName(PyObject *self, PyObject *args) {
const char *name;
@@ -105,6 +128,8 @@ static PyMethodDef methods[] = {
"(lexer mode, line, start_pos) -> (id, end_pos)."},
{"MatchEchoToken", fastlex_MatchEchoToken, METH_VARARGS,
"(line, start_pos) -> (id, end_pos)."},
{"MatchGlobToken", fastlex_MatchGlobToken, METH_VARARGS,
"(line, start_pos) -> (id, end_pos)."},
{"IsValidVarName", fastlex_IsValidVarName, METH_VARARGS,
"Is it a valid var name?"},
{"IsPlainWord", fastlex_IsPlainWord, METH_VARARGS,
View

This file was deleted.

Oops, something went wrong.
View
@@ -514,3 +514,31 @@ def IsKeyword(name):
# them with regcomp. I've only seen constant regexes.
#
# From code: ( | ) are treated special.
# A lexer for the parser that converts globs to extended regexes. Since we're
# only parsing character classes ([^[:space:][:alpha:]]) as opaque blobs, we
# don't need lexer modes here.
GLOB_DEF = [
# These could be operators in the glob, or just literals in a char class,
# e.g. touch '?'; echo [?].
C('*', Id.Glob_Star),
C('?', Id.Glob_QMark),
# For negation.
C('!', Id.Glob_Bang),
C('^', Id.Glob_Caret),
# Character classes.
C('[', Id.Glob_LBracket),
C(']', Id.Glob_RBracket),
R(r'\\[^\0]', Id.Glob_EscapedChar),
C('\\', Id.Glob_BadBackslash), # Trailing single backslash
# For efficiency, combine other characters into a single token, e.g. '.py'
# or ':alpha:'. TODO: re2c has the '*' clause; could we this in Python too?
# Although that only matches on character.
R(r'[a-zA-Z0-9_]+', Id.Glob_CleanLiterals), # no regex escaping
R(r'[^\0]', Id.Glob_OtherLiteral), # anything else -- examine the char
]
View
@@ -74,16 +74,16 @@ def _MatchOshToken_Fast(lex_mode, line, start_pos):
class SimpleLexer(object):
"""Lexer for echo -e, which interprets C-escaped strings.
Based on osh/parse_lib.py MatchOshToken_Slow.
"""
"""Lexer for echo -e, which interprets C-escaped strings."""
def __init__(self, match_func):
self.match_func = match_func
def Tokens(self, line):
"""Yields tokens."""
pos = 0
# NOTE: We're not using Eol_Tok like LineLexer. We probably should. And
# then the consumers of the ECHO_E_DEF and GLOB_DEF should use it. Get rid
# of Glob_Eof.
n = len(line)
while pos != n:
# NOTE: Need longest-match semantics to find \377 vs \.
@@ -92,7 +92,7 @@ def Tokens(self, line):
pos = end_pos
class _MatchEchoToken_Slow(object):
class _MatchTokenSlow(object):
def __init__(self, pat_list):
self.pat_list = _CompileAll(pat_list)
@@ -106,13 +106,21 @@ def _MatchEchoToken_Fast(line, start_pos):
return IdInstance(tok_type), end_pos
def _MatchGlobToken_Fast(line, start_pos):
"""Returns (id, end_pos)."""
tok_type, end_pos = fastlex.MatchGlobToken(line, start_pos)
return IdInstance(tok_type), end_pos
if fastlex:
MATCHER = _MatchOshToken_Fast
ECHO_MATCHER = _MatchEchoToken_Fast
GLOB_MATCHER = _MatchGlobToken_Fast
IsValidVarName = fastlex.IsValidVarName
else:
MATCHER = _MatchOshToken_Slow(lex.LEXER_DEF)
ECHO_MATCHER = _MatchEchoToken_Slow(lex.ECHO_E_DEF)
ECHO_MATCHER = _MatchTokenSlow(lex.ECHO_E_DEF)
GLOB_MATCHER = _MatchTokenSlow(lex.GLOB_DEF)
# Used by osh/cmd_parse.py to validate for loop name. Note it must be
# anchored on the right.
@@ -121,5 +129,5 @@ def _MatchEchoToken_Fast(line, start_pos):
def IsValidVarName(s):
return _VAR_NAME_RE.match(s)
ECHO_LEXER = SimpleLexer(ECHO_MATCHER)
GLOB_LEXER = SimpleLexer(GLOB_MATCHER)
View
@@ -141,26 +141,6 @@ def IdInstance(i):
f.close()
#
# Instantiate osh/glob.asdl
#
f = util.GetResourceLoader().open('osh/glob.asdl')
_asdl_module, _type_lookup = asdl.LoadSchema(f, APP_TYPES)
glob = _AsdlModule()
if 0:
py_meta.MakeTypes(_asdl_module, glob, _type_lookup)
else:
# Exported for the generated code to use
GLOB_TYPE_LOOKUP = _type_lookup
# Get the types from elsewhere
from _devbuild.gen import glob_asdl
py_meta.AssignTypes(glob_asdl, glob)
f.close()
#
# Instantiate core/runtime.asdl
#
View
@@ -216,8 +216,19 @@ module osh
-- In-memory format for all the functions snipped out of a file.
partial_file = (string path, arena* funcs)
--
-- These types are not used in the LST. But they could be statically
-- derived from values in the LST.
--
-- Glob representation, for converting ${x//} to extended regexes.
-- Example: *.[ch] is:
-- GlobOp(<Glob_Star '*'>),
-- GlobLit(Glob_OtherLiteral, '.'),
-- CharClassExpr(False, 'ch') # from Glob_CleanLiterals token
glob_part =
GlobLit(id id, string s)
| GlobOp(id op_id) -- * or ?
| CharClass(bool negated, string* strs)
-- Char classes are opaque for now. If we ever need them:
-- * Collating symbols are [. .]
-- * Equivalence classes are [=
}
View
@@ -187,3 +187,62 @@ f 'void *'
# stdout: void *
# BUG dash stdout-json: ""
# BUG dash status: 2
### Glob of unescaped [[] and []]
touch $TMP/[ $TMP/]
cd $TMP
echo [\[z] [\]z] # the right way to do it
echo [[z] []z] # also accepted
## STDOUT:
[ ]
[ ]
## END
### Glob of negated unescaped [[] and []]
touch $TMP/_G
cd $TMP
echo _[^\[z] _[^\]z] # the right way to do it
echo _[^[z] _[^]z] # also accepted
## STDOUT:
_G _G
_G _G
## END
## BUG mksh STDOUT:
_[^[z] _[^]z]
_[^[z] _[^]z]
## END
### PatSub of unescaped [[] and []]
x='[foo]'
echo ${x//[\[z]/<} # the right way to do it
echo ${x//[\]z]/>}
echo ${x//[[z]/<} # also accepted
echo ${x//[]z]/>}
## STDOUT:
<foo]
[foo>
<foo]
[foo>
## END
## N-I dash stdout-json: ""
## N-I dash status: 2
### PatSub of negated unescaped [[] and []]
x='[foo]'
echo ${x//[^\[z]/<} # the right way to do it
echo ${x//[^\]z]/>}
echo ${x//[^[z]/<} # also accepted
#echo ${x//[^]z]/>} # only busybox ash interprets as ^\]
## STDOUT:
[<<<<
>>>>]
[<<<<
## END
# mksh is doing something very odd, ignoring ^ altogether?
## BUG mksh STDOUT:
<foo]
[foo>
<foo]
## END
## N-I dash stdout-json: ""
## N-I dash status: 2
View
@@ -256,7 +256,6 @@ loop() {
${REF_SHELLS[@]} $OSH "$@"
}
# Not implemented in osh at all. Need glob matching of words.
case_() {
sh-spec spec/case_.test.sh --osh-failures-allowed 2 \
${REF_SHELLS[@]} $OSH "$@"
@@ -337,7 +336,7 @@ func() {
}
glob() {
sh-spec spec/glob.test.sh --osh-failures-allowed 2 \
sh-spec spec/glob.test.sh --osh-failures-allowed 4 \
${REF_SHELLS[@]} $BUSYBOX_ASH $OSH "$@"
}