Permalink
Browse files

Add failing test case for unicode glob.

Bash globbing is unicode-aware when LANG is set appropriately.  No other
shell implements this.

- Changes to the sh_spec.py framework to support unicode!  (including
  invalid unicode)
- Notes in the OSH manual about Unicode operations
- Add awk error checking to spec test html-summary report
  • Loading branch information...
Andy Chu
Andy Chu committed Aug 22, 2017
1 parent 8066fd7 commit 643b825f518d528a4e02e3604b83f23992249820
Showing with 85 additions and 11 deletions.
  1. +27 −0 doc/osh-manual.md
  2. +5 −1 spec/unicode.sh
  3. +9 −0 spec/var-op-strip.test.sh
  4. +27 −7 test/sh_spec.py
  5. +16 −2 test/spec-runner.sh
  6. +1 −1 test/spec.sh
View
@@ -32,3 +32,30 @@ Very good articles on bash errexit:
- http://mywiki.wooledge.org/BashFAQ/105
- http://fvue.nl/wiki/Bash:_Error_handling
## Unicode
Encoding of programs should be utf-8.
But those programs can manipulate data in ANY encoding?
echo $'[\u03bc]' # C-escaped string
vs literal unicode vs. echo -e. $'' is preferred because it's statically parsed.
List of operations that are Unicode-aware:
- ${#s} -- number of characters in a string
- slice: ${s:0:1}
- any operations that uses glob, which has '.' and [[:alpha:]] expressions
- case
- [[ $x == . ]]
- ${s/./x}
- ${s#.} # remove one character
- sorting [[ $a < $b ]] -- should use current locale? I guess that is like the
'sort' command.
- prompt string has time, which is locale-specific.
View
@@ -34,7 +34,11 @@ echo-char() {
}
raw-char() {
# Use vim to put utf-8 in this source file.
# Use vim to put utf-8 in this source file:
# 1. i to enter Insert mode
# 2. Ctrl-V
# 3. u
# 4. 03bc -- 4 digits of hex0
echo [μ]
}
@@ -58,3 +58,12 @@ v=abc
echo ${v%[[:alpha:]]}
# stdout: ab
# N-I mksh stdout: abc
### Strip unicode prefix
# NOTE: LANG is set to utf-8. Problem: there is no way to represent the
# invalid character! Instead of stdout-json, how about stdout-bytes?
v='μ-'
echo ${v#?} # ? is a glob that stands for one character
# stdout: -
# BUG dash/mksh stdout-repr: '\xbc-\n'
# BUG zsh stdout-repr: '\n'
View
@@ -255,6 +255,14 @@ def CreateStringAssertion(d, key, assertions, qualifier=False):
assertions.append(a)
found = True
# For testing invalid unicode
exp_repr = d.get(key + '-repr')
if exp_repr is not None:
exp = eval(exp_repr)
a = EqualAssertion(key, exp, qualifier=qualifier)
assertions.append(a)
found = True
return found
@@ -329,10 +337,7 @@ class EqualAssertion(object):
"""An expected value in a record."""
def __init__(self, key, expected, qualifier=None):
self.key = key
if isinstance(expected, str):
self.expected = expected.encode('utf-8')
else:
self.expected = expected # expected value
self.expected = expected # expected value
self.qualifier = qualifier # whether this was a special case?
def __repr__(self):
@@ -597,9 +602,15 @@ def _WriteDetailsAsText(self, details):
for m in messages:
print(m, file=self.f)
print('%s stdout:' % shell, file=self.f)
print(stdout.decode('utf-8'), file=self.f)
try:
print(stdout.decode('utf-8'), file=self.f)
except UnicodeDecodeError:
print(stdout, file=self.f)
print('%s stderr:' % shell, file=self.f)
print(stderr.decode('utf-8'), file=self.f)
try:
print(stderr.decode('utf-8'), file=self.f)
except UnicodeDecodeError:
print(stderr, file=self.f)
print('', file=self.f)
def _WriteStats(self, stats):
@@ -676,7 +687,13 @@ def _WriteDetails(self):
def _WriteRaw(s):
self.f.write('<pre>')
self.f.write(cgi.escape(s.decode('utf-8')))
# We output utf-8-encoded HTML. If we get invalid utf-8 as stdout
# (which is very possible), then show the ASCII repr().
try:
s2 = s.decode('utf-8')
except UnicodeDecodeError:
s2 = repr(s) # ASCII representation
self.f.write(cgi.escape(s2))
self.f.write('</pre>')
self.f.write('<i>stdout:</i> <br/>\n')
@@ -858,6 +875,9 @@ def main(argv):
env = {
'TMP': os.path.normpath(opts.tmp_env), # no .. or .
'PATH': opts.path_env,
# Copied from my own environment. For now, we want to test bash and other
# shells in utf-8 mode.
'LANG': 'en_US.UTF-8',
}
stats = RunCases(cases, case_predicate, shell_pairs, env, out)
out.EndCases(stats)
View
@@ -115,15 +115,29 @@ EOF
# - Lack of string interpolation is very annoying
head -n $NUM_TASKS _tmp/spec/MANIFEST.txt | awk '
# Awk problem: getline errors are ignored by default!
function error(path) {
print "Error reading line from file: " path > "/dev/stderr"
exit(1)
}
{
spec_name = $0
# Read from the task files
getline < ( "_tmp/spec/" spec_name ".task.txt" )
path = ( "_tmp/spec/" spec_name ".task.txt" )
n = getline < path
if (n != 1) {
error(path)
}
status = $1
wall_secs = $2
getline < ( "_tmp/spec/" spec_name ".stats.txt" )
path = ( "_tmp/spec/" spec_name ".stats.txt" )
n = getline < path
if (n != 1) {
error(path)
}
num_cases = $1
osh_num_passed = $2
osh_num_failed = $3
View
@@ -354,7 +354,7 @@ var-op-other() {
}
var-op-strip() {
sh-spec spec/var-op-strip.test.sh --osh-failures-allowed 1 \
sh-spec spec/var-op-strip.test.sh --osh-failures-allowed 2 \
${REF_SHELLS[@]} $ZSH $OSH "$@"
}

0 comments on commit 643b825

Please sign in to comment.