Permalink
Browse files

Properly handle invalid UTF-8 when taking the length ${#s}.

If set -o strict-word-eval is on, the it's a fatal error.

If not, we print a warning and return -1 as the length.
  • Loading branch information...
Andy Chu
Andy Chu committed Aug 17, 2018
1 parent 132cee4 commit 9148d8631b5bf7955b2788b9fc9a35f73a85b9ee
Showing with 61 additions and 23 deletions.
  1. +2 −0 core/libstr.py
  2. +8 −2 core/word_eval.py
  3. +51 −21 spec/var-op-other.test.sh
View
@@ -56,6 +56,8 @@ def Utf8Encode(code):
return ''.join(chr(b & 0xFF) for b in bytes_)
# TODO: Add details of the invalid character/byte here?
INCOMPLETE_CHAR = 'Incomplete UTF-8 character'
INVALID_CONT = 'Invalid UTF-8 continuation byte'
INVALID_START = 'Invalid start of UTF-8 character'
View
@@ -307,8 +307,14 @@ def _ApplyPrefixOp(self, val, op_id):
try:
length = libstr.CountUtf8Chars(val.s)
except util.InvalidUtf8 as e:
# EARLY RETURN. TODO: Should print to stderr!
return runtime.Str(str(e.msg))
# TODO: Add location info from 'part'? Only the caller has it.
if self.exec_opts.strict_word_eval:
raise
else:
# NOTE: Doesn't make the command exit with 1; it just returns a
# length of -1.
util.warn(e.UserErrorString())
return runtime.Str('-1')
elif val.tag == value_e.StrArray:
# There can be empty placeholder values in the array.
View
@@ -32,19 +32,28 @@ done
0
1
2
Incomplete UTF-8 character
-1
3
4
Incomplete UTF-8 character
Incomplete UTF-8 character
-1
-1
5
6
Incomplete UTF-8 character
Incomplete UTF-8 character
Incomplete UTF-8 character
-1
-1
-1
7
## END
## STDERR:
osh warning: Incomplete UTF-8 character
osh warning: Incomplete UTF-8 character
osh warning: Incomplete UTF-8 character
osh warning: Incomplete UTF-8 character
osh warning: Incomplete UTF-8 character
osh warning: Incomplete UTF-8 character
## END
# zsh behavior actually matches bash!
## BUG bash/zsh stderr-json: ""
## BUG bash/zsh STDOUT:
0
1
@@ -61,6 +70,7 @@ Incomplete UTF-8 character
9
7
## END
## BUG dash/mksh stderr-json: ""
## N-I dash/mksh STDOUT:
0
1
@@ -84,22 +94,40 @@ for num_bytes in 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14; do
echo ${#s}
done
## STDOUT:
Invalid start of UTF-8 character
Invalid start of UTF-8 character
Invalid start of UTF-8 character
Invalid UTF-8 continuation byte
Invalid start of UTF-8 character
Invalid start of UTF-8 character
Invalid UTF-8 continuation byte
Invalid UTF-8 continuation byte
Invalid start of UTF-8 character
Invalid start of UTF-8 character
Invalid UTF-8 continuation byte
Invalid UTF-8 continuation byte
Invalid UTF-8 continuation byte
Invalid start of UTF-8 character
Invalid start of UTF-8 character
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
## END
## STDERR:
osh warning: Invalid start of UTF-8 character
osh warning: Invalid start of UTF-8 character
osh warning: Invalid start of UTF-8 character
osh warning: Invalid UTF-8 continuation byte
osh warning: Invalid start of UTF-8 character
osh warning: Invalid start of UTF-8 character
osh warning: Invalid UTF-8 continuation byte
osh warning: Invalid UTF-8 continuation byte
osh warning: Invalid start of UTF-8 character
osh warning: Invalid start of UTF-8 character
osh warning: Invalid UTF-8 continuation byte
osh warning: Invalid UTF-8 continuation byte
osh warning: Invalid UTF-8 continuation byte
osh warning: Invalid start of UTF-8 character
osh warning: Invalid start of UTF-8 character
## END
## BUG bash/zsh stderr-json: ""
## BUG bash/zsh STDOUT:
1
2
@@ -116,6 +144,7 @@ Invalid start of UTF-8 character
10
8
8
## N-I dash stderr-json: ""
## N-I dash STDOUT:
7
8
@@ -133,6 +162,7 @@ Invalid start of UTF-8 character
20
20
## END
## N-I mksh stderr-json: ""
## N-I mksh STDOUT:
1
2

0 comments on commit 9148d86

Please sign in to comment.