Permalink
Browse files
A couple comments on utf-8 string length.
- Loading branch information...
Showing
with
16 additions
and
1 deletion.
-
+10
−1
core/libstr.py
-
+6
−0
core/word_eval.py
|
|
@@ -67,7 +67,16 @@ def _CheckContinuationByte(byte): |
|
|
|
|
|
|
|
|
def NumOfUtf8Chars(bytes):
|
|
|
"""Returns the number of utf-8 characters in the byte string 's'."""
|
|
|
"""Returns the number of utf-8 characters in the byte string 's'.
|
|
|
|
|
|
TODO: Raise exception rather than returning a string, so we can set the exit
|
|
|
code of the command to 1 ?
|
|
|
|
|
|
$ echo ${#bad}
|
|
|
Invalid utf-8 at index 3 of string 'bad': 'ab\xffd'
|
|
|
$ echo $?
|
|
|
1
|
|
|
"""
|
|
|
num_of_utf8_chars = 0
|
|
|
|
|
|
num_bytes = len(bytes)
|
|
|
|
|
|
@@ -298,6 +298,12 @@ def _ApplyPrefixOp(self, val, op_id): |
|
|
|
|
|
if op_id == Id.VSub_Pound: # LENGTH
|
|
|
if val.tag == value_e.Str:
|
|
|
# NOTE: Whether bash counts bytes or chars is affected by LANG
|
|
|
# environment variables.
|
|
|
# Should we respect that, or another way to select? set -o
|
|
|
# count-bytes?
|
|
|
|
|
|
# https://stackoverflow.com/questions/17368067/length-of-string-in-bash
|
|
|
length = libstr.NumOfUtf8Chars(val.s)
|
|
|
elif val.tag == value_e.StrArray:
|
|
|
# There can be empty placeholder values in the array.
|
|
|
|
0 comments on commit
68dc292