Permalink
Browse files

A couple comments on utf-8 string length.

  • Loading branch information...
Andy Chu
Andy Chu committed Jun 26, 2018
1 parent cae0d5b commit 68dc2924be1dc98fdfebf43d4232e69324744fad
Showing with 16 additions and 1 deletion.
  1. +10 −1 core/libstr.py
  2. +6 −0 core/word_eval.py
View
@@ -67,7 +67,16 @@ def _CheckContinuationByte(byte):
def NumOfUtf8Chars(bytes):
"""Returns the number of utf-8 characters in the byte string 's'."""
"""Returns the number of utf-8 characters in the byte string 's'.
TODO: Raise exception rather than returning a string, so we can set the exit
code of the command to 1 ?
$ echo ${#bad}
Invalid utf-8 at index 3 of string 'bad': 'ab\xffd'
$ echo $?
1
"""
num_of_utf8_chars = 0
num_bytes = len(bytes)
View
@@ -298,6 +298,12 @@ def _ApplyPrefixOp(self, val, op_id):
if op_id == Id.VSub_Pound: # LENGTH
if val.tag == value_e.Str:
# NOTE: Whether bash counts bytes or chars is affected by LANG
# environment variables.
# Should we respect that, or another way to select? set -o
# count-bytes?
# https://stackoverflow.com/questions/17368067/length-of-string-in-bash
length = libstr.NumOfUtf8Chars(val.s)
elif val.tag == value_e.StrArray:
# There can be empty placeholder values in the array.

0 comments on commit 68dc292

Please sign in to comment.