Skip to content
Permalink
Browse files

utf8bytelength: count UTF8 string bytelength

[Builtin name changed, and it only works on string inputs. -Nico]
  • Loading branch information...
jnothman authored and nicowilliams committed Sep 8, 2015
1 parent 63dd033 commit 83e8ec587f56a88980c55204ee8433e2f50419cc
Showing with 28 additions and 0 deletions.
  1. +12 −0 docs/content/3.manual/manual.yml
  2. +7 −0 src/builtin.c
  3. +9 −0 tests/jq.test
@@ -662,6 +662,18 @@ sections:
input: '[[1,2], "string", {"a":2}, null]'
output: [2, 6, 1, 0]


- title: "`utf8bytelength`"
body: |
The builtin function `utf8bytelength` outputs the number of
bytes used to encode a string in UTF-8.
examples:
- program: 'utf8bytelength'
input: '"\u03bc"'
output: [2]

- title: "`keys`, `keys_unsorted`"
body: |
@@ -367,6 +367,12 @@ static jv f_tostring(jq_state *jq, jv input) {
}
}

static jv f_utf8bytelength(jq_state *jq, jv input) {
if (jv_get_kind(input) != JV_KIND_STRING)
return type_error(input, "only strings have UTF-8 byte length");
return jv_number(jv_string_length_bytes(input));
}

#define CHARS_ALPHANUM "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"

static jv escape_string(jv input, const char* escapings) {
@@ -1273,6 +1279,7 @@ static const struct cfunction function_list[] = {
{(cfunction_ptr)f_greatereq, "_greatereq", 3},
{(cfunction_ptr)f_contains, "contains", 2},
{(cfunction_ptr)f_length, "length", 1},
{(cfunction_ptr)f_utf8bytelength, "utf8bytelength", 1},
{(cfunction_ptr)f_type, "type", 1},
{(cfunction_ptr)f_isinfinite, "isinfinite", 1},
{(cfunction_ptr)f_isnan, "isnan", 1},
@@ -525,6 +525,15 @@ null
[[], {}, [1,2], {"a":42}, "asdf", "\u03bc"]
[0, 0, 2, 1, 4, 1]

utf8bytelength
"asdf\u03bc"
6

[.[] | try utf8bytelength catch .]
[[], {}, [1,2], 55, true, false]
["array ([]) only strings have UTF-8 byte length","object ({}) only strings have UTF-8 byte length","array ([1,2]) only strings have UTF-8 byte length","number (55) only strings have UTF-8 byte length","boolean (true) only strings have UTF-8 byte length","boolean (false) only strings have UTF-8 byte length"]


map(keys)
[{}, {"abcd":1,"abc":2,"abcde":3}, {"x":1, "z": 3, "y":2}]
[[], ["abc","abcd","abcde"], ["x","y","z"]]

0 comments on commit 83e8ec5

Please sign in to comment.
You can’t perform that action at this time.