From 6f81c4a620fbb8035c8c06072d6752582b6e9dd6 Mon Sep 17 00:00:00 2001 From: Kristian Svalland <54534849+kristiansvalland@users.noreply.github.com> Date: Mon, 27 Dec 2021 12:47:39 +0100 Subject: [PATCH] Add `array.reverse(array)` and `strings.reverse(string)` built-in functions. (#4161) The function `array.reverse` takes an array as an argument, and returns an array with a reversed order of elements. The function `strings.reverse` takes a string as an argument, and returns a string with a reversed order of unicode code points. WASM support is included for both built-ins. Fixes #3736 Signed-off-by: Kristian Svalland --- ast/builtins.go | 24 ++++++ capabilities.json | 34 +++++++++ docs/content/policy-reference.md | 2 + internal/compiler/wasm/wasm.go | 2 + .../cases/testdata/array/test-array-0052.yaml | 43 +++++++++++ .../testdata/strings/test-strings-0924.yaml | 73 +++++++++++++++++++ topdown/array.go | 17 +++++ topdown/strings.go | 20 +++++ wasm/src/array.c | 21 ++++++ wasm/src/array.h | 1 + wasm/src/strings.c | 27 +++++++ wasm/src/strings.h | 1 + wasm/tests/test.c | 15 ++++ 13 files changed, 280 insertions(+) create mode 100644 test/cases/testdata/array/test-array-0052.yaml create mode 100644 test/cases/testdata/strings/test-strings-0924.yaml diff --git a/ast/builtins.go b/ast/builtins.go index ee57ed4413..4afa98f381 100644 --- a/ast/builtins.go +++ b/ast/builtins.go @@ -80,6 +80,7 @@ var DefaultBuiltins = [...]*Builtin{ // Arrays ArrayConcat, ArraySlice, + ArrayReverse, // Conversions ToNumber, @@ -127,6 +128,7 @@ var DefaultBuiltins = [...]*Builtin{ TrimSuffix, TrimSpace, Sprintf, + StringReverse, // Numbers NumbersRange, @@ -717,6 +719,17 @@ var ArraySlice = &Builtin{ ), } +// ArrayReverse returns a given array, reversed +var ArrayReverse = &Builtin{ + Name: "array.reverse", + Decl: types.NewFunction( + types.Args( + types.NewArray(nil, types.A), + ), + types.NewArray(nil, types.A), + ), +} + /** * Conversions */ @@ -1080,6 +1093,17 @@ var Sprintf = &Builtin{ ), } +// StringReverse returns the given string, reversed. +var StringReverse = &Builtin{ + Name: "strings.reverse", + Decl: types.NewFunction( + types.Args( + types.S, + ), + types.S, + ), +} + /** * Numbers */ diff --git a/capabilities.json b/capabilities.json index d9a04bf152..bedf071665 100644 --- a/capabilities.json +++ b/capabilities.json @@ -123,6 +123,26 @@ "type": "function" } }, + { + "name": "array.reverse", + "decl": { + "args": [ + { + "dynamic": { + "type": "any" + }, + "type": "array" + } + ], + "result": { + "dynamic": { + "type": "any" + }, + "type": "array" + }, + "type": "function" + } + }, { "name": "array.slice", "decl": { @@ -2979,6 +2999,20 @@ "type": "function" } }, + { + "name": "strings.reverse", + "decl": { + "args": [ + { + "type": "string" + } + ], + "result": { + "type": "string" + }, + "type": "function" + } + }, { "name": "substring", "decl": { diff --git a/docs/content/policy-reference.md b/docs/content/policy-reference.md index f134f00d1d..ede64378fa 100644 --- a/docs/content/policy-reference.md +++ b/docs/content/policy-reference.md @@ -324,6 +324,7 @@ complex types. | Built-in | Description | Wasm Support | | ------- |-------------|---------------| | ``output := array.concat(array, array)`` | ``output`` is the result of concatenating the two input arrays together. | ✅ | +| ``output := array.reverse(array)`` | ``output`` is the result of reversing the order of the elements in ``array``. | ✅ | ``output := array.slice(array, startIndex, stopIndex)`` | ``output`` is the part of the ``array`` from ``startIndex`` to ``stopIndex`` including the first but excluding the last. If `startIndex >= stopIndex` then `output == []`. If both `startIndex` and `stopIndex` are less than zero, `output == []`. Otherwise, `startIndex` and `stopIndex` are clamped to 0 and `count(array)` respectively. | ✅ | ### Sets @@ -379,6 +380,7 @@ complex types. | ``output := indexof(string, search)`` | ``output`` is the index inside ``string`` where ``search`` first occurs, or -1 if ``search`` does not exist | ✅ | | ``output := lower(string)`` | ``output`` is ``string`` after converting to lower case | ✅ | | ``output := replace(string, old, new)`` | ``output`` is a ``string`` representing ``string`` with all instances of ``old`` replaced by ``new`` | ✅ | +| ``output := strings.reverse(string)`` | ``output`` is ``string`` reversed | ✅ | | ``output := strings.replace_n(patterns, string)`` | ``patterns`` is an object with old, new string key value pairs (e.g. ``{"old1": "new1", "old2": "new2", ...}``). ``output`` is a ``string`` with all old strings inside ``patterns`` replaced by the new strings | ✅ | | ``output := split(string, delimiter)`` | ``output`` is ``array[string]`` representing elements of ``string`` separated by ``delimiter`` | ✅ | | ``output := sprintf(string, values)`` | ``output`` is a ``string`` representing ``string`` formatted by the values in the ``array`` ``values``. | ``SDK-dependent`` | diff --git a/internal/compiler/wasm/wasm.go b/internal/compiler/wasm/wasm.go index 57357f1abf..77b0150af4 100644 --- a/internal/compiler/wasm/wasm.go +++ b/internal/compiler/wasm/wasm.go @@ -91,6 +91,7 @@ var builtinsFunctions = map[string]string{ ast.Floor.Name: "opa_arith_floor", ast.Rem.Name: "opa_arith_rem", ast.ArrayConcat.Name: "opa_array_concat", + ast.ArrayReverse.Name: "opa_array_reverse", ast.ArraySlice.Name: "opa_array_slice", ast.SetDiff.Name: "opa_set_diff", ast.And.Name: "opa_set_intersection", @@ -149,6 +150,7 @@ var builtinsFunctions = map[string]string{ ast.Contains.Name: "opa_strings_contains", ast.StartsWith.Name: "opa_strings_startswith", ast.EndsWith.Name: "opa_strings_endswith", + ast.StringReverse.Name: "opa_strings_reverse", ast.Split.Name: "opa_strings_split", ast.Replace.Name: "opa_strings_replace", ast.ReplaceN.Name: "opa_strings_replace_n", diff --git a/test/cases/testdata/array/test-array-0052.yaml b/test/cases/testdata/array/test-array-0052.yaml new file mode 100644 index 0000000000..b06a867e46 --- /dev/null +++ b/test/cases/testdata/array/test-array-0052.yaml @@ -0,0 +1,43 @@ +cases: +- note: 'array/reverse_123' + query: data.test.p = x + data: + foo: + - 1 + - 2 + - 3 + modules: + - | + package test + + p := array.reverse(data.foo) + want_result: + - x: + - 3 + - 2 + - 1 +- note: 'array/reverse_empty' + query: data.test.p = x + data: + foo: [] + modules: + - | + package test + + p := array.reverse(data.foo) + want_result: + - x: [] +- note: 'array/reverse_object_error' + query: data.test.p = x + data: + foo: + bar: baz + baz: bar + modules: + - | + package test + + p := array.reverse(data.foo) + want_error: "array.reverse: operand 1 must be array but got object" + want_error_code: eval_type_error + strict_error: true \ No newline at end of file diff --git a/test/cases/testdata/strings/test-strings-0924.yaml b/test/cases/testdata/strings/test-strings-0924.yaml new file mode 100644 index 0000000000..6f306d2451 --- /dev/null +++ b/test/cases/testdata/strings/test-strings-0924.yaml @@ -0,0 +1,73 @@ +cases: +- note: 'strings/reverse_bar' + query: data.test.p = x + data: + foo: "bar" + modules: + - | + package test + + p := strings.reverse(data.foo) + want_result: + - x: "rab" +- note: 'strings/reverse_unicode_multi_char_emojii' + query: data.test.p = x + data: + # The "Keycap Digit Two" consists of three codepoints: [2 U+32, U+FE0F, U+20E3] + # Other examples of such emojies are smileys with defined skin-color + # In the current implementation, we reverse strings at the level of codepoints, not Glyphs + foo: "2️⃣" + modules: + - | + package test + + p := strings.reverse(data.foo) + want_result: + - x: "⃣️2" +- note: 'strings/reverse_unicode' + query: data.test.p = x + data: + foo: "1😀𝛾" + modules: + - | + package test + + p := strings.reverse(data.foo) + want_result: + - x: "𝛾😀1" +- note: 'strings/reverse_empty' + query: data.test.p = x + data: + foo: "" + modules: + - | + package test + + p := strings.reverse(data.foo) + want_result: + - x: "" +- note: 'strings/reverse_number_error' + query: data.test.p = x + data: + foo: 123 + modules: + - | + package test + + p := strings.reverse(data.foo) + want_error: "reverse: operand 1 must be string but got number" + want_error_code: eval_type_error + strict_error: true +- note: 'strings/reverse_object_error' + query: data.test.p = x + data: + foo: + bar: baz + modules: + - | + package test + + p := strings.reverse(data.foo) + want_error: "reverse: operand 1 must be string but got object" + want_error_code: eval_type_error + strict_error: true diff --git a/topdown/array.go b/topdown/array.go index ca3e0d9fae..939de49920 100644 --- a/topdown/array.go +++ b/topdown/array.go @@ -71,7 +71,24 @@ func builtinArraySlice(a, i, j ast.Value) (ast.Value, error) { return arr.Slice(startIndex, stopIndex), nil } +func builtinArrayReverse(bctx BuiltinContext, operands []*ast.Term, iter func(*ast.Term) error) error { + arr, err := builtins.ArrayOperand(operands[0].Value, 1) + if err != nil { + return err + } + + length := arr.Len() + reversedArr := make([]*ast.Term, length) + + for index := 0; index < length; index++ { + reversedArr[index] = arr.Elem(length - index - 1) + } + + return iter(ast.ArrayTerm(reversedArr...)) +} + func init() { RegisterFunctionalBuiltin2(ast.ArrayConcat.Name, builtinArrayConcat) RegisterFunctionalBuiltin3(ast.ArraySlice.Name, builtinArraySlice) + RegisterBuiltinFunc(ast.ArrayReverse.Name, builtinArrayReverse) } diff --git a/topdown/strings.go b/topdown/strings.go index 1fe2806c1b..e5118b7780 100644 --- a/topdown/strings.go +++ b/topdown/strings.go @@ -412,6 +412,25 @@ func builtinSprintf(a, b ast.Value) (ast.Value, error) { return ast.String(fmt.Sprintf(string(s), args...)), nil } +func builtinReverse(bctx BuiltinContext, operands []*ast.Term, iter func(*ast.Term) error) error { + s, err := builtins.StringOperand(operands[0].Value, 1) + if err != nil { + return err + } + + sRunes := []rune(string(s)) + length := len(sRunes) + reversedRunes := make([]rune, length) + + for index, r := range sRunes { + reversedRunes[length-index-1] = r + } + + reversedString := string(reversedRunes) + + return iter(ast.StringTerm(reversedString)) +} + func init() { RegisterFunctionalBuiltin2(ast.FormatInt.Name, builtinFormatInt) RegisterFunctionalBuiltin2(ast.Concat.Name, builtinConcat) @@ -432,4 +451,5 @@ func init() { RegisterFunctionalBuiltin2(ast.TrimSuffix.Name, builtinTrimSuffix) RegisterFunctionalBuiltin1(ast.TrimSpace.Name, builtinTrimSpace) RegisterFunctionalBuiltin2(ast.Sprintf.Name, builtinSprintf) + RegisterBuiltinFunc(ast.StringReverse.Name, builtinReverse) } diff --git a/wasm/src/array.c b/wasm/src/array.c index e95d0af5e5..595fb988db 100644 --- a/wasm/src/array.c +++ b/wasm/src/array.c @@ -66,3 +66,24 @@ opa_value *opa_array_slice(opa_value *a, opa_value *i, opa_value *j) return &r->hdr; } + +OPA_BUILTIN +opa_value *opa_array_reverse(opa_value *a) +{ + if (opa_value_type(a) != OPA_ARRAY) + { + return NULL; + } + + opa_array_t *arr = opa_cast_array(a); + + opa_array_t *reversed = opa_cast_array(opa_array_with_cap(arr->len)); + + int n = arr->len; + + for (int i = 0; i < n; i++) { + opa_array_append(reversed, arr->elems[n - 1 - i].v); + } + + return &reversed->hdr; +} diff --git a/wasm/src/array.h b/wasm/src/array.h index 298a8f5b23..e1bcdce904 100644 --- a/wasm/src/array.h +++ b/wasm/src/array.h @@ -3,5 +3,6 @@ opa_value *opa_array_concat(opa_value *a, opa_value *b); opa_value *opa_array_slice(opa_value *a, opa_value *i, opa_value *j); +opa_value *opa_array_reverse(opa_value *a); #endif diff --git a/wasm/src/strings.c b/wasm/src/strings.c index 06efba9517..7c9f869cdf 100644 --- a/wasm/src/strings.c +++ b/wasm/src/strings.c @@ -351,6 +351,33 @@ opa_value *opa_strings_replace_n(opa_value *a, opa_value *b) return result; } +OPA_BUILTIN +opa_value *opa_strings_reverse(opa_value *a) +{ + if (opa_value_type(a) != OPA_STRING) + { + return NULL; + } + + opa_string_t *s = opa_cast_string(a); + + char *reversed = opa_malloc(s->len + 1); + + for (int i = 0; i < s->len; ) + { + int len = 0; + if (opa_unicode_decode_utf8(s->v, i, s->len, &len) == -1) + { + opa_abort("string: invalid unicode"); + } + memcpy(&reversed[s->len - i - len], &s->v[i], len); + i += len; + } + reversed[s->len] = '\0'; + + return opa_string_allocated(reversed, s->len); +} + OPA_BUILTIN opa_value *opa_strings_split(opa_value *a, opa_value *b) { diff --git a/wasm/src/strings.h b/wasm/src/strings.h index 74474aa0b7..bae03ffd38 100644 --- a/wasm/src/strings.h +++ b/wasm/src/strings.h @@ -11,6 +11,7 @@ opa_value *opa_strings_indexof(opa_value *a, opa_value *b); opa_value *opa_strings_lower(opa_value *a); opa_value *opa_strings_replace(opa_value *a, opa_value *b, opa_value *c); opa_value *opa_strings_replace_n(opa_value *a, opa_value *b); +opa_value *opa_strings_reverse(opa_value *a); opa_value *opa_strings_split(opa_value *a, opa_value *b); opa_value *opa_strings_startswith(opa_value *a, opa_value *b); opa_value *opa_strings_substring(opa_value *a, opa_value *b, opa_value *c); diff --git a/wasm/tests/test.c b/wasm/tests/test.c index 60033bbb41..c3932d2fe2 100644 --- a/wasm/tests/test.c +++ b/wasm/tests/test.c @@ -1454,6 +1454,17 @@ void test_array(void) test("array_slice", r->len == 2 && opa_value_compare(r->elems[0].v, opa_number_int(1)) == 0 && opa_value_compare(r->elems[1].v, opa_number_int(2)) == 0); + + opa_array_t *arr3 = opa_cast_array(opa_array()); + opa_array_append(arr3, opa_number_int(0)); + opa_array_append(arr3, opa_number_int(1)); + opa_array_append(arr3, opa_number_int(2)); + + r = opa_cast_array(opa_array_reverse(&arr3->hdr)); + test("array_reverse", r->len == 3 && + opa_value_compare(r->elems[0].v, opa_number_int(2)) == 0 && + opa_value_compare(r->elems[1].v, opa_number_int(1)) == 0 && + opa_value_compare(r->elems[2].v, opa_number_int(0)) == 0); } WASM_EXPORT(test_types) @@ -2679,6 +2690,10 @@ void test_strings(void) test("replace/cacab", opa_value_compare(opa_strings_replace(opa_string_terminated("cac"), opa_string_terminated("a"), opa_string_terminated("b")), opa_string_terminated("cbc")) == 0); test("replace/cacabd", opa_value_compare(opa_strings_replace(opa_string_terminated("cac"), opa_string_terminated("a"), opa_string_terminated("bd")), opa_string_terminated("cbdc")) == 0); + test("reverse/abc", opa_value_compare(opa_strings_reverse(opa_string_terminated("abc")), opa_string_terminated("cba")) == 0); + test("reverse/unicode", opa_value_compare(opa_strings_reverse(opa_string_terminated("1😀𝛾")), opa_string_terminated("𝛾😀1"))== 0); + test("reverse/___", opa_value_compare(opa_strings_reverse(opa_string_terminated("")), opa_string_terminated("")) == 0); + opa_object_t *obj2 = opa_cast_object(opa_object()); opa_object_insert(obj2, opa_string_terminated("a"), opa_string_terminated("b")); opa_object_insert(obj2, opa_string_terminated("c"), opa_string_terminated("d"));