From 686f7a55d122513eed7bc6e50968953dc7cb2107 Mon Sep 17 00:00:00 2001 From: shuheiktgw Date: Tue, 28 Dec 2021 07:06:33 +0900 Subject: [PATCH 1/4] Support indexof_n built-in function Fixes https://github.com/open-policy-agent/opa/issues/4155 Signed-off-by: shuheiktgw --- ast/builtins.go | 13 +++++ capabilities.json | 20 ++++++++ docs/content/policy-reference.md | 45 ++++++++-------- .../testdata/strings/test-strings-0925.yaml | 25 +++++++++ topdown/strings.go | 51 +++++++++++++++---- 5 files changed, 123 insertions(+), 31 deletions(-) create mode 100644 test/cases/testdata/strings/test-strings-0925.yaml diff --git a/ast/builtins.go b/ast/builtins.go index 4afa98f381..b6ecf00b1b 100644 --- a/ast/builtins.go +++ b/ast/builtins.go @@ -112,6 +112,7 @@ var DefaultBuiltins = [...]*Builtin{ Concat, FormatInt, IndexOf, + IndexOfN, Substring, Lower, Upper, @@ -895,6 +896,18 @@ var IndexOf = &Builtin{ ), } +// IndexOfN returns a list of all the indexes of a substring contained inside a string +var IndexOfN = &Builtin{ + Name: "indexof_n", + Decl: types.NewFunction( + types.Args( + types.S, + types.S, + ), + types.NewArray(nil, types.N), + ), +} + // Substring returns the portion of a string for a given start index and a length. // If the length is less than zero, then substring returns the remainder of the string. var Substring = &Builtin{ diff --git a/capabilities.json b/capabilities.json index bedf071665..1e2b33a6d4 100644 --- a/capabilities.json +++ b/capabilities.json @@ -1090,6 +1090,26 @@ "type": "function" } }, + { + "name": "indexof_n", + "decl": { + "args": [ + { + "type": "string" + }, + { + "type": "string" + } + ], + "result": { + "dynamic": { + "type": "number" + }, + "type": "array" + }, + "type": "function" + } + }, { "name": "internal.member_2", "decl": { diff --git a/docs/content/policy-reference.md b/docs/content/policy-reference.md index ede64378fa..899433353b 100644 --- a/docs/content/policy-reference.md +++ b/docs/content/policy-reference.md @@ -371,28 +371,29 @@ complex types. ### Strings -| Built-in | Description | Wasm Support | -| ------- |-------------|---------------| -| ``output := concat(delimiter, array_or_set)`` | ``output`` is the result of joining together the elements of ``array_or_set`` with the string ``delimiter`` | ✅ | -| ``contains(string, search)`` | true if ``string`` contains ``search`` | ✅ | -| ``endswith(string, search)`` | true if ``string`` ends with ``search`` | ✅ | -| ``output := format_int(number, base)`` | ``output`` is string representation of ``number`` in the given ``base`` | ✅ | -| ``output := indexof(string, search)`` | ``output`` is the index inside ``string`` where ``search`` first occurs, or -1 if ``search`` does not exist | ✅ | -| ``output := lower(string)`` | ``output`` is ``string`` after converting to lower case | ✅ | -| ``output := replace(string, old, new)`` | ``output`` is a ``string`` representing ``string`` with all instances of ``old`` replaced by ``new`` | ✅ | -| ``output := strings.reverse(string)`` | ``output`` is ``string`` reversed | ✅ | -| ``output := strings.replace_n(patterns, string)`` | ``patterns`` is an object with old, new string key value pairs (e.g. ``{"old1": "new1", "old2": "new2", ...}``). ``output`` is a ``string`` with all old strings inside ``patterns`` replaced by the new strings | ✅ | -| ``output := split(string, delimiter)`` | ``output`` is ``array[string]`` representing elements of ``string`` separated by ``delimiter`` | ✅ | -| ``output := sprintf(string, values)`` | ``output`` is a ``string`` representing ``string`` formatted by the values in the ``array`` ``values``. | ``SDK-dependent`` | -| ``startswith(string, search)`` | true if ``string`` begins with ``search`` | ✅ | -| ``output := substring(string, start, length)`` | ``output`` is the portion of ``string`` from index ``start`` and having a length of ``length``. If ``length`` is less than zero, ``length`` is the remainder of the ``string``. If ``start`` is greater than the length of the string, ``output`` is empty. It is invalid to pass a negative offset to this function. | ✅ | -| ``output := trim(string, cutset)`` | ``output`` is a ``string`` representing ``string`` with all leading and trailing instances of the characters in ``cutset`` removed. | ✅ | -| ``output := trim_left(string, cutset)`` | ``output`` is a ``string`` representing ``string`` with all leading instances of the characters in ``cutset`` removed. | ✅ | -| ``output := trim_prefix(string, prefix)`` | ``output`` is a ``string`` representing ``string`` with leading instance of ``prefix`` removed. If ``string`` doesn't start with prefix, ``string`` is returned unchanged.| ✅ | -| ``output := trim_right(string, cutset)`` | ``output`` is a ``string`` representing ``string`` with all trailing instances of the characters in ``cutset`` removed. | ✅ | -| ``output := trim_suffix(string, suffix)`` | ``output`` is a ``string`` representing ``string`` with trailing instance of ``suffix`` removed. If ``string`` doesn't end with suffix, ``string`` is returned unchanged.| ✅ | -| ``output := trim_space(string)`` | ``output`` is a ``string`` representing ``string`` with all leading and trailing white space removed.| ✅ | -| ``output := upper(string)`` | ``output`` is ``string`` after converting to upper case | ✅ | +| Built-in | Description | Wasm Support | +|---------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-------------------| +| ``output := concat(delimiter, array_or_set)`` | ``output`` is the result of joining together the elements of ``array_or_set`` with the string ``delimiter`` | ✅ | +| ``contains(string, search)`` | true if ``string`` contains ``search`` | ✅ | +| ``endswith(string, search)`` | true if ``string`` ends with ``search`` | ✅ | +| ``output := format_int(number, base)`` | ``output`` is string representation of ``number`` in the given ``base`` | ✅ | +| ``output := indexof(string, search)`` | ``output`` is the index inside ``string`` where ``search`` first occurs, or -1 if ``search`` does not exist | ✅ | +| ``output := indexof_n(string, search)`` | ``output`` is ``array[number]`` representing the indexes inside ``string`` where ``search`` occurs | ``SDK-dependent`` | +| ``output := lower(string)`` | ``output`` is ``string`` after converting to lower case | ✅ | +| ``output := replace(string, old, new)`` | ``output`` is a ``string`` representing ``string`` with all instances of ``old`` replaced by ``new`` | ✅ | +| ``output := strings.reverse(string)`` | ``output`` is ``string`` reversed | ✅ | +| ``output := strings.replace_n(patterns, string)`` | ``patterns`` is an object with old, new string key value pairs (e.g. ``{"old1": "new1", "old2": "new2", ...}``). ``output`` is a ``string`` with all old strings inside ``patterns`` replaced by the new strings | ✅ | +| ``output := split(string, delimiter)`` | ``output`` is ``array[string]`` representing elements of ``string`` separated by ``delimiter`` | ✅ | +| ``output := sprintf(string, values)`` | ``output`` is a ``string`` representing ``string`` formatted by the values in the ``array`` ``values``. | ``SDK-dependent`` | +| ``startswith(string, search)`` | true if ``string`` begins with ``search`` | ✅ | +| ``output := substring(string, start, length)`` | ``output`` is the portion of ``string`` from index ``start`` and having a length of ``length``. If ``length`` is less than zero, ``length`` is the remainder of the ``string``. If ``start`` is greater than the length of the string, ``output`` is empty. It is invalid to pass a negative offset to this function. | ✅ | +| ``output := trim(string, cutset)`` | ``output`` is a ``string`` representing ``string`` with all leading and trailing instances of the characters in ``cutset`` removed. | ✅ | +| ``output := trim_left(string, cutset)`` | ``output`` is a ``string`` representing ``string`` with all leading instances of the characters in ``cutset`` removed. | ✅ | +| ``output := trim_prefix(string, prefix)`` | ``output`` is a ``string`` representing ``string`` with leading instance of ``prefix`` removed. If ``string`` doesn't start with prefix, ``string`` is returned unchanged. | ✅ | +| ``output := trim_right(string, cutset)`` | ``output`` is a ``string`` representing ``string`` with all trailing instances of the characters in ``cutset`` removed. | ✅ | +| ``output := trim_suffix(string, suffix)`` | ``output`` is a ``string`` representing ``string`` with trailing instance of ``suffix`` removed. If ``string`` doesn't end with suffix, ``string`` is returned unchanged. | ✅ | +| ``output := trim_space(string)`` | ``output`` is a ``string`` representing ``string`` with all leading and trailing white space removed. | ✅ | +| ``output := upper(string)`` | ``output`` is ``string`` after converting to upper case | ✅ | ### Regex | Built-in | Description | Wasm Support | diff --git a/test/cases/testdata/strings/test-strings-0925.yaml b/test/cases/testdata/strings/test-strings-0925.yaml new file mode 100644 index 0000000000..4f0888e253 --- /dev/null +++ b/test/cases/testdata/strings/test-strings-0925.yaml @@ -0,0 +1,25 @@ +cases: + - note: 'strings/indexof_n_single_match' + query: data.test.p = x + modules: + - | + package test + p := indexof_n("dogcat", "cat") + want_result: + - x: [3] + - note: 'strings/indexof_n_multiple_matches' + query: data.test.p = x + modules: + - | + package test + p := indexof_n("dogcatdogcat", "cat") + want_result: + - x: [3, 9] + - note: 'strings/indexof_n_no_match' + query: data.test.p = x + modules: + - | + package test + p := indexof_n("dogcat", "rabbit") + want_result: + - x: [] \ No newline at end of file diff --git a/topdown/strings.go b/topdown/strings.go index e5118b7780..294e678dbd 100644 --- a/topdown/strings.go +++ b/topdown/strings.go @@ -87,19 +87,19 @@ func builtinConcat(a, b ast.Value) (ast.Value, error) { return ast.String(strings.Join(strs, string(join))), nil } -func builtinIndexOf(a, b ast.Value) (ast.Value, error) { - runesEqual := func(a, b []rune) bool { - if len(a) != len(b) { +var runesEqual = func(a, b []rune) bool { + if len(a) != len(b) { + return false + } + for i, v := range a { + if v != b[i] { return false } - for i, v := range a { - if v != b[i] { - return false - } - } - return true } + return true +} +func builtinIndexOf(a, b ast.Value) (ast.Value, error) { base, err := builtins.StringOperand(a, 1) if err != nil { return nil, err @@ -130,6 +130,38 @@ func builtinIndexOf(a, b ast.Value) (ast.Value, error) { return ast.IntNumberTerm(-1).Value, nil } +func builtinIndexOfN(a, b ast.Value) (ast.Value, error) { + base, err := builtins.StringOperand(a, 1) + if err != nil { + return nil, err + } + + search, err := builtins.StringOperand(b, 2) + if err != nil { + return nil, err + } + if len(string(search)) == 0 { + return nil, fmt.Errorf("empty search character") + } + + baseRunes := []rune(string(base)) + searchRunes := []rune(string(search)) + searchLen := len(searchRunes) + + var arr []*ast.Term + for i, r := range baseRunes { + if len(baseRunes) >= i+searchLen { + if r == searchRunes[0] && runesEqual(baseRunes[i:i+searchLen], searchRunes) { + arr = append(arr, ast.IntNumberTerm(i)) + } + } else { + break + } + } + + return ast.NewArray(arr...), nil +} + func builtinSubstring(a, b, c ast.Value) (ast.Value, error) { base, err := builtins.StringOperand(a, 1) @@ -435,6 +467,7 @@ func init() { RegisterFunctionalBuiltin2(ast.FormatInt.Name, builtinFormatInt) RegisterFunctionalBuiltin2(ast.Concat.Name, builtinConcat) RegisterFunctionalBuiltin2(ast.IndexOf.Name, builtinIndexOf) + RegisterFunctionalBuiltin2(ast.IndexOfN.Name, builtinIndexOfN) RegisterFunctionalBuiltin3(ast.Substring.Name, builtinSubstring) RegisterFunctionalBuiltin2(ast.Contains.Name, builtinContains) RegisterFunctionalBuiltin2(ast.StartsWith.Name, builtinStartsWith) From cd46890da8417865744200c4386037340054920e Mon Sep 17 00:00:00 2001 From: shuheiktgw Date: Wed, 29 Dec 2021 09:34:37 +0900 Subject: [PATCH 2/4] Use a function instead of a variable Signed-off-by: shuheiktgw --- topdown/strings.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/topdown/strings.go b/topdown/strings.go index 294e678dbd..ef7c8d7eaf 100644 --- a/topdown/strings.go +++ b/topdown/strings.go @@ -87,7 +87,7 @@ func builtinConcat(a, b ast.Value) (ast.Value, error) { return ast.String(strings.Join(strs, string(join))), nil } -var runesEqual = func(a, b []rune) bool { +func runesEqual(a, b []rune) bool { if len(a) != len(b) { return false } From df3979c40029c9541b52a5c317548e788e34ed57 Mon Sep 17 00:00:00 2001 From: shuheiktgw Date: Fri, 31 Dec 2021 06:37:58 +0900 Subject: [PATCH 3/4] Add unicode tests Signed-off-by: shuheiktgw --- .../testdata/strings/test-strings-0925.yaml | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/test/cases/testdata/strings/test-strings-0925.yaml b/test/cases/testdata/strings/test-strings-0925.yaml index 4f0888e253..a5d21524e9 100644 --- a/test/cases/testdata/strings/test-strings-0925.yaml +++ b/test/cases/testdata/strings/test-strings-0925.yaml @@ -21,5 +21,21 @@ cases: - | package test p := indexof_n("dogcat", "rabbit") + want_result: + - x: [] + - note: 'strings/indexof_n_unicode_matches' + query: data.test.p = x + modules: + - | + package test + p := indexof_n("😇😀😇😀😇😀", "😀") + want_result: + - x: [1, 3, 5] + - note: 'strings/indexof_n_unicode_no_match' + query: data.test.p = x + modules: + - | + package test + p := indexof_n("😇😀😇😀😇😀", "😂") want_result: - x: [] \ No newline at end of file From cc41dc25e4fc05644062b44a467f3d1b28878661 Mon Sep 17 00:00:00 2001 From: shuheiktgw Date: Fri, 31 Dec 2021 06:38:26 +0900 Subject: [PATCH 4/4] Remove markdown table format Signed-off-by: shuheiktgw --- docs/content/policy-reference.md | 46 ++++++++++++++++---------------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/docs/content/policy-reference.md b/docs/content/policy-reference.md index 899433353b..14f862cd29 100644 --- a/docs/content/policy-reference.md +++ b/docs/content/policy-reference.md @@ -371,29 +371,29 @@ complex types. ### Strings -| Built-in | Description | Wasm Support | -|---------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-------------------| -| ``output := concat(delimiter, array_or_set)`` | ``output`` is the result of joining together the elements of ``array_or_set`` with the string ``delimiter`` | ✅ | -| ``contains(string, search)`` | true if ``string`` contains ``search`` | ✅ | -| ``endswith(string, search)`` | true if ``string`` ends with ``search`` | ✅ | -| ``output := format_int(number, base)`` | ``output`` is string representation of ``number`` in the given ``base`` | ✅ | -| ``output := indexof(string, search)`` | ``output`` is the index inside ``string`` where ``search`` first occurs, or -1 if ``search`` does not exist | ✅ | -| ``output := indexof_n(string, search)`` | ``output`` is ``array[number]`` representing the indexes inside ``string`` where ``search`` occurs | ``SDK-dependent`` | -| ``output := lower(string)`` | ``output`` is ``string`` after converting to lower case | ✅ | -| ``output := replace(string, old, new)`` | ``output`` is a ``string`` representing ``string`` with all instances of ``old`` replaced by ``new`` | ✅ | -| ``output := strings.reverse(string)`` | ``output`` is ``string`` reversed | ✅ | -| ``output := strings.replace_n(patterns, string)`` | ``patterns`` is an object with old, new string key value pairs (e.g. ``{"old1": "new1", "old2": "new2", ...}``). ``output`` is a ``string`` with all old strings inside ``patterns`` replaced by the new strings | ✅ | -| ``output := split(string, delimiter)`` | ``output`` is ``array[string]`` representing elements of ``string`` separated by ``delimiter`` | ✅ | -| ``output := sprintf(string, values)`` | ``output`` is a ``string`` representing ``string`` formatted by the values in the ``array`` ``values``. | ``SDK-dependent`` | -| ``startswith(string, search)`` | true if ``string`` begins with ``search`` | ✅ | -| ``output := substring(string, start, length)`` | ``output`` is the portion of ``string`` from index ``start`` and having a length of ``length``. If ``length`` is less than zero, ``length`` is the remainder of the ``string``. If ``start`` is greater than the length of the string, ``output`` is empty. It is invalid to pass a negative offset to this function. | ✅ | -| ``output := trim(string, cutset)`` | ``output`` is a ``string`` representing ``string`` with all leading and trailing instances of the characters in ``cutset`` removed. | ✅ | -| ``output := trim_left(string, cutset)`` | ``output`` is a ``string`` representing ``string`` with all leading instances of the characters in ``cutset`` removed. | ✅ | -| ``output := trim_prefix(string, prefix)`` | ``output`` is a ``string`` representing ``string`` with leading instance of ``prefix`` removed. If ``string`` doesn't start with prefix, ``string`` is returned unchanged. | ✅ | -| ``output := trim_right(string, cutset)`` | ``output`` is a ``string`` representing ``string`` with all trailing instances of the characters in ``cutset`` removed. | ✅ | -| ``output := trim_suffix(string, suffix)`` | ``output`` is a ``string`` representing ``string`` with trailing instance of ``suffix`` removed. If ``string`` doesn't end with suffix, ``string`` is returned unchanged. | ✅ | -| ``output := trim_space(string)`` | ``output`` is a ``string`` representing ``string`` with all leading and trailing white space removed. | ✅ | -| ``output := upper(string)`` | ``output`` is ``string`` after converting to upper case | ✅ | +| Built-in | Description | Wasm Support | +| ------- |-------------|---------------| +| ``output := concat(delimiter, array_or_set)`` | ``output`` is the result of joining together the elements of ``array_or_set`` with the string ``delimiter`` | ✅ | +| ``contains(string, search)`` | true if ``string`` contains ``search`` | ✅ | +| ``endswith(string, search)`` | true if ``string`` ends with ``search`` | ✅ | +| ``output := format_int(number, base)`` | ``output`` is string representation of ``number`` in the given ``base`` | ✅ | +| ``output := indexof(string, search)`` | ``output`` is the index inside ``string`` where ``search`` first occurs, or -1 if ``search`` does not exist | ✅ | +| ``output := indexof_n(string, search)`` | ``output`` is ``array[number]`` representing the indexes inside ``string`` where ``search`` occurs | ``SDK-dependent`` | +| ``output := lower(string)`` | ``output`` is ``string`` after converting to lower case | ✅ | +| ``output := replace(string, old, new)`` | ``output`` is a ``string`` representing ``string`` with all instances of ``old`` replaced by ``new`` | ✅ | +| ``output := strings.reverse(string)`` | ``output`` is ``string`` reversed | ✅ | +| ``output := strings.replace_n(patterns, string)`` | ``patterns`` is an object with old, new string key value pairs (e.g. ``{"old1": "new1", "old2": "new2", ...}``). ``output`` is a ``string`` with all old strings inside ``patterns`` replaced by the new strings | ✅ | +| ``output := split(string, delimiter)`` | ``output`` is ``array[string]`` representing elements of ``string`` separated by ``delimiter`` | ✅ | +| ``output := sprintf(string, values)`` | ``output`` is a ``string`` representing ``string`` formatted by the values in the ``array`` ``values``. | ``SDK-dependent`` | +| ``startswith(string, search)`` | true if ``string`` begins with ``search`` | ✅ | +| ``output := substring(string, start, length)`` | ``output`` is the portion of ``string`` from index ``start`` and having a length of ``length``. If ``length`` is less than zero, ``length`` is the remainder of the ``string``. If ``start`` is greater than the length of the string, ``output`` is empty. It is invalid to pass a negative offset to this function. | ✅ | +| ``output := trim(string, cutset)`` | ``output`` is a ``string`` representing ``string`` with all leading and trailing instances of the characters in ``cutset`` removed. | ✅ | +| ``output := trim_left(string, cutset)`` | ``output`` is a ``string`` representing ``string`` with all leading instances of the characters in ``cutset`` removed. | ✅ | +| ``output := trim_prefix(string, prefix)`` | ``output`` is a ``string`` representing ``string`` with leading instance of ``prefix`` removed. If ``string`` doesn't start with prefix, ``string`` is returned unchanged.| ✅ | +| ``output := trim_right(string, cutset)`` | ``output`` is a ``string`` representing ``string`` with all trailing instances of the characters in ``cutset`` removed. | ✅ | +| ``output := trim_suffix(string, suffix)`` | ``output`` is a ``string`` representing ``string`` with trailing instance of ``suffix`` removed. If ``string`` doesn't end with suffix, ``string`` is returned unchanged.| ✅ | +| ``output := trim_space(string)`` | ``output`` is a ``string`` representing ``string`` with all leading and trailing white space removed.| ✅ | +| ``output := upper(string)`` | ``output`` is ``string`` after converting to upper case | ✅ | ### Regex | Built-in | Description | Wasm Support |