From afbe526e73c5f094404fe7790d46f19c8b1e9752 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Romain=20Tarti=C3=A8re?= Date: Tue, 21 Nov 2023 16:42:41 -1000 Subject: [PATCH] patterndb: Add support for nested quotted string in @QSTRING@ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When using 2 chars to match quoted strings (e.g. `(), `[]`, `{}`), we want to detect nesting to capture the whole containing expression. Matching `(foo (bar (baz) qux)) quux` against `@QSTRING::()@` previously captured `(foo (bar (baz)` and now capture `(foo (bar (baz) qux))`. Fixes: #4716 Signed-off-by: Romain Tartière --- modules/correlation/radix.c | 67 ++++++++++++++++++++++---- modules/correlation/tests/test_radix.c | 15 ++++++ 2 files changed, 72 insertions(+), 10 deletions(-) diff --git a/modules/correlation/radix.c b/modules/correlation/radix.c index 12763659a9..0fe66bb25c 100644 --- a/modules/correlation/radix.c +++ b/modules/correlation/radix.c @@ -49,12 +49,12 @@ r_parser_string(gchar *str, gint *len, const gchar *param, gpointer state, RPars return FALSE; } -gboolean -r_parser_qstring(gchar *str, gint *len, const gchar *param, gpointer state, RParserMatch *match) +static gboolean +r_parser_single_delimiter_qstring(gchar *str, gint *len, gchar stop_char, gpointer state, RParserMatch *match) { gchar *end; - if ((end = strchr(str + 1, ((gchar *)&state)[0])) != NULL) + if ((end = strchr(str + 1, stop_char)) != NULL) { *len = (end - str) + 1; @@ -71,6 +71,60 @@ r_parser_qstring(gchar *str, gint *len, const gchar *param, gpointer state, RPar return FALSE; } +static gboolean +r_parser_open_close_delimiter_qstring(gchar *str, gint *len, gchar start_char, gchar stop_char, gpointer state, + RParserMatch *match) +{ + int nesting_level = 0; + + gchar *end = str; + + while (*end) + { + if (*end == stop_char) + { + nesting_level--; + + if (nesting_level < 0) + return FALSE; + + if (nesting_level == 0) + { + *len = (end - str) + 1; + + if (match) + { + /* skip starting and ending quote */ + match->ofs = 1; + match->len = -2; + } + + return TRUE; + } + } + else if (*end == start_char) + { + nesting_level++; + } + + end++; + } + + return FALSE; +} + +gboolean +r_parser_qstring(gchar *str, gint *len, const gchar *param, gpointer state, RParserMatch *match) +{ + gchar start_char = param[0]; + gchar stop_char = param[1] ? param[1] : param[0]; + + if (start_char == stop_char) + return r_parser_single_delimiter_qstring(str, len, stop_char, state, match); + else + return r_parser_open_close_delimiter_qstring(str, len, start_char, stop_char, state, match); +} + gboolean r_parser_estring_c(gchar *str, gint *len, const gchar *param, gpointer state, RParserMatch *match) { @@ -761,17 +815,10 @@ r_new_pnode(gchar *key, const gchar *capture_prefix) { if (params_len == 3) { - gchar *state = (gchar *) &(parser_node->state); - parser_node->parse = r_parser_qstring; parser_node->parser_type = RPT_QSTRING; parser_node->first = params[2][0]; parser_node->last = params[2][0]; - - if (params_len >= 2 && params[2] && strlen(params[2]) == 2) - state[0] = params[2][1]; - else - state[0] = params[2][0]; } else { diff --git a/modules/correlation/tests/test_radix.c b/modules/correlation/tests/test_radix.c index 6e95e82b8a..a1fabaca65 100644 --- a/modules/correlation/tests/test_radix.c +++ b/modules/correlation/tests/test_radix.c @@ -730,6 +730,21 @@ ParameterizedTestParameters(dbparser, test_radix_search_matches) .key = "'quoted string' hehehe", .expected_pattern = {"qstring", "quoted string", NULL} }, + { + .node_to_insert = {"@QSTRING:qstring:()@", NULL}, + .key = "(quoted string) hehehe", + .expected_pattern = {"qstring", "quoted string", NULL} + }, + { + .node_to_insert = {"@QSTRING:qstring:()@", NULL}, + .key = "(nested (quoted string())) hehehe", + .expected_pattern = {"qstring", "nested (quoted string())", NULL} + }, + { + .node_to_insert = {"@QSTRING:qstring:()@", NULL}, + .key = "(unbalanced (nested (quoted string())) hehehe", + .expected_pattern = {NULL} + }, { .node_to_insert = {"@QSTRING:qstring:'@", NULL}, .key = "v12345",