Skip to content

Commit

Permalink
[Fix] Fix trie code when there are regexps and Hyperscan is absent
Browse files Browse the repository at this point in the history
  • Loading branch information
vstakhov committed Sep 10, 2019
1 parent eeb429b commit d0974f0
Showing 1 changed file with 63 additions and 11 deletions.
74 changes: 63 additions & 11 deletions src/libutil/multipattern.c
Expand Up @@ -25,6 +25,7 @@
#include "hs.h"
#endif
#include "acism.h"
#include "libutil/regexp.h"
#include <stdalign.h>

#define MAX_SCRATCH 4
Expand All @@ -51,6 +52,7 @@ struct RSPAMD_ALIGNED(64) rspamd_multipattern {
#endif
ac_trie_t *t;
GArray *pats;
GArray *res;

gboolean compiled;
guint cnt;
Expand Down Expand Up @@ -192,14 +194,14 @@ rspamd_multipattern_pattern_filter (const gchar *pattern, gsize len,
gsize *dst_len)
{
gchar *ret = NULL;
#ifdef WITH_HYPERSCAN
if (rspamd_hs_check ()) {
gint gl_flags = RSPAMD_REGEXP_ESCAPE_ASCII;
gint gl_flags = RSPAMD_REGEXP_ESCAPE_ASCII;

if (flags & RSPAMD_MULTIPATTERN_UTF8) {
gl_flags |= RSPAMD_REGEXP_ESCAPE_UTF;
}
if (flags & RSPAMD_MULTIPATTERN_UTF8) {
gl_flags |= RSPAMD_REGEXP_ESCAPE_UTF;
}

#ifdef WITH_HYPERSCAN
if (rspamd_hs_check ()) {
if (flags & RSPAMD_MULTIPATTERN_TLD) {
gchar *tmp;
gsize tlen;
Expand Down Expand Up @@ -228,6 +230,14 @@ rspamd_multipattern_pattern_filter (const gchar *pattern, gsize len,
if (flags & RSPAMD_MULTIPATTERN_TLD) {
ret = rspamd_multipattern_escape_tld_acism (pattern, len, dst_len);
}
else if (flags & RSPAMD_MULTIPATTERN_RE) {
ret = rspamd_str_regexp_escape (pattern, len, dst_len, gl_flags |
RSPAMD_REGEXP_ESCAPE_RE);
}
else if (flags & RSPAMD_MULTIPATTERN_GLOB) {
ret = rspamd_str_regexp_escape (pattern, len, dst_len,
gl_flags | RSPAMD_REGEXP_ESCAPE_GLOB);
}
else {
ret = malloc (len + 1);
*dst_len = rspamd_strlcpy (ret, pattern, len + 1);
Expand Down Expand Up @@ -496,7 +506,30 @@ rspamd_multipattern_compile (struct rspamd_multipattern *mp, GError **err)
#endif

if (mp->cnt > 0) {
mp->t = acism_create ((const ac_trie_pat_t *)mp->pats->data, mp->cnt);

if (mp->flags & (RSPAMD_MULTIPATTERN_GLOB|RSPAMD_MULTIPATTERN_RE)) {
/* Fallback to pcre... */
rspamd_regexp_t *re;
mp->res = g_array_sized_new (FALSE, TRUE,
sizeof (rspamd_regexp_t *), mp->cnt);

for (guint i = 0; i < mp->cnt; i ++) {
const ac_trie_pat_t *pat;

pat = &g_array_index (mp->pats, ac_trie_pat_t, i);

re = rspamd_regexp_new (pat->ptr, NULL, err);

if (re == NULL) {
return FALSE;
}

g_array_append_val (mp->res, re);
}
}
else {
mp->t = acism_create ((const ac_trie_pat_t *) mp->pats->data, mp->cnt);
}
}

mp->compiled = TRUE;
Expand Down Expand Up @@ -617,11 +650,30 @@ rspamd_multipattern_lookup (struct rspamd_multipattern *mp,

gint state = 0;

ret = acism_lookup (mp->t, in, len, rspamd_multipattern_acism_cb, &cbd,
&state, mp->flags & RSPAMD_MULTIPATTERN_ICASE);
if (mp->flags & (RSPAMD_MULTIPATTERN_GLOB|RSPAMD_MULTIPATTERN_RE)) {
/* Terribly inefficient, but who cares - just use hyperscan */
for (guint i = 0; i < mp->cnt; i ++) {
rspamd_regexp_t *re = g_array_index (mp->res, rspamd_regexp_t *, i);
const gchar *start = NULL, *end = NULL;

while (rspamd_regexp_search (re,
in,
len,
&start,
&end,
TRUE,
NULL)) {
ret = rspamd_multipattern_acism_cb (i, end - in, &cbd);
}
}
}
else {
ret = acism_lookup (mp->t, in, len, rspamd_multipattern_acism_cb, &cbd,
&state, mp->flags & RSPAMD_MULTIPATTERN_ICASE);

if (pnfound) {
*pnfound = cbd.nfound;
if (pnfound) {
*pnfound = cbd.nfound;
}
}

return ret;
Expand Down

0 comments on commit d0974f0

Please sign in to comment.