Skip to content

Commit

Permalink
[Minor] Fix www.` prefix matcher
Browse files Browse the repository at this point in the history
  • Loading branch information
vstakhov committed Aug 24, 2023
1 parent e42de23 commit 8a9452e
Show file tree
Hide file tree
Showing 3 changed files with 33 additions and 16 deletions.
1 change: 1 addition & 0 deletions src/libmime/scan_result.c
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ rspamd_scan_result_dtor(gpointer d)
kh_destroy(rspamd_options_hash, sres->options);
}
});

kh_destroy(rspamd_symbols_hash, r->symbols);
kh_destroy(rspamd_symbols_group_hash, r->sym_groups);
}
Expand Down
18 changes: 17 additions & 1 deletion src/libmime/scan_result.h
Original file line number Diff line number Diff line change
@@ -1,3 +1,19 @@
/*
* Copyright 2023 Vsevolod Stakhov
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

/**
* @file scan_result.h
* Scan result holder
Expand Down Expand Up @@ -43,7 +59,7 @@ struct rspamd_symbol_result {
gssize opts_len; /**< total size of all options (negative if truncated option is added) */
guint nshots;
int flags;
struct rspamd_symbol_result *next;
struct rspamd_symbol_result *next; /**< for shadow results */
};


Expand Down
30 changes: 15 additions & 15 deletions src/libserver/url.c
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,10 @@ typedef struct url_match_s {
gchar st;
} url_match_t;

#define URL_FLAG_NOHTML (1u << 0u)
#define URL_FLAG_TLD_MATCH (1u << 1u)
#define URL_FLAG_STAR_MATCH (1u << 2u)
#define URL_FLAG_REGEXP (1u << 3u)
#define URL_MATCHER_FLAG_NOHTML (1u << 0u)
#define URL_MATCHER_FLAG_TLD_MATCH (1u << 1u)
#define URL_MATCHER_FLAG_STAR_MATCH (1u << 2u)
#define URL_MATCHER_FLAG_REGEXP (1u << 3u)

struct url_callback_data;

Expand Down Expand Up @@ -163,8 +163,8 @@ struct url_matcher static_matchers[] = {
0},
{"sip:", "", url_web_start, url_web_end,
0},
{"www.", "http://", url_web_start, url_web_end,
0},
{"www\\.[0-9a-z]", "http://", url_web_start, url_web_end,
URL_MATCHER_FLAG_REGEXP},
{"ftp.", "ftp://", url_web_start, url_web_end,
0},
/* Likely emails */
Expand Down Expand Up @@ -449,10 +449,10 @@ rspamd_url_parse_tld_file(const gchar *fname,
continue;
}

flags = URL_FLAG_NOHTML | URL_FLAG_TLD_MATCH;
flags = URL_MATCHER_FLAG_NOHTML | URL_MATCHER_FLAG_TLD_MATCH;

if (linebuf[0] == '*') {
flags |= URL_FLAG_STAR_MATCH;
flags |= URL_MATCHER_FLAG_STAR_MATCH;
p = strchr(linebuf, '.');

if (p == NULL) {
Expand Down Expand Up @@ -486,7 +486,7 @@ rspamd_url_add_static_matchers(struct url_match_scanner *sc)
gint n = G_N_ELEMENTS(static_matchers), i;

for (i = 0; i < n; i++) {
if (static_matchers[i].flags & URL_FLAG_REGEXP) {
if (static_matchers[i].flags & URL_MATCHER_FLAG_REGEXP) {
rspamd_multipattern_add_pattern(url_scanner->search_trie_strict,
static_matchers[i].pattern,
RSPAMD_MULTIPATTERN_ICASE | RSPAMD_MULTIPATTERN_UTF8 |
Expand All @@ -503,7 +503,7 @@ rspamd_url_add_static_matchers(struct url_match_scanner *sc)

if (sc->matchers_full) {
for (i = 0; i < n; i++) {
if (static_matchers[i].flags & URL_FLAG_REGEXP) {
if (static_matchers[i].flags & URL_MATCHER_FLAG_REGEXP) {
rspamd_multipattern_add_pattern(url_scanner->search_trie_full,
static_matchers[i].pattern,
RSPAMD_MULTIPATTERN_ICASE | RSPAMD_MULTIPATTERN_UTF8 |
Expand Down Expand Up @@ -1664,7 +1664,7 @@ rspamd_tld_trie_callback(struct rspamd_multipattern *mp,
strnum);
ndots = 1;

if (matcher->flags & URL_FLAG_STAR_MATCH) {
if (matcher->flags & URL_MATCHER_FLAG_STAR_MATCH) {
/* Skip one more tld component */
ndots++;
}
Expand Down Expand Up @@ -2595,7 +2595,7 @@ rspamd_tld_trie_find_callback(struct rspamd_multipattern *mp,
matcher = &g_array_index(url_scanner->matchers_full, struct url_matcher,
strnum);

if (matcher->flags & URL_FLAG_STAR_MATCH) {
if (matcher->flags & URL_MATCHER_FLAG_STAR_MATCH) {
/* Skip one more tld component */
ndots = 2;
}
Expand Down Expand Up @@ -3107,7 +3107,7 @@ static gboolean
rspamd_url_trie_is_match(struct url_matcher *matcher, const gchar *pos,
const gchar *end, const gchar *newline_pos)
{
if (matcher->flags & URL_FLAG_TLD_MATCH) {
if (matcher->flags & URL_MATCHER_FLAG_TLD_MATCH) {
/* Immediately check pos for valid chars */
if (pos < end) {
if (pos != newline_pos && !g_ascii_isspace(*pos) && *pos != '/' && *pos != '?' &&
Expand Down Expand Up @@ -3156,7 +3156,7 @@ rspamd_url_trie_callback(struct rspamd_multipattern *mp,
matcher = &g_array_index(cb->matchers, struct url_matcher,
strnum);

if ((matcher->flags & URL_FLAG_NOHTML) && cb->how == RSPAMD_URL_FIND_STRICT) {
if ((matcher->flags & URL_MATCHER_FLAG_NOHTML) && cb->how == RSPAMD_URL_FIND_STRICT) {
/* Do not try to match non-html like urls in html texts */
return 0;
}
Expand Down Expand Up @@ -3313,7 +3313,7 @@ rspamd_url_trie_generic_callback_common(struct rspamd_multipattern *mp,
strnum);
pool = cb->pool;

if ((matcher->flags & URL_FLAG_NOHTML) && cb->how == RSPAMD_URL_FIND_STRICT) {
if ((matcher->flags & URL_MATCHER_FLAG_NOHTML) && cb->how == RSPAMD_URL_FIND_STRICT) {
/* Do not try to match non-html like urls in html texts, continue matching */
return 0;
}
Expand Down

0 comments on commit 8a9452e

Please sign in to comment.