Skip to content

Commit

Permalink
[Feature] Reorganise struct rspamd_url to be 64 bytes size
Browse files Browse the repository at this point in the history
  • Loading branch information
vstakhov committed Jul 23, 2023
1 parent 80e8a76 commit eef2f3c
Show file tree
Hide file tree
Showing 5 changed files with 87 additions and 48 deletions.
21 changes: 14 additions & 7 deletions src/libserver/html/html_url.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -183,8 +183,12 @@ html_url_is_phished(rspamd_mempool_t *pool,

if (!rspamd_url_is_subdomain(disp_tok, href_tok)) {
href_url->flags |= RSPAMD_URL_FLAG_PHISHED;
href_url->linked_url = text_url;
text_url->flags |= RSPAMD_URL_FLAG_HTML_DISPLAYED;

if (href_url->ext == nullptr) {
href_url->ext = rspamd_mempool_alloc0_type(pool, rspamd_url_ext);
}
href_url->ext->linked_url = text_url;
}
}
}
Expand Down Expand Up @@ -241,18 +245,21 @@ html_check_displayed_url(rspamd_mempool_t *pool,
return;
}

url->visible_part = rspamd_mempool_alloc_buffer(pool, visible_part.size() + 1);
rspamd_strlcpy(url->visible_part,
if (url->ext == nullptr) {
url->ext = rspamd_mempool_alloc0_type(pool, rspamd_url_ext);
}
url->ext->visible_part = rspamd_mempool_alloc_buffer(pool, visible_part.size() + 1);
rspamd_strlcpy(url->ext->visible_part,
visible_part.data(),
visible_part.size() + 1);
dlen = visible_part.size();

/* Strip unicode spaces from the start and the end */
url->visible_part = const_cast<char *>(
rspamd_string_unicode_trim_inplace(url->visible_part,
url->ext->visible_part = const_cast<char *>(
rspamd_string_unicode_trim_inplace(url->ext->visible_part,
&dlen));
auto maybe_url = html_url_is_phished(pool, url,
{url->visible_part, dlen});
{url->ext->visible_part, dlen});

if (maybe_url) {
url->flags |= saved_flags;
Expand Down Expand Up @@ -300,7 +307,7 @@ html_check_displayed_url(rspamd_mempool_t *pool,
}
}

rspamd_normalise_unicode_inplace(url->visible_part, &dlen);
rspamd_normalise_unicode_inplace(url->ext->visible_part, &dlen);
}

auto
Expand Down
6 changes: 3 additions & 3 deletions src/libserver/protocol.c
Original file line number Diff line number Diff line change
Expand Up @@ -909,9 +909,9 @@ rspamd_protocol_extended_url (struct rspamd_task *task,

ucl_object_insert_key (obj, flags, "flags", 0, false);

if (url->linked_url) {
encoded = rspamd_url_encode (url->linked_url, &enclen, task->task_pool);
elt = rspamd_protocol_extended_url (task, url->linked_url, encoded,
if (url->ext && url->ext->linked_url) {
encoded = rspamd_url_encode (url->ext->linked_url, &enclen, task->task_pool);
elt = rspamd_protocol_extended_url (task, url->ext->linked_url, encoded,
enclen);
ucl_object_insert_key (obj, elt, "linked_url", 0, false);
}
Expand Down
5 changes: 2 additions & 3 deletions src/libserver/url.c
Original file line number Diff line number Diff line change
Expand Up @@ -1797,11 +1797,11 @@ rspamd_url_regen_from_inet_addr (struct rspamd_url *uri, const void *addr, int a
uri->flags |= RSPAMD_URL_FLAG_NUMERIC;

/* Reconstruct URL */
if (uri->flags & RSPAMD_URL_FLAG_HAS_PORT) {
if (uri->flags & RSPAMD_URL_FLAG_HAS_PORT && uri->ext) {
p = strbuf + r;
start_offset = p + 1;
r += rspamd_snprintf (strbuf + r, slen - r, ":%ud",
(unsigned int)uri->port);
(unsigned int)uri->ext->port);
}
if (uri->datalen > 0) {
p = strbuf + r;
Expand Down Expand Up @@ -2351,7 +2351,6 @@ rspamd_url_parse (struct rspamd_url *uri,
}
}

uri->port = u.port;
uri->flags = flags;

if (!uri->hostlen) {
Expand Down
73 changes: 50 additions & 23 deletions src/libserver/url.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,35 +52,46 @@ struct rspamd_url_tag {
struct rspamd_url_tag *prev, *next;
};


struct rspamd_url_ext;
/**
* URL structure
*/
struct rspamd_url {
gchar *string;
gchar *raw;
char *string;
char *raw;
struct rspamd_url_ext *ext;

gchar *visible_part;
struct rspamd_url *linked_url;
uint32_t flags;

guint32 flags;
uint8_t protocol;
uint8_t protocollen;

guint8 protocol;
guint8 protocollen;

guint16 port;
uint16_t hostshift;
uint16_t datashift;
uint16_t queryshift;
uint16_t fragmentshift;
uint16_t tldshift;
guint16 usershift;
guint16 hostshift;
guint16 datashift;
guint16 queryshift;
guint16 fragmentshift;
guint16 tldshift;
guint16 userlen;
guint16 hostlen;
guint16 datalen;
guint16 querylen;
guint16 fragmentlen;
guint16 tldlen;
guint16 count;
guint16 urllen;
guint16 rawlen;

uint16_t hostlen;
uint16_t datalen;
uint16_t querylen;
uint16_t fragmentlen;
uint16_t tldlen;
uint16_t count;
uint16_t urllen;
uint16_t rawlen;
};

/**
* Rarely used url fields
*/
struct rspamd_url_ext {
gchar *visible_part;
struct rspamd_url *linked_url;

guint16 port;
};

#define rspamd_url_user(u) ((u)->userlen > 0 ? (u)->string + (u)->usershift : NULL)
Expand Down Expand Up @@ -350,6 +361,22 @@ int rspamd_url_cmp(const struct rspamd_url *u1, const struct rspamd_url *u2);
*/
int rspamd_url_cmp_qsort(const void *u1, const void *u2);

static inline uint16_t rspamd_url_get_port(struct rspamd_url *u)
{
if (u->flags & RSPAMD_URL_FLAG_HAS_PORT && u->ext) {
return u->ext->port;
}
else {
/* Assume standard port */
if (u->protocol == PROTOCOL_HTTPS) {
return 443;
}
else {
return 80;
}
}
}

/**
* Normalize unicode input and set out url flags as appropriate
* @param pool
Expand Down
30 changes: 18 additions & 12 deletions src/lua/lua_url.c
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ lua_url_get_port (lua_State *L)
struct rspamd_lua_url *url = lua_check_url (L, 1);

if (url != NULL) {
lua_pushinteger (L, url->url->port);
lua_pushinteger (L, rspamd_url_get_port(url->url));
}
else {
lua_pushnil (L);
Expand Down Expand Up @@ -475,12 +475,13 @@ lua_url_get_phished (lua_State *L)
struct rspamd_lua_url *purl, *url = lua_check_url (L, 1);

if (url) {
if (url->url->linked_url != NULL) {
if (url->url->ext && url->url->ext->linked_url != NULL) {
/* XXX: in fact, this is the only possible combination of flags, so this check is redundant */
if (url->url->flags &
(RSPAMD_URL_FLAG_PHISHED|RSPAMD_URL_FLAG_REDIRECTED)) {
purl = lua_newuserdata (L, sizeof (struct rspamd_lua_url));
rspamd_lua_setclass (L, "rspamd{url}", -1);
purl->url = url->url->linked_url;
purl->url = url->url->ext->linked_url;

return 1;
}
Expand Down Expand Up @@ -535,7 +536,11 @@ lua_url_set_redirected (lua_State *L)
redir = lua_check_url (L, -1);

url->url->flags |= RSPAMD_URL_FLAG_REDIRECTED;
url->url->linked_url = redir->url;

if (url->url->ext == NULL) {
url->url->ext = rspamd_mempool_alloc0_type(pool, struct rspamd_url_ext);
}
url->url->ext->linked_url = redir->url;
}
}
else {
Expand All @@ -546,7 +551,10 @@ lua_url_set_redirected (lua_State *L)
}

url->url->flags |= RSPAMD_URL_FLAG_REDIRECTED;
url->url->linked_url = redir->url;
if (url->url->ext == NULL) {
url->url->ext = rspamd_mempool_alloc0_type(pool, struct rspamd_url_ext);
}
url->url->ext->linked_url = redir->url;

/* Push back on stack */
lua_pushvalue (L, 2);
Expand Down Expand Up @@ -629,8 +637,8 @@ lua_url_get_visible (lua_State *L)
LUA_TRACE_POINT;
struct rspamd_lua_url *url = lua_check_url (L, 1);

if (url != NULL && url->url->visible_part) {
lua_pushstring (L, url->url->visible_part);
if (url != NULL && url->url->ext && url->url->ext->visible_part) {
lua_pushstring (L, url->url->ext->visible_part);
}
else {
lua_pushnil (L);
Expand Down Expand Up @@ -671,11 +679,9 @@ lua_url_to_table (lua_State *L)
lua_settable (L, -3);
}

if (u->port != 0) {
lua_pushstring (L, "port");
lua_pushinteger (L, u->port);
lua_settable (L, -3);
}
lua_pushstring (L, "port");
lua_pushinteger (L, rspamd_url_get_port(u));
lua_settable (L, -3);

if (u->tldlen > 0) {
lua_pushstring (L, "tld");
Expand Down

0 comments on commit eef2f3c

Please sign in to comment.