Skip to content

Commit

Permalink
[Feature] Core: Add libmagic detection for all parts
Browse files Browse the repository at this point in the history
  • Loading branch information
vstakhov committed Dec 11, 2018
1 parent 8c2a9cf commit 1912eac
Show file tree
Hide file tree
Showing 4 changed files with 113 additions and 30 deletions.
4 changes: 2 additions & 2 deletions src/libmime/archives.c
Expand Up @@ -1509,8 +1509,8 @@ rspamd_archive_cheat_detect (struct rspamd_mime_part *part, const gchar *str,
}

if (magic_start != NULL) {
if (part->parsed_data.len > magic_len && memcmp (part->parsed_data.begin,
magic_start, magic_len) == 0) {
if (part->parsed_data.len > magic_len &&
memcmp (part->parsed_data.begin, magic_start, magic_len) == 0) {
return TRUE;
}
}
Expand Down
51 changes: 39 additions & 12 deletions src/libmime/message.c
Expand Up @@ -703,7 +703,7 @@ rspamd_message_process_html_text_part (struct rspamd_task *task,
return TRUE;
}

static void
static gboolean
rspamd_message_process_text_part_maybe (struct rspamd_task *task,
struct rspamd_mime_part *mime_part)
{
Expand Down Expand Up @@ -812,11 +812,11 @@ rspamd_message_process_text_part_maybe (struct rspamd_task *task,
mime_part->cd && mime_part->cd->type == RSPAMD_CT_ATTACHMENT &&
(task->cfg && !task->cfg->check_text_attachements)) {
debug_task ("skip attachments for checking as text parts");
return;
return TRUE;
}
else if (!(found_txt || found_html)) {
/* Not a text part */
return;
return FALSE;
}

text_part = rspamd_mempool_alloc0 (task->task_pool,
Expand All @@ -830,12 +830,12 @@ rspamd_message_process_text_part_maybe (struct rspamd_task *task,

if (found_html) {
if (!rspamd_message_process_html_text_part (task, text_part)) {
return;
return FALSE;
}
}
else {
if (!rspamd_message_process_plain_text_part (task, text_part)) {
return;
return FALSE;
}
}

Expand Down Expand Up @@ -866,7 +866,7 @@ rspamd_message_process_text_part_maybe (struct rspamd_task *task,

rspamd_task_insert_result (task, GTUBE_SYMBOL, 0, NULL);

return;
return TRUE;
}

/* Post process part */
Expand All @@ -885,6 +885,8 @@ rspamd_message_process_text_part_maybe (struct rspamd_task *task,
}

rspamd_mime_part_create_words (task, text_part);

return TRUE;
}

/* Creates message from various data using libmagic to detect type */
Expand All @@ -900,15 +902,18 @@ rspamd_message_from_data (struct rspamd_task *task, const guchar *start,

g_assert (start != NULL);

part = rspamd_mempool_alloc0 (task->task_pool, sizeof (*part));

tok = rspamd_task_get_request_header (task, "Content-Type");

if (tok) {
/* We have Content-Type defined */
ct = rspamd_content_type_parse (tok->begin, tok->len,
task->task_pool);
part->ct = ct;
}
else if (task->cfg && task->cfg->libs_ctx) {
/* Try to predict it by content (slow) */

if (task->cfg && task->cfg->libs_ctx) {
mb = magic_buffer (task->cfg->libs_ctx->libmagic,
start,
len);
Expand All @@ -918,12 +923,16 @@ rspamd_message_from_data (struct rspamd_task *task, const guchar *start,
srch.len = strlen (mb);
ct = rspamd_content_type_parse (srch.begin, srch.len,
task->task_pool);
msg_warn_task ("construct fake mime of type: %s", mb);

if (!part->ct) {
part->ct = ct;
}

part->detected_ct = ct;
}
}

msg_warn_task ("construct fake mime of type: %s", mb);
part = rspamd_mempool_alloc0 (task->task_pool, sizeof (*part));
part->ct = ct;
part->raw_data.begin = start;
part->raw_data.len = len;
part->parsed_data.begin = start;
Expand Down Expand Up @@ -1189,7 +1198,25 @@ rspamd_message_process (struct rspamd_task *task)
struct rspamd_mime_part *part;

part = g_ptr_array_index (task->parts, i);
rspamd_message_process_text_part_maybe (task, part);


if (!rspamd_message_process_text_part_maybe (task, part) &&
part->parsed_data.len > 0) {
const gchar *mb = magic_buffer (task->cfg->libs_ctx->libmagic,
part->parsed_data.begin,
part->parsed_data.len);

if (mb) {
rspamd_ftok_t srch;

srch.begin = mb;
srch.len = strlen (mb);
part->detected_ct = rspamd_content_type_parse (srch.begin,
srch.len,
task->task_pool);
}

}
}

rspamd_images_process (task);
Expand Down
1 change: 1 addition & 0 deletions src/libmime/message.h
Expand Up @@ -47,6 +47,7 @@ struct rspamd_mime_multipart {

struct rspamd_mime_part {
struct rspamd_content_type *ct;
struct rspamd_content_type *detected_ct;
struct rspamd_content_disposition *cd;
rspamd_ftok_t raw_data;
rspamd_ftok_t parsed_data;
Expand Down
87 changes: 71 additions & 16 deletions src/lua/lua_mimepart.c
Expand Up @@ -333,6 +333,20 @@ LUA_FUNCTION_DEF (mimepart, get_type);
*/
LUA_FUNCTION_DEF (mimepart, get_type_full);

/***
* @method mime_part:get_detected_type()
* Extract content-type string of the mime part. Use libmagic detection
* @return {string,string} content type in form 'type','subtype'
*/
LUA_FUNCTION_DEF (mimepart, get_detected_type);

/***
* @method mime_part:get_detected_type_full()
* Extract content-type string of the mime part with all attributes. Use libmagic detection
* @return {string,string,table} content type in form 'type','subtype', {attrs}
*/
LUA_FUNCTION_DEF (mimepart, get_detected_type_full);

/***
* @method mime_part:get_cte()
* Extract content-transfer-encoding for a part
Expand Down Expand Up @@ -457,6 +471,8 @@ static const struct luaL_reg mimepartlib_m[] = {
LUA_INTERFACE_DEF (mimepart, get_length),
LUA_INTERFACE_DEF (mimepart, get_type),
LUA_INTERFACE_DEF (mimepart, get_type_full),
LUA_INTERFACE_DEF (mimepart, get_detected_type),
LUA_INTERFACE_DEF (mimepart, get_detected_type_full),
LUA_INTERFACE_DEF (mimepart, get_cte),
LUA_INTERFACE_DEF (mimepart, get_filename),
LUA_INTERFACE_DEF (mimepart, get_header),
Expand Down Expand Up @@ -1189,48 +1205,49 @@ lua_mimepart_get_length (lua_State * L)
}

static gint
lua_mimepart_get_type_common (lua_State * L, gboolean full)
lua_mimepart_get_type_common (lua_State * L, struct rspamd_content_type *ct,
gboolean full)
{
struct rspamd_mime_part *part = lua_check_mimepart (L);

GHashTableIter it;
gpointer k, v;
struct rspamd_content_type_param *param;

if (part == NULL) {
if (ct == NULL) {
lua_pushnil (L);
lua_pushnil (L);
return 2;
}

lua_pushlstring (L, part->ct->type.begin, part->ct->type.len);
lua_pushlstring (L, part->ct->subtype.begin, part->ct->subtype.len);
lua_pushlstring (L, ct->type.begin, ct->type.len);
lua_pushlstring (L, ct->subtype.begin, ct->subtype.len);

if (!full) {
return 2;
}

lua_createtable (L, 0, 2 + (part->ct->attrs ?
g_hash_table_size (part->ct->attrs) : 0));
lua_createtable (L, 0, 2 + (ct->attrs ?
g_hash_table_size (ct->attrs) : 0));

if (part->ct->charset.len > 0) {
if (ct->charset.len > 0) {
lua_pushstring (L, "charset");
lua_pushlstring (L, part->ct->charset.begin, part->ct->charset.len);
lua_pushlstring (L, ct->charset.begin, ct->charset.len);
lua_settable (L, -3);
}

if (part->ct->boundary.len > 0) {
if (ct->boundary.len > 0) {
lua_pushstring (L, "charset");
lua_pushlstring (L, part->ct->boundary.begin, part->ct->boundary.len);
lua_pushlstring (L, ct->boundary.begin, ct->boundary.len);
lua_settable (L, -3);
}

if (part->ct->attrs) {
g_hash_table_iter_init (&it, part->ct->attrs);
if (ct->attrs) {
g_hash_table_iter_init (&it, ct->attrs);

while (g_hash_table_iter_next (&it, &k, &v)) {
param = v;

if (param->name.len > 0 && param->name.len > 0) {
if (param->name.len > 0 && param->value.len > 0) {
/* TODO: think about multiple values here */
lua_pushlstring (L, param->name.begin, param->name.len);
lua_pushlstring (L, param->value.begin, param->value.len);
Expand All @@ -1246,14 +1263,52 @@ static gint
lua_mimepart_get_type (lua_State * L)
{
LUA_TRACE_POINT;
return lua_mimepart_get_type_common (L, FALSE);
struct rspamd_mime_part *part = lua_check_mimepart (L);

if (part == NULL) {
return luaL_error (L, "invalid arguments");
}

return lua_mimepart_get_type_common (L, part->ct, FALSE);
}

static gint
lua_mimepart_get_type_full (lua_State * L)
{
LUA_TRACE_POINT;
return lua_mimepart_get_type_common (L, TRUE);
struct rspamd_mime_part *part = lua_check_mimepart (L);

if (part == NULL) {
return luaL_error (L, "invalid arguments");
}

return lua_mimepart_get_type_common (L, part->ct, TRUE);
}

static gint
lua_mimepart_get_detected_type (lua_State * L)
{
LUA_TRACE_POINT;
struct rspamd_mime_part *part = lua_check_mimepart (L);

if (part == NULL) {
return luaL_error (L, "invalid arguments");
}

return lua_mimepart_get_type_common (L, part->detected_ct, FALSE);
}

static gint
lua_mimepart_get_detected_type_full (lua_State * L)
{
LUA_TRACE_POINT;
struct rspamd_mime_part *part = lua_check_mimepart (L);

if (part == NULL) {
return luaL_error (L, "invalid arguments");
}

return lua_mimepart_get_type_common (L, part->detected_ct, TRUE);
}

static gint
Expand Down

0 comments on commit 1912eac

Please sign in to comment.