Skip to content

Commit

Permalink
[Minor] Fix performance issue with is_utf_outside_range
Browse files Browse the repository at this point in the history
Fix performace issue, add some checking and add few tests
  • Loading branch information
miecio45 committed Mar 18, 2019
1 parent 7160b4a commit cd08c88
Show file tree
Hide file tree
Showing 2 changed files with 116 additions and 18 deletions.
67 changes: 49 additions & 18 deletions src/lua/lua_util.c
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include "libmime/email_addr.h"
#include "libmime/content_type.h"
#include "libmime/mime_headers.h"
#include "libutil/hash.h"
#include "linenoise.h"
#include <math.h>
#include <glob.h>
Expand Down Expand Up @@ -2458,6 +2459,12 @@ lua_util_is_utf_spoofed (lua_State *L)
uspoof_setChecks (spc_sgl,
USPOOF_INVISIBLE | USPOOF_MIXED_SCRIPT_CONFUSABLE | USPOOF_ANY_CASE,
&uc_err);
if (uc_err != U_ZERO_ERROR) {
msg_err ("Cannot set proper checks for uspoof: %s", u_errorName (uc_err));
lua_pushboolean (L, false);
uspoof_close(spc);
return 1;
}
}

ret = uspoof_checkUTF8 (spc_sgl, s1, l1, NULL, &uc_err);
Expand Down Expand Up @@ -2533,28 +2540,52 @@ lua_util_is_utf_outside_range(lua_State *L)
guint32 range_start = lua_tointeger (L, 2);
guint32 range_end = lua_tointeger (L, 3);

USpoofChecker *spc_sgl;
USet * allowed_chars;
UErrorCode uc_err = U_ZERO_ERROR;
static rspamd_lru_hash_t *validators;

if (validators == NULL) {
validators = rspamd_lru_hash_new(16, g_free, (GDestroyNotify)uspoof_close);
}

if (string_to_check) {
spc_sgl = uspoof_open (&uc_err);
if (uc_err != U_ZERO_ERROR) {
msg_err ("cannot init spoof checker: %s", u_errorName (uc_err));
lua_pushboolean (L, false);
uspoof_close(spc_sgl);
return 1;
}
guint64 hash_key = (guint64)range_end << 32 || range_start;

USpoofChecker *validator = rspamd_lru_hash_lookup(validators, &hash_key, time(NULL));

UErrorCode uc_err = U_ZERO_ERROR;

if (validator == NULL) {
USet * allowed_chars;
guint64 * creation_hash_key = g_malloc(sizeof(guint64));
*creation_hash_key = hash_key;

validator = uspoof_open (&uc_err);
if (uc_err != U_ZERO_ERROR) {
msg_err ("cannot init spoof checker: %s", u_errorName (uc_err));
lua_pushboolean (L, false);
uspoof_close(validator);
return 1;
}

allowed_chars = uset_openEmpty();
uset_addRange(allowed_chars, range_start, range_end);
uspoof_setAllowedChars(validator, allowed_chars, &uc_err);

uspoof_setChecks (validator,
USPOOF_CHAR_LIMIT | USPOOF_ANY_CASE, &uc_err);

allowed_chars = uset_openEmpty();
uset_addRange(allowed_chars, range_start, range_end);
uspoof_setAllowedChars(spc_sgl, allowed_chars, &uc_err);
uset_close(allowed_chars);

if (uc_err != U_ZERO_ERROR) {
msg_err ("Cannot configure uspoof: %s", u_errorName (uc_err));
lua_pushboolean (L, false);
uspoof_close(validator);
return 1;
}

rspamd_lru_hash_insert(validators, creation_hash_key, validator, time(NULL), 0);
}

uspoof_setChecks (spc_sgl,
USPOOF_CHAR_LIMIT | USPOOF_ANY_CASE, &uc_err);
ret = uspoof_checkUTF8 (spc_sgl, string_to_check, len_of_string, NULL, &uc_err);
uset_close(allowed_chars);
uspoof_close(spc_sgl);
ret = uspoof_checkUTF8 (validator, string_to_check, len_of_string, NULL, &uc_err);
}
else {
return luaL_error (L, "invalid arguments");
Expand Down
67 changes: 67 additions & 0 deletions test/lua/unit/rspamd_util.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
context("Rspamd util for lua - check generic functions", function()
local util = require 'rspamd_util'

local cases = {
{
input = "test1",
result = false,
range_start = 0x0000,
range_end = 0x017f
},
{
input = "test test xxx",
result = false,
range_start = 0x0000,
range_end = 0x017f
},
{
input = "АбЫрвАлг",
result = true,
range_start = 0x0000,
range_end = 0x017f
},
{
input = "АбЫрвАлг example",
result = true,
range_start = 0x0000,
range_end = 0x017f
},
{
input = "example ąłśćżłóę",
result = false,
range_start = 0x0000,
range_end = 0x017f
},
{
input = "ąłśćżłóę АбЫрвАлг",
result = true,
range_start = 0x0000,
range_end = 0x017f
},
}

for i,c in ipairs(cases) do
test("is_utf_outside_range, test case #" .. i, function()
local actual = util.is_utf_outside_range(c.input, c.range_start, c.range_end)

assert_equal(c.result, actual)
end)
end

test("is_utf_outside_range, check cache", function ()
cache_size = 20
for i = 1,cache_size do
local res = util.is_utf_outside_range("a", 0x0000, 0x0000+i)
end
end)

test("is_utf_outside_range, check empty string", function ()
assert_error(util.is_utf_outside_range)
end)

test("get_string_stats, test case", function()
local res = util.get_string_stats("this is test 99")
assert_equal(res["letters"], 10)
assert_equal(res["digits"], 2)
end)
end)

0 comments on commit cd08c88

Please sign in to comment.