Skip to content

Commit

Permalink
[Project] Show fasttext info
Browse files Browse the repository at this point in the history
  • Loading branch information
vstakhov committed Apr 29, 2023
1 parent 4627303 commit 2426e04
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 2 deletions.
11 changes: 9 additions & 2 deletions src/libmime/lang_detection.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
*/

#include "lang_detection.h"
#include "lang_detection_fasttext.h"
#include "libserver/logger.h"
#include "libcryptobox/cryptobox.h"
#include "libutil/multipattern.h"
Expand Down Expand Up @@ -181,6 +182,7 @@ struct rspamd_lang_detector {
UConverter *uchar_converter;
gsize short_text_limit;
gsize total_occurrences; /* number of all languages found */
gpointer fasttext_detector;
ref_entry_t ref;
};

Expand Down Expand Up @@ -766,6 +768,7 @@ rspamd_language_detector_dtor (struct rspamd_lang_detector *d)
}

kh_destroy (rspamd_stopwords_hash, d->stop_words_norm);
rspamd_lang_detection_fasttext_destroy(d->fasttext_detector);
}
}

Expand Down Expand Up @@ -886,10 +889,14 @@ rspamd_language_detector_init (struct rspamd_config *cfg)
total += kh_size (ret->trigrams[i]);
}

ret->fasttext_detector = rspamd_lang_detection_fasttext_init(cfg);
char *fasttext_status = rspamd_lang_detection_fasttext_show_info(ret->fasttext_detector);

msg_info_config ("loaded %d languages, "
"%d trigrams",
"%d trigrams; %s",
(gint)ret->languages->len,
(gint)total);
(gint)total, fasttext_status);
g_free (fasttext_status);

if (stop_words) {
ucl_object_unref (stop_words);
Expand Down
23 changes: 23 additions & 0 deletions src/libmime/lang_detection_fasttext.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ namespace rspamd::langdet {
class fasttext_langdet {
private:
fasttext::FastText ft;
std::string model_fname;
bool loaded;

struct one_shot_buf : public std::streambuf {
Expand All @@ -53,6 +54,7 @@ class fasttext_langdet {
try {
ft.loadModel(ucl_object_tostring(model));
loaded = true;
model_fname = std::string{ucl_object_tostring(model)};
}
catch (std::exception &e) {
auto err_message = fmt::format("cannot load fasttext model: {}", e.what());
Expand Down Expand Up @@ -93,6 +95,16 @@ class fasttext_langdet {

return nullptr;
}

auto model_info(void) const -> std::string {
if (!loaded) {
return "fasttext model is not loaded";
}
else {
return fmt::format("fasttext model {}: {} languages, {} tokens", model_fname,
ft.getDictionary()->nlabels(), ft.getDictionary()->ntokens());
}
}
};
}
#endif
Expand All @@ -112,6 +124,17 @@ void* rspamd_lang_detection_fasttext_init(struct rspamd_config *cfg)
#endif
}

char *rspamd_lang_detection_fasttext_show_info(void *ud)
{
#ifndef WITH_FASTTEXT
return g_strdup("fasttext is not compiled in");
#else
auto model_info = FASTTEXT_MODEL_TO_C_API(ud)->model_info();

return g_strdup(model_info.c_str());
#endif
}

rspamd_fasttext_predict_result_t rspamd_lang_detection_fasttext_detect(void *ud,
const char *in, size_t len, int k)
{
Expand Down
7 changes: 7 additions & 0 deletions src/libmime/lang_detection_fasttext.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,13 @@ struct rspamd_config;
*/
void* rspamd_lang_detection_fasttext_init(struct rspamd_config *cfg);

/**
* Show info about fasttext language detector
* @param ud
* @return
*/
char *rspamd_lang_detection_fasttext_show_info(void *ud);


typedef void * rspamd_fasttext_predict_result_t;
/**
Expand Down

0 comments on commit 2426e04

Please sign in to comment.