Skip to content

Commit

Permalink
Merge pull request #3028 from systemcrash/patch-2
Browse files Browse the repository at this point in the history
Update bayes.c
  • Loading branch information
vstakhov committed Sep 2, 2019
2 parents 3b02843 + 217642d commit ab807c1
Showing 1 changed file with 17 additions and 17 deletions.
34 changes: 17 additions & 17 deletions src/libstat/classifiers/bayes.c
Expand Up @@ -80,7 +80,7 @@ inv_chi_square (struct rspamd_task *task, gdouble value, gint freedom_deg)

sum = prob;

msg_debug_bayes ("m: %f, prob: %g", m, prob);
msg_debug_bayes ("m: %f, probability: %g", m, prob);

/*
* m is our confidence in class
Expand All @@ -91,7 +91,7 @@ inv_chi_square (struct rspamd_task *task, gdouble value, gint freedom_deg)
for (i = 1; i < freedom_deg; i++) {
prob *= m / (gdouble)i;
sum += prob;
msg_debug_bayes ("i=%d, prob: %g, sum: %g", i, prob, sum);
msg_debug_bayes ("i=%d, probability: %g, sum: %g", i, prob, sum);
}

return MIN (1.0, sum);
Expand Down Expand Up @@ -197,7 +197,7 @@ bayes_classify_token (struct rspamd_classifier *ctx,
if ((bayes_spam_prob > 0.5 && bayes_spam_prob < 0.5 + ctx->cfg->min_prob_strength) ||
(bayes_spam_prob < 0.5 && bayes_spam_prob > 0.5 - ctx->cfg->min_prob_strength)) {
msg_debug_bayes (
"token %uL <%*s:%*s> skipped, prob not in range: %f",
"token %uL <%*s:%*s> skipped, probability not in range: %f",
tok->data,
(int) tok->t1->stemmed.len, tok->t1->stemmed.begin,
(int) tok->t2->stemmed.len, tok->t2->stemmed.begin,
Expand Down Expand Up @@ -225,7 +225,7 @@ bayes_classify_token (struct rspamd_classifier *ctx,
"spam_count: %ud, ham_count: %ud,"
"spam_prob: %.3f, ham_prob: %.3f, "
"bayes_spam_prob: %.3f, bayes_ham_prob: %.3f, "
"current spam prob: %.3f, current ham prob: %.3f",
"current spam probability: %.3f, current ham probability: %.3f",
token_type,
tok->data,
(int) tok->t1->stemmed.len, tok->t1->stemmed.begin,
Expand All @@ -241,7 +241,7 @@ bayes_classify_token (struct rspamd_classifier *ctx,
"spam_count: %ud, ham_count: %ud,"
"spam_prob: %.3f, ham_prob: %.3f, "
"bayes_spam_prob: %.3f, bayes_ham_prob: %.3f, "
"current spam prob: %.3f, current ham prob: %.3f",
"current spam probability: %.3f, current ham probability: %.3f",
token_type,
tok->data,
fw, w, total_count, spam_count, ham_count,
Expand Down Expand Up @@ -291,15 +291,15 @@ bayes_classify (struct rspamd_classifier * ctx,
/* Check min learns */
if (ctx->cfg->min_learns > 0) {
if (ctx->ham_learns < ctx->cfg->min_learns) {
msg_info_task ("skip classification as ham class has not enough "
"learns: %ul, %ud required",
msg_info_task ("not classified as ham. The ham class needs more "
"training samples. Currently: %ul; minimum %ud required",
ctx->ham_learns, ctx->cfg->min_learns);

return TRUE;
}
if (ctx->spam_learns < ctx->cfg->min_learns) {
msg_info_task ("skip classification as spam class has not enough "
"learns: %ul, %ud required",
msg_info_task ("not classified as spam. The spam class needs more "
"training samples. Currently: %ul; minimum %ud required",
ctx->spam_learns, ctx->cfg->min_learns);

return TRUE;
Expand All @@ -314,8 +314,8 @@ bayes_classify (struct rspamd_classifier * ctx,
}

if (text_tokens == 0) {
msg_info_task ("skip classification as there are no text tokens, "
"%ud total tokens",
msg_info_task ("skipped classification as there are no text tokens. "
"Total tokens: %ud",
tokens->len);

return TRUE;
Expand Down Expand Up @@ -349,7 +349,7 @@ bayes_classify (struct rspamd_classifier * ctx,
cl.text_tokens < (gint)(ctx->cfg->min_tokens * 0.1)) {
msg_info_bayes ("ignore bayes probability since we have "
"found too few text tokens: %uL (of %ud checked), "
"at least %d is required",
"at least %d required",
cl.text_tokens,
text_tokens,
(gint)(ctx->cfg->min_tokens * 0.1));
Expand Down Expand Up @@ -379,7 +379,7 @@ bayes_classify (struct rspamd_classifier * ctx,
if (isfinite (s) && isfinite (h)) {
final_prob = (s + 1.0 - h) / 2.;
msg_debug_bayes (
"got ham prob %.2f -> %.2f and spam prob %.2f -> %.2f,"
"got ham probability %.2f -> %.2f and spam probability %.2f -> %.2f,"
" %L tokens processed of %ud total tokens;"
" %uL text tokens found of %ud text tokens)",
cl.ham_prob,
Expand All @@ -398,17 +398,17 @@ bayes_classify (struct rspamd_classifier * ctx,
*/
if (isfinite (h)) {
final_prob = 1.0;
msg_debug_bayes ("spam class is overflowed, as we have no"
msg_debug_bayes ("spam class is full: no"
" ham samples");
}
else if (isfinite (s)) {
final_prob = 0.0;
msg_debug_bayes ("ham class is overflowed, as we have no"
msg_debug_bayes ("ham class is full: no"
" spam samples");
}
else {
final_prob = 0.5;
msg_warn_bayes ("spam and ham classes are both overflowed");
msg_warn_bayes ("spam and ham classes are both full");
}
}

Expand Down Expand Up @@ -553,4 +553,4 @@ bayes_learn_spam (struct rspamd_classifier * ctx,
}

return TRUE;
}
}

0 comments on commit ab807c1

Please sign in to comment.