Commit
- Loading branch information
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,341 @@ | ||
#!/usr/bin/perl | ||
|
||
=head1 NAME | ||
dspam - dspam integration for qpsmtpd | ||
=head1 DESCRIPTION | ||
qpsmtpd plugin that uses dspam to classify messages. Can use SpamAssassin to | ||
train dspam. | ||
Adds the X-DSPAM-Result and X-DSPAM-Signature headers to messages. The latter is essential for | ||
training dspam and the former is useful to MDAs, MUAs, and humans. | ||
=head1 TRAINING DSPAM | ||
To get dspam into a useful state, it must be trained. The best method way to | ||
train dspam is to feed it two large equal sized corpuses of spam and ham from | ||
your mail server. The dspam authors suggest avoiding public corpuses. I do | ||
this as follows: | ||
=over 4 | ||
=item learn from SpamAssassin | ||
See the docs on the learn_from_sa feature in the CONFIG section. | ||
=item daily training | ||
I have a script that crawls the contents of every users maildir each night. | ||
The script builds two lists of messages: ham and spam. | ||
The spam message list consists of all read messages in folders named Spam | ||
that have changed since the last spam learning run (normally 1 day). | ||
The ham message list consists of read messages in any folder not named like | ||
Spam, Junk, Trash, or Deleted. This catches messages that users have read | ||
and left in their inbox, filed away into subfolders, and | ||
=item on-the-fly training | ||
=back | ||
=head1 CONFIG | ||
=over 4 | ||
=item dspam_bin | ||
The path to the dspam binary. If yours is installed somewhere other | ||
than /usr/local/bin/dspam, you'll need to set this. | ||
=item learn_from_sa | ||
Dspam can be trained by SpamAssassin. This relationship between them requires | ||
attention to several important details: | ||
=over 4 | ||
=item 1 | ||
dspam must be listed B<after> spamassassin in the config/plugins file. | ||
Because SA runs first, I crank the SA reject_threshold up above 100 so that | ||
all spam messages will be used to train dspam. | ||
Once dspam is trained and errors are rare, I plan to run dspam first and | ||
reduce the SA load. | ||
=item 2 | ||
Autolearn must be enabled and configured in SpamAssassin. SA autolearn | ||
preferences will determine whether a message is learned as spam or innocent | ||
by dspam. The settings to pay careful attention to in your SA local.cf file | ||
are bayes_auto_learn_threshold_spam and bayes_auto_learn_threshold_nonspam. | ||
Make sure they are both set to conservative values that are certain to | ||
yield no false positives. | ||
If you are using learn_from_sa and reject, then messages that exceed the SA | ||
threshholds will cause dspam to reject them. Again I say, make sure them SA | ||
autolearn threshholds are set high enough to avoid false positives. | ||
=item 3 | ||
dspam must be configured and working properly. I have modified the following | ||
dspam values on my system: | ||
=over 4 | ||
=item mysql storage | ||
=item Trust smtpd | ||
=item TrainingMode tum | ||
=item Tokenizer osb | ||
=item Preference "trainingMode=TOE" | ||
=item Preference "spamAction=deliver" | ||
=item Preference "signatureLocation=headers" | ||
=item TrainPristine off | ||
=item ParseToHeaders off | ||
=back | ||
Of those changes, the most important is the signature location. This plugin | ||
only supports storing the signature in the headers. If you want to train dspam | ||
after delivery (ie, users moving messages to/from spam folders), then the | ||
dspam signature must be in the headers. | ||
=back | ||
=item reject | ||
Set to a floating point value between 0 and 1.00 where 0 is no confidence | ||
and 1.0 is 100% confidence. | ||
If dspam's confidence is greater than or equal to this threshold, the | ||
message will be rejected. | ||
=back | ||
=head1 MULTIPLE RECIPIENT BEHAVIOR | ||
For messages with multiple recipients, the user that dspam is running as will | ||
be the dspam username. | ||
When messages have a single recipient, the recipient address is used as the | ||
dspam username. For dspam to trust qpsmtpd with modifying the username, you | ||
B<must> add the username that qpsmtpd is running to to the dspamd.conf file. | ||
ie, (Trust smtpd). | ||
=head1 CHANGES | ||
=cut | ||
|
||
use strict; | ||
|
||
use Qpsmtpd::Constants; | ||
use Qpsmtpd::DSN; | ||
use IO::Handle; | ||
use Socket qw(:DEFAULT :crlf); | ||
|
||
sub register { | ||
my ($self, $qp, @args) = @_; | ||
|
||
$self->log(LOGERROR, "Bad parameters for the dspam plugin") if @_ % 2; | ||
|
||
%{$self->{_args}} = @args; | ||
|
||
$self->register_hook('data_post', 'dspam_reject') | ||
if $self->{_args}->{reject}; | ||
} | ||
|
||
sub hook_data_post { | ||
my ($self, $transaction) = @_; | ||
|
||
$self->log(LOGDEBUG, "check_dspam"); | ||
return (DECLINED) if $transaction->data_size > 500_000; | ||
|
||
my $username = $self->select_username( $transaction ); | ||
my $message = $self->assemble_message($transaction); | ||
my $filtercmd = $self->get_filter_cmd( $transaction, $username ); | ||
$self->log(LOGWARN, $filtercmd); | ||
|
||
my $response = $self->dspam_process( $filtercmd, $message ); | ||
if ( ! $response ) { | ||
$self->log(LOGWARN, "No response received from dspam. Check your logs for errors."); | ||
return (DECLINED); | ||
}; | ||
$self->log(LOGWARN, $response); | ||
|
||
# X-DSPAM-Result: alan@alannowell.com; result="Spam"; class="Spam"; probability=1.0000; confidence=1.00; signature=N/A | ||
This comment has been minimized.
Sorry, something went wrong.
This comment has been minimized.
Sorry, something went wrong.
msimerson
Author
Owner
|
||
# X-DSPAM-Result: smtpd; result="Innocent"; class="Innocent"; probability=0.0023; confidence=1.00; signature=4f8dae6a446008399211546 | ||
my ($result,$prob,$conf,$sig) = $response =~ /result=\"(Spam|Innocent)\";.*?probability=([\d\.]+); confidence=([\d\.]+); signature=(.*)/; | ||
my $header_str = "$result, probability=$prob, confidence=$conf"; | ||
$self->log(LOGWARN, $header_str); | ||
$transaction->header->add('X-DSPAM-Result', $header_str, 0); | ||
|
||
# the signature header is required if you intend to train dspam later | ||
# you must set Preference "signatureLocation=headers" in dspam.conf | ||
$transaction->header->add('X-DSPAM-Signature', $sig, 0); | ||
|
||
return (DECLINED); | ||
}; | ||
|
||
sub select_username { | ||
my ($self, $transaction) = @_; | ||
|
||
my $recipient_count = scalar $transaction->recipients; | ||
$self->log(LOGDEBUG, "Message has $recipient_count recipients"); | ||
|
||
if ( $recipient_count > 1 ) { | ||
$self->log(LOGINFO, "skipping user prefs, $recipient_count recipients detected."); | ||
return getpwuid($>); | ||
}; | ||
|
||
# use the recipients email address as username. This enables user prefs | ||
my $username = ($transaction->recipients)[0]->address; | ||
return lc($username); | ||
}; | ||
|
||
sub assemble_message { | ||
my ($self, $transaction) = @_; | ||
|
||
$transaction->body_resetpos; | ||
|
||
my $message = "X-Envelope-From: " | ||
. $transaction->sender->format . "\n" | ||
. $transaction->header->as_string . "\n\n"; | ||
|
||
while (my $line = $transaction->body_getline) { $message .= $line; }; | ||
|
||
$message = join(CRLF, split/\n/, $message); | ||
return $message . CRLF; | ||
}; | ||
|
||
sub dspam_process { | ||
my ( $self, $filtercmd, $message ) = @_; | ||
|
||
#return $self->dspam_process_open2( $filtercmd, $message ); | ||
|
||
my ($in_fh, $out_fh); | ||
if (! open($in_fh, "-|")) { | ||
open($out_fh, "|$filtercmd") or die "Can't run $filtercmd: $!\n"; | ||
print $out_fh $message; | ||
close $out_fh; | ||
exit(0); | ||
}; | ||
my $response = join('', <$in_fh>); | ||
close $in_fh; | ||
chomp $response; | ||
|
||
return $response; | ||
}; | ||
|
||
sub dspam_process_open2 { | ||
my ( $self, $filtercmd, $message ) = @_; | ||
|
||
# not sure why, but this is not as reliable as I'd like. What's a dspam | ||
# error -5 mean anyway? | ||
use FileHandle; | ||
use IPC::Open2; | ||
my ($dspam_in, $dspam_out); | ||
my $pid = open2($dspam_out, $dspam_in, $filtercmd); | ||
print $dspam_in $message; | ||
close $dspam_in; | ||
my $response = join('', <$dspam_out>); | ||
waitpid $pid, 0; | ||
chomp $response; | ||
return $response; | ||
}; | ||
|
||
sub dspam_reject { | ||
my ($self, $transaction) = @_; | ||
|
||
return (DECLINED) if ! $self->{_args}->{reject}; | ||
|
||
my $status = $transaction->header->get('X-DSPAM-Result') or do { | ||
$self->log(LOGWARN, "dspam_reject: failed to find the dspam header"); | ||
return (DECLINED); | ||
}; | ||
my ($clas,$probability,$confidence) = $status =~ m/^(Spam|Innocent), probability=([\d\.]+), confidence=([\d\.]+)/i; | ||
|
||
$self->log(LOGDEBUG, "dspam $clas, prob: $probability, conf: $confidence"); | ||
|
||
if ( $clas eq 'Spam' && $probability == 1 && $confidence == 1 ) { | ||
# default of media_unsupported is DENY, so just change the message | ||
if ( $self->qp->connection->relay_client ) { | ||
$self->log(LOGWARN, "allowing spam since user authenticated"); | ||
return DECLINED; | ||
}; | ||
return Qpsmtpd::DSN->media_unsupported('dspam says, no spam please'); | ||
}; | ||
|
||
return DECLINED; | ||
} | ||
|
||
sub get_filter_cmd { | ||
my ($self, $transaction, $user) = @_; | ||
|
||
my $dspam_bin = $self->{_args}->{dspam_bin} || '/usr/local/bin/dspam'; | ||
my $default = "$dspam_bin --user $user --mode=tum --process --deliver=summary --stdout"; | ||
my $min_score = $self->{_args}->{learn_from_sa} or return $default; | ||
|
||
#$self->log(LOGDEBUG, "attempting to learn from SA"); | ||
my $sa_status = $transaction->header->get('X-Spam-Status'); | ||
|
||
if ( ! $sa_status ) { | ||
$self->log(LOGERROR, "dspam learn_from_sa was set but no X-Spam-Status header detected"); | ||
return $default; | ||
}; | ||
chomp $sa_status; | ||
|
||
my ($is_spam,$score,$autolearn) = $sa_status =~ /^(yes|no), score=([\d\.\-]+)\s.*?autolearn=([\w]+)/i; | ||
$self->log(LOGINFO, "sa_status: $sa_status; $is_spam; $autolearn"); | ||
|
||
$is_spam = lc($is_spam); | ||
$autolearn = lc($autolearn); | ||
|
||
if ( $is_spam eq 'yes' && $score < $min_score ) { | ||
$self->log(LOGWARN, "SA spam score of $score is less than $min_score, skipping autolearn"); | ||
return $default; | ||
}; | ||
|
||
if ( $is_spam eq 'yes' && $autolearn eq 'spam' ) { | ||
return "$dspam_bin --user $user --mode=tum --source=corpus --class=spam --deliver=summary --stdout"; | ||
} | ||
elsif ( $is_spam eq 'no' && $autolearn eq 'ham' ) { | ||
return "$dspam_bin --user $user --mode=tum --source=corpus --class=innocent --deliver=summary --stdout"; | ||
}; | ||
|
||
return $default; | ||
}; | ||
|
||
sub _cleanup_spam_header { | ||
my ($self, $transaction, $header_name) = @_; | ||
|
||
my $action = 'rename'; | ||
if ( $self->{_args}->{leave_old_headers} ) { | ||
$action = lc($self->{_args}->{leave_old_headers}); | ||
}; | ||
|
||
return unless $action eq 'drop' || $action eq 'rename'; | ||
|
||
my $old_header_name = $header_name; | ||
$old_header_name = ($old_header_name =~ s/^X-//) ? "X-Old-$old_header_name" : "Old-$old_header_name"; | ||
|
||
for my $header ( $transaction->header->get($header_name) ) { | ||
$transaction->header->add($old_header_name, $header) if $action eq 'rename'; | ||
$transaction->header->delete($header_name); | ||
} | ||
} | ||
|
I don't know who Alan is, but lets use a fake address?