Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
ldidry committed Nov 16, 2018
1 parent 2779fac commit ea40312
Show file tree
Hide file tree
Showing 8 changed files with 157 additions and 1 deletion.
1 change: 1 addition & 0 deletions default/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ nobase_default_DATA = \
charset.conf \
crawlers_detection.conf \
create_list.conf \
domain_correction \
edit_list.conf \
ldap_alias_entry.tt2 \
mhonarc-ressources.tt2 \
Expand Down
63 changes: 63 additions & 0 deletions default/domain_correction
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
# vim:set ft=perl:

=head1 Domain correction
Users make common mistakes and typos on email addresses, like C<foo@gmail.fr>
instead of C<foo@gmail.com>.
You can define here some mistakes and typos that you want to automatically
correct before using them in list subscriptions.
To use that feature, set C<use_domain_correction> to 1 in C<sympa.conf>, copy
this file in Sympa's C<etc> directory then modify that copy.
Please note that this feature can only be use to correct the domains of the
email addresses, you can't change the user part of the email address.
=head2 Configuration
This file contains a Perl hash table containing two keys. Any other keys that
you could add won't be used, but you can delete one if you don't need it.
=over 1
=item B<substitutions>
Its value is a hash table, which keys will be the correct domains and the
values will be an array of mistyped domains.
Example:
substitutions => {
'hotmail.com' => ['hotamil.com', 'htmail.com']
},
In this example, C<foo@hotamil.com> will be corrected in C<foo@hotmail.com>
=item B<globbing>
Its value is a hash table, which keys will be a mistyped domain pattern and
the values will be the correct domains.
B<Warning>: the pattern is not a regex, it's similar to shell globbing.
C<gmail.*> would match C<gmail.fr> but not C<gmailbar.fr>
See
https://www.oreilly.com/library/view/perl-cookbook/1565922433/ch06s10.html
Example:
globbing => {
'gmail.*' => 'gmail.com',
}
In this example, C<foo@gmail.fr> and C<bar@gmail.de> will be corrected in
C<foo@gmail.com> and C<bar@gmail.com>
=back
=cut

{
substitutions => { },
globbing => { }
};
2 changes: 1 addition & 1 deletion src/lib/Conf.pm
Original file line number Diff line number Diff line change
Expand Up @@ -2291,7 +2291,7 @@ sub _store_source_file_name {
$param->{'config_hash'}{'source_file'} = $param->{'config_file'};
}

# FXIME:Use Sympa::search_fullpath().
# FIXME:Use Sympa::search_fullpath().
sub _get_config_file_name {
my $param = shift;
my $config_file;
Expand Down
7 changes: 7 additions & 0 deletions src/lib/Sympa/ConfDef.pm
Original file line number Diff line number Diff line change
Expand Up @@ -2123,6 +2123,13 @@ our @params = (
'file' => 'wwsympa.conf',
'optional' => 1,
},
{ 'name' => 'use_domain_correction',
'default' => '0',
'gettext_id' => 'Enable domain correction',
'gettext_comment' => 'See default/domain_correction file for details',
'file' => 'sympa.conf',
'optional' => 1,
},

## Not implemented yet.
## {
Expand Down
1 change: 1 addition & 0 deletions src/lib/Sympa/List.pm
Original file line number Diff line number Diff line change
Expand Up @@ -3821,6 +3821,7 @@ sub add_list_member {

foreach my $new_user (@new_users) {
my $who = Sympa::Tools::Text::canonic_email($new_user->{'email'});
$who = Sympa::Tools::Text::domain_correction($who);
unless (defined $who) {
$log->syslog('err', 'Ignoring %s which is not a valid email',
$new_user->{'email'});
Expand Down
1 change: 1 addition & 0 deletions src/lib/Sympa/Request/Handler/add.pm
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ sub _twist {

$language->set_lang($list->{'admin'}{'lang'});

$email = Sympa::Tools::Text::domain_correction($email);
unless (Sympa::Tools::Text::valid_email($email)) {
$self->add_stash($request, 'user', 'incorrect_email',
{'email' => $email});
Expand Down
2 changes: 2 additions & 0 deletions src/lib/Sympa/Request/Handler/subscribe.pm
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,8 @@ sub _twist {
undef $comment;
}

$email = Sympa::Tools::Text::domain_correction($email);

# Unless rejected by scenario, don't go further if the user is subscribed
# already.
my $user_entry = $list->get_list_member($email);
Expand Down
81 changes: 81 additions & 0 deletions src/lib/Sympa/Tools/Text.pm
Original file line number Diff line number Diff line change
Expand Up @@ -34,13 +34,17 @@ use MIME::EncWords;
use Text::LineFold;
use Unicode::GCString;
use URI::Escape qw();
use Sympa;
use Conf;
use if (5.008 < $] && $] < 5.016), qw(Unicode::CaseFold fc);
use if (5.016 <= $]), qw(feature fc);
BEGIN { eval 'use Unicode::Normalize qw()'; }

use Sympa::Language;
use Sympa::Regexps;

my $domains_corrections;

# Old name: tools::addrencode().
sub addrencode {
my $addr = shift;
Expand Down Expand Up @@ -474,6 +478,83 @@ sub weburl {
$qstring, $fstring;
}

sub domain_correction {
my $email = shift;

if (!$Conf::Conf{'use_domain_correction'}) {
return $email;
}

if (!defined($domains_corrections)) {
$domains_corrections = _get_domains_corrections();
}

my @parts = split '@', $email;

if (defined($domains_corrections->{substitutions}->{$parts[1]})) {
return $parts[0] . '@'
. $domains_corrections->{substitutions}->{$parts[1]};
} else {
for my $pattern (keys %{$domains_corrections->{globbing}}) {
my $replacement = $domains_corrections->{globbing}->{$pattern};
if ($parts[1] =~ m/$pattern/) {
return $parts[0] . '@' . $replacement;
}
}
return $email;
}
}

sub _get_domains_corrections {
if (!$Conf::Conf{'use_domain_correction'}) {
return {substitutions => {}, globbing => {}};
}

my $hash = do {
my $file = Sympa::search_fullpath('*', 'domain_correction');
open my $fh, '<', $file or die "Could not open $file for reading";
local $/;
eval <$fh>;
};

my ($substitutions, $globbing) = ({}, {});

# Transform { a => [b, c, d] } to { b => a, c => a, d => a }
if (defined($hash->{substitutions})) {
for my $domain (keys %{$hash->{substitutions}}) {
for my $substitution (@{$hash->{substitutions}->{$domain}}) {
$substitutions->{$substitution} = $domain;
}
}
}

# Transform shell glob patterns to regexes
if (defined($hash->{globbing})) {
for my $key (keys %{$hash->{globbing}}) {
my $glob = _glob2pat($key);
$globbing->{$glob} = $hash->{globbing}->{$key};
}
}

return {
substitutions => $substitutions,
globbing => $globbing
};
}

# Comes from https://www.oreilly.com/library/view/perl-cookbook/1565922433/ch06s10.html
sub _glob2pat {
my $globstr = shift;
my %patmap = (
'*' => '.*',
'?' => '.',
'[' => '[',
']' => ']',
);
$globstr =~ s{(.)} { $patmap{$1} || "\Q$1" }ge;
return '^' . $globstr . '$';
}

1;
__END__
Expand Down

0 comments on commit ea40312

Please sign in to comment.