-
-
Notifications
You must be signed in to change notification settings - Fork 331
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: add email address normalisation (#1206)
* feat: add email address normalisation - normalise Gmail addresses: - remove subaddresses. - remove dots/periods. - lower-case usernames. - standardise on `gmail.com`. - include this in `syntax.normalized_email` in the output. relates #952 * test: add check for idempotency verify that normalisation is idempotent (i.e. normalising an already-normalised email results in no further changes.) * fix: normalize by username/password update the `normalize_email()` signature to accept `username` and `domain` separately: these have been split in an earlier stage.
- Loading branch information
1 parent
fcec5e7
commit f8ec348
Showing
5 changed files
with
92 additions
and
7 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -65,6 +65,7 @@ | |
pub mod gravatar; | ||
pub mod misc; | ||
pub mod mx; | ||
mod normalize; | ||
pub mod smtp; | ||
pub mod syntax; | ||
mod util; | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
pub fn normalize_email(username: &str, domain: &str) -> String { | ||
match domain { | ||
"gmail.com" | "googlemail.com" => normalize_gmail(username), | ||
_ => format!("{}@{}", username, domain), | ||
} | ||
} | ||
|
||
/// Normalize a Gmail address. | ||
/// | ||
/// See Gmail username | ||
/// [restrictions](https://support.google.com/mail/answer/9211434?hl=en-GB). | ||
/// | ||
/// - removes | ||
/// [sub-addresses](https://support.google.com/a/users/answer/9282734?hl=en#zippy=%2Clearn-how) | ||
/// (i.e. parts after a `+` character.) | ||
/// - removes [dots](https://support.google.com/mail/answer/7436150). | ||
/// - converts to lower-case. | ||
/// - [replaces](https://support.google.com/mail/answer/10313?hl=en-GB#zippy=%2Cgetting-messages-sent-to-an-googlemailcom-address) | ||
/// `googlemail.com` with `gmail.com`. | ||
fn normalize_gmail(username: &str) -> String { | ||
let username = match username.split_once('+') { | ||
Some((username, _)) => username, | ||
_ => username, | ||
} | ||
.chars() | ||
.filter_map(|c| match c.to_ascii_lowercase() { | ||
'.' => None, | ||
lower => Some(lower), | ||
}) | ||
.collect::<String>(); | ||
|
||
format!("{}@gmail.com", username) | ||
} | ||
|
||
#[cfg(test)] | ||
mod tests { | ||
use super::*; | ||
|
||
#[test] | ||
fn test_gmail_removes_periods() { | ||
assert_eq!(normalize_email("a.b.c", "gmail.com"), "abc@gmail.com"); | ||
} | ||
|
||
#[test] | ||
fn test_gmail_removes_subaddress() { | ||
assert_eq!(normalize_email("abc+123", "gmail.com"), "abc@gmail.com"); | ||
} | ||
|
||
#[test] | ||
fn test_gmail_uses_gmail_com() { | ||
assert_eq!(normalize_email("abc", "googlemail.com"), "abc@gmail.com"); | ||
} | ||
|
||
#[test] | ||
fn test_gmail() { | ||
assert_eq!( | ||
normalize_email("A.B.C+123", "googlemail.com"), | ||
"abc@gmail.com" | ||
); | ||
} | ||
|
||
#[test] | ||
fn test_gmail_idempotent() { | ||
let normalized = normalize_email("A.B.C+123", "googlemail.com"); | ||
|
||
let (username, domain) = normalized.rsplit_once('@').unwrap(); | ||
|
||
assert_eq!(normalize_email(username, domain), normalized); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters