Skip to content

Commit

Permalink
[idna] Add unit tests for punycode prefix edge cases
Browse files Browse the repository at this point in the history
Helps with answering question asked in
#373

The test invariants here are based on and checked against:

* Processing Step 4 of UTS#46
  (<http://www.unicode.org/reports/tr46/#ProcessingStepPunycode>)

* 6.2 Decoding procedure of Punycode RFC.
  (<https://ietf.org/rfc/rfc3492.txt>)
  • Loading branch information
behnam authored and djc committed Aug 25, 2020
1 parent 8dedda4 commit 4c1be65
Showing 1 changed file with 55 additions and 16 deletions.
71 changes: 55 additions & 16 deletions idna/tests/unit.rs
Original file line number Diff line number Diff line change
@@ -1,34 +1,73 @@
use unicode_normalization::char::is_combining_mark;

fn _to_ascii(domain: &str) -> Result<String, idna::Errors> {
idna::Config::default()
/// https://github.com/servo/rust-url/issues/373
#[test]
fn test_punycode_prefix_with_length_check() {
let config = idna::Config::default()
.verify_dns_length(true)
.use_std3_ascii_rules(true)
.to_ascii(domain)
.check_hyphens(true)
.use_std3_ascii_rules(true);

assert!(config.to_ascii("xn--").is_err());
assert!(config.to_ascii("xn---").is_err());
assert!(config.to_ascii("xn-----").is_err());
assert!(config.to_ascii("xn--.").is_err());
assert!(config.to_ascii("xn--...").is_err());
assert!(config.to_ascii(".xn--").is_err());
assert!(config.to_ascii("...xn--").is_err());
assert!(config.to_ascii("xn--.xn--").is_err());
assert!(config.to_ascii("xn--.example.org").is_err());
}

/// https://github.com/servo/rust-url/issues/373
#[test]
fn test_punycode_prefix_without_length_check() {
let config = idna::Config::default()
.verify_dns_length(false)
.check_hyphens(true)
.use_std3_ascii_rules(true);

assert_eq!(config.to_ascii("xn--").unwrap(), "");
assert!(config.to_ascii("xn---").is_err());
assert!(config.to_ascii("xn-----").is_err());
assert_eq!(config.to_ascii("xn--.").unwrap(), ".");
assert_eq!(config.to_ascii("xn--...").unwrap(), "...");
assert_eq!(config.to_ascii(".xn--").unwrap(), ".");
assert_eq!(config.to_ascii("...xn--").unwrap(), "...");
assert_eq!(config.to_ascii("xn--.xn--").unwrap(), ".");
assert_eq!(config.to_ascii("xn--.example.org").unwrap(), ".example.org");
}

#[test]
fn test_v5() {
let config = idna::Config::default()
.verify_dns_length(true)
.use_std3_ascii_rules(true);

// IdnaTest:784 蔏。𑰺
assert!(is_combining_mark('\u{11C3A}'));
assert!(_to_ascii("\u{11C3A}").is_err());
assert!(_to_ascii("\u{850f}.\u{11C3A}").is_err());
assert!(_to_ascii("\u{850f}\u{ff61}\u{11C3A}").is_err());
assert!(config.to_ascii("\u{11C3A}").is_err());
assert!(config.to_ascii("\u{850f}.\u{11C3A}").is_err());
assert!(config.to_ascii("\u{850f}\u{ff61}\u{11C3A}").is_err());
}

#[test]
fn test_v8_bidi_rules() {
assert_eq!(_to_ascii("abc").unwrap(), "abc");
assert_eq!(_to_ascii("123").unwrap(), "123");
assert_eq!(_to_ascii("אבּג").unwrap(), "xn--kdb3bdf");
assert_eq!(_to_ascii("ابج").unwrap(), "xn--mgbcm");
assert_eq!(_to_ascii("abc.ابج").unwrap(), "abc.xn--mgbcm");
assert_eq!(_to_ascii("אבּג.ابج").unwrap(), "xn--kdb3bdf.xn--mgbcm");
let config = idna::Config::default()
.verify_dns_length(true)
.use_std3_ascii_rules(true);

assert_eq!(config.to_ascii("abc").unwrap(), "abc");
assert_eq!(config.to_ascii("123").unwrap(), "123");
assert_eq!(config.to_ascii("אבּג").unwrap(), "xn--kdb3bdf");
assert_eq!(config.to_ascii("ابج").unwrap(), "xn--mgbcm");
assert_eq!(config.to_ascii("abc.ابج").unwrap(), "abc.xn--mgbcm");
assert_eq!(config.to_ascii("אבּג.ابج").unwrap(), "xn--kdb3bdf.xn--mgbcm");

// Bidi domain names cannot start with digits
assert!(_to_ascii("0a.\u{05D0}").is_err());
assert!(_to_ascii("0à.\u{05D0}").is_err());
assert!(config.to_ascii("0a.\u{05D0}").is_err());
assert!(config.to_ascii("0à.\u{05D0}").is_err());

// Bidi chars may be punycode-encoded
assert!(_to_ascii("xn--0ca24w").is_err());
assert!(config.to_ascii("xn--0ca24w").is_err());
}

0 comments on commit 4c1be65

Please sign in to comment.