Skip to content

Commit

Permalink
Add prevailing_rule/2 and matches_explicit_rule?/2 and add tests for the
Browse files Browse the repository at this point in the history
same; make find_prevailing_rule return actual matching rule; update README
  • Loading branch information
andersju committed Jun 2, 2016
1 parent 887d7e0 commit 7520462
Show file tree
Hide file tree
Showing 3 changed files with 159 additions and 7 deletions.
8 changes: 8 additions & 0 deletions README.md
Expand Up @@ -20,6 +20,14 @@ iex(3)> PublicSuffix.public_suffix("mysite.foo.bar.com")
"com"
iex(4)> PublicSuffix.public_suffix("mysite.foo.bar.co.uk")
"co.uk"
iex(5)> PublicSuffix.prevailing_rule("mysite.foo.bar.com")
"com"
iex(6)> PublicSuffix.prevailing_rule("mysite.example")
"*"
iex(7)> PublicSuffix.matches_explicit_rule?("mysite.foo.bar.com")
true
iex(8)> PublicSuffix.matches_explicit_rule?("mysite.example")
false
```

The publicsuffix.org data file contains both official ICANN records
Expand Down
77 changes: 70 additions & 7 deletions lib/public_suffix.ex
Expand Up @@ -26,7 +26,6 @@ defmodule PublicSuffix do
iex> public_suffix("foo.github.io")
"github.io"
"""
@spec public_suffix(String.t) :: nil | String.t
@spec public_suffix(String.t, options) :: nil | String.t
def public_suffix(domain, options \\ []) when is_binary(domain) do
parse_domain(domain, options, 0)
Expand Down Expand Up @@ -55,13 +54,73 @@ defmodule PublicSuffix do
iex> registrable_domain("foo.github.io")
"foo.github.io"
"""
@spec registrable_domain(String.t) :: nil | String.t
@spec registrable_domain(String.t, options) :: nil | String.t
def registrable_domain(domain, options \\ []) when is_binary(domain) do
# "The registered or registrable domain is the public suffix plus one additional label."
parse_domain(domain, options, 1)
end

@doc """
Parses the provided domain and returns the prevailing rule based on the
publicsuffix.org rules. If no rules match, the prevailing rule is "*",
unless the provided domain has a leading dot, in which case the input is
invalid and the function returns `nil`.
## Examples
iex> prevailing_rule("foo.bar.com")
"com"
iex> prevailing_rule("co.uk")
"co.uk"
iex> prevailing_rule("foo.ck")
"*.ck"
iex> prevailing_rule("foobar.example")
"*"
You can use the `ignore_private` keyword to exclude private (non-ICANN) domains.
iex> prevailing_rule("foo.github.io", ignore_private: false)
"github.io"
iex> prevailing_rule("foo.github.io", ignore_private: true)
"io"
iex> prevailing_rule("foo.github.io")
"github.io"
"""
@spec prevailing_rule(String.t, options) :: nil | String.t
def prevailing_rule(domain, options \\ [])
def prevailing_rule("." <> _domain, _), do: nil
def prevailing_rule(domain, options) when is_binary(domain) do
domain
|> String.downcase
|> String.split(".")
|> find_prevailing_rule(options)
|> case do
{:exception, rule} -> "!" <> Enum.join(rule, ".")
{_, rule} -> Enum.join(rule, ".")
end
end

@doc """
Checks whether the provided domain matches an existing rule in the
publicsuffix.org rules.
## Examples
iex> matches_explicit_rule?("foo.bar.com")
true
iex> matches_explicit_rule?("com")
true
iex> matches_explicit_rule?("foobar.example")
false
You can use the `ignore_private` keyword to exclude private (non-ICANN) domains.
"""
@spec matches_explicit_rule?(String.t | nil) :: boolean
def matches_explicit_rule?(nil), do: false
def matches_explicit_rule?(domain, options \\ []) when is_binary(domain) do
!(prevailing_rule(domain, options) in [nil, "*"])
end

# Inputs with a leading dot should be treated as a special case.
# see https://github.com/publicsuffix/list/issues/208
defp parse_domain("." <> _domain, _, _), do: nil
Expand All @@ -82,6 +141,11 @@ defmodule PublicSuffix do
num_labels =
labels
|> find_prevailing_rule(options)
|> case do
# "If the prevailing rule is a exception rule, modify it by removing the leftmost label."
{:exception, labels} -> tl(labels)
{_, labels} -> labels
end
|> length
|> +(extra_label_parts)

Expand All @@ -99,7 +163,7 @@ defmodule PublicSuffix do
find_prevailing_exception_rule(labels, allowed_rule_types) ||
find_prevailing_normal_rule(labels, allowed_rule_types) ||
# "If no rules match, the prevailing rule is "*"."
["*"]
{:normal, ["*"]}
end

data_file = Path.expand("../data/public_suffix_list.dat", __DIR__)
Expand Down Expand Up @@ -128,8 +192,7 @@ defmodule PublicSuffix do
defp find_prevailing_exception_rule([], _allowed_rule_types), do: nil
defp find_prevailing_exception_rule([_ | suffix] = domain_labels, allowed_rule_types) do
if @exception_rules[domain_labels] in allowed_rule_types do
# "If the prevailing rule is a exception rule, modify it by removing the leftmost label."
suffix
{:exception, domain_labels}
else
find_prevailing_exception_rule(suffix, allowed_rule_types)
end
Expand All @@ -140,9 +203,9 @@ defmodule PublicSuffix do
defp find_prevailing_normal_rule([], _allowed_rule_types), do: nil
defp find_prevailing_normal_rule([_ | suffix] = domain_labels, allowed_rule_types) do
cond do
@exact_match_rules[domain_labels] in allowed_rule_types -> domain_labels
@exact_match_rules[domain_labels] in allowed_rule_types -> {:normal, domain_labels}
# TODO: "Wildcards are not restricted to appear only in the leftmost position"
@wild_card_rules[["*" | suffix]] in allowed_rule_types -> domain_labels
@wild_card_rules[["*" | suffix]] in allowed_rule_types -> {:normal, ["*"] ++ suffix}
true -> find_prevailing_normal_rule(suffix, allowed_rule_types)
end
end
Expand Down
81 changes: 81 additions & 0 deletions test/public_suffix_test.exs
Expand Up @@ -28,6 +28,87 @@ defmodule PublicSuffix.PublicSuffixTest do
end
end

test_cases_prevailing_private = [
{"exact match", "foo.github.io", "github.io", "io"},
{"wildcard", "foo.bar.api.githubcloud.com", "*.api.githubcloud.com", "com"},
]

for {rule_type, input, expected_with_private, expected_without_private} <- test_cases_prevailing_private do
@input input
@expected_with_private expected_with_private
@expected_without_private expected_without_private

test "`prevailing_rule` includes private domains by default (#{rule_type})" do
assert prevailing_rule(@input) == @expected_with_private
end

test "`prevailing_rule` includes private domains if passed `ignore_private: false` (#{rule_type})" do
assert prevailing_rule(@input, ignore_private: false) == @expected_with_private
end

test "`prevailing_rule` excludes private domains if passed `ignore_private: true` (#{rule_type})" do
assert prevailing_rule(@input, ignore_private: true) == @expected_without_private
end
end

test_cases_prevailing = [
{"leading dot", ".com", nil},
{"unlisted TLD", "example", "*"},
{"unlisted TLD", "example.example", "*"},
{"TLD with only 1 rule", "biz", "biz"},
{"TLD with only 1 rule", "domain.biz", "biz"},
{"TLD with some 2-level rules", "uk.com", "uk.com"},
{"TLD with some 2-level rules", "example.uk.com", "uk.com"},
{"TLD with only 1 (wildcard) rule", "mm", "*"},
{"TLD with only 1 (wildcard) rule", "c.mm", "*.mm"},
{"TLD with only 1 (wildcard) rule", "b.c.mm", "*.mm"},
{"more complex TLD", "kyoto.jp", "kyoto.jp"},
{"more complex TLD", "test.kyoto.jp", "kyoto.jp"},
{"more complex TLD", "ide.kyoto.jp", "ide.kyoto.jp"},
{"more complex TLD", "b.ide.kyoto.jp", "ide.kyoto.jp"},
{"more complex TLD", "a.b.ide.kyoto.jp", "ide.kyoto.jp"},
{"more complex TLD", "c.kobe.jp", "*.kobe.jp"},
{"more complex TLD", "b.c.kobe.jp", "*.kobe.jp"},
{"more complex TLD", "city.kobe.jp", "!city.kobe.jp"},
{"more complex TLD", "www.city.kobe.jp", "!city.kobe.jp"},
{"TLD with a wildcard rule and exceptions", "ck", "*"},
{"TLD with a wildcard rule and exceptions", "test.ck", "*.ck"},
{"TLD with a wildcard rule and exceptions", "b.test.ck", "*.ck"},
{"TLD with a wildcard rule and exceptions", "www.ck", "!www.ck"},
{"TLD with a wildcard rule and exceptions", "www.www.ck", "!www.ck"},
]

for {rule_type, input, expected_output} <- test_cases_prevailing do
@input input
@expected_output expected_output

test "`prevailing_rule` returns `#{to_string(expected_output)}` if passed `#{input}` (#{rule_type})" do
assert prevailing_rule(@input) == @expected_output
end
end

test_cases_matches_explicit = [
{"listed TLD only", "com", true},
{"TLD with only 1 (wildcard) rule", "mm", false},
{"TLD with only 1 (wildcard) rule", "b.mm", true},
{"TLD with a wildcard rule and exceptions", "ck", false},
{"TLD with a wildcard rule and exceptions", "b.ck", true},
{"TLD with a wildcard rule and exceptions", "www.ck", true},
{"domain with leading dot", ".com", false},
{"unlisted TLD", "example", false},
{"empty string", "", false},
{"nil", nil, false},
]

for {rule_type, input, expected_output} <- test_cases_matches_explicit do
@input input
@expected_output expected_output

test "`matches_explicit_rule?` returns `#{to_string(expected_output)}` if passed `#{input}` (#{rule_type})" do
assert matches_explicit_rule?(@input) == @expected_output
end
end

test "unicode domains are correctly NFKC normalized when punycoding them" do
# Both of these strings are different unicode forms of "ábc.co.uk".
# The example came from:
Expand Down

0 comments on commit 7520462

Please sign in to comment.