From e9bb24fbc0971e9153b44aa162919c1c4f3b1e9f Mon Sep 17 00:00:00 2001 From: Daniel Date: Mon, 21 Jan 2019 17:43:29 -0500 Subject: [PATCH] New regex to exclude leading dashes per #62 --- HostsTools/hosts_tools.py | 5 +++-- ads-and-tracking-extended.txt | 5 ++--- ads-and-tracking.txt | 2 +- tests/unit/test_hosts_tools.py | 8 ++++++++ 4 files changed, 14 insertions(+), 6 deletions(-) diff --git a/HostsTools/hosts_tools.py b/HostsTools/hosts_tools.py index 60bef475fd1..19bfdda122b 100644 --- a/HostsTools/hosts_tools.py +++ b/HostsTools/hosts_tools.py @@ -10,7 +10,8 @@ import time STRIP_COMMENTS_PATTERN = re.compile(r"^([^#]+)") -ALLOWED_DOMAIN_PATTERN = re.compile("^[^\*\?\[\]{}\|\\\/&^%$#@!+=~`\s\.<>,\"']+$", re.IGNORECASE) +EXCLUDE_DOMAIN_PATTERN = re.compile(r"^[-]", re.IGNORECASE) +ALLOWED_DOMAIN_PATTERN = re.compile(r"^[^\*\?\[\]{}\|\\/&^%$#@!+=~`\s\.<>,\"']+$", re.IGNORECASE) FILE_HEADER = """ # Collection of Analytics, Ads, and tracking hosts to block. # @@ -129,7 +130,7 @@ def is_valid_domain(domain: str) -> bool: return False if domain[-1] == ".": domain = domain[:-1] # strip exactly one dot from the right, if present - return all(ALLOWED_DOMAIN_PATTERN.match(x) for x in domain.split(".")) + return all(ALLOWED_DOMAIN_PATTERN.match(x) and not EXCLUDE_DOMAIN_PATTERN.match(x) for x in domain.split(".")) def filter_whitelist(domains: Set[str], whitelist: Set[Pattern] = {}): diff --git a/ads-and-tracking-extended.txt b/ads-and-tracking-extended.txt index 943e75d9559..2d6217239f5 100644 --- a/ads-and-tracking-extended.txt +++ b/ads-and-tracking-extended.txt @@ -1,8 +1,8 @@ # Collection of Analytics, Ads, and tracking hosts to block. # # Title: Lightswitch05's ads-and-tracking-extended.txt -# Released: 2019-01-21T13:42:14+00:00 -# Count: 32130 domains +# Released: 2019-01-21T22:42:38+00:00 +# Count: 32129 domains # Details: https://github.com/lightswitch05/hosts # Issues: https://github.com/lightswitch05/hosts/issues # Source: https://raw.githubusercontent.com/lightswitch05/hosts/master/ads-and-tracking-extended.txt @@ -20509,7 +20509,6 @@ 0.0.0.0 www.vilynx.com 0.0.0.0 www2.vilynx.com 0.0.0.0 vindicosuite.com -0.0.0.0 -x3.vindicosuite.com 0.0.0.0 1.vindicosuite.com 0.0.0.0 adcreative.vindicosuite.com 0.0.0.0 addirector.vindicosuite.com diff --git a/ads-and-tracking.txt b/ads-and-tracking.txt index db7fa92cbf4..7ffbca07f1b 100644 --- a/ads-and-tracking.txt +++ b/ads-and-tracking.txt @@ -1,7 +1,7 @@ # Collection of Analytics, Ads, and tracking hosts to block. # # Title: Lightswitch05's ads-and-tracking.txt -# Released: 2019-01-21T13:42:14+00:00 +# Released: 2019-01-21T22:42:37+00:00 # Count: 2183 domains # Details: https://github.com/lightswitch05/hosts # Issues: https://github.com/lightswitch05/hosts/issues diff --git a/tests/unit/test_hosts_tools.py b/tests/unit/test_hosts_tools.py index 858377cca31..4d70a8a48fd 100644 --- a/tests/unit/test_hosts_tools.py +++ b/tests/unit/test_hosts_tools.py @@ -64,6 +64,14 @@ def test_trailing_dot_is_a_valid_domain(self): is_valid = hosts_tools.is_valid_domain("www.example.com.") assert is_valid + def test_leading_dash_is_not_a_valid_domain(self): + is_valid = hosts_tools.is_valid_domain("-example.com") + assert not is_valid + + def test_middle_dash_is_a_valid_domain(self): + is_valid = hosts_tools.is_valid_domain("my-example.com") + assert is_valid + def test_extract_basic(self): extracted = hosts_tools.extract_domain("0.0.0.0 example.com") assert extracted == "example.com"