Skip to content
This repository has been archived by the owner on May 17, 2024. It is now read-only.

Commit

Permalink
New regex to exclude leading dashes per #62
Browse files Browse the repository at this point in the history
  • Loading branch information
lightswitch05 committed Jan 21, 2019
1 parent a01a183 commit e9bb24f
Show file tree
Hide file tree
Showing 4 changed files with 14 additions and 6 deletions.
5 changes: 3 additions & 2 deletions HostsTools/hosts_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@
import time

STRIP_COMMENTS_PATTERN = re.compile(r"^([^#]+)")
ALLOWED_DOMAIN_PATTERN = re.compile("^[^\*\?\[\]{}\|\\\/&^%$#@!+=~`\s\.<>,\"']+$", re.IGNORECASE)
EXCLUDE_DOMAIN_PATTERN = re.compile(r"^[-]", re.IGNORECASE)
ALLOWED_DOMAIN_PATTERN = re.compile(r"^[^\*\?\[\]{}\|\\/&^%$#@!+=~`\s\.<>,\"']+$", re.IGNORECASE)
FILE_HEADER = """
# Collection of Analytics, Ads, and tracking hosts to block.
#
Expand Down Expand Up @@ -129,7 +130,7 @@ def is_valid_domain(domain: str) -> bool:
return False
if domain[-1] == ".":
domain = domain[:-1] # strip exactly one dot from the right, if present
return all(ALLOWED_DOMAIN_PATTERN.match(x) for x in domain.split("."))
return all(ALLOWED_DOMAIN_PATTERN.match(x) and not EXCLUDE_DOMAIN_PATTERN.match(x) for x in domain.split("."))


def filter_whitelist(domains: Set[str], whitelist: Set[Pattern] = {}):
Expand Down
5 changes: 2 additions & 3 deletions ads-and-tracking-extended.txt
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
# Collection of Analytics, Ads, and tracking hosts to block.
#
# Title: Lightswitch05's ads-and-tracking-extended.txt
# Released: 2019-01-21T13:42:14+00:00
# Count: 32130 domains
# Released: 2019-01-21T22:42:38+00:00
# Count: 32129 domains
# Details: https://github.com/lightswitch05/hosts
# Issues: https://github.com/lightswitch05/hosts/issues
# Source: https://raw.githubusercontent.com/lightswitch05/hosts/master/ads-and-tracking-extended.txt
Expand Down Expand Up @@ -20509,7 +20509,6 @@
0.0.0.0 www.vilynx.com
0.0.0.0 www2.vilynx.com
0.0.0.0 vindicosuite.com
0.0.0.0 -x3.vindicosuite.com
0.0.0.0 1.vindicosuite.com
0.0.0.0 adcreative.vindicosuite.com
0.0.0.0 addirector.vindicosuite.com
Expand Down
2 changes: 1 addition & 1 deletion ads-and-tracking.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Collection of Analytics, Ads, and tracking hosts to block.
#
# Title: Lightswitch05's ads-and-tracking.txt
# Released: 2019-01-21T13:42:14+00:00
# Released: 2019-01-21T22:42:37+00:00
# Count: 2183 domains
# Details: https://github.com/lightswitch05/hosts
# Issues: https://github.com/lightswitch05/hosts/issues
Expand Down
8 changes: 8 additions & 0 deletions tests/unit/test_hosts_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,14 @@ def test_trailing_dot_is_a_valid_domain(self):
is_valid = hosts_tools.is_valid_domain("www.example.com.")
assert is_valid

def test_leading_dash_is_not_a_valid_domain(self):
is_valid = hosts_tools.is_valid_domain("-example.com")
assert not is_valid

def test_middle_dash_is_a_valid_domain(self):
is_valid = hosts_tools.is_valid_domain("my-example.com")
assert is_valid

def test_extract_basic(self):
extracted = hosts_tools.extract_domain("0.0.0.0 example.com")
assert extracted == "example.com"
Expand Down

0 comments on commit e9bb24f

Please sign in to comment.