Skip to content
Permalink
Browse files

rename domain_blacklist.txt => domain_blocklist.txt

  • Loading branch information
snarfed committed Jan 8, 2020
1 parent 6844355 commit 19312bf8f81784835c6ea323ac04e99150a9c87e
@@ -1,5 +1,5 @@
# Domains that don't support webmentions. Mainly just the silos.
# Subdomains are automatically blacklisted too.
# Subdomains are automatically blocklisted too.

# TLDs
#
@@ -552,7 +552,7 @@ def verify(self, force=False):
verified.
"""
author_urls = [u for u, d in zip(self.get_author_urls(), self.domains)
if not util.in_webmention_blacklist(d)]
if not util.in_webmention_blocklist(d)]
if ((self.verified() and not force) or self.status == 'disabled' or
not self.features or not author_urls):
return
@@ -581,7 +581,7 @@ def verify(self, force=False):
self.put()

def _urls_and_domains(self, auth_entity, user_url):
"""Returns this user's valid (not webmention-blacklisted) URLs and domains.
"""Returns this user's valid (not webmention-blocklisted) URLs and domains.
Converts the auth entity's user_json to an ActivityStreams actor and uses
its 'urls' and 'url' fields. May be overridden by subclasses.
@@ -616,7 +616,7 @@ def _urls_and_domains(self, auth_entity, user_url):
domains = []
for url in util.dedupe_urls(urls): # normalizes domains to lower case
# skip links on this source's domain itself. only currently needed for
# Mastodon; the other silo domains are in the webmention blacklist.
# Mastodon; the other silo domains are in the webmention blocklist.
domain = util.domain_from_link(url)
if domain != self.gr_source.DOMAIN:
final_urls.append(url)
@@ -11,7 +11,7 @@
This feature adds costs in terms of HTTP requests and database
lookups in the following primary cases:
- Author's domain is known to be invalid or blacklisted, there will
- Author's domain is known to be invalid or blocklisted, there will
be 0 requests and 0 DB lookups.
- For a syndicated post has been seen previously (regardless of
@@ -83,7 +83,7 @@ def discover(source, activity, fetch_hfeed=True, include_redirect_sources=True,
headers=util.request_headers(source=source))

# only include mentions of the author themselves.
# (mostly just for Mastodon; other silos' domains are all in the blacklist, so
# (mostly just for Mastodon; other silos' domains are all in the blocklist, so
# their mention URLs get dropped later anyway.)
# (these are originally added in Source._inject_user_urls() and in poll step 2.)
obj = activity.get('object', {})
@@ -228,10 +228,10 @@ def test_source_link_not_found(self):
self.assertEqual('failed', bw.status)
self.assertEqual(html, bw.html)

def test_target_path_blacklisted(self):
bad = 'http://foo.com/blacklisted/1'
def test_target_path_blocklisted(self):
bad = 'http://foo.com/blocklisted/1'
self.assert_error(
'FakeSource webmentions are not supported for URL path: /blacklisted/1',
'FakeSource webmentions are not supported for URL path: /blocklisted/1',
target=bad, status=202)
self.assertEqual(0, BlogWebmention.query().count())

@@ -330,7 +330,7 @@ def test_create_new_domain(self):

# bad URLs
for user_json in (None, {}, {'url': 'not<a>url'},
# t.co is in the webmention blacklist
# t.co is in the webmention blocklist
{'url': 'http://t.co/foo'},
# fa.ke is the source's domain
{'url': 'http://fa.ke/bar'},
@@ -354,7 +354,7 @@ def test_create_new_domain(self):
self.assertEqual([url.lower()], source.domain_urls)
self.assertEqual(['foo.com'], source.domains)

# multiple good URLs and one that's in the webmention blacklist
# multiple good URLs and one that's in the webmention blocklist
auth_entity = testutil.FakeAuthEntity(id='x', user_json=json_dumps({
'url': 'http://foo.org',
'urls': [{'value': u} for u in
@@ -545,7 +545,7 @@ def test_verify_without_webmention_endpoint(self):
source.verify()
self.assertIsNone(source.webmention_endpoint)

def test_verify_checks_blacklist(self):
def test_verify_checks_blocklist(self):
self.expect_webmention_requests_get('http://good/', """
<html><meta>
<link rel="webmention" href="http://web.ment/ion">
@@ -418,7 +418,7 @@ def test_existing_syndicated_posts(self):
def test_invalid_webmention_target(self):
"""Confirm that no additional requests are made if the author url is
an invalid webmention target. Right now this pretty much just
means they're on the blacklist. Eventually we want to filter out
means they're on the blocklist. Eventually we want to filter out
targets that don't have certain features, like a webmention
endpoint or microformats.
"""
@@ -1072,8 +1072,8 @@ def test_expand_target_urls_no_microformats(self):
self.mox.ReplayAll()
self.assert_created('')

def test_expand_target_urls_blacklisted_target(self):
"""RSVP to a domain in the webmention blacklist should not trigger a fetch.
def test_expand_target_urls_blocklisted_target(self):
"""RSVP to a domain in the webmention blocklist should not trigger a fetch.
"""
self.mox.StubOutWithMock(self.source.gr_source, 'create',
use_mock_anything=True)
@@ -309,8 +309,8 @@ def test_non_html_file_extension(self):
self.post_task()
self.assert_equals([], self.responses[0].key.get().unsent)

def test_invalid_and_blacklisted_urls(self):
"""Target URLs with domains in the blacklist should be ignored.
def test_invalid_and_blocklisted_urls(self):
"""Target URLs with domains in the blocklist should be ignored.
Same with invalid URLs that can't be parsed by urlparse.
"""
@@ -1532,8 +1532,8 @@ def test_cached_webmention_discovery_shouldnt_refresh_cache(self):
self.post_task()
self.assert_response_is('complete', sent=['http://target1/post/url'])

def test_webmention_blacklist(self):
"""Target URLs with domains in the blacklist should be ignored.
def test_webmention_blocklist(self):
"""Target URLs with domains in the blocklist should be ignored.
TODO: also invalid URLs that can't be parsed by urlparse?
"""
@@ -111,7 +111,7 @@ def test_search_for_links(self):
'text': 'a http://bar/baz ok',
},
}, {
'id_str': '3', # no, link domain is blacklisted
'id_str': '3', # no, link domain is blocklisted
'text': 'x https://t.co/xyz/abc z',
}, {
'id_str': '4', # no link
@@ -136,7 +136,7 @@ def test_search_for_links(self):
[a['id'] for a in self.tw.search_for_links()])

def test_search_for_links_no_urls(self):
# only a blacklisted domain
# only a blocklisted domain
self.tw.domain_urls = ['https://t.co/xyz']
self.tw.put()
self.mox.ReplayAll()
@@ -168,7 +168,7 @@ def test_webmention_tools_relative_webmention_endpoint_in_header(self):
mention._discoverEndpoint()
self.assertEqual('http://target/endpoint', mention.receiver_endpoint)

def test_get_webmention_target_blacklisted_urls(self):
def test_get_webmention_target_blocklisted_urls(self):
for resolve in True, False:
self.assertTrue(util.get_webmention_target(
'http://good.com/a', resolve=resolve)[2])
@@ -194,8 +194,8 @@ def test_get_webmention_second_redirect_not_text_html(self):
self.assert_equals(('https://end', 'end', False),
util.get_webmention_target('http://orig', resolve=True))

def test_get_webmention_middle_redirect_blacklisted(self):
"""We should allow blacklisted domains in the middle of a redirect chain.
def test_get_webmention_middle_redirect_blocklisted(self):
"""We should allow blocklisted domains in the middle of a redirect chain.
...e.g. Google's redirector https://www.google.com/url?...
"""
@@ -391,10 +391,10 @@ def test_requests_get_content_length_not_int(self):
self.assertEqual(200, resp.status_code)
self.assertEqual('xyz', resp.text)

def test_requests_get_url_blacklist(self):
def test_requests_get_url_blocklist(self):
resp = util.requests_get(next(iter(util.URL_BLACKLIST)))
self.assertEqual(util.HTTP_REQUEST_REFUSED_STATUS_CODE, resp.status_code)
self.assertEqual('Sorry, Bridgy has blacklisted this URL.', resp.text)
self.assertEqual('Sorry, Bridgy has blocklisted this URL.', resp.text)

def test_no_accept_header(self):
self.assertEqual(util.REQUEST_HEADERS,
@@ -420,12 +420,12 @@ def test_rhiaro_accept_header(self):
self.mox.ReplayAll()
util.requests_get('http://rhiaro.co.uk/')

def test_in_webmention_blacklist(self):
def test_in_webmention_blocklist(self):
for bad in 't.co', 'x.t.co', 'x.y.t.co', 'abc.onion':
self.assertTrue(util.in_webmention_blacklist(bad), bad)
self.assertTrue(util.in_webmention_blocklist(bad), bad)

for good in 'snarfed.org', 'www.snarfed.org', 't.co.com':
self.assertFalse(util.in_webmention_blacklist(good), good)
self.assertFalse(util.in_webmention_blocklist(good), good)

def test_webmention_endpoint_cache_key(self):
for expected, url in (
@@ -210,7 +210,7 @@ class FakeSource(Source):
URL_CANONICALIZER = util.UrlCanonicalizer(
domain=GR_CLASS.DOMAIN,
headers=util.REQUEST_HEADERS)
PATH_BLACKLIST = (re.compile('^/blacklisted/.*'),)
PATH_BLACKLIST = (re.compile('^/blocklisted/.*'),)
HAS_BLOCKS = True

string_id_counter = 1
@@ -81,7 +81,7 @@ def search_for_links(self):
"""
urls = set(util.schemeless(util.fragmentless(url), slashes=False)
for url in self.domain_urls
if not util.in_webmention_blacklist(util.domain_from_link(url)))
if not util.in_webmention_blocklist(util.domain_from_link(url)))
if not urls:
return []

12 util.py
@@ -50,12 +50,12 @@
now_fn = datetime.datetime.now

# Domains that don't support webmentions. Mainly just the silos.
# Subdomains are automatically blacklisted too.
# Subdomains are automatically blocklisted too.
#
# We also check this when a user sign up and we extract the web site links from
# their profile. We automatically omit links to these domains.
_dir = os.path.dirname(__file__)
with open(os.path.join(_dir, 'domain_blacklist.txt'), 'rt', encoding='utf-8') as f:
with open(os.path.join(_dir, 'domain_blocklist.txt'), 'rt', encoding='utf-8') as f:
BLACKLIST = util.load_file_lines(f)

# Individual URLs that we shouldn't fetch. Started because of
@@ -237,7 +237,7 @@ def requests_get(url, **kwargs):
if url in URL_BLACKLIST:
resp = requests.Response()
resp.status_code = HTTP_REQUEST_REFUSED_STATUS_CODE
resp._text = 'Sorry, Bridgy has blacklisted this URL.'
resp._text = 'Sorry, Bridgy has blocklisted this URL.'
resp._content = resp._text.encode()
return resp

@@ -283,7 +283,7 @@ def get_webmention_target(url, resolve=True, replace_test_domains=True):
Returns:
(string url, string pretty domain, boolean) tuple. The boolean is
True if we should send a webmention, False otherwise, e.g. if it's a bad
URL, not text/html, or in the blacklist.
URL, not text/html, or in the blocklist.
"""
url = util.clean_url(url)
try:
@@ -303,15 +303,15 @@ def get_webmention_target(url, resolve=True, replace_test_domains=True):

scheme = urllib.parse.urlparse(url).scheme # require http or https
send = (send and domain and scheme in ('http', 'https') and
not in_webmention_blacklist(domain))
not in_webmention_blocklist(domain))

if replace_test_domains:
url = replace_test_domains_with_localhost(url)

return url, domain, send


def in_webmention_blacklist(domain):
def in_webmention_blocklist(domain):
"""Returns True if the domain or its root domain is in BLACKLIST."""
return util.domain_or_parent_in(domain.lower(), BLACKLIST)

0 comments on commit 19312bf

Please sign in to comment.
You can’t perform that action at this time.