Skip to content

Commit

Permalink
rename domain_blacklist.txt => domain_blocklist.txt
Browse files Browse the repository at this point in the history
  • Loading branch information
snarfed committed Jan 9, 2020
1 parent 6844355 commit 19312bf
Show file tree
Hide file tree
Showing 13 changed files with 37 additions and 37 deletions.
2 changes: 1 addition & 1 deletion domain_blacklist.txt → domain_blocklist.txt
Original file line number Original file line Diff line number Diff line change
@@ -1,5 +1,5 @@
# Domains that don't support webmentions. Mainly just the silos. # Domains that don't support webmentions. Mainly just the silos.
# Subdomains are automatically blacklisted too. # Subdomains are automatically blocklisted too.


# TLDs # TLDs
# #
Expand Down
6 changes: 3 additions & 3 deletions models.py
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -552,7 +552,7 @@ def verify(self, force=False):
verified. verified.
""" """
author_urls = [u for u, d in zip(self.get_author_urls(), self.domains) author_urls = [u for u, d in zip(self.get_author_urls(), self.domains)
if not util.in_webmention_blacklist(d)] if not util.in_webmention_blocklist(d)]
if ((self.verified() and not force) or self.status == 'disabled' or if ((self.verified() and not force) or self.status == 'disabled' or
not self.features or not author_urls): not self.features or not author_urls):
return return
Expand Down Expand Up @@ -581,7 +581,7 @@ def verify(self, force=False):
self.put() self.put()


def _urls_and_domains(self, auth_entity, user_url): def _urls_and_domains(self, auth_entity, user_url):
"""Returns this user's valid (not webmention-blacklisted) URLs and domains. """Returns this user's valid (not webmention-blocklisted) URLs and domains.
Converts the auth entity's user_json to an ActivityStreams actor and uses Converts the auth entity's user_json to an ActivityStreams actor and uses
its 'urls' and 'url' fields. May be overridden by subclasses. its 'urls' and 'url' fields. May be overridden by subclasses.
Expand Down Expand Up @@ -616,7 +616,7 @@ def _urls_and_domains(self, auth_entity, user_url):
domains = [] domains = []
for url in util.dedupe_urls(urls): # normalizes domains to lower case for url in util.dedupe_urls(urls): # normalizes domains to lower case
# skip links on this source's domain itself. only currently needed for # skip links on this source's domain itself. only currently needed for
# Mastodon; the other silo domains are in the webmention blacklist. # Mastodon; the other silo domains are in the webmention blocklist.
domain = util.domain_from_link(url) domain = util.domain_from_link(url)
if domain != self.gr_source.DOMAIN: if domain != self.gr_source.DOMAIN:
final_urls.append(url) final_urls.append(url)
Expand Down
4 changes: 2 additions & 2 deletions original_post_discovery.py
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
This feature adds costs in terms of HTTP requests and database This feature adds costs in terms of HTTP requests and database
lookups in the following primary cases: lookups in the following primary cases:
- Author's domain is known to be invalid or blacklisted, there will - Author's domain is known to be invalid or blocklisted, there will
be 0 requests and 0 DB lookups. be 0 requests and 0 DB lookups.
- For a syndicated post has been seen previously (regardless of - For a syndicated post has been seen previously (regardless of
Expand Down Expand Up @@ -83,7 +83,7 @@ def discover(source, activity, fetch_hfeed=True, include_redirect_sources=True,
headers=util.request_headers(source=source)) headers=util.request_headers(source=source))


# only include mentions of the author themselves. # only include mentions of the author themselves.
# (mostly just for Mastodon; other silos' domains are all in the blacklist, so # (mostly just for Mastodon; other silos' domains are all in the blocklist, so
# their mention URLs get dropped later anyway.) # their mention URLs get dropped later anyway.)
# (these are originally added in Source._inject_user_urls() and in poll step 2.) # (these are originally added in Source._inject_user_urls() and in poll step 2.)
obj = activity.get('object', {}) obj = activity.get('object', {})
Expand Down
6 changes: 3 additions & 3 deletions tests/test_blog_webmention.py
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -228,10 +228,10 @@ def test_source_link_not_found(self):
self.assertEqual('failed', bw.status) self.assertEqual('failed', bw.status)
self.assertEqual(html, bw.html) self.assertEqual(html, bw.html)


def test_target_path_blacklisted(self): def test_target_path_blocklisted(self):
bad = 'http://foo.com/blacklisted/1' bad = 'http://foo.com/blocklisted/1'
self.assert_error( self.assert_error(
'FakeSource webmentions are not supported for URL path: /blacklisted/1', 'FakeSource webmentions are not supported for URL path: /blocklisted/1',
target=bad, status=202) target=bad, status=202)
self.assertEqual(0, BlogWebmention.query().count()) self.assertEqual(0, BlogWebmention.query().count())


Expand Down
6 changes: 3 additions & 3 deletions tests/test_models.py
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -330,7 +330,7 @@ def test_create_new_domain(self):


# bad URLs # bad URLs
for user_json in (None, {}, {'url': 'not<a>url'}, for user_json in (None, {}, {'url': 'not<a>url'},
# t.co is in the webmention blacklist # t.co is in the webmention blocklist
{'url': 'http://t.co/foo'}, {'url': 'http://t.co/foo'},
# fa.ke is the source's domain # fa.ke is the source's domain
{'url': 'http://fa.ke/bar'}, {'url': 'http://fa.ke/bar'},
Expand All @@ -354,7 +354,7 @@ def test_create_new_domain(self):
self.assertEqual([url.lower()], source.domain_urls) self.assertEqual([url.lower()], source.domain_urls)
self.assertEqual(['foo.com'], source.domains) self.assertEqual(['foo.com'], source.domains)


# multiple good URLs and one that's in the webmention blacklist # multiple good URLs and one that's in the webmention blocklist
auth_entity = testutil.FakeAuthEntity(id='x', user_json=json_dumps({ auth_entity = testutil.FakeAuthEntity(id='x', user_json=json_dumps({
'url': 'http://foo.org', 'url': 'http://foo.org',
'urls': [{'value': u} for u in 'urls': [{'value': u} for u in
Expand Down Expand Up @@ -545,7 +545,7 @@ def test_verify_without_webmention_endpoint(self):
source.verify() source.verify()
self.assertIsNone(source.webmention_endpoint) self.assertIsNone(source.webmention_endpoint)


def test_verify_checks_blacklist(self): def test_verify_checks_blocklist(self):
self.expect_webmention_requests_get('http://good/', """ self.expect_webmention_requests_get('http://good/', """
<html><meta> <html><meta>
<link rel="webmention" href="http://web.ment/ion"> <link rel="webmention" href="http://web.ment/ion">
Expand Down
2 changes: 1 addition & 1 deletion tests/test_original_post_discovery.py
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -418,7 +418,7 @@ def test_existing_syndicated_posts(self):
def test_invalid_webmention_target(self): def test_invalid_webmention_target(self):
"""Confirm that no additional requests are made if the author url is """Confirm that no additional requests are made if the author url is
an invalid webmention target. Right now this pretty much just an invalid webmention target. Right now this pretty much just
means they're on the blacklist. Eventually we want to filter out means they're on the blocklist. Eventually we want to filter out
targets that don't have certain features, like a webmention targets that don't have certain features, like a webmention
endpoint or microformats. endpoint or microformats.
""" """
Expand Down
4 changes: 2 additions & 2 deletions tests/test_publish.py
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -1072,8 +1072,8 @@ def test_expand_target_urls_no_microformats(self):
self.mox.ReplayAll() self.mox.ReplayAll()
self.assert_created('') self.assert_created('')


def test_expand_target_urls_blacklisted_target(self): def test_expand_target_urls_blocklisted_target(self):
"""RSVP to a domain in the webmention blacklist should not trigger a fetch. """RSVP to a domain in the webmention blocklist should not trigger a fetch.
""" """
self.mox.StubOutWithMock(self.source.gr_source, 'create', self.mox.StubOutWithMock(self.source.gr_source, 'create',
use_mock_anything=True) use_mock_anything=True)
Expand Down
8 changes: 4 additions & 4 deletions tests/test_tasks.py
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -309,8 +309,8 @@ def test_non_html_file_extension(self):
self.post_task() self.post_task()
self.assert_equals([], self.responses[0].key.get().unsent) self.assert_equals([], self.responses[0].key.get().unsent)


def test_invalid_and_blacklisted_urls(self): def test_invalid_and_blocklisted_urls(self):
"""Target URLs with domains in the blacklist should be ignored. """Target URLs with domains in the blocklist should be ignored.
Same with invalid URLs that can't be parsed by urlparse. Same with invalid URLs that can't be parsed by urlparse.
""" """
Expand Down Expand Up @@ -1532,8 +1532,8 @@ def test_cached_webmention_discovery_shouldnt_refresh_cache(self):
self.post_task() self.post_task()
self.assert_response_is('complete', sent=['http://target1/post/url']) self.assert_response_is('complete', sent=['http://target1/post/url'])


def test_webmention_blacklist(self): def test_webmention_blocklist(self):
"""Target URLs with domains in the blacklist should be ignored. """Target URLs with domains in the blocklist should be ignored.
TODO: also invalid URLs that can't be parsed by urlparse? TODO: also invalid URLs that can't be parsed by urlparse?
""" """
Expand Down
4 changes: 2 additions & 2 deletions tests/test_twitter.py
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ def test_search_for_links(self):
'text': 'a http://bar/baz ok', 'text': 'a http://bar/baz ok',
}, },
}, { }, {
'id_str': '3', # no, link domain is blacklisted 'id_str': '3', # no, link domain is blocklisted
'text': 'x https://t.co/xyz/abc z', 'text': 'x https://t.co/xyz/abc z',
}, { }, {
'id_str': '4', # no link 'id_str': '4', # no link
Expand All @@ -136,7 +136,7 @@ def test_search_for_links(self):
[a['id'] for a in self.tw.search_for_links()]) [a['id'] for a in self.tw.search_for_links()])


def test_search_for_links_no_urls(self): def test_search_for_links_no_urls(self):
# only a blacklisted domain # only a blocklisted domain
self.tw.domain_urls = ['https://t.co/xyz'] self.tw.domain_urls = ['https://t.co/xyz']
self.tw.put() self.tw.put()
self.mox.ReplayAll() self.mox.ReplayAll()
Expand Down
16 changes: 8 additions & 8 deletions tests/test_util.py
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ def test_webmention_tools_relative_webmention_endpoint_in_header(self):
mention._discoverEndpoint() mention._discoverEndpoint()
self.assertEqual('http://target/endpoint', mention.receiver_endpoint) self.assertEqual('http://target/endpoint', mention.receiver_endpoint)


def test_get_webmention_target_blacklisted_urls(self): def test_get_webmention_target_blocklisted_urls(self):
for resolve in True, False: for resolve in True, False:
self.assertTrue(util.get_webmention_target( self.assertTrue(util.get_webmention_target(
'http://good.com/a', resolve=resolve)[2]) 'http://good.com/a', resolve=resolve)[2])
Expand All @@ -194,8 +194,8 @@ def test_get_webmention_second_redirect_not_text_html(self):
self.assert_equals(('https://end', 'end', False), self.assert_equals(('https://end', 'end', False),
util.get_webmention_target('http://orig', resolve=True)) util.get_webmention_target('http://orig', resolve=True))


def test_get_webmention_middle_redirect_blacklisted(self): def test_get_webmention_middle_redirect_blocklisted(self):
"""We should allow blacklisted domains in the middle of a redirect chain. """We should allow blocklisted domains in the middle of a redirect chain.
...e.g. Google's redirector https://www.google.com/url?... ...e.g. Google's redirector https://www.google.com/url?...
""" """
Expand Down Expand Up @@ -391,10 +391,10 @@ def test_requests_get_content_length_not_int(self):
self.assertEqual(200, resp.status_code) self.assertEqual(200, resp.status_code)
self.assertEqual('xyz', resp.text) self.assertEqual('xyz', resp.text)


def test_requests_get_url_blacklist(self): def test_requests_get_url_blocklist(self):
resp = util.requests_get(next(iter(util.URL_BLACKLIST))) resp = util.requests_get(next(iter(util.URL_BLACKLIST)))
self.assertEqual(util.HTTP_REQUEST_REFUSED_STATUS_CODE, resp.status_code) self.assertEqual(util.HTTP_REQUEST_REFUSED_STATUS_CODE, resp.status_code)
self.assertEqual('Sorry, Bridgy has blacklisted this URL.', resp.text) self.assertEqual('Sorry, Bridgy has blocklisted this URL.', resp.text)


def test_no_accept_header(self): def test_no_accept_header(self):
self.assertEqual(util.REQUEST_HEADERS, self.assertEqual(util.REQUEST_HEADERS,
Expand All @@ -420,12 +420,12 @@ def test_rhiaro_accept_header(self):
self.mox.ReplayAll() self.mox.ReplayAll()
util.requests_get('http://rhiaro.co.uk/') util.requests_get('http://rhiaro.co.uk/')


def test_in_webmention_blacklist(self): def test_in_webmention_blocklist(self):
for bad in 't.co', 'x.t.co', 'x.y.t.co', 'abc.onion': for bad in 't.co', 'x.t.co', 'x.y.t.co', 'abc.onion':
self.assertTrue(util.in_webmention_blacklist(bad), bad) self.assertTrue(util.in_webmention_blocklist(bad), bad)


for good in 'snarfed.org', 'www.snarfed.org', 't.co.com': for good in 'snarfed.org', 'www.snarfed.org', 't.co.com':
self.assertFalse(util.in_webmention_blacklist(good), good) self.assertFalse(util.in_webmention_blocklist(good), good)


def test_webmention_endpoint_cache_key(self): def test_webmention_endpoint_cache_key(self):
for expected, url in ( for expected, url in (
Expand Down
2 changes: 1 addition & 1 deletion tests/testutil.py
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -210,7 +210,7 @@ class FakeSource(Source):
URL_CANONICALIZER = util.UrlCanonicalizer( URL_CANONICALIZER = util.UrlCanonicalizer(
domain=GR_CLASS.DOMAIN, domain=GR_CLASS.DOMAIN,
headers=util.REQUEST_HEADERS) headers=util.REQUEST_HEADERS)
PATH_BLACKLIST = (re.compile('^/blacklisted/.*'),) PATH_BLACKLIST = (re.compile('^/blocklisted/.*'),)
HAS_BLOCKS = True HAS_BLOCKS = True


string_id_counter = 1 string_id_counter = 1
Expand Down
2 changes: 1 addition & 1 deletion twitter.py
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ def search_for_links(self):
""" """
urls = set(util.schemeless(util.fragmentless(url), slashes=False) urls = set(util.schemeless(util.fragmentless(url), slashes=False)
for url in self.domain_urls for url in self.domain_urls
if not util.in_webmention_blacklist(util.domain_from_link(url))) if not util.in_webmention_blocklist(util.domain_from_link(url)))
if not urls: if not urls:
return [] return []


Expand Down
12 changes: 6 additions & 6 deletions util.py
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -50,12 +50,12 @@
now_fn = datetime.datetime.now now_fn = datetime.datetime.now


# Domains that don't support webmentions. Mainly just the silos. # Domains that don't support webmentions. Mainly just the silos.
# Subdomains are automatically blacklisted too. # Subdomains are automatically blocklisted too.
# #
# We also check this when a user sign up and we extract the web site links from # We also check this when a user sign up and we extract the web site links from
# their profile. We automatically omit links to these domains. # their profile. We automatically omit links to these domains.
_dir = os.path.dirname(__file__) _dir = os.path.dirname(__file__)
with open(os.path.join(_dir, 'domain_blacklist.txt'), 'rt', encoding='utf-8') as f: with open(os.path.join(_dir, 'domain_blocklist.txt'), 'rt', encoding='utf-8') as f:
BLACKLIST = util.load_file_lines(f) BLACKLIST = util.load_file_lines(f)


# Individual URLs that we shouldn't fetch. Started because of # Individual URLs that we shouldn't fetch. Started because of
Expand Down Expand Up @@ -237,7 +237,7 @@ def requests_get(url, **kwargs):
if url in URL_BLACKLIST: if url in URL_BLACKLIST:
resp = requests.Response() resp = requests.Response()
resp.status_code = HTTP_REQUEST_REFUSED_STATUS_CODE resp.status_code = HTTP_REQUEST_REFUSED_STATUS_CODE
resp._text = 'Sorry, Bridgy has blacklisted this URL.' resp._text = 'Sorry, Bridgy has blocklisted this URL.'
resp._content = resp._text.encode() resp._content = resp._text.encode()
return resp return resp


Expand Down Expand Up @@ -283,7 +283,7 @@ def get_webmention_target(url, resolve=True, replace_test_domains=True):
Returns: Returns:
(string url, string pretty domain, boolean) tuple. The boolean is (string url, string pretty domain, boolean) tuple. The boolean is
True if we should send a webmention, False otherwise, e.g. if it's a bad True if we should send a webmention, False otherwise, e.g. if it's a bad
URL, not text/html, or in the blacklist. URL, not text/html, or in the blocklist.
""" """
url = util.clean_url(url) url = util.clean_url(url)
try: try:
Expand All @@ -303,15 +303,15 @@ def get_webmention_target(url, resolve=True, replace_test_domains=True):


scheme = urllib.parse.urlparse(url).scheme # require http or https scheme = urllib.parse.urlparse(url).scheme # require http or https
send = (send and domain and scheme in ('http', 'https') and send = (send and domain and scheme in ('http', 'https') and
not in_webmention_blacklist(domain)) not in_webmention_blocklist(domain))


if replace_test_domains: if replace_test_domains:
url = replace_test_domains_with_localhost(url) url = replace_test_domains_with_localhost(url)


return url, domain, send return url, domain, send




def in_webmention_blacklist(domain): def in_webmention_blocklist(domain):
"""Returns True if the domain or its root domain is in BLACKLIST.""" """Returns True if the domain or its root domain is in BLACKLIST."""
return util.domain_or_parent_in(domain.lower(), BLACKLIST) return util.domain_or_parent_in(domain.lower(), BLACKLIST)


Expand Down

0 comments on commit 19312bf

Please sign in to comment.