Skip to content

Commit

Permalink
fully qualify relative Atom feed URLs before fetching
Browse files Browse the repository at this point in the history
fixes #58. thanks again for reporting @fluffy-critter!
  • Loading branch information
snarfed committed Sep 23, 2019
1 parent c623a29 commit 00d0cc5
Show file tree
Hide file tree
Showing 2 changed files with 51 additions and 3 deletions.
42 changes: 41 additions & 1 deletion tests/test_webmention.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
CONNEG_HEADERS_AS2,
CONNEG_HEADERS_AS2_HTML,
CONTENT_TYPE_AS2,
CONTENT_TYPE_ATOM,
CONTENT_TYPE_HTML,
CONTENT_TYPE_MAGIC_ENVELOPE,
HEADERS,
Expand Down Expand Up @@ -97,7 +98,7 @@ def setUp(self):
<link rel="salmon" href="http://orig/salmon"/>
<content type="html">baz ☕ baj</content>
</entry>
""")
""", content_type=CONTENT_TYPE_ATOM)
self.orig_as2_data = {
'@context': ['https://www.w3.org/ns/activitystreams'],
'type': 'Article',
Expand Down Expand Up @@ -817,3 +818,42 @@ def test_salmon_no_target_atom(self, mock_get, mock_post):
self.assertIn('Target post http://orig/url has no Atom link', got.body)

self.assertIsNone(Response.get_by_id('http://a/reply http://orig/post'))

def test_salmon_relative_atom_href(self, mock_get, mock_post):
orig_relative = requests_response("""\
<html>
<meta>
<link href='atom/1' rel='alternate' type='application/atom+xml'>
</meta>
</html>""", 'http://orig/url')
mock_get.side_effect = [self.reply, orig_relative, self.orig_atom]

got = app.get_response('/webmention', method='POST', body=urllib.urlencode({
'source': 'http://a/reply',
'target': 'http://orig/post',
}))
self.assertEquals(200, got.status_int)

mock_get.assert_any_call('http://orig/atom/1', headers=HEADERS,
timeout=util.HTTP_TIMEOUT)
data = self.verify_salmon(mock_post)

def test_salmon_relative_atom_href_with_base(self, mock_get, mock_post):
orig_base = requests_response("""\
<html>
<meta>
<base href='/base/'>
<link href='atom/1' rel='alternate' type='application/atom+xml'>
</meta>
</html>""", 'http://orig/url')
mock_get.side_effect = [self.reply, orig_base, self.orig_atom]

got = app.get_response('/webmention', method='POST', body=urllib.urlencode({
'source': 'http://a/reply',
'target': 'http://orig/post',
}))
self.assertEquals(200, got.status_int)

mock_get.assert_any_call('http://orig/base/atom/1', headers=HEADERS,
timeout=util.HTTP_TIMEOUT)
data = self.verify_salmon(mock_post)
12 changes: 10 additions & 2 deletions webmention.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,15 @@ def _try_salmon(self, resp):
status=400)

# fetch Atom target post, extract and inject id into source object
feed = common.requests_get(atom_url['href']).text
base_url = ''
base = parsed.find('base')
if base and base.get('href'):
base_url = base['href']
atom_link = parsed.find('link', rel='alternate', type=common.CONTENT_TYPE_ATOM)
atom_url = urlparse.urljoin(
resp.target(), urlparse.urljoin(base_url, atom_link['href']))

feed = common.requests_get(atom_url).text
parsed = feedparser.parse(feed)
logging.info('Parsed: %s', json.dumps(parsed, indent=2,
default=lambda key: '-'))
Expand All @@ -271,7 +279,7 @@ def _try_salmon(self, resp):
self.source_obj.setdefault('tags', []).append({'url': url})

# extract and discover salmon endpoint
logging.info('Discovering Salmon endpoint in %s', atom_url['href'])
logging.info('Discovering Salmon endpoint in %s', atom_url)
endpoint = django_salmon.discover_salmon_endpoint(feed)

if not endpoint:
Expand Down

0 comments on commit 00d0cc5

Please sign in to comment.