Skip to content

Commit

Permalink
atom: bug fix for de-duping images in attachments
Browse files Browse the repository at this point in the history
  • Loading branch information
snarfed committed Sep 20, 2019
1 parent 685fb00 commit 172d84a
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 15 deletions.
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -311,6 +311,8 @@ Changelog
### 2.2 - unreleased
* Facebook:
* Add `get_activities(scrape=True)` for scraping HTML from [m.facebook.com](https://m.facebook.com/). Requires `c_user` and `xs` cookies from a logged in session. ([snarfed/bridgy#886](https://github.com/snarfed/bridgy/issues/886)
* Atom:
* Bug fix for de-duping images in attachments.

### 2.1 - 2019-09-04
* Convert AS2 `Mention` tags to AS1 `objectType` `mention` (non-standard) and vice versa ([snarfed/bridgy-fed#46](https://github.com/snarfed/bridgy-fed/issues/46)).
Expand Down
3 changes: 2 additions & 1 deletion granary/atom.py
Original file line number Diff line number Diff line change
Expand Up @@ -417,7 +417,8 @@ def _prepare_activity(a, reader=True):
parsed = urllib.parse.urlparse(url)
rest = urllib.parse.urlunparse(('', '') + parsed[2:])
img_src_re = re.compile(r"""src *= *['"] *((https?:)?//%s)?%s *['"]""" %
(re.escape(parsed.netloc), re.escape(rest)))
(re.escape(parsed.netloc),
_encode_ampersands(re.escape(rest))))
if (url and url not in image_urls_seen and
not img_src_re.search(obj['rendered_content'])):
children.append(microformats2.img(url))
Expand Down
27 changes: 13 additions & 14 deletions granary/tests/test_atom.py
Original file line number Diff line number Diff line change
Expand Up @@ -750,16 +750,15 @@ def test_image_duplicated_in_content(self):
https://github.com/snarfed/granary/issues/113
"""
for url in 'http://pics/1.jpg?foo', '/1.jpg?foo':
activity = {
'object': {
'content': 'foo <img src="%s"> bar' % url,
'image': [
{"url": "http://pics/1.jpg?foo"},
{"url": "http://pics/2.jpg"},
],
},
}
activity = {
'object': {
'content': 'foo <img src="/1.jpg?foo"> bar',
'image': [
{"url": "http://pics/1.jpg?foo"},
{"url": "http://pics/2.jpg"},
],
},
}

got = atom.activities_to_atom([activity], {})
self.assertNotIn('<img class="u-photo" src="http://pics/1.jpg?foo" alt="" />', got)
Expand All @@ -778,23 +777,23 @@ def test_image_duplicated_in_attachment(self):
'object': {
'content': 'foo bar',
'image': [
{'url': 'http://pics/1.jpg'},
{'url': 'http://pics/1.jpg?x&y'},
{'url': 'http://pics/2.jpg'},
],
'attachments': [{
'objectType': 'note',
'image': {'url': 'http://pics/2.jpg'},
}, {
'objectType': 'image',
'image': {'url': 'http://pics/1.jpg'},
'image': {'url': 'http://pics/1.jpg?x&y'},
}],
},
}

got = atom.activities_to_atom([activity], {})
self.assertEqual(1, got.count('<img class="u-photo" src="http://pics/1.jpg" alt="" />'), got)
self.assertEqual(1, got.count('<img class="u-photo" src="http://pics/1.jpg?x&amp;y" alt="" />'), got)
self.assert_multiline_in("""
<link rel="enclosure" href="http://pics/1.jpg" type="image/jpeg" />
<link rel="enclosure" href="http://pics/1.jpg?x&amp;y" type="" />
""", got)
self.assertNotIn('<img class="u-photo" src="http://pics/2.jpg" alt="" />', got, got)

Expand Down

0 comments on commit 172d84a

Please sign in to comment.