diff --git a/README.md b/README.md index 771ddbda..58de6dbb 100644 --- a/README.md +++ b/README.md @@ -310,6 +310,8 @@ Changelog * Update scraping to handle replies in new `edge_media_to_parent_comment` field ([#164](https://github.com/snarfed/granary/issues/164)). * microformats2: * Revise whitespace handling; use `white-space: pre` CSS in HTML output. +* Facebook: + * Bug fix: don't interpret `photo.php` as username in post URLs. ### 2.0 - 2019-03-01 diff --git a/granary/facebook.py b/granary/facebook.py index b2c95648..210a532a 100644 --- a/granary/facebook.py +++ b/granary/facebook.py @@ -979,7 +979,15 @@ def base_object(self, obj, verb=None, resolve_numeric_id=False): path = parsed.path.strip('/') path_parts = path.split('/') - if len(path_parts) == 1: + if path == 'photo.php': + # photo URLs look like: + # https://www.facebook.com/photo.php?fbid=123&set=a.4.5.6&type=1 + # https://www.facebook.com/user/photos/a.12.34.56/78/?type=1&offset=0 + fbids = params.get('fbid') + base_id = base_obj['id'] = fbids[0] if fbids else None + + elif len(path_parts) == 1: + # maybe a profile/page URL? if not base_obj.get('objectType'): base_obj['objectType'] = 'person' # or page if not base_id: @@ -999,13 +1007,6 @@ def base_object(self, obj, verb=None, resolve_numeric_id=False): if util.is_int(author_id) and not author.get('numeric_id'): author['numeric_id'] = author_id - # photo URLs look like: - # https://www.facebook.com/photo.php?fbid=123&set=a.4.5.6&type=1 - # https://www.facebook.com/user/photos/a.12.34.56/78/?type=1&offset=0 - if path == 'photo.php': - fbids = params.get('fbid') - if fbids: - base_obj['id'] = fbids[0] # photo album URLs look like this: # https://www.facebook.com/media/set/?set=a.12.34.56 diff --git a/granary/tests/test_facebook.py b/granary/tests/test_facebook.py index f8df8ea0..7a0c79ae 100644 --- a/granary/tests/test_facebook.py +++ b/granary/tests/test_facebook.py @@ -2986,6 +2986,23 @@ def test_base_object_recurring_event_instance(self): self.assert_equals('123', got['id']) self.assert_equals('https://www.facebook.com/123', got['url']) + def test_base_object_photo_php(self): + self.assert_equals({ + 'id': '123', + 'url': 'https://facebook.com/photo.php?fbid=123', + 'author': {}, + }, self.fb.base_object({ + 'object': {'url': 'https://facebook.com/photo.php?fbid=123'}, + })) + + def test_base_object_photo_php_no_fbid(self): + self.assert_equals({ + 'url': 'https://facebook.com/photo.php', + 'author': {}, + }, self.fb.base_object({ + 'object': {'url': 'https://facebook.com/photo.php'}, + })) + def test_base_id_recurring_event_instance(self): url = 'https://facebook.com/000?event_time_id=123' self.assert_equals('123', self.fb.base_id(url))