Skip to content

Commit

Permalink
instagram: support multi-photo/video posts
Browse files Browse the repository at this point in the history
  • Loading branch information
snarfed committed Feb 24, 2017
1 parent 404e2ae commit fa69873
Show file tree
Hide file tree
Showing 3 changed files with 88 additions and 1 deletion.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -297,6 +297,7 @@ Changelog
* When converting quote tweets to AS, strip quoted tweet URL from end of text.
* Instagram:
* Improve HTML scraping error handling.
* Support [multi-photo/video posts](https://www.instagram.com/p/BQ0mDB2gV_O/).
* Facebook:
* Disable creating "interested" RSVPs, since Facebook's API doesn't allow it.
* Atom:
Expand Down
15 changes: 14 additions & 1 deletion granary/instagram.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
__author__ = ['Ryan Barrett <granary@ryanb.org>']

import datetime
import itertools
import json
import logging
import operator
Expand Down Expand Up @@ -99,6 +100,10 @@ def urlopen(self, url, **kwargs):
def user_url(cls, username):
return '%s%s/' % (cls.BASE_URL, username)

@classmethod
def media_url(cls, shortcode):
return '%sp/%s/' % (cls.BASE_URL, shortcode)

def get_actor(self, user_id=None):
"""Returns a user as a JSON ActivityStreams actor dict.
Expand Down Expand Up @@ -512,6 +517,7 @@ def media_to_object(self, media):
}],
'attachments': [{
'objectType': 'video' if 'videos' in media else 'image',
'url': media.get('link'),
# ActivityStreams 2.0 allows image to be a JSON array.
# http://jasnell.github.io/w3c-socialwg-activitystreams/activitystreams2.html#link
'image': sorted(
Expand Down Expand Up @@ -794,7 +800,7 @@ def _json_media_node_to_activity(self, media):
owner = media.get('owner', {})
image_url = media.get('display_src') or media.get('display_url') or ''
media.update({
'link': 'https://www.instagram.com/p/%s/' % media.get('code'),
'link': self.media_url(media.get('code') or media.get('shortcode')),
'user': owner,
'created_time': media.get('date'),
'caption': {'text': media.get('caption')},
Expand Down Expand Up @@ -837,5 +843,12 @@ def _json_media_node_to_activity(self, media):
obj = activity['object']
obj['ig_like_count'] = media['likes'].get('count', 0)

# multi-photo
children = media.get('edge_sidecar_to_children', {}).get('edges', [])
if children:
obj['attachments'] = list(itertools.chain(*(
self._json_media_node_to_activity(child.get('node'))['object']['attachments']
for child in children)))

self.postprocess_object(obj)
return super(Instagram, self).postprocess_activity(activity)
73 changes: 73 additions & 0 deletions granary/test/test_instagram.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,7 @@ def tag_uri(name):
},
'attachments': [{
'objectType': 'image',
'url': 'https://www.instagram.com/p/ABC123/',
'image': [{
'url': 'http://attach/image/big',
'width': 612,
Expand Down Expand Up @@ -301,6 +302,7 @@ def tag_uri(name):
ACTIVITY_WITH_LIKES['object'] = MEDIA_OBJ_WITH_LIKES
VIDEO_OBJ = {
'attachments': [{
'url': 'https://www.instagram.com/p/ABC123/',
'image': [{
'url': 'http://distilleryimage2.ak.instagram.com/11f75f1cd9cc11e2a0fd22000aa8039a_7.jpg',
'width': 612,
Expand Down Expand Up @@ -543,6 +545,36 @@ def tag_uri(name):
del HTML_PHOTO['likes']['nodes']
del HTML_PHOTO['comments']['nodes']

# based on https://www.instagram.com/p/BQ0mDB2gV_O/
HTML_MULTI_PHOTO = copy.deepcopy(HTML_PHOTO)
HTML_MULTI_PHOTO.update({
'edge_sidecar_to_children': {
'edges': [{
'node': {
'__typename': 'GraphVideo',
'id': '1455954809369749561',
'shortcode': 'BQ0ly9lgWg5',
'dimensions': {'height': 640, 'width': 640},
'display_url': 'https://instagram.fsnc1-2.fna.fbcdn.net/t51.2885-15/s640x640/e15/16789781_644256779091860_6907514546886279168_n.jpg',
'video_url': 'https://instagram.fsnc1-2.fna.fbcdn.net/t50.2886-16/16914332_634350210109260_5674637823722913792_n.mp4',
'video_view_count': 0,
'is_video': True,
'edge_media_to_tagged_user': {'edges': []},
},
}, {
'node': {
'__typename': 'GraphImage',
'id': '1455954810972087680',
'shortcode': 'BQ0ly_FAyWA',
'dimensions': {'height': 1080, 'width': 1080},
'display_url': 'https://instagram.fsnc1-2.fna.fbcdn.net/t51.2885-15/s1080x1080/e35/16906679_776417269184045_871950675452362752_n.jpg',
'is_video': False,
'edge_media_to_tagged_user': {'edges': []},
},
}],
},
})

HTML_FEED = { # eg https://www.instagram.com/ when you're logged in
'environment_switcher_visible_server_guess': True,
'config': {
Expand Down Expand Up @@ -645,6 +677,15 @@ def tag_uri(name):
'entry_data': {'PostPage': [{'media': HTML_VIDEO_FULL}]},
}

HTML_MULTI_PHOTO_PAGE = { # eg https://www.instagram.com/p/BQ0mDB2gV_O/
'config': {
'csrf_token': 'xyz',
'viewer': None,
},
'entry_data': {'PostPage': [{'media': HTML_MULTI_PHOTO}]},
}


HTML_HEADER = """
<!DOCTYPE html>
...
Expand Down Expand Up @@ -701,6 +742,7 @@ def tag_uri(name):
},
'attachments': [{
'objectType': 'image',
'url': 'https://www.instagram.com/p/ABC123/',
'image': [{
'url': 'https://scontent-sjc2-1.cdninstagram.com/hphotos-xfp1/t51.2885-15/e35/12545499_1662965520652470_1466520818_n.jpg',
'width': 1080,
Expand Down Expand Up @@ -737,6 +779,7 @@ def tag_uri(name):
},
'attachments': [{
'objectType': 'video',
'url': 'https://www.instagram.com/p/XYZ789/',
'stream': [{
'url': 'https://scontent-sjc2-1.cdninstagram.com/hphotos-xtp1/t50.2886-16/12604073_746855092124622_46574942_n.mp4',
'width': 640,
Expand Down Expand Up @@ -765,6 +808,30 @@ def tag_uri(name):
'inReplyTo': [{'id': tag_uri('789_456')}],
})

HTML_MULTI_PHOTO_ACTIVITY = copy.deepcopy(HTML_PHOTO_ACTIVITY) # ActivityStreams
HTML_MULTI_PHOTO_ACTIVITY['object']['attachments'] = [{
'objectType': 'video',
'url': 'https://www.instagram.com/p/BQ0ly9lgWg5/',
'stream': [{
'url': 'https://instagram.fsnc1-2.fna.fbcdn.net/t50.2886-16/16914332_634350210109260_5674637823722913792_n.mp4',
'width': 640,
'height': 640,
}],
'image': [{
'url': 'https://instagram.fsnc1-2.fna.fbcdn.net/t51.2885-15/s640x640/e15/16789781_644256779091860_6907514546886279168_n.jpg',
'width': 640,
'height': 640,
}],
}, {
'objectType': 'image',
'url': 'https://www.instagram.com/p/BQ0ly_FAyWA/',
'image': [{
'url': 'https://instagram.fsnc1-2.fna.fbcdn.net/t51.2885-15/s1080x1080/e35/16906679_776417269184045_871950675452362752_n.jpg',
'width': 1080,
'height': 1080,
}],
}]

HTML_ACTIVITIES = [HTML_PHOTO_ACTIVITY, HTML_VIDEO_ACTIVITY]
HTML_ACTIVITIES_FULL = [HTML_PHOTO_ACTIVITY_FULL, HTML_VIDEO_ACTIVITY_FULL]

Expand All @@ -773,6 +840,7 @@ def tag_uri(name):
HTML_PROFILE_PRIVATE_COMPLETE = HTML_HEADER + json.dumps(HTML_PROFILE_PRIVATE) + HTML_FOOTER
HTML_PHOTO_COMPLETE = HTML_HEADER + json.dumps(HTML_PHOTO_PAGE) + HTML_FOOTER
HTML_VIDEO_COMPLETE = HTML_HEADER + json.dumps(HTML_VIDEO_PAGE) + HTML_FOOTER
HTML_MULTI_PHOTO_COMPLETE = HTML_HEADER + json.dumps(HTML_MULTI_PHOTO_PAGE) + HTML_FOOTER
HTML_PHOTO_MISSING_HEADER = json.dumps(HTML_PHOTO_PAGE) + HTML_FOOTER
HTML_PHOTO_MISSING_FOOTER = HTML_HEADER + json.dumps(HTML_PHOTO_PAGE)

Expand Down Expand Up @@ -1343,6 +1411,11 @@ def test_html_to_activities_video(self):
self.assert_equals([HTML_VIDEO_ACTIVITY_FULL], activities)
self.assertIsNone(viewer)

def test_html_to_activities_multi_photo(self):
activities, viewer = self.instagram.html_to_activities(HTML_MULTI_PHOTO_COMPLETE)
self.assert_equals([HTML_MULTI_PHOTO_ACTIVITY], activities)
self.assertIsNone(viewer)

def test_html_to_activities_missing_profile_picture_external_url(self):
data = copy.deepcopy(HTML_FEED)
data['config']['viewer']['profile_pic_url'] = None
Expand Down

0 comments on commit fa69873

Please sign in to comment.