Skip to content

Commit

Permalink
Merge pull request #1702 from Exirel/instagram-catch-errors
Browse files Browse the repository at this point in the history
instagram: catch parse errors
  • Loading branch information
dgw committed Nov 11, 2019
2 parents 617083d + a1cb3d2 commit 33c153b
Showing 1 changed file with 89 additions and 35 deletions.
124 changes: 89 additions & 35 deletions sopel/modules/instagram.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from __future__ import unicode_literals, absolute_import, print_function, division

from datetime import datetime
import logging

from requests import get

Expand All @@ -20,17 +21,29 @@
from json import loads


LOGGER = logging.getLogger(__name__)
INSTAGRAM_REGEX = r'(https?:\/\/(?:www\.){0,1}instagram\.com\/([a-zA-Z0-9_\.]{,30}\/)?p\/[a-zA-Z0-9_-]+)'


# TODO: Parse Instagram profile page


class ParseError(Exception):
pass


@module.url(INSTAGRAM_REGEX)
def instaparse(bot, trigger, match):
instagram_url = match.group(1)
# Get the embedded JSON
json = get_insta_json(match.group(1))
bot.say(parse_insta_json(json))
json = get_insta_json(instagram_url)
try:
bot.say(parse_insta_json(json))
except ParseError:
LOGGER.exception(
"Unable to find Instagram's payload for URL %s", instagram_url)
bot.say(
"Unable to parse this Instagram URL. "
"It's probably a temporary error; try again in a bit.")


def get_insta_json(url):
Expand All @@ -45,43 +58,84 @@ def get_insta_json(url):
return loads(json_astxt)


def _get_json_data(json):
post_pages = json.get('entry_data', {}).get('PostPage', [])
if not post_pages:
raise ParseError('No PostPage found in %r' % json)
post_page = post_pages[0]

media = post_page.get('graphql', {}).get('shortcode_media', {})

if not media:
raise ParseError('No graphql data in %r' % post_page)

return media


def parse_insta_json(json):
# Parse JSON content
needed = json['entry_data']['PostPage'][0]['graphql']['shortcode_media']
iwidth = needed['dimensions']['width']
iheight = needed['dimensions']['height']
iuser = needed['owner']['username']
ifname = needed['owner']['full_name']
ilikes = needed['edge_media_preview_like']['count']
icomms = needed['edge_media_to_parent_comment']['count']
idate = needed['taken_at_timestamp']
pubdate = datetime.utcfromtimestamp(idate).strftime('%Y-%m-%d %H:%M:%S')
ivideo = needed['is_video']

# Does the post have a caption?
needed = _get_json_data(json)

dimensions = needed.get('dimensions', {})
owner = needed.get('owner', {})

# Build bot response
parts = []

# Title
if needed.get('is_video'):
title = "[insta] Video by "
else:
title = "[insta] Photo by "

# Author
iuser = owner.get('username')
ifname = owner.get('full_name')
if ifname and iuser:
parts.append('%s %s (@%s)' % (title, ifname, iuser))
elif iuser:
parts.append('%s @%s' % (title, iuser))
elif ifname:
parts.append('%s %s' % (title, ifname))
else:
parts.append('%s unknown user' % title)

# Media caption
try:
icap = needed['edge_media_to_caption']['edges'][0]['node']['text']
# Strip newlines
icap = icap.replace('\n', ' ')
# Truncate caption
icap = (icap[:256] + '…') if len(icap) > 256 else icap
except Exception: # TODO: be specific
icap = False

# Build bot response
if ivideo is True:
botmessage = "[insta] Video by "
else:
botmessage = "[insta] Photo by "
if ifname is None:
botmessage += "@%s" % iuser
else:
botmessage += "%s (@%s)" % (ifname, iuser)
if icap is not False:
botmessage += " | " + icap
botmessage += " | " + str(iwidth) + "x" + str(iheight)
botmessage += " | Likes: {:,} | Comments: {:,}".format(ilikes, icomms)
botmessage += " | Uploaded: " + pubdate

# Ta-da!
return botmessage
except (KeyError, IndexError):
icap = None

if icap:
parts.append(icap)

# Media width and height
iwidth = dimensions.get('width') or None
iheight = dimensions.get('height') or None

if iwidth and iheight:
parts.append('%sx%s' % (iwidth, iheight))

# Likes
ilikes = needed.get('edge_media_preview_like', {}).get('count')
if ilikes:
parts.append('Likes: {:,}'.format(ilikes))

# Comments
icomms = needed.get('edge_media_to_parent_comment', {}).get('count')
if icomms:
parts.append('Comments: {:,}'.format(icomms))

# Publishing date
idate = needed.get('taken_at_timestamp')
if idate:
dateformat = '%Y-%m-%d %H:%M:%S'
pubdate = datetime.utcfromtimestamp(idate).strftime(dateformat)
parts.append('Uploaded: %s' % pubdate)

# Build the message
return ' | '.join(parts)

0 comments on commit 33c153b

Please sign in to comment.