Skip to content

Commit

Permalink
disable instagram backfeed for now. browser extension underway, stay …
Browse files Browse the repository at this point in the history
…tuned.
  • Loading branch information
snarfed committed Dec 16, 2020
1 parent f8d0301 commit ab7d3f6
Show file tree
Hide file tree
Showing 13 changed files with 48 additions and 444 deletions.
3 changes: 3 additions & 0 deletions .gcloudignore
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ l3/
local/
local3/
local3.7/
node_modules
package-lock.json
pydoc/
pydocs/
python3/
Expand All @@ -41,3 +43,4 @@ TAGS
TAGS/
test/
tests/
yarn.lock
14 changes: 2 additions & 12 deletions app.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,13 +242,6 @@ def template_vars(self):
if not self.source:
return vars

if self.source.key.kind() == 'Instagram':
auth = self.source.auth_entity
vars['indieauth_me'] = (
auth.id() if auth.kind() == 'IndieAuth'
else self.source.domain_urls[0] if self.source.domain_urls
else None)

# Blog webmention promos
if 'webmention' not in self.source.features:
if self.source.SHORT_NAME in ('blogger', 'medium', 'tumblr', 'wordpress'):
Expand Down Expand Up @@ -461,17 +454,14 @@ def post(self):
if kind == 'Blogger':
return self.redirect('/blogger/delete/start?state=%s' % state)

path = ('/instagram/callback' if kind == 'Instagram'
else '/reddit/callback' if kind == 'Reddit'
path = ('/reddit/callback' if kind == 'Reddit'
else '/wordpress/add' if kind == 'WordPress'
else '/%s/delete/finish' % source.SHORT_NAME)
kwargs = {}
if kind == 'Twitter':
kwargs['access_type'] = 'read' if feature == 'listen' else 'write'

start_handler = (indieauth.StartHandler if kind == 'Instagram'
else source.OAUTH_START_HANDLER)
handler = start_handler.to(path, **kwargs)(self.request, self.response)
handler = source.OAUTH_START_HANDLER.to(path, **kwargs)(self.request, self.response)
try:
self.redirect(handler.redirect_url(state=state))
except Exception as e:
Expand Down
1 change: 0 additions & 1 deletion browser-extension/manifest.json
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
"permissions": [
// https://developer.mozilla.org/en-US/docs/Mozilla/Add-ons/WebExtensions/API/webrequest
"*://*.brid.gy/*",
"*://*.granary.io/*",
"*://*.instagram.com/*",
"cookies",
"storage",
Expand Down
28 changes: 0 additions & 28 deletions cron.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
import models
from models import Source
from flickr import Flickr
from instagram import Instagram
from mastodon import Mastodon
from twitter import Twitter
import util
Expand Down Expand Up @@ -112,32 +111,6 @@ def get(self):
util.CachedPage.invalidate('/users')


class UpdateInstagramPictures(UpdatePictures):
"""Finds :class:`Instagram` sources with new profile pictures and updates them.
Splits the accounts up into batches to avoid hitting Instagram's rate limit.
Try to hit every account once a week.
Testing on 2017-07-05 hit the rate limit after ~170 profile page requests,
with ~270 total Instagram accounts on Bridgy.
"""
SOURCE_CLS = Instagram
FREQUENCY = datetime.timedelta(hours=1)
WEEK = datetime.timedelta(days=7)
BATCH = float(WEEK.total_seconds()) / FREQUENCY.total_seconds()
TRANSIENT_ERROR_HTTP_CODES = (Instagram.TRANSIENT_ERROR_HTTP_CODES +
Instagram.RATE_LIMIT_HTTP_CODES)

def source_query(self):
now = util.now_fn()
since_sun = (now.weekday() * datetime.timedelta(days=1) +
(now - now.replace(hour=0, minute=0, second=0)))
batch = float(Instagram.query().count()) / self.BATCH
offset = batch * float(since_sun.total_seconds()) / self.FREQUENCY.total_seconds()
return Instagram.query().fetch(offset=int(math.floor(offset)),
limit=int(math.ceil(batch)))


class UpdateFlickrPictures(UpdatePictures):
"""Finds :class:`Flickr` sources with new profile pictures and updates them.
"""
Expand Down Expand Up @@ -191,7 +164,6 @@ def get(self):
('/cron/build_circle', BuildCircle),
('/cron/replace_poll_tasks', ReplacePollTasks),
('/cron/update_flickr_pictures', UpdateFlickrPictures),
('/cron/update_instagram_pictures', UpdateInstagramPictures),
('/cron/update_mastodon_pictures', UpdateMastodonPictures),
('/cron/update_twitter_pictures', UpdateTwitterPictures),
]
6 changes: 0 additions & 6 deletions cron.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,6 @@ cron:
schedule: every day 08:00 # 1am pst
target: background

# frequent, updates just a few every time, to throttle and avoid rate limiting
- description: update changed instagram profile pictures
url: /cron/update_instagram_pictures
schedule: every 1 hours
target: background

- description: update changed flickr profile pictures
url: /cron/update_flickr_pictures
schedule: every day 09:00 # 2am pst
Expand Down
2 changes: 1 addition & 1 deletion flickr.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ def silo_url(self):
return self.url

def user_tag_id(self):
"""Returns the tag URI for this source, e.g. 'tag:instagram.com:123456'."""
"""Returns the tag URI for this source, e.g. 'tag:flickr.com:123456'."""
return self.gr_source.tag_uri(self.username)

def label_name(self):
Expand Down
138 changes: 24 additions & 114 deletions instagram.py
Original file line number Diff line number Diff line change
@@ -1,32 +1,10 @@
"""Instagram API code and datastore model classes.
Example post ID and links:
* id: 595990791004231349 or 595990791004231349_247678460 (suffix is user id)
* Permalink: http://instagram.com/p/hFYnd7Nha1/
* API URL: https://api.instagram.com/v1/media/595990791004231349
* Local handler path: /post/instagram/212038/595990791004231349
Example comment ID and links:
* id: 595996024371549506
No direct API URL or permalink, as far as I can tell. :/
* API URL for all comments on that picture:
https://api.instagram.com/v1/media/595990791004231349_247678460/comments
* Local handler path:
/comment/instagram/212038/595990791004231349_247678460/595996024371549506
"""Instagram code and datastore model classes.
"""
import datetime
import logging
import urllib.parse

from granary import instagram as gr_instagram
from granary import microformats2
from granary import source as gr_source
from oauth_dropins import indieauth
# InstagramAuth entities are loaded here
from oauth_dropins import instagram as oauth_instagram
from oauth_dropins.webutil.handlers import TemplateHandler
from oauth_dropins.webutil.util import json_dumps, json_loads
import webapp2

Expand All @@ -42,12 +20,8 @@ class Instagram(Source):
https://stackoverflow.com/questions/15470180
"""
GR_CLASS = gr_instagram.Instagram
OAUTH_START_HANDLER = oauth_instagram.StartHandler
SHORT_NAME = 'instagram'
FAST_POLL = datetime.timedelta(days=7)
RATE_LIMITED_POLL = FAST_POLL
RATE_LIMIT_HTTP_CODES = ('401', '429', '503')
DISABLE_HTTP_CODES = ()
CAN_LISTEN = False
CAN_PUBLISH = False
URL_CANONICALIZER = util.UrlCanonicalizer(
domain=GR_CLASS.DOMAIN,
Expand All @@ -63,18 +37,13 @@ def new(handler, auth_entity=None, actor=None, **kwargs):
Args:
handler: the current :class:`webapp2.RequestHandler`
auth_entity: :class:`oauth_dropins.instagram.InstagramAuth`
"""
user = json_loads(auth_entity.user_json)
user['actor'] = actor
auth_entity.user_json = json_dumps(user)
auth_entity.put()
assert auth_entity is None

username = actor['username']
if not kwargs.get('features'):
kwargs['features'] = ['listen']
return Instagram(id=username,
auth_entity=auth_entity.key,
name=actor.get('displayName'),
picture=actor.get('image', {}).get('url'),
url=gr_instagram.Instagram.user_url(username),
Expand Down Expand Up @@ -104,84 +73,25 @@ def get_activities_response(self, *args, **kwargs):
kwargs.setdefault('user_id', self.key_id())
return self.gr_source.get_activities_response(*args, **kwargs)

# # check that instagram profile links to web site
# actor = gr_instagram.Instagram(
# scrape=True, cookie=oauth_instagram.INSTAGRAM_SESSIONID_COOKIE
# ).get_actor(username, ignore_rate_limit=True)

# canonicalize = util.UrlCanonicalizer(redirects=False)
# website = canonicalize(auth_entity.key.id())
# urls = [canonicalize(u) for u in microformats2.object_urls(actor)]
# logging.info('Looking for %s in %s', website, urls)
# if website not in urls:
# self.messages.add("Please add %s to your Instagram profile's website or bio field and try again." % website)
# return self.redirect('/')

# # check that the instagram account is public
# if not gr_source.Source.is_public(actor):
# self.messages.add('Your Instagram account is private. Bridgy only supports public accounts.')
# return self.redirect('/')

# self.maybe_add_or_delete_source(Instagram, auth_entity, state, actor=actor)


class StartHandler(TemplateHandler, util.Handler):
"""Serves the "Enter your username" form page."""

def template_file(self):
return 'indieauth.html'

def post(self):
ia_start = util.oauth_starter(indieauth.StartHandler).to('/instagram/callback')(
self.request, self.response)

try:
self.redirect(ia_start.redirect_url(me=util.get_required_param(self, 'user_url')))
except Exception as e:
if util.is_connection_failure(e) or util.interpret_http_exception(e)[0]:
self.messages.add("Couldn't fetch your web site: %s" % e)
return self.redirect('/')
raise


class CallbackHandler(indieauth.CallbackHandler, util.Handler):
def finish(self, auth_entity, state=None):
if auth_entity:
user_json = json_loads(auth_entity.user_json)

# find instagram profile URL
urls = user_json.get('rel-me', [])
logging.info('rel-mes: %s', urls)
for url in util.trim_nulls(urls):
if util.domain_from_link(url) == gr_instagram.Instagram.DOMAIN:
username = urllib.parse.urlparse(url).path.strip('/')
break
else:
# TODO: detect when the actual error is that we couldn't fetch their
# home page. when that happens, IndieAuth currently logs a warning and
# stores user_json with just 'me', which we can't really distinguish
# from a successful fetch with no rel links or representative h-card.
# https://indiewebcamp.slack.com/archives/C1PA11USK/p1579284622006700
self.messages.add(
'No Instagram profile found. Please <a href="https://indieauth.com/setup">add an Instagram rel-me link</a>, then try again.')
return self.redirect('/')

# check that instagram profile links to web site
try:
actor = gr_instagram.Instagram(
scrape=True, cookie=oauth_instagram.INSTAGRAM_SESSIONID_COOKIE
).get_actor(username, ignore_rate_limit=True)
except Exception as e:
code, _ = util.interpret_http_exception(e)
if code in Instagram.RATE_LIMIT_HTTP_CODES:
self.messages.add(
'<a href="https://github.com/snarfed/bridgy/issues/665#issuecomment-524977427">Apologies, Instagram is temporarily blocking us.</a> Please try again later!')
return self.redirect('/')
else:
raise

if not actor:
self.messages.add("Couldn't find Instagram user '%s'. Please check your site's rel-me link and your Instagram account." % username)
return self.redirect('/')

canonicalize = util.UrlCanonicalizer(redirects=False)
website = canonicalize(auth_entity.key.id())
urls = [canonicalize(u) for u in microformats2.object_urls(actor)]
logging.info('Looking for %s in %s', website, urls)
if website not in urls:
self.messages.add("Please add %s to your Instagram profile's website or bio field and try again." % website)
return self.redirect('/')

# check that the instagram account is public
if not gr_source.Source.is_public(actor):
self.messages.add('Your Instagram account is private. Bridgy only supports public accounts.')
return self.redirect('/')

self.maybe_add_or_delete_source(Instagram, auth_entity, state, actor=actor)


ROUTES = [
('/instagram/start', StartHandler),
('/instagram/indieauth', indieauth.StartHandler.to('/instagram/callback')),
('/instagram/callback', CallbackHandler),
]
ROUTES = []
19 changes: 10 additions & 9 deletions templates/about.html
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
<li><a href="#who">Who are you? Why did you make this?</a></li>
<li><a href="#history">How long has this been around?</a></li>
<li><a href="#stats">How big is this?</a></li>
<li><a href="#instagram-paused">What happened to Instagram?</li>
<li><a href="#rip-facebook">What happened to Facebook?</li>
<li><a href="#rip-google+">What happened to Google+?</li>
<li><a href="#profile-link">Why does my web site URL need to be in my social
Expand Down Expand Up @@ -225,6 +226,11 @@
<p>Check out the <a href="/#stats">stats on the front page</a>!</p>
</li>

<li id="instagram-paused" class="question">What happened to Instagram?</li>
<li class="answer">
<p>Instagram ramped up their scraping countermeasures around November 2020. We tried various approaches, but we were unable to find a reliable way around them. We're currently working on a browser extension to keep Instagram backfeed working. Stay tuned!</p>
</li>

<a id="facebook-publish"></a>
<li id="rip-facebook" class="question">What happened to Facebook?</li>
<li class="answer">
Expand Down Expand Up @@ -290,16 +296,12 @@
without leaving my site or application?</li>

<p>Not exactly, but you can get close. Direct your users to <code>POST</code> to a Bridgy registration URL of the form <code>https://brid.gy/SITE/start</code> (where SITE is
<code>twitter</code>, <code>instagram</code>, <code>flickr</code>, <code>github</code>, <code>mastodon</code>, or <code>reddit</code>) with the parameters:</p>
<code>twitter</code>, <code>flickr</code>, <code>github</code>, <code>mastodon</code>, or <code>reddit</code>) with the parameters:</p>

<ul>
<li><code>feature</code>, a comma-separated list of features,
usually <code>listen</code>, <code>publish</code>,
or <code>listen,publish</code></li>
<li><code>feature</code>, a comma-separated list of features, usually <code>listen</code>, <code>publish</code>, or <code>listen,publish</code></li>
<li><code>callback</code>, a URL pointing back to your site</li>
<li> <code>user_url</code> (required for Instagram, optional otherwise), the
user's URL. Allows callers to set this value without requiring it to be present
in the user's silo profile</li>
<li> <code>user_url</code> (optional), the user's URL. Allows callers to set this value without requiring it to be present in the user's silo profile</li>
</ul>
<p>They will be ushered through the registration process and sent back to
your site when it finishes.</p>
Expand All @@ -322,7 +324,6 @@
<li><b>Blogger</b> doesn't have permissions at all, <a href="https://developers.google.com/blogger/docs/2.0/developers_guide_protocol#OAuth2Authorizing">just full access or none.</a> <a href="/static/permissions_screenshots/blogger.png">Screenshot.</a></li>
<li><b>Flickr</b> has <a href="https://www.flickr.com/services/api/auth.oauth.html#authorization">read, write, and delete permissions</a>. We ask for read for listen, write for publish. Screenshots: <a href="/static/permissions_screenshots/flickr_read.png">listen</a>, <a href="/static/permissions_screenshots/flickr_write.png">publish</a>.</li>
<li><b>GitHub</b> has a few different permission models. We use an <a href="https://developer.github.com/apps/building-oauth-apps/scopes-for-oauth-apps/">OAuth app</a> to request read/write permission to public repository data, and notifications to see new comments and reactions. The alternative would have been a <a href="https://developer.github.com/apps/building-github-apps/">GitHub app,</a> which lets you grant read vs write permission to individual repos, but includes private repos, and doesn't include notifications. <a href="https://developer.github.com/apps/differences-between-apps/">More on the tradeoff here.</a> <a href="/static/permissions_screenshots/github.png">Screenshot.</a></li>
<li><b>Instagram</b> turned off their API for individual usage, so we just ask for your web site, via <a href="https://indieauth.com/">IndieAuth</a>, and your Instagram username. <a href="/static/permissions_screenshots/instagram_indieauth.png">Screenshot.</a></li>
<li><b>Mastodon</b> has <a href="https://docs.joinmastodon.org/api/permissions/">a number of fine grained permissions</a>, but listen needs most of the read categories, and publish needs most of the write ones, so Bridgy just asks for blanket read and write permission for each. Listen also uses your block list to suppress responses from accounts you've blocked, which <a href="https://docs.joinmastodon.org/api/rest/blocks/#get-api-v1-blocks">Mastodon unfortunately bundles with write access</a> and reports as <em>modify account relationships</em>. We don't actually modify any account relationships, promise!</li>
<li><b>Medium</b> has <a href="https://github.com/Medium/medium-api-docs/#user-content-21-browser-based-authentication">a few different permissions</a>. We ask for read acces to your profile, public posts, and publications. <a href="/static/permissions_screenshots/medium.png">Screenshot.</a></li>
<li><b>Tumblr</b> doesn't have permissions at all, <a href="https://www.tumblr.com/docs/en/api/v2#oauth">just full access or nothing.</a> <a href="/static/permissions_screenshots/tumblr.png">Screenshot.</a></li>
Expand Down Expand Up @@ -451,7 +452,7 @@ <h3 id="listen">Pulling back responses</h3>
<li>Tweets from other people that
<a href="https://support.twitter.com/articles/14023-what-are-replies-and-mentions">@-mention</a>
you or contain links to your site.</li>
<li>Instagram comments and likes on your pictures.</li>
<!-- TODO (browser extension) <li>Instagram comments and likes on your pictures.</li> -->
<li>Flickr comments and favorites on your photos.</li>
<li>GitHub comments and emoji reactions on your issues and pull requests.</li>
<li>Mastodon replies, favorites, boosts, mentions, and links to your site.</li>
Expand Down
1 change: 0 additions & 1 deletion templates/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
<div id="listen-signups" class="row">
<p class="big">Connect your accounts:</p>
{{ sources['twitter'].button_html('listen')|safe }}
{{ sources['instagram'].button_html('listen')|safe }}
{{ sources['flickr'].button_html('listen')|safe }}
{{ sources['github'].button_html('listen')|safe }}
{{ sources['mastodon'].button_html('listen')|safe }}
Expand Down
25 changes: 0 additions & 25 deletions templates/indieauth.html

This file was deleted.

Loading

0 comments on commit ab7d3f6

Please sign in to comment.