diff --git a/admin.py b/admin.py
index e50b4bfd..2eae52e8 100644
--- a/admin.py
+++ b/admin.py
@@ -15,6 +15,7 @@
from flask_app import app
import models
import util
+
# Import source class files so their metaclasses are initialized.
from models import BlogPost, Response, Source
import blogger, flickr, github, instagram, mastodon, medium, tumblr, twitter, wordpress_rest
@@ -22,96 +23,118 @@
NUM_ENTITIES = 10
-@app.route('/admin/responses')
+@app.route("/admin/responses")
def responses():
- """Find the most recently attempted responses and blog posts with error URLs."""
- entities = []
-
- for cls in (Response,): # BlogPost
- for e in cls.query().order(-cls.updated):
- if (len(entities) >= NUM_ENTITIES or
- e.updated < datetime.datetime.now() - datetime.timedelta(hours=1)):
- break
- elif (not e.error and not e.unsent) or e.status == 'complete':
- continue
-
- e.links = [util.pretty_link(u, new_tab=True) for u in e.error + e.failed]
- if e.key.kind() == 'Response':
- e.response = json_loads(e.response_json)
- e.activities = [json_loads(a) for a in e.activities_json]
- else:
- e.response = {'content': '[BlogPost]'}
- e.activities = [{'url': e.key.id()}]
-
- entities.append(e)
-
- return render_template('admin_responses.html', responses=entities, logs=logs)
-
-
-@app.route('/admin/sources')
+ """Find the most recently attempted responses and blog posts with error URLs."""
+ entities = []
+
+ for cls in (Response,): # BlogPost
+ for e in cls.query().order(-cls.updated):
+ if len(
+ entities
+ ) >= NUM_ENTITIES or e.updated < datetime.datetime.now() - datetime.timedelta(
+ hours=1
+ ):
+ break
+ elif (not e.error and not e.unsent) or e.status == "complete":
+ continue
+
+ e.links = [util.pretty_link(u, new_tab=True) for u in e.error + e.failed]
+ if e.key.kind() == "Response":
+ e.response = json_loads(e.response_json)
+ e.activities = [json_loads(a) for a in e.activities_json]
+ else:
+ e.response = {"content": "[BlogPost]"}
+ e.activities = [{"url": e.key.id()}]
+
+ entities.append(e)
+
+ return render_template("admin_responses.html", responses=entities, logs=logs)
+
+
+@app.route("/admin/sources")
def sources():
- """Find sources whose last poll errored out."""
- CLASSES = (flickr.Flickr, github.GitHub, twitter.Twitter,
- instagram.Instagram, mastodon.Mastodon)
- queries = [cls.query(Source.status == 'enabled',
- Source.poll_status == 'error',
- Source.rate_limited.IN((False, None)),
- Source.features == 'listen',
- ).fetch_async(NUM_ENTITIES)
- for cls in CLASSES]
-
- return render_template(
- 'admin_sources.html',
- sources=itertools.chain(*[q.get_result() for q in queries]),
- logs=logs,
- )
-
-
-@app.route('/admin/mark_complete', methods=['POST'])
+ """Find sources whose last poll errored out."""
+ CLASSES = (
+ flickr.Flickr,
+ github.GitHub,
+ twitter.Twitter,
+ instagram.Instagram,
+ mastodon.Mastodon,
+ )
+ queries = [
+ cls.query(
+ Source.status == "enabled",
+ Source.poll_status == "error",
+ Source.rate_limited.IN((False, None)),
+ Source.features == "listen",
+ ).fetch_async(NUM_ENTITIES)
+ for cls in CLASSES
+ ]
+
+ return render_template(
+ "admin_sources.html",
+ sources=itertools.chain(*[q.get_result() for q in queries]),
+ logs=logs,
+ )
+
+
+@app.route("/admin/mark_complete", methods=["POST"])
def mark_complete():
- entities = ndb.get_multi(ndb.Key(urlsafe=u)
- for u in request.values.getlist('key'))
- for e in entities:
- e.status = 'complete'
- ndb.put_multi(entities)
- return util.redirect('/admin/responses')
+ entities = ndb.get_multi(ndb.Key(urlsafe=u) for u in request.values.getlist("key"))
+ for e in entities:
+ e.status = "complete"
+ ndb.put_multi(entities)
+ return util.redirect("/admin/responses")
-@app.route('/admin/stats')
+@app.route("/admin/stats")
def stats():
- """Collect and report misc lifetime stats.
-
- https://developers.google.com/appengine/docs/python/ndb/admin#Statistics_queries
-
- Used to be on the front page, dropped them during the Flask port in August 2021.
- """
- def count(query):
- stat = query.get() # no datastore stats in dev_appserver
- return stat.count if stat else 0
-
- def kind_count(kind):
- return count(KindStat.query(KindStat.kind_name == kind))
-
- num_users = sum(kind_count(cls.__name__) for cls in models.sources.values())
- link_counts = {
- property: sum(count(KindPropertyNamePropertyTypeStat.query(
- KindPropertyNamePropertyTypeStat.kind_name == kind,
- KindPropertyNamePropertyTypeStat.property_name == property,
- # specify string because there are also >2M Response entities with null
- # values for some of these properties, as opposed to missing altogether,
- # which we don't want to include.
- KindPropertyNamePropertyTypeStat.property_type == 'String'))
- for kind in ('BlogPost', 'Response'))
- for property in ('sent', 'unsent', 'error', 'failed', 'skipped')}
-
- return render_template('admin_stats.html', **{
- # add comma separator between thousands
- k: '{:,}'.format(v) for k, v in {
- 'users': num_users,
- 'responses': kind_count('Response'),
- 'links': sum(link_counts.values()),
- 'webmentions': link_counts['sent'] + kind_count('BlogPost'),
- 'publishes': kind_count('Publish'),
- 'blogposts': kind_count('BlogPost'),
- 'webmentions_received': kind_count('BlogWebmention'),
- }.items()})
+ """Collect and report misc lifetime stats.
+
+ https://developers.google.com/appengine/docs/python/ndb/admin#Statistics_queries
+
+ Used to be on the front page, dropped them during the Flask port in August 2021.
+ """
+
+ def count(query):
+ stat = query.get() # no datastore stats in dev_appserver
+ return stat.count if stat else 0
+
+ def kind_count(kind):
+ return count(KindStat.query(KindStat.kind_name == kind))
+
+ num_users = sum(kind_count(cls.__name__) for cls in models.sources.values())
+ link_counts = {
+ property: sum(
+ count(
+ KindPropertyNamePropertyTypeStat.query(
+ KindPropertyNamePropertyTypeStat.kind_name == kind,
+ KindPropertyNamePropertyTypeStat.property_name == property,
+ # specify string because there are also >2M Response entities with null
+ # values for some of these properties, as opposed to missing altogether,
+ # which we don't want to include.
+ KindPropertyNamePropertyTypeStat.property_type == "String",
+ )
+ )
+ for kind in ("BlogPost", "Response")
+ )
+ for property in ("sent", "unsent", "error", "failed", "skipped")
+ }
+
+ return render_template(
+ "admin_stats.html",
+ **{
+ # add comma separator between thousands
+ k: "{:,}".format(v)
+ for k, v in {
+ "users": num_users,
+ "responses": kind_count("Response"),
+ "links": sum(link_counts.values()),
+ "webmentions": link_counts["sent"] + kind_count("BlogPost"),
+ "publishes": kind_count("Publish"),
+ "blogposts": kind_count("BlogPost"),
+ "webmentions_received": kind_count("BlogWebmention"),
+ }.items()
+ }
+ )
diff --git a/appengine_config.py b/appengine_config.py
index 993c0277..7674fbb3 100644
--- a/appengine_config.py
+++ b/appengine_config.py
@@ -3,6 +3,7 @@
# Needed because I originally generated tag URIs with the current year, which
# resulted in different URIs for the same objects when the year changed. :/
from oauth_dropins.webutil import util
-if not hasattr(util, '_orig_tag_uri'):
+
+if not hasattr(util, "_orig_tag_uri"):
util._orig_tag_uri = util.tag_uri
util.tag_uri = lambda domain, name: util._orig_tag_uri(domain, name, year=2013)
diff --git a/blog_webmention.py b/blog_webmention.py
index 0f95bc60..14bcefc1 100644
--- a/blog_webmention.py
+++ b/blog_webmention.py
@@ -15,211 +15,233 @@
class BlogWebmentionView(webmention.Webmention):
- """View for incoming webmentions against blog providers."""
-
- def dispatch_request(self, site):
- logging.info('Params: %s', list(request.values.items()))
- # strip fragments from source and target url
- self.source_url = urllib.parse.urldefrag(request.form['source'])[0]
- self.target_url = urllib.parse.urldefrag(request.form['target'])[0]
-
- # follow target url through any redirects, strip utm_* query params
- resp = util.follow_redirects(self.target_url)
- redirected_target_urls = [r.url for r in resp.history]
- self.target_url = util.clean_url(resp.url)
-
- # parse and validate target URL
- domain = util.domain_from_link(self.target_url)
- if not domain:
- self.error('Could not parse target URL %s' % self.target_url)
-
- # look up source by domain
- source_cls = models.sources[site]
- domain = domain.lower()
- self.source = (source_cls.query()
- .filter(source_cls.domains == domain)
- .filter(source_cls.features == 'webmention')
- .filter(source_cls.status == 'enabled')
- .get())
- if not self.source:
- # check for a rel-canonical link. Blogger uses these when it serves a post
- # from multiple domains, e.g country TLDs like epeus.blogspot.co.uk vs
- # epeus.blogspot.com.
- # https://github.com/snarfed/bridgy/issues/805
- mf2 = self.fetch_mf2(self.target_url, require_mf2=False)
- if not mf2:
- # fetch_mf2() already wrote the error response
- return
- domains = util.dedupe_urls(
- util.domain_from_link(url)
- for url in mf2[1]['rels'].get('canonical', []))
- if domains:
- self.source = (source_cls.query()
- .filter(source_cls.domains.IN(domains))
- .filter(source_cls.features == 'webmention')
- .filter(source_cls.status == 'enabled')
- .get())
-
- if not self.source:
- self.error(
- 'Could not find %s account for %s. Is it registered with Bridgy?' %
- (source_cls.GR_CLASS.NAME, domain))
-
- # check that the target URL path is supported
- target_path = urllib.parse.urlparse(self.target_url).path
- if target_path in ('', '/'):
- msg = 'Home page webmentions are not currently supported.'
- logging.info(msg)
- return {'error': msg}, 202
- for pattern in self.source.PATH_BLOCKLIST:
- if pattern.match(target_path):
- msg = f'{self.source.GR_CLASS.NAME} webmentions are not supported for URL path: {target_path}'
- logging.info(msg)
- return {'error': msg}, 202
-
- # create BlogWebmention entity
- id = '%s %s' % (self.source_url, self.target_url)
- self.entity = BlogWebmention.get_or_insert(
- id, source=self.source.key, redirected_target_urls=redirected_target_urls)
- if self.entity.status == 'complete':
- # TODO: response message saying update isn't supported
- return self.entity.published
- logging.debug("BlogWebmention entity: '%s'", self.entity.key.urlsafe().decode())
-
- # fetch source page
- fetched = self.fetch_mf2(self.source_url)
- if not fetched:
- return
- resp, mf2 = fetched
-
- item = self.find_mention_item(mf2.get('items', []))
- if not item:
- self.error('Could not find target URL %s in source page %s' %
- (self.target_url, resp.url), data=mf2, log_exception=False)
-
- # default author to target domain
- author_name = domain
- author_url = 'http://%s/' % domain
-
- # extract author name and URL from h-card, if any
- props = item['properties']
- author = get_first(props, 'author')
- if author:
- if isinstance(author, str):
- author_name = author
- else:
- author_props = author.get('properties', {})
- author_name = get_first(author_props, 'name')
- author_url = get_first(author_props, 'url')
-
- # if present, u-url overrides source url
- u_url = get_first(props, 'url')
- if u_url:
- self.entity.u_url = u_url
-
- # generate content
- content = props['content'][0] # find_mention_item() guaranteed this is here
- text = (content.get('html') or content.get('value')).strip()
- source_url = self.entity.source_url()
- text += ' via %s' % (
- source_url, util.domain_from_link(source_url))
-
- # write comment
- try:
- self.entity.published = self.source.create_comment(
- self.target_url, author_name, author_url, text)
- except Exception as e:
- code, body = util.interpret_http_exception(e)
- msg = 'Error: %s: %s; %s' % (code, e, body)
- if code == '401':
- logging.warning(f'Disabling source due to: {e}', exc_info=True)
- self.source.status = 'disabled'
- self.source.put()
- self.error(msg, status=code, report=self.source.is_beta_user())
- elif code == '404':
- # post is gone
- self.error(msg, status=code, report=False)
- elif util.is_connection_failure(e) or (code and int(code) // 100 == 5):
- self.error(msg, status=502, report=False)
- elif code or body:
- self.error(msg, status=code, report=True)
- else:
- raise
-
- # write results to datastore
- self.entity.status = 'complete'
- self.entity.put()
-
- return self.entity.published
-
- def find_mention_item(self, items):
- """Returns the mf2 item that mentions (or replies to, likes, etc) the target.
-
- May modify the items arg, e.g. may set or replace content.html or
- content.value.
-
- Args:
- items: sequence of mf2 item dicts
-
- Returns:
- mf2 item dict or None
- """
- # find target URL in source
- for item in items:
- props = item.setdefault('properties', {})
-
- # find first non-empty content element
- content = props.setdefault('content', [{}])[0]
- text = content.get('html') or content.get('value')
-
- for type in 'in-reply-to', 'like', 'like-of', 'repost', 'repost-of':
- urls = [urllib.parse.urldefrag(u)[0] for u in
- microformats2.get_string_urls(props.get(type, []))]
- if self.any_target_in(urls):
- break
- else:
- if text and self.any_target_in(text):
- type = 'post'
- url = get_first(props, 'url') or self.source_url
- name = get_first(props, 'name') or get_first(props, 'summary')
- text = content['html'] = ('mentioned this in %s.' %
- util.pretty_link(url, text=name, max_length=280))
- else:
- type = None
-
- if type:
- # found the target!
- rsvp = get_first(props, 'rsvp')
- if rsvp:
- self.entity.type = 'rsvp'
- if not text:
- content['value'] = 'RSVPed %s.' % rsvp
- else:
- self.entity.type = {'in-reply-to': 'comment',
- 'like-of': 'like',
- 'repost-of': 'repost',
- }.get(type, type)
- if not text:
- content['value'] = {'comment': 'replied to this.',
- 'like': 'liked this.',
- 'repost': 'reposted this.',
- }[self.entity.type]
- return item
-
- # check children in case this is eg an h-feed
- found = self.find_mention_item(item.get('children', []))
- if found:
- return found
-
- return None
-
- def any_target_in(self, haystack):
- """Returns true if any target URL (including redirects) is in haystack."""
- for target in self.entity.redirected_target_urls + [self.target_url]:
- if target in haystack:
- return True
- return False
-
-
-app.add_url_rule('/webmention/',
- view_func=BlogWebmentionView.as_view('blog_wm'), methods=['POST'])
+ """View for incoming webmentions against blog providers."""
+
+ def dispatch_request(self, site):
+ logging.info("Params: %s", list(request.values.items()))
+ # strip fragments from source and target url
+ self.source_url = urllib.parse.urldefrag(request.form["source"])[0]
+ self.target_url = urllib.parse.urldefrag(request.form["target"])[0]
+
+ # follow target url through any redirects, strip utm_* query params
+ resp = util.follow_redirects(self.target_url)
+ redirected_target_urls = [r.url for r in resp.history]
+ self.target_url = util.clean_url(resp.url)
+
+ # parse and validate target URL
+ domain = util.domain_from_link(self.target_url)
+ if not domain:
+ self.error("Could not parse target URL %s" % self.target_url)
+
+ # look up source by domain
+ source_cls = models.sources[site]
+ domain = domain.lower()
+ self.source = (
+ source_cls.query()
+ .filter(source_cls.domains == domain)
+ .filter(source_cls.features == "webmention")
+ .filter(source_cls.status == "enabled")
+ .get()
+ )
+ if not self.source:
+ # check for a rel-canonical link. Blogger uses these when it serves a post
+ # from multiple domains, e.g country TLDs like epeus.blogspot.co.uk vs
+ # epeus.blogspot.com.
+ # https://github.com/snarfed/bridgy/issues/805
+ mf2 = self.fetch_mf2(self.target_url, require_mf2=False)
+ if not mf2:
+ # fetch_mf2() already wrote the error response
+ return
+ domains = util.dedupe_urls(
+ util.domain_from_link(url)
+ for url in mf2[1]["rels"].get("canonical", [])
+ )
+ if domains:
+ self.source = (
+ source_cls.query()
+ .filter(source_cls.domains.IN(domains))
+ .filter(source_cls.features == "webmention")
+ .filter(source_cls.status == "enabled")
+ .get()
+ )
+
+ if not self.source:
+ self.error(
+ "Could not find %s account for %s. Is it registered with Bridgy?"
+ % (source_cls.GR_CLASS.NAME, domain)
+ )
+
+ # check that the target URL path is supported
+ target_path = urllib.parse.urlparse(self.target_url).path
+ if target_path in ("", "/"):
+ msg = "Home page webmentions are not currently supported."
+ logging.info(msg)
+ return {"error": msg}, 202
+ for pattern in self.source.PATH_BLOCKLIST:
+ if pattern.match(target_path):
+ msg = f"{self.source.GR_CLASS.NAME} webmentions are not supported for URL path: {target_path}"
+ logging.info(msg)
+ return {"error": msg}, 202
+
+ # create BlogWebmention entity
+ id = "%s %s" % (self.source_url, self.target_url)
+ self.entity = BlogWebmention.get_or_insert(
+ id, source=self.source.key, redirected_target_urls=redirected_target_urls
+ )
+ if self.entity.status == "complete":
+ # TODO: response message saying update isn't supported
+ return self.entity.published
+ logging.debug("BlogWebmention entity: '%s'", self.entity.key.urlsafe().decode())
+
+ # fetch source page
+ fetched = self.fetch_mf2(self.source_url)
+ if not fetched:
+ return
+ resp, mf2 = fetched
+
+ item = self.find_mention_item(mf2.get("items", []))
+ if not item:
+ self.error(
+ "Could not find target URL %s in source page %s"
+ % (self.target_url, resp.url),
+ data=mf2,
+ log_exception=False,
+ )
+
+ # default author to target domain
+ author_name = domain
+ author_url = "http://%s/" % domain
+
+ # extract author name and URL from h-card, if any
+ props = item["properties"]
+ author = get_first(props, "author")
+ if author:
+ if isinstance(author, str):
+ author_name = author
+ else:
+ author_props = author.get("properties", {})
+ author_name = get_first(author_props, "name")
+ author_url = get_first(author_props, "url")
+
+ # if present, u-url overrides source url
+ u_url = get_first(props, "url")
+ if u_url:
+ self.entity.u_url = u_url
+
+ # generate content
+ content = props["content"][0] # find_mention_item() guaranteed this is here
+ text = (content.get("html") or content.get("value")).strip()
+ source_url = self.entity.source_url()
+ text += ' via %s' % (
+ source_url,
+ util.domain_from_link(source_url),
+ )
+
+ # write comment
+ try:
+ self.entity.published = self.source.create_comment(
+ self.target_url, author_name, author_url, text
+ )
+ except Exception as e:
+ code, body = util.interpret_http_exception(e)
+ msg = "Error: %s: %s; %s" % (code, e, body)
+ if code == "401":
+ logging.warning(f"Disabling source due to: {e}", exc_info=True)
+ self.source.status = "disabled"
+ self.source.put()
+ self.error(msg, status=code, report=self.source.is_beta_user())
+ elif code == "404":
+ # post is gone
+ self.error(msg, status=code, report=False)
+ elif util.is_connection_failure(e) or (code and int(code) // 100 == 5):
+ self.error(msg, status=502, report=False)
+ elif code or body:
+ self.error(msg, status=code, report=True)
+ else:
+ raise
+
+ # write results to datastore
+ self.entity.status = "complete"
+ self.entity.put()
+
+ return self.entity.published
+
+ def find_mention_item(self, items):
+ """Returns the mf2 item that mentions (or replies to, likes, etc) the target.
+
+ May modify the items arg, e.g. may set or replace content.html or
+ content.value.
+
+ Args:
+ items: sequence of mf2 item dicts
+
+ Returns:
+ mf2 item dict or None
+ """
+ # find target URL in source
+ for item in items:
+ props = item.setdefault("properties", {})
+
+ # find first non-empty content element
+ content = props.setdefault("content", [{}])[0]
+ text = content.get("html") or content.get("value")
+
+ for type in "in-reply-to", "like", "like-of", "repost", "repost-of":
+ urls = [
+ urllib.parse.urldefrag(u)[0]
+ for u in microformats2.get_string_urls(props.get(type, []))
+ ]
+ if self.any_target_in(urls):
+ break
+ else:
+ if text and self.any_target_in(text):
+ type = "post"
+ url = get_first(props, "url") or self.source_url
+ name = get_first(props, "name") or get_first(props, "summary")
+ text = content["html"] = "mentioned this in %s." % util.pretty_link(
+ url, text=name, max_length=280
+ )
+ else:
+ type = None
+
+ if type:
+ # found the target!
+ rsvp = get_first(props, "rsvp")
+ if rsvp:
+ self.entity.type = "rsvp"
+ if not text:
+ content["value"] = "RSVPed %s." % rsvp
+ else:
+ self.entity.type = {
+ "in-reply-to": "comment",
+ "like-of": "like",
+ "repost-of": "repost",
+ }.get(type, type)
+ if not text:
+ content["value"] = {
+ "comment": "replied to this.",
+ "like": "liked this.",
+ "repost": "reposted this.",
+ }[self.entity.type]
+ return item
+
+ # check children in case this is eg an h-feed
+ found = self.find_mention_item(item.get("children", []))
+ if found:
+ return found
+
+ return None
+
+ def any_target_in(self, haystack):
+ """Returns true if any target URL (including redirects) is in haystack."""
+ for target in self.entity.redirected_target_urls + [self.target_url]:
+ if target in haystack:
+ return True
+ return False
+
+
+app.add_url_rule(
+ "/webmention/",
+ view_func=BlogWebmentionView.as_view("blog_wm"),
+ methods=["POST"],
+)
diff --git a/blogger.py b/blogger.py
index b62f4dfc..018a27bd 100644
--- a/blogger.py
+++ b/blogger.py
@@ -43,186 +43,213 @@
class Blogger(models.Source):
- """A Blogger blog.
+ """A Blogger blog.
- The key name is the blog id.
- """
- GR_CLASS = collections.namedtuple('FakeGrClass', ('NAME',))(NAME='Blogger')
- OAUTH_START = oauth_blogger.Start
- SHORT_NAME = 'blogger'
- PATH_BLOCKLIST = (re.compile('^/search/.*'),)
-
- def feed_url(self):
- # https://support.google.com/blogger/answer/97933?hl=en
- return urllib.parse.urljoin(self.url, '/feeds/posts/default') # Atom
-
- def silo_url(self):
- return self.url
-
- def edit_template_url(self):
- return 'https://www.blogger.com/blogger.g?blogID=%s#template' % self.key_id()
-
- @staticmethod
- def new(auth_entity=None, blog_id=None, **kwargs):
- """Creates and returns a Blogger for the logged in user.
-
- Args:
- auth_entity: :class:`oauth_dropins.blogger.BloggerV2Auth`
- blog_id: which blog. optional. if not provided, uses the first available.
- """
- urls, domains = Blogger._urls_and_domains(auth_entity, blog_id=blog_id)
- if not urls or not domains:
- flash('Blogger blog not found. Please create one first!')
- return None
-
- if blog_id is None:
- for blog_id, hostname in zip(auth_entity.blog_ids, auth_entity.blog_hostnames):
- if domains[0] == hostname:
- break
- else:
- assert False, "Internal error, shouldn't happen"
-
- return Blogger(id=blog_id,
- auth_entity=auth_entity.key,
- url=urls[0],
- name=auth_entity.user_display_name(),
- domains=domains,
- domain_urls=urls,
- picture=auth_entity.picture_url,
- superfeedr_secret=util.generate_secret(),
- **kwargs)
-
- @staticmethod
- def _urls_and_domains(auth_entity, blog_id=None):
- """Returns an auth entity's URL and domain.
-
- Args:
- auth_entity: oauth_dropins.blogger.BloggerV2Auth
- blog_id: which blog. optional. if not provided, uses the first available.
-
- Returns:
- ([string url], [string domain])
+ The key name is the blog id.
"""
- for id, host in zip(auth_entity.blog_ids, auth_entity.blog_hostnames):
- if blog_id == id or (not blog_id and host):
- return ['http://%s/' % host], [host]
- return [], []
-
- def create_comment(self, post_url, author_name, author_url, content, client=None):
- """Creates a new comment in the source silo.
-
- Must be implemented by subclasses.
-
- Args:
- post_url: string
- author_name: string
- author_url: string
- content: string
- client: :class:`gdata.blogger.client.BloggerClient`. If None, one will be
- created from auth_entity. Used for dependency injection in the unit
- test.
+ GR_CLASS = collections.namedtuple("FakeGrClass", ("NAME",))(NAME="Blogger")
+ OAUTH_START = oauth_blogger.Start
+ SHORT_NAME = "blogger"
+ PATH_BLOCKLIST = (re.compile("^/search/.*"),)
+
+ def feed_url(self):
+ # https://support.google.com/blogger/answer/97933?hl=en
+ return urllib.parse.urljoin(self.url, "/feeds/posts/default") # Atom
+
+ def silo_url(self):
+ return self.url
+
+ def edit_template_url(self):
+ return "https://www.blogger.com/blogger.g?blogID=%s#template" % self.key_id()
+
+ @staticmethod
+ def new(auth_entity=None, blog_id=None, **kwargs):
+ """Creates and returns a Blogger for the logged in user.
+
+ Args:
+ auth_entity: :class:`oauth_dropins.blogger.BloggerV2Auth`
+ blog_id: which blog. optional. if not provided, uses the first available.
+ """
+ urls, domains = Blogger._urls_and_domains(auth_entity, blog_id=blog_id)
+ if not urls or not domains:
+ flash("Blogger blog not found. Please create one first!")
+ return None
+
+ if blog_id is None:
+ for blog_id, hostname in zip(
+ auth_entity.blog_ids, auth_entity.blog_hostnames
+ ):
+ if domains[0] == hostname:
+ break
+ else:
+ assert False, "Internal error, shouldn't happen"
+
+ return Blogger(
+ id=blog_id,
+ auth_entity=auth_entity.key,
+ url=urls[0],
+ name=auth_entity.user_display_name(),
+ domains=domains,
+ domain_urls=urls,
+ picture=auth_entity.picture_url,
+ superfeedr_secret=util.generate_secret(),
+ **kwargs,
+ )
+
+ @staticmethod
+ def _urls_and_domains(auth_entity, blog_id=None):
+ """Returns an auth entity's URL and domain.
+
+ Args:
+ auth_entity: oauth_dropins.blogger.BloggerV2Auth
+ blog_id: which blog. optional. if not provided, uses the first available.
+
+ Returns:
+ ([string url], [string domain])
+ """
+ for id, host in zip(auth_entity.blog_ids, auth_entity.blog_hostnames):
+ if blog_id == id or (not blog_id and host):
+ return ["http://%s/" % host], [host]
+
+ return [], []
+
+ def create_comment(self, post_url, author_name, author_url, content, client=None):
+ """Creates a new comment in the source silo.
+
+ Must be implemented by subclasses.
+
+ Args:
+ post_url: string
+ author_name: string
+ author_url: string
+ content: string
+ client: :class:`gdata.blogger.client.BloggerClient`. If None, one will be
+ created from auth_entity. Used for dependency injection in the unit
+ test.
+
+ Returns:
+ JSON response dict with 'id' and other fields
+ """
+ if client is None:
+ client = self.auth_entity.get().api()
+
+ # extract the post's path and look up its post id
+ path = urllib.parse.urlparse(post_url).path
+ logging.info("Looking up post id for %s", path)
+ feed = client.get_posts(self.key_id(), query=Query(path=path))
+
+ if not feed.entry:
+ return self.error("Could not find Blogger post %s" % post_url)
+ elif len(feed.entry) > 1:
+ logging.warning(
+ "Found %d Blogger posts for path %s , expected 1", len(feed.entry), path
+ )
+ post_id = feed.entry[0].get_post_id()
+
+ # create the comment
+ content = '%s: %s' % (author_url, author_name, content)
+ if len(content) > MAX_COMMENT_LENGTH:
+ content = content[: MAX_COMMENT_LENGTH - 3] + "..."
+ logging.info(
+ "Creating comment on blog %s, post %s: %s",
+ self.key.id(),
+ post_id,
+ content.encode("utf-8"),
+ )
+ try:
+ comment = client.add_comment(self.key.id(), post_id, content)
+ except Error as e:
+ msg = str(e)
+ if "Internal error:" in msg:
+ # known errors. e.g. https://github.com/snarfed/bridgy/issues/175
+ # https://groups.google.com/d/topic/bloggerdev/szGkT5xA9CE/discussion
+ return {"error": msg}
+ else:
+ raise
+
+ resp = {"id": comment.get_comment_id(), "response": comment.to_string()}
+ logging.info(f"Response: {resp}")
+ return resp
+
+
+@app.route("/blogger/oauth_handler")
+def oauth_callback():
+ """OAuth callback handler.
- Returns:
- JSON response dict with 'id' and other fields
+ Both the add and delete flows have to share this because Blogger's
+ oauth-dropin doesn't yet allow multiple callback handlers. :/
"""
- if client is None:
- client = self.auth_entity.get().api()
-
- # extract the post's path and look up its post id
- path = urllib.parse.urlparse(post_url).path
- logging.info('Looking up post id for %s', path)
- feed = client.get_posts(self.key_id(), query=Query(path=path))
-
- if not feed.entry:
- return self.error('Could not find Blogger post %s' % post_url)
- elif len(feed.entry) > 1:
- logging.warning('Found %d Blogger posts for path %s , expected 1',
- len(feed.entry), path)
- post_id = feed.entry[0].get_post_id()
-
- # create the comment
- content = '%s: %s' % (author_url, author_name, content)
- if len(content) > MAX_COMMENT_LENGTH:
- content = content[:MAX_COMMENT_LENGTH - 3] + '...'
- logging.info('Creating comment on blog %s, post %s: %s', self.key.id(),
- post_id, content.encode('utf-8'))
- try:
- comment = client.add_comment(self.key.id(), post_id, content)
- except Error as e:
- msg = str(e)
- if ('Internal error:' in msg):
- # known errors. e.g. https://github.com/snarfed/bridgy/issues/175
- # https://groups.google.com/d/topic/bloggerdev/szGkT5xA9CE/discussion
- return {'error': msg}
- else:
- raise
-
- resp = {'id': comment.get_comment_id(), 'response': comment.to_string()}
- logging.info(f'Response: {resp}')
- return resp
-
-
-@app.route('/blogger/oauth_handler')
-def oauth_callback():
- """OAuth callback handler.
-
- Both the add and delete flows have to share this because Blogger's
- oauth-dropin doesn't yet allow multiple callback handlers. :/
- """
- auth_entity = None
- auth_entity_str_key = request.values.get('auth_entity')
- if auth_entity_str_key:
- auth_entity = ndb.Key(urlsafe=auth_entity_str_key).get()
- if not auth_entity.blog_ids or not auth_entity.blog_hostnames:
- auth_entity = None
-
- if not auth_entity:
- flash("Couldn't fetch your blogs. Maybe you're not a Blogger user?")
-
- state = request.values.get('state')
- if not state:
- state = util.construct_state_param_for_add(feature='webmention')
-
- if not auth_entity:
- util.maybe_add_or_delete_source(Blogger, auth_entity, state)
- return
-
- vars = {
- 'action': '/blogger/add',
- 'state': state,
- 'operation': util.decode_oauth_state(state).get('operation'),
- 'auth_entity_key': auth_entity.key.urlsafe().decode(),
- 'blogs': [{'id': id, 'title': title, 'domain': host}
- for id, title, host in zip(auth_entity.blog_ids,
- auth_entity.blog_titles,
- auth_entity.blog_hostnames)],
+ auth_entity = None
+ auth_entity_str_key = request.values.get("auth_entity")
+ if auth_entity_str_key:
+ auth_entity = ndb.Key(urlsafe=auth_entity_str_key).get()
+ if not auth_entity.blog_ids or not auth_entity.blog_hostnames:
+ auth_entity = None
+
+ if not auth_entity:
+ flash("Couldn't fetch your blogs. Maybe you're not a Blogger user?")
+
+ state = request.values.get("state")
+ if not state:
+ state = util.construct_state_param_for_add(feature="webmention")
+
+ if not auth_entity:
+ util.maybe_add_or_delete_source(Blogger, auth_entity, state)
+ return
+
+ vars = {
+ "action": "/blogger/add",
+ "state": state,
+ "operation": util.decode_oauth_state(state).get("operation"),
+ "auth_entity_key": auth_entity.key.urlsafe().decode(),
+ "blogs": [
+ {"id": id, "title": title, "domain": host}
+ for id, title, host in zip(
+ auth_entity.blog_ids,
+ auth_entity.blog_titles,
+ auth_entity.blog_hostnames,
+ )
+ ],
}
- logging.info(f'Rendering choose_blog.html with {vars}')
- return render_template('choose_blog.html', **vars)
+ logging.info(f"Rendering choose_blog.html with {vars}")
+ return render_template("choose_blog.html", **vars)
-@app.route('/blogger/add', methods=['POST'])
+@app.route("/blogger/add", methods=["POST"])
def blogger_add():
- util.maybe_add_or_delete_source(
- Blogger,
- ndb.Key(urlsafe=request.form['auth_entity_key']).get(),
- request.form['state'],
- blog_id=request.form['blog'],
- )
+ util.maybe_add_or_delete_source(
+ Blogger,
+ ndb.Key(urlsafe=request.form["auth_entity_key"]).get(),
+ request.form["state"],
+ blog_id=request.form["blog"],
+ )
class SuperfeedrNotify(superfeedr.Notify):
- SOURCE_CLS = Blogger
+ SOURCE_CLS = Blogger
# Blogger only has one OAuth scope. oauth-dropins fills it in.
# https://developers.google.com/blogger/docs/2.0/developers_guide_protocol#OAuth2Authorizing
start = util.oauth_starter(oauth_blogger.Start).as_view(
- 'blogger_start', '/blogger/oauth2callback')
-app.add_url_rule('/blogger/start', view_func=start, methods=['POST'])
-app.add_url_rule('/blogger/oauth2callback', view_func=oauth_blogger.Callback.as_view(
- 'blogger_oauth2callback', '/blogger/oauth_handler'))
-app.add_url_rule('/blogger/delete/start', view_func=oauth_blogger.Start.as_view(
- 'blogger_delete_start', '/blogger/oauth2callback'))
-app.add_url_rule('/blogger/notify/', view_func=SuperfeedrNotify.as_view('blogger_notify'), methods=['POST'])
+ "blogger_start", "/blogger/oauth2callback"
+)
+app.add_url_rule("/blogger/start", view_func=start, methods=["POST"])
+app.add_url_rule(
+ "/blogger/oauth2callback",
+ view_func=oauth_blogger.Callback.as_view(
+ "blogger_oauth2callback", "/blogger/oauth_handler"
+ ),
+)
+app.add_url_rule(
+ "/blogger/delete/start",
+ view_func=oauth_blogger.Start.as_view(
+ "blogger_delete_start", "/blogger/oauth2callback"
+ ),
+)
+app.add_url_rule(
+ "/blogger/notify/",
+ view_func=SuperfeedrNotify.as_view("blogger_notify"),
+ methods=["POST"],
+)
diff --git a/browser.py b/browser.py
index aeed99e0..3c3572ab 100644
--- a/browser.py
+++ b/browser.py
@@ -17,377 +17,413 @@
from models import Activity, Domain, Source
import util
-JSON_CONTENT_TYPE = 'application/json'
+JSON_CONTENT_TYPE = "application/json"
# See https://www.cloudimage.io/
-IMAGE_PROXY_URL_BASE = 'https://aujtzahimq.cloudimg.io/v7/'
+IMAGE_PROXY_URL_BASE = "https://aujtzahimq.cloudimg.io/v7/"
def merge_by_id(existing, updates):
- """Merges two lists of AS1 objects by id.
+ """Merges two lists of AS1 objects by id.
- Overwrites the objects in the existing list with objects in the updates list
- with the same id. Requires all objects to have ids.
+ Overwrites the objects in the existing list with objects in the updates list
+ with the same id. Requires all objects to have ids.
- Args:
- existing: sequence of AS1 dicts
- updates: sequence of AS1 dicts
+ Args:
+ existing: sequence of AS1 dicts
+ updates: sequence of AS1 dicts
- Returns: merged list of AS1 dicts
- """
- objs = {o['id']: o for o in existing}
- objs.update({o['id']: o for o in updates})
- return sorted(objs.values(), key=itemgetter('id'))
+ Returns: merged list of AS1 dicts
+ """
+ objs = {o["id"]: o for o in existing}
+ objs.update({o["id"]: o for o in updates})
+ return sorted(objs.values(), key=itemgetter("id"))
class BrowserSource(Source):
- """A source whose data is provided by the browser extension.
-
- Current subclasses are Instagram and Facebook.
- """
- CAN_LISTEN = True
- CAN_PUBLISH = False
- AUTO_POLL = False
-
- # set by subclasses
- GR_CLASS = None
- OAUTH_START = None
- gr_source = None
-
- @classmethod
- def key_id_from_actor(cls, actor):
- """Returns the key id for this entity from a given AS1 actor.
+ """A source whose data is provided by the browser extension.
- To be implemented by subclasses.
-
- Args:
- actor: dict AS1 actor
-
- Returns: str, key id to use for the corresponding datastore entity
+ Current subclasses are Instagram and Facebook.
"""
- raise NotImplementedError()
-
- @classmethod
- def new(cls, auth_entity=None, actor=None, **kwargs):
- """Creates and returns an entity based on an AS1 actor.
- Args:
- auth_entity: unused
- actor: dict AS1 actor
- """
- assert not auth_entity
- assert actor
-
- if not kwargs.get('features'):
- kwargs['features'] = ['listen']
-
- src = cls(id=cls.key_id_from_actor(actor),
- name=actor.get('displayName'),
- picture=actor.get('image', {}).get('url'),
- **kwargs)
- src.domain_urls, src.domains = src._urls_and_domains(None, None, actor=actor)
- return src
-
- @classmethod
- def button_html(cls, feature, **kwargs):
- return cls.OAUTH_START.button_html(
- '/about#browser-extension',
- form_method='get',
- image_prefix='/oauth_dropins_static/')
-
- def get_activities_response(self, *args, **kwargs):
- """Uses Activity entities stored in the datastore."""
- activities = []
-
- activity_id = kwargs.get('activity_id')
- if activity_id:
- activity = Activity.get_by_id(self.gr_source.tag_uri(activity_id))
- if activity:
- activities = [activity]
- else:
- activities = Activity.query(Activity.source == self.key)\
- .order(-Activity.updated).fetch(50)
-
- activities = [json_loads(a.activity_json) for a in activities]
- for a in activities:
- microformats2.prefix_image_urls(a, IMAGE_PROXY_URL_BASE)
-
- return self.gr_source.make_activities_base_response(activities)
-
- def get_comment(self, comment_id, activity=None, **kwargs):
- """Uses the activity passed in the activity kwarg."""
- if activity:
- for reply in activity.get('object', {}).get('replies', {}).get('items', []):
- parsed = util.parse_tag_uri(reply.get('id', ''))
- if parsed and parsed[1] == comment_id:
- return reply
-
- def get_like(self, activity_user_id, activity_id, like_user_id, activity=None,
- **kwargs):
- """Uses the activity passed in the activity kwarg."""
- if activity:
- for tag in activity.get('object', {}).get('tags', []):
- if tag.get('verb') == 'like':
- parsed = util.parse_tag_uri(tag.get('author', {}).get('id', ''))
- if parsed and parsed[1] == like_user_id:
- return tag
+ CAN_LISTEN = True
+ CAN_PUBLISH = False
+ AUTO_POLL = False
+
+ # set by subclasses
+ GR_CLASS = None
+ OAUTH_START = None
+ gr_source = None
+
+ @classmethod
+ def key_id_from_actor(cls, actor):
+ """Returns the key id for this entity from a given AS1 actor.
+
+ To be implemented by subclasses.
+
+ Args:
+ actor: dict AS1 actor
+
+ Returns: str, key id to use for the corresponding datastore entity
+ """
+ raise NotImplementedError()
+
+ @classmethod
+ def new(cls, auth_entity=None, actor=None, **kwargs):
+ """Creates and returns an entity based on an AS1 actor.
+
+ Args:
+ auth_entity: unused
+ actor: dict AS1 actor
+ """
+ assert not auth_entity
+ assert actor
+
+ if not kwargs.get("features"):
+ kwargs["features"] = ["listen"]
+
+ src = cls(
+ id=cls.key_id_from_actor(actor),
+ name=actor.get("displayName"),
+ picture=actor.get("image", {}).get("url"),
+ **kwargs,
+ )
+ src.domain_urls, src.domains = src._urls_and_domains(None, None, actor=actor)
+ return src
+
+ @classmethod
+ def button_html(cls, feature, **kwargs):
+ return cls.OAUTH_START.button_html(
+ "/about#browser-extension",
+ form_method="get",
+ image_prefix="/oauth_dropins_static/",
+ )
+
+ def get_activities_response(self, *args, **kwargs):
+ """Uses Activity entities stored in the datastore."""
+ activities = []
+
+ activity_id = kwargs.get("activity_id")
+ if activity_id:
+ activity = Activity.get_by_id(self.gr_source.tag_uri(activity_id))
+ if activity:
+ activities = [activity]
+ else:
+ activities = (
+ Activity.query(Activity.source == self.key)
+ .order(-Activity.updated)
+ .fetch(50)
+ )
+
+ activities = [json_loads(a.activity_json) for a in activities]
+ for a in activities:
+ microformats2.prefix_image_urls(a, IMAGE_PROXY_URL_BASE)
+
+ return self.gr_source.make_activities_base_response(activities)
+
+ def get_comment(self, comment_id, activity=None, **kwargs):
+ """Uses the activity passed in the activity kwarg."""
+ if activity:
+ for reply in activity.get("object", {}).get("replies", {}).get("items", []):
+ parsed = util.parse_tag_uri(reply.get("id", ""))
+ if parsed and parsed[1] == comment_id:
+ return reply
+
+ def get_like(
+ self, activity_user_id, activity_id, like_user_id, activity=None, **kwargs
+ ):
+ """Uses the activity passed in the activity kwarg."""
+ if activity:
+ for tag in activity.get("object", {}).get("tags", []):
+ if tag.get("verb") == "like":
+ parsed = util.parse_tag_uri(tag.get("author", {}).get("id", ""))
+ if parsed and parsed[1] == like_user_id:
+ return tag
class BrowserView(View):
- """Base class for requests from the browser extension."""
- def source_class(self):
- return models.sources.get(request.path.strip('/').split('/')[0])
+ """Base class for requests from the browser extension."""
- def gr_source(self):
- return self.source_class().gr_source
+ def source_class(self):
+ return models.sources.get(request.path.strip("/").split("/")[0])
- def check_token_for_actor(self, actor):
- """Checks that the given actor is public and matches the request's token.
+ def gr_source(self):
+ return self.source_class().gr_source
- Raises: :class:`HTTPException` with HTTP 403
- """
- if not actor:
- error('Missing actor!')
+ def check_token_for_actor(self, actor):
+ """Checks that the given actor is public and matches the request's token.
- if not gr_source.Source.is_public(actor):
- error(f'Your {self.gr_source().NAME} account is private. Bridgy only supports public accounts.')
+ Raises: :class:`HTTPException` with HTTP 403
+ """
+ if not actor:
+ error("Missing actor!")
- token = request.values['token']
- domains = set(util.domain_from_link(util.replace_test_domains_with_localhost(u))
- for u in microformats2.object_urls(actor))
- domains.discard(self.source_class().GR_CLASS.DOMAIN)
+ if not gr_source.Source.is_public(actor):
+ error(
+ f"Your {self.gr_source().NAME} account is private. Bridgy only supports public accounts."
+ )
- logging.info(f'Checking token against domains {domains}')
- for domain in ndb.get_multi(ndb.Key(Domain, d) for d in domains):
- if domain and token in domain.tokens:
- return
+ token = request.values["token"]
+ domains = set(
+ util.domain_from_link(util.replace_test_domains_with_localhost(u))
+ for u in microformats2.object_urls(actor)
+ )
+ domains.discard(self.source_class().GR_CLASS.DOMAIN)
- error(f'Token {token} is not authorized for any of: {domains}', 403)
+ logging.info(f"Checking token against domains {domains}")
+ for domain in ndb.get_multi(ndb.Key(Domain, d) for d in domains):
+ if domain and token in domain.tokens:
+ return
- def auth(self):
- """Loads the source and token and checks that they're valid.
+ error(f"Token {token} is not authorized for any of: {domains}", 403)
- Expects token in the `token` query param, source in `key` or `username`.
+ def auth(self):
+ """Loads the source and token and checks that they're valid.
- Raises: :class:`HTTPException` with HTTP 400 if the token or source are
- missing or invalid
+ Expects token in the `token` query param, source in `key` or `username`.
- Returns: BrowserSource or None
- """
- # Load source
- source = util.load_source()
+ Raises: :class:`HTTPException` with HTTP 400 if the token or source are
+ missing or invalid
+
+ Returns: BrowserSource or None
+ """
+ # Load source
+ source = util.load_source()
- # Load and check token
- token = request.values['token']
- for domain in Domain.query(Domain.tokens == token):
- if domain.key.id() in source.domains:
- return source
+ # Load and check token
+ token = request.values["token"]
+ for domain in Domain.query(Domain.tokens == token):
+ if domain.key.id() in source.domains:
+ return source
- error(f'Token {token} is not authorized for any of: {source.domains}', 403)
+ error(f"Token {token} is not authorized for any of: {source.domains}", 403)
class Status(BrowserView):
- """Runs preflight checks for a source and returns status and config info.
+ """Runs preflight checks for a source and returns status and config info.
+
+ Response body is a JSON map with these fields:
+ status: string, 'enabled' or 'disabled'
+ poll-seconds: integer, current poll frequency for this source in seconds
+ """
- Response body is a JSON map with these fields:
- status: string, 'enabled' or 'disabled'
- poll-seconds: integer, current poll frequency for this source in seconds
- """
- def dispatch_request(self):
- source = self.auth()
- logging.info(f'Got source: {source}')
+ def dispatch_request(self):
+ source = self.auth()
+ logging.info(f"Got source: {source}")
- out = {
- 'status': source.status,
- 'poll-seconds': source.poll_period().total_seconds(),
- }
- logging.info(f'Returning {out}')
- return out
+ out = {
+ "status": source.status,
+ "poll-seconds": source.poll_period().total_seconds(),
+ }
+ logging.info(f"Returning {out}")
+ return out
class Homepage(BrowserView):
- """Parses a silo home page and returns the logged in user's username.
+ """Parses a silo home page and returns the logged in user's username.
- Request body is https://www.instagram.com/ HTML for a logged in user.
- """
- def dispatch_request(self):
- gr_src = self.gr_source()
- _, actor = gr_src.scraped_to_activities(request.get_data(as_text=True))
- logging.info(f'Got actor: {actor}')
+ Request body is https://www.instagram.com/ HTML for a logged in user.
+ """
+
+ def dispatch_request(self):
+ gr_src = self.gr_source()
+ _, actor = gr_src.scraped_to_activities(request.get_data(as_text=True))
+ logging.info(f"Got actor: {actor}")
- if actor:
- username = actor.get('username')
- if username:
- logging.info(f'Returning {username}')
- return jsonify(username)
+ if actor:
+ username = actor.get("username")
+ if username:
+ logging.info(f"Returning {username}")
+ return jsonify(username)
- error(f"Couldn't determine logged in {gr_src.NAME} user or username")
+ error(f"Couldn't determine logged in {gr_src.NAME} user or username")
class Feed(BrowserView):
- """Parses a silo feed page and returns the posts.
+ """Parses a silo feed page and returns the posts.
- Request body is HTML from a silo profile with posts, eg
- https://www.instagram.com/name/ , for a logged in user.
+ Request body is HTML from a silo profile with posts, eg
+ https://www.instagram.com/name/ , for a logged in user.
+
+ Response body is the JSON list of translated ActivityStreams activities.
+ """
- Response body is the JSON list of translated ActivityStreams activities.
- """
- def dispatch_request(self):
- self.auth()
- activities, _ = self.scrape()
- return jsonify(activities)
+ def dispatch_request(self):
+ self.auth()
+ activities, _ = self.scrape()
+ return jsonify(activities)
- def scrape(self):
- activities, actor = self.gr_source().scraped_to_activities(
- request.get_data(as_text=True))
- ids = ' '.join(a['id'] for a in activities)
- logging.info(f"Returning activities: {ids}")
- return activities, actor
+ def scrape(self):
+ activities, actor = self.gr_source().scraped_to_activities(
+ request.get_data(as_text=True)
+ )
+ ids = " ".join(a["id"] for a in activities)
+ logging.info(f"Returning activities: {ids}")
+ return activities, actor
class Profile(Feed):
- """Parses a silo profile page and creates or updates its Bridgy user.
+ """Parses a silo profile page and creates or updates its Bridgy user.
- Request body is HTML from an IG profile, eg https://www.instagram.com/name/ ,
- for a logged in user.
+ Request body is HTML from an IG profile, eg https://www.instagram.com/name/ ,
+ for a logged in user.
- Response body is the JSON string URL-safe key of the Bridgy source entity.
- """
- def dispatch_request(self):
- _, actor = self.scrape()
- if not actor:
- actor = self.gr_source().scraped_to_actor(request.get_data(as_text=True))
- self.check_token_for_actor(actor)
+ Response body is the JSON string URL-safe key of the Bridgy source entity.
+ """
+
+ def dispatch_request(self):
+ _, actor = self.scrape()
+ if not actor:
+ actor = self.gr_source().scraped_to_actor(request.get_data(as_text=True))
+ self.check_token_for_actor(actor)
- # create/update the Bridgy account
- source = self.source_class().create_new(self, actor=actor)
- return jsonify(source.key.urlsafe().decode())
+ # create/update the Bridgy account
+ source = self.source_class().create_new(self, actor=actor)
+ return jsonify(source.key.urlsafe().decode())
class Post(BrowserView):
- """Parses a silo post's HTML and creates or updates an Activity.
-
- Request body is HTML from a silo post, eg https://www.instagram.com/p/ABC123/
-
- Response body is the translated ActivityStreams activity JSON.
- """
- def dispatch_request(self):
- source = self.auth()
-
- gr_src = self.gr_source()
- new_activity, actor = gr_src.scraped_to_activity(request.get_data(as_text=True))
- if not new_activity:
- error(f'No {gr_src.NAME} post found in HTML')
-
- @ndb.transactional()
- def update_activity():
- id = new_activity.get('id')
- if not id:
- error('Scraped post missing id')
- activity = Activity.get_by_id(id)
-
- if activity:
- # we already have this activity! merge in any new comments.
- merged_activity = copy.deepcopy(new_activity)
- existing_activity = json_loads(activity.activity_json)
- # TODO: extract out merging replies
- replies = merged_activity.setdefault('object', {}).setdefault('replies', {})
- gr_source.merge_by_id(replies, 'items',
- existing_activity.get('object', {}).get('replies', {}).get('items', []))
- replies['totalItems'] = len(replies.get('items', []))
- # TODO: merge tags too
- activity.activity_json = json_dumps(merged_activity)
- else:
- activity = Activity(id=id, source=source.key,
- html=request.get_data(as_text=True),
- activity_json=json_dumps(new_activity))
-
- # store and return the activity
- activity.put()
- logging.info(f"Stored activity {id}")
-
- update_activity()
- return new_activity
+ """Parses a silo post's HTML and creates or updates an Activity.
+
+ Request body is HTML from a silo post, eg https://www.instagram.com/p/ABC123/
+
+ Response body is the translated ActivityStreams activity JSON.
+ """
+
+ def dispatch_request(self):
+ source = self.auth()
+
+ gr_src = self.gr_source()
+ new_activity, actor = gr_src.scraped_to_activity(request.get_data(as_text=True))
+ if not new_activity:
+ error(f"No {gr_src.NAME} post found in HTML")
+
+ @ndb.transactional()
+ def update_activity():
+ id = new_activity.get("id")
+ if not id:
+ error("Scraped post missing id")
+ activity = Activity.get_by_id(id)
+
+ if activity:
+ # we already have this activity! merge in any new comments.
+ merged_activity = copy.deepcopy(new_activity)
+ existing_activity = json_loads(activity.activity_json)
+ # TODO: extract out merging replies
+ replies = merged_activity.setdefault("object", {}).setdefault(
+ "replies", {}
+ )
+ gr_source.merge_by_id(
+ replies,
+ "items",
+ existing_activity.get("object", {})
+ .get("replies", {})
+ .get("items", []),
+ )
+ replies["totalItems"] = len(replies.get("items", []))
+ # TODO: merge tags too
+ activity.activity_json = json_dumps(merged_activity)
+ else:
+ activity = Activity(
+ id=id,
+ source=source.key,
+ html=request.get_data(as_text=True),
+ activity_json=json_dumps(new_activity),
+ )
+
+ # store and return the activity
+ activity.put()
+ logging.info(f"Stored activity {id}")
+
+ update_activity()
+ return new_activity
class Reactions(BrowserView):
- """Parses reactions/likes from silo HTML and adds them to an existing Activity.
+ """Parses reactions/likes from silo HTML and adds them to an existing Activity.
- Requires the request parameter `id` with the silo post's id (not shortcode!).
+ Requires the request parameter `id` with the silo post's id (not shortcode!).
+
+ Response body is the translated ActivityStreams JSON for the reactions.
+ """
- Response body is the translated ActivityStreams JSON for the reactions.
- """
- def dispatch_request(self, *args):
- source = self.auth()
+ def dispatch_request(self, *args):
+ source = self.auth()
- gr_src = self.gr_source()
- id = request.values['id']
+ gr_src = self.gr_source()
+ id = request.values["id"]
- # validate request
- parsed_id = util.parse_tag_uri(id)
- if not parsed_id:
- error(f'Expected id to be tag URI; got {id}')
+ # validate request
+ parsed_id = util.parse_tag_uri(id)
+ if not parsed_id:
+ error(f"Expected id to be tag URI; got {id}")
- activity = Activity.get_by_id(id)
- if not activity:
- error(f'No {gr_src.NAME} post found for id {id}', 404)
- elif activity.source != source.key:
- error(f'Activity {id} is owned by {activity.source}, not {source.key}', 403)
+ activity = Activity.get_by_id(id)
+ if not activity:
+ error(f"No {gr_src.NAME} post found for id {id}", 404)
+ elif activity.source != source.key:
+ error(f"Activity {id} is owned by {activity.source}, not {source.key}", 403)
- activity_data = json_loads(activity.activity_json)
+ activity_data = json_loads(activity.activity_json)
- # convert new reactions to AS, merge into existing activity
- try:
- new_reactions = gr_src.merge_scraped_reactions(
- request.get_data(as_text=True), activity_data)
- except ValueError as e:
- msg = "Couldn't parse scraped reactions: %s" % e
- logging.error(msg, exc_info=True)
- error(msg)
+ # convert new reactions to AS, merge into existing activity
+ try:
+ new_reactions = gr_src.merge_scraped_reactions(
+ request.get_data(as_text=True), activity_data
+ )
+ except ValueError as e:
+ msg = "Couldn't parse scraped reactions: %s" % e
+ logging.error(msg, exc_info=True)
+ error(msg)
- activity.activity_json = json_dumps(activity_data)
- activity.put()
+ activity.activity_json = json_dumps(activity_data)
+ activity.put()
- reaction_ids = ' '.join(r['id'] for r in new_reactions)
- logging.info(f"Stored reactions for activity {id}: {reaction_ids}")
- return jsonify(new_reactions)
+ reaction_ids = " ".join(r["id"] for r in new_reactions)
+ logging.info(f"Stored reactions for activity {id}: {reaction_ids}")
+ return jsonify(new_reactions)
class Poll(BrowserView):
- """Triggers a poll for a browser-based account."""
- def dispatch_request(self):
- source = self.auth()
- util.add_poll_task(source)
- return jsonify('OK')
+ """Triggers a poll for a browser-based account."""
+
+ def dispatch_request(self):
+ source = self.auth()
+ util.add_poll_task(source)
+ return jsonify("OK")
class TokenDomains(BrowserView):
- """Returns the domains that a token is registered for."""
- def dispatch_request(self):
- token = request.values['token']
+ """Returns the domains that a token is registered for."""
+
+ def dispatch_request(self):
+ token = request.values["token"]
- domains = [d.key.id() for d in Domain.query(Domain.tokens == token)]
- if not domains:
- error(f'No registered domains for token {token}', 404)
+ domains = [d.key.id() for d in Domain.query(Domain.tokens == token)]
+ if not domains:
+ error(f"No registered domains for token {token}", 404)
- return jsonify(domains)
+ return jsonify(domains)
def route(source_cls):
- """Registers browser extension URL routes for a given source class.
-
- ...specifically, with the source's short name as the routes' URL prefix.
- """
- for route, cls in (
- (f'/{source_cls.SHORT_NAME}/browser/status', Status),
- (f'/{source_cls.SHORT_NAME}/browser/homepage', Homepage),
- (f'/{source_cls.SHORT_NAME}/browser/profile', Profile),
- (f'/{source_cls.SHORT_NAME}/browser/feed', Feed),
- (f'/{source_cls.SHORT_NAME}/browser/post', Post),
- (f'/{source_cls.SHORT_NAME}/browser/likes', Reactions),
- (f'/{source_cls.SHORT_NAME}/browser/reactions', Reactions),
- (f'/{source_cls.SHORT_NAME}/browser/poll', Poll),
- (f'/{source_cls.SHORT_NAME}/browser/token-domains', TokenDomains),
+ """Registers browser extension URL routes for a given source class.
+
+ ...specifically, with the source's short name as the routes' URL prefix.
+ """
+ for route, cls in (
+ (f"/{source_cls.SHORT_NAME}/browser/status", Status),
+ (f"/{source_cls.SHORT_NAME}/browser/homepage", Homepage),
+ (f"/{source_cls.SHORT_NAME}/browser/profile", Profile),
+ (f"/{source_cls.SHORT_NAME}/browser/feed", Feed),
+ (f"/{source_cls.SHORT_NAME}/browser/post", Post),
+ (f"/{source_cls.SHORT_NAME}/browser/likes", Reactions),
+ (f"/{source_cls.SHORT_NAME}/browser/reactions", Reactions),
+ (f"/{source_cls.SHORT_NAME}/browser/poll", Poll),
+ (f"/{source_cls.SHORT_NAME}/browser/token-domains", TokenDomains),
):
- app.add_url_rule(route, view_func=cls.as_view(route),
- methods=['GET', 'POST'] if cls == Status else ['POST'])
+ app.add_url_rule(
+ route,
+ view_func=cls.as_view(route),
+ methods=["GET", "POST"] if cls == Status else ["POST"],
+ )
diff --git a/config.py b/config.py
index fd344555..13815dd7 100644
--- a/config.py
+++ b/config.py
@@ -7,10 +7,10 @@
JSONIFY_PRETTYPRINT_REGULAR = True
if appengine_info.DEBUG:
- ENV = 'development'
- CACHE_TYPE = 'NullCache'
- SECRET_KEY = 'sooper seekret'
+ ENV = "development"
+ CACHE_TYPE = "NullCache"
+ SECRET_KEY = "sooper seekret"
else:
- ENV = 'production'
- CACHE_TYPE = 'SimpleCache'
- SECRET_KEY = util.read('flask_secret_key')
+ ENV = "production"
+ CACHE_TYPE = "SimpleCache"
+ SECRET_KEY = util.read("flask_secret_key")
diff --git a/cron.py b/cron.py
index d90601e8..36d43ec0 100644
--- a/cron.py
+++ b/cron.py
@@ -20,112 +20,127 @@
from twitter import Twitter
import util
-CIRCLECI_TOKEN = util.read('circleci_token')
+CIRCLECI_TOKEN = util.read("circleci_token")
PAGE_SIZE = 20
class LastUpdatedPicture(StringIdModel):
- """Stores the last user in a given silo that we updated profile picture for.
+ """Stores the last user in a given silo that we updated profile picture for.
- Key id is the silo's SHORT_NAME.
- """
- last = ndb.KeyProperty()
- created = ndb.DateTimeProperty(auto_now_add=True, required=True)
- updated = ndb.DateTimeProperty(auto_now=True)
+ Key id is the silo's SHORT_NAME.
+ """
+ last = ndb.KeyProperty()
+ created = ndb.DateTimeProperty(auto_now_add=True, required=True)
+ updated = ndb.DateTimeProperty(auto_now=True)
-@app.route('/cron/replace_poll_tasks')
-def replace_poll_tasks():
- """Finds sources missing their poll tasks and adds new ones."""
- now = datetime.datetime.now()
- queries = [cls.query(Source.features == 'listen', Source.status == 'enabled')
- for cls in models.sources.values() if cls.AUTO_POLL]
- for source in itertools.chain(*queries):
- age = now - source.last_poll_attempt
- if age > max(source.poll_period() * 2, datetime.timedelta(hours=2)):
- logging.info('%s last polled %s ago. Adding new poll task.',
- source.bridgy_url(), age)
- util.add_poll_task(source)
- return ''
+@app.route("/cron/replace_poll_tasks")
+def replace_poll_tasks():
+ """Finds sources missing their poll tasks and adds new ones."""
+ now = datetime.datetime.now()
+ queries = [
+ cls.query(Source.features == "listen", Source.status == "enabled")
+ for cls in models.sources.values()
+ if cls.AUTO_POLL
+ ]
+ for source in itertools.chain(*queries):
+ age = now - source.last_poll_attempt
+ if age > max(source.poll_period() * 2, datetime.timedelta(hours=2)):
+ logging.info(
+ "%s last polled %s ago. Adding new poll task.", source.bridgy_url(), age
+ )
+ util.add_poll_task(source)
+
+ return ""
class UpdatePictures(View):
- """Finds sources with new profile pictures and updates them."""
- SOURCE_CLS = None
-
- @classmethod
- def user_id(cls, source):
- return source.key_id()
-
- def dispatch_request(self):
- g.TRANSIENT_ERROR_HTTP_CODES = (self.SOURCE_CLS.TRANSIENT_ERROR_HTTP_CODES +
- self.SOURCE_CLS.RATE_LIMIT_HTTP_CODES)
-
- query = self.SOURCE_CLS.query().order(self.SOURCE_CLS.key)
- last = LastUpdatedPicture.get_by_id(self.SOURCE_CLS.SHORT_NAME)
- if last and last.last:
- query = query.filter(self.SOURCE_CLS.key > last.last)
-
- results, _, more = query.fetch_page(PAGE_SIZE)
- for source in results:
- if source.features and source.status != 'disabled':
- logging.debug('checking for updated profile pictures for: %s',
- source.bridgy_url())
- try:
- actor = source.gr_source.get_actor(self.user_id(source))
- except BaseException as e:
- # Mastodon API returns HTTP 404 for deleted (etc) users, and
- # often one or more users' Mastodon instances are down.
- code, _ = util.interpret_http_exception(e)
- if code:
- continue
- raise
-
- if not actor:
- logging.info(f"Couldn't fetch {source.bridgy_url()} 's user")
- continue
-
- new_pic = actor.get('image', {}).get('url')
- if not new_pic or source.picture == new_pic:
- logging.info(f'No new picture found for {source.bridgy_url()}')
- continue
-
- @ndb.transactional()
- def update():
- src = source.key.get()
- src.picture = new_pic
- src.put()
-
- logging.info(f'Updating profile picture for {source.bridgy_url()} from {source.picture} to {new_pic}')
- update()
-
- LastUpdatedPicture(id=self.SOURCE_CLS.SHORT_NAME,
- last=source.key if more else None).put()
- return 'OK'
+ """Finds sources with new profile pictures and updates them."""
+
+ SOURCE_CLS = None
+
+ @classmethod
+ def user_id(cls, source):
+ return source.key_id()
+
+ def dispatch_request(self):
+ g.TRANSIENT_ERROR_HTTP_CODES = (
+ self.SOURCE_CLS.TRANSIENT_ERROR_HTTP_CODES
+ + self.SOURCE_CLS.RATE_LIMIT_HTTP_CODES
+ )
+
+ query = self.SOURCE_CLS.query().order(self.SOURCE_CLS.key)
+ last = LastUpdatedPicture.get_by_id(self.SOURCE_CLS.SHORT_NAME)
+ if last and last.last:
+ query = query.filter(self.SOURCE_CLS.key > last.last)
+
+ results, _, more = query.fetch_page(PAGE_SIZE)
+ for source in results:
+ if source.features and source.status != "disabled":
+ logging.debug(
+ "checking for updated profile pictures for: %s", source.bridgy_url()
+ )
+ try:
+ actor = source.gr_source.get_actor(self.user_id(source))
+ except BaseException as e:
+ # Mastodon API returns HTTP 404 for deleted (etc) users, and
+ # often one or more users' Mastodon instances are down.
+ code, _ = util.interpret_http_exception(e)
+ if code:
+ continue
+ raise
+
+ if not actor:
+ logging.info(f"Couldn't fetch {source.bridgy_url()} 's user")
+ continue
+
+ new_pic = actor.get("image", {}).get("url")
+ if not new_pic or source.picture == new_pic:
+ logging.info(f"No new picture found for {source.bridgy_url()}")
+ continue
+
+ @ndb.transactional()
+ def update():
+ src = source.key.get()
+ src.picture = new_pic
+ src.put()
+
+ logging.info(
+ f"Updating profile picture for {source.bridgy_url()} from {source.picture} to {new_pic}"
+ )
+ update()
+
+ LastUpdatedPicture(
+ id=self.SOURCE_CLS.SHORT_NAME, last=source.key if more else None
+ ).put()
+ return "OK"
class UpdateFlickrPictures(UpdatePictures):
- """Finds :class:`Flickr` sources with new profile pictures and updates them."""
- SOURCE_CLS = Flickr
+ """Finds :class:`Flickr` sources with new profile pictures and updates them."""
+
+ SOURCE_CLS = Flickr
class UpdateMastodonPictures(UpdatePictures):
- """Finds :class:`Mastodon` sources with new profile pictures and updates them."""
- SOURCE_CLS = Mastodon
+ """Finds :class:`Mastodon` sources with new profile pictures and updates them."""
+
+ SOURCE_CLS = Mastodon
- @classmethod
- def user_id(cls, source):
- return source.auth_entity.get().user_id()
+ @classmethod
+ def user_id(cls, source):
+ return source.auth_entity.get().user_id()
class UpdateTwitterPictures(UpdatePictures):
- """Finds :class:`Twitter` sources with new profile pictures and updates them.
+ """Finds :class:`Twitter` sources with new profile pictures and updates them.
- https://github.com/snarfed/granary/commit/dfc3d406a20965a5ed14c9705e3d3c2223c8c3ff
- http://indiewebcamp.com/Twitter#Profile_Image_URLs
- """
- SOURCE_CLS = Twitter
+ https://github.com/snarfed/granary/commit/dfc3d406a20965a5ed14c9705e3d3c2223c8c3ff
+ http://indiewebcamp.com/Twitter#Profile_Image_URLs
+ """
+
+ SOURCE_CLS = Twitter
# class UpdateBloggerPictures(UpdatePictures):
@@ -135,20 +150,29 @@ class UpdateTwitterPictures(UpdatePictures):
# # TODO: no granary.Blogger!
-@app.route('/cron/build_circle')
+@app.route("/cron/build_circle")
def build_circle():
- """Trigger CircleCI to build and test the main branch.
-
- ...to run twitter_live_test.py, to check that scraping likes is still working.
- """
- resp = requests.post('https://circleci.com/api/v1.1/project/github/snarfed/bridgy/tree/main?circle-token=%s' % CIRCLECI_TOKEN)
- resp.raise_for_status()
- return 'OK'
-
-
-app.add_url_rule('/cron/update_flickr_pictures',
- view_func=UpdateFlickrPictures.as_view('update_flickr_pictures'))
-app.add_url_rule('/cron/update_mastodon_pictures',
- view_func=UpdateMastodonPictures.as_view('update_mastodon_pictures'))
-app.add_url_rule('/cron/update_twitter_pictures',
- view_func=UpdateTwitterPictures.as_view('update_twitter_pictures'))
+ """Trigger CircleCI to build and test the main branch.
+
+ ...to run twitter_live_test.py, to check that scraping likes is still working.
+ """
+ resp = requests.post(
+ "https://circleci.com/api/v1.1/project/github/snarfed/bridgy/tree/main?circle-token=%s"
+ % CIRCLECI_TOKEN
+ )
+ resp.raise_for_status()
+ return "OK"
+
+
+app.add_url_rule(
+ "/cron/update_flickr_pictures",
+ view_func=UpdateFlickrPictures.as_view("update_flickr_pictures"),
+)
+app.add_url_rule(
+ "/cron/update_mastodon_pictures",
+ view_func=UpdateMastodonPictures.as_view("update_mastodon_pictures"),
+)
+app.add_url_rule(
+ "/cron/update_twitter_pictures",
+ view_func=UpdateTwitterPictures.as_view("update_twitter_pictures"),
+)
diff --git a/facebook.py b/facebook.py
index 03db4823..00307c9e 100644
--- a/facebook.py
+++ b/facebook.py
@@ -11,101 +11,105 @@
class Facebook(browser.BrowserSource):
- """A Facebook account.
-
- The key name is the Facebook global user id.
- """
- GR_CLASS = gr_facebook.Facebook
- SHORT_NAME = 'facebook'
- OAUTH_START = oauth_facebook.Start
- URL_CANONICALIZER = util.UrlCanonicalizer(
- # no reject regexp; non-private FB post URLs just 404
- domain=GR_CLASS.DOMAIN,
- subdomain='www',
- query=True,
- approve=r'https://www\.facebook\.com/[^/?]+/posts/[^/?]+$',
- headers=util.REQUEST_HEADERS)
-
- # blank granary Facebook object, shared across all instances
- gr_source = gr_facebook.Facebook()
-
- # unique name used in FB URLs, e.g. facebook.com/[username]
- username = ndb.StringProperty()
-
- @classmethod
- def new(cls, auth_entity=None, actor=None, **kwargs):
- """Creates and returns an entity based on an AS1 actor."""
- src = super().new(auth_entity=None, actor=actor, **kwargs)
- src.username = actor.get('username')
- return src
-
- @classmethod
- def key_id_from_actor(cls, actor):
- """Returns the actor's numeric_id field to use as this entity's key id.
-
- numeric_id is the Facebook global user id.
- """
- return actor['numeric_id']
-
- @classmethod
- def lookup(cls, id):
- """Returns the entity with the given id or username."""
- return ndb.Key(cls, id).get() or cls.query(cls.username == id).get()
-
- def silo_url(self):
- """Returns the Facebook profile URL, e.g. https://facebook.com/foo.
-
- Facebook profile URLS with app-scoped user ids (eg www.facebook.com/ID) no
- longer work as of April 2018, so if that's all we have, return None instead.
- https://developers.facebook.com/blog/post/2018/04/19/facebook-login-changes-address-abuse/
- """
- if self.username:
- return self.gr_source.user_url(self.username)
-
- user_id = self.key.id()
- # STATE: define this, where is it? not here or granary or o-d
- if util.is_int(id) and int(id) < MIN_APP_SCOPED_ID:
- return self.gr_source.user_url(user_id)
-
- @classmethod
- def button_html(cls, feature, **kwargs):
- return super(cls, cls).button_html(feature, form_method='get', **kwargs)
- return oauth_facebook.Start.button_html(
- '/about#browser-extension',
- form_method='get',
- image_prefix='/oauth_dropins_static/')
+ """A Facebook account.
- def canonicalize_url(self, url, **kwargs):
- """Facebook-specific standardization of syndicated urls.
-
- Canonical form is https://www.facebook.com/USERID/posts/POSTID
-
- Args:
- url: a string, the url of the syndicated content
- kwargs: unused
-
- Return:
- a string, the canonical form of the syndication url
+ The key name is the Facebook global user id.
"""
- if util.domain_from_link(url) != self.gr_source.DOMAIN:
- return None
-
- def post_url(id):
- return 'https://www.facebook.com/%s/posts/%s' % (self.key.id(), id)
-
- parsed = urllib.parse.urlparse(url)
- params = urllib.parse.parse_qs(parsed.query)
- url_id = self.gr_source.post_id(url)
- ids = params.get('story_fbid') or params.get('fbid')
-
- post_id = ids[0] if ids else url_id
- if post_id:
- url = post_url(post_id)
-
- url = url.replace('facebook.com/%s/' % self.username,
- 'facebook.com/%s/' % self.key.id())
- return super().canonicalize_url(url)
+ GR_CLASS = gr_facebook.Facebook
+ SHORT_NAME = "facebook"
+ OAUTH_START = oauth_facebook.Start
+ URL_CANONICALIZER = util.UrlCanonicalizer(
+ # no reject regexp; non-private FB post URLs just 404
+ domain=GR_CLASS.DOMAIN,
+ subdomain="www",
+ query=True,
+ approve=r"https://www\.facebook\.com/[^/?]+/posts/[^/?]+$",
+ headers=util.REQUEST_HEADERS,
+ )
+
+ # blank granary Facebook object, shared across all instances
+ gr_source = gr_facebook.Facebook()
+
+ # unique name used in FB URLs, e.g. facebook.com/[username]
+ username = ndb.StringProperty()
+
+ @classmethod
+ def new(cls, auth_entity=None, actor=None, **kwargs):
+ """Creates and returns an entity based on an AS1 actor."""
+ src = super().new(auth_entity=None, actor=actor, **kwargs)
+ src.username = actor.get("username")
+ return src
+
+ @classmethod
+ def key_id_from_actor(cls, actor):
+ """Returns the actor's numeric_id field to use as this entity's key id.
+
+ numeric_id is the Facebook global user id.
+ """
+ return actor["numeric_id"]
+
+ @classmethod
+ def lookup(cls, id):
+ """Returns the entity with the given id or username."""
+ return ndb.Key(cls, id).get() or cls.query(cls.username == id).get()
+
+ def silo_url(self):
+ """Returns the Facebook profile URL, e.g. https://facebook.com/foo.
+
+ Facebook profile URLS with app-scoped user ids (eg www.facebook.com/ID) no
+ longer work as of April 2018, so if that's all we have, return None instead.
+ https://developers.facebook.com/blog/post/2018/04/19/facebook-login-changes-address-abuse/
+ """
+ if self.username:
+ return self.gr_source.user_url(self.username)
+
+ user_id = self.key.id()
+ # STATE: define this, where is it? not here or granary or o-d
+ if util.is_int(id) and int(id) < MIN_APP_SCOPED_ID:
+ return self.gr_source.user_url(user_id)
+
+ @classmethod
+ def button_html(cls, feature, **kwargs):
+ return super(cls, cls).button_html(feature, form_method="get", **kwargs)
+ return oauth_facebook.Start.button_html(
+ "/about#browser-extension",
+ form_method="get",
+ image_prefix="/oauth_dropins_static/",
+ )
+
+ def canonicalize_url(self, url, **kwargs):
+ """Facebook-specific standardization of syndicated urls.
+
+ Canonical form is https://www.facebook.com/USERID/posts/POSTID
+
+ Args:
+ url: a string, the url of the syndicated content
+ kwargs: unused
+
+ Return:
+ a string, the canonical form of the syndication url
+ """
+ if util.domain_from_link(url) != self.gr_source.DOMAIN:
+ return None
+
+ def post_url(id):
+ return "https://www.facebook.com/%s/posts/%s" % (self.key.id(), id)
+
+ parsed = urllib.parse.urlparse(url)
+ params = urllib.parse.parse_qs(parsed.query)
+ url_id = self.gr_source.post_id(url)
+ ids = params.get("story_fbid") or params.get("fbid")
+
+ post_id = ids[0] if ids else url_id
+ if post_id:
+ url = post_url(post_id)
+
+ url = url.replace(
+ "facebook.com/%s/" % self.username, "facebook.com/%s/" % self.key.id()
+ )
+
+ return super().canonicalize_url(url)
browser.route(Facebook)
diff --git a/flask_app.py b/flask_app.py
index 45888468..3934f69c 100644
--- a/flask_app.py
+++ b/flask_app.py
@@ -14,29 +14,32 @@
# Flask app
-app = Flask('default')
-app.template_folder = './templates'
-app.config.from_pyfile('config.py')
-app.url_map.converters['regex'] = flask_util.RegexConverter
+app = Flask("default")
+app.template_folder = "./templates"
+app.config.from_pyfile("config.py")
+app.url_map.converters["regex"] = flask_util.RegexConverter
app.after_request(flask_util.default_modern_headers)
app.register_error_handler(Exception, flask_util.handle_exception)
-app.before_request(flask_util.canonicalize_domain(
- util.OTHER_DOMAINS, util.PRIMARY_DOMAIN))
+app.before_request(
+ flask_util.canonicalize_domain(util.OTHER_DOMAINS, util.PRIMARY_DOMAIN)
+)
app.wsgi_app = flask_util.ndb_context_middleware(app.wsgi_app, client=ndb_client)
-app.jinja_env.globals.update({
- 'naturaltime': humanize.naturaltime,
- 'get_logins': util.get_logins,
- 'sources': models.sources,
- 'string': string,
- 'util': util,
- 'EPOCH': util.EPOCH,
-})
+app.jinja_env.globals.update(
+ {
+ "naturaltime": humanize.naturaltime,
+ "get_logins": util.get_logins,
+ "sources": models.sources,
+ "string": string,
+ "util": util,
+ "EPOCH": util.EPOCH,
+ }
+)
cache = Cache(app)
-@app.route('/_ah/')
+@app.route("/_ah/")
def noop(_):
- return 'OK'
+ return "OK"
diff --git a/flask_background.py b/flask_background.py
index a2e7b35c..19f0163d 100644
--- a/flask_background.py
+++ b/flask_background.py
@@ -10,35 +10,38 @@
# Flask app
-app = Flask('background')
-app.config.from_pyfile('config.py')
+app = Flask("background")
+app.config.from_pyfile("config.py")
app.wsgi_app = flask_util.ndb_context_middleware(app.wsgi_app, client=ndb_client)
@app.errorhandler(Exception)
def background_handle_exception(e):
- """Common exception handler for background tasks.
+ """Common exception handler for background tasks.
- Catches failed outbound HTTP requests and returns HTTP 304.
- """
- if isinstance(e, HTTPException):
- # raised by this app itself, pass it through
- return str(e), e.code
+ Catches failed outbound HTTP requests and returns HTTP 304.
+ """
+ if isinstance(e, HTTPException):
+ # raised by this app itself, pass it through
+ return str(e), e.code
- transients = getattr(g, 'TRANSIENT_ERROR_HTTP_CODES', ())
- source = getattr(g, 'source', None)
- if source:
- transients += source.RATE_LIMIT_HTTP_CODES + source.TRANSIENT_ERROR_HTTP_CODES
+ transients = getattr(g, "TRANSIENT_ERROR_HTTP_CODES", ())
+ source = getattr(g, "source", None)
+ if source:
+ transients += source.RATE_LIMIT_HTTP_CODES + source.TRANSIENT_ERROR_HTTP_CODES
- code, body = util.interpret_http_exception(e)
- if ((code and int(code) // 100 == 5) or code in transients or
- util.is_connection_failure(e)):
- logging.error(f'Marking as error and finishing. {code}: {body}\n{e}')
- return '', util.ERROR_HTTP_RETURN_CODE
+ code, body = util.interpret_http_exception(e)
+ if (
+ (code and int(code) // 100 == 5)
+ or code in transients
+ or util.is_connection_failure(e)
+ ):
+ logging.error(f"Marking as error and finishing. {code}: {body}\n{e}")
+ return "", util.ERROR_HTTP_RETURN_CODE
- raise e
+ raise e
-@app.route('/_ah/')
+@app.route("/_ah/")
def noop(_):
- return 'OK'
+ return "OK"
diff --git a/flickr.py b/flickr.py
index 8c333aa6..6d2b7db1 100644
--- a/flickr.py
+++ b/flickr.py
@@ -15,122 +15,148 @@
class Flickr(models.Source):
- """A Flickr account.
-
- The key name is the nsid.
- """
- # Fetching comments and likes is extremely request-intensive, so let's dial
- # back the frequency for now.
- FAST_POLL = datetime.timedelta(minutes=60)
- GR_CLASS = gr_flickr.Flickr
- OAUTH_START = oauth_flickr.Start
- SHORT_NAME = 'flickr'
- TRANSIENT_ERROR_HTTP_CODES = ('400',)
- CAN_PUBLISH = True
- URL_CANONICALIZER = util.UrlCanonicalizer(
- domain=GR_CLASS.DOMAIN,
- approve=r'https://www\.flickr\.com/(photos|people)/[^/?]+/([^/?]+/)?$',
- reject=r'https://login\.yahoo\.com/.*',
- subdomain='www',
- trailing_slash=True,
- headers=util.REQUEST_HEADERS)
-
- # unique name optionally used in URLs instead of nsid (e.g.,
- # flickr.com/photos/username)
- username = ndb.StringProperty()
-
- @staticmethod
- def new(auth_entity=None, **kwargs):
- """Creates and returns a :class:`Flickr` for the logged in user.
-
- Args:
- auth_entity: :class:`oauth_dropins.flickr.FlickrAuth`
+ """A Flickr account.
+
+ The key name is the nsid.
"""
- person = json_loads(auth_entity.user_json).get('person', {})
- return Flickr(
- id=person.get('nsid'),
- auth_entity=auth_entity.key,
- name=person.get('realname', {}).get('_content'),
- # path_alias, if it exists, is the actual thing that shows up in the url.
- # I think this is an artifact of the conversion to Yahoo.
- username=(person.get('path_alias')
- or person.get('username', {}).get('_content')),
- picture='https://farm{}.staticflickr.com/{}/buddyicons/{}.jpg' .format(
- person.get('iconfarm'), person.get('iconserver'),
- person.get('nsid')),
- url=person.get('profileurl', {}).get('_content'),
- **kwargs)
-
- def silo_url(self):
- """Returns the Flickr account URL, e.g. https://www.flickr.com/people/foo/."""
- return self.url
-
- def user_tag_id(self):
- """Returns the tag URI for this source, e.g. 'tag:flickr.com:123456'."""
- return self.gr_source.tag_uri(self.username)
-
- def label_name(self):
- """Human-readable name, username, or id for this source."""
- return self.name or self.username or self.key_id()
-
- def get_activities_response(self, *args, **kwargs):
- """Discard min_id because we still want new comments/likes on old photos."""
- kwargs.setdefault('group_id', SELF)
- if 'min_id' in kwargs:
- del kwargs['min_id']
- return self.gr_source.get_activities_response(*args, **kwargs)
-
- def canonicalize_url(self, url, activity=None, **kwargs):
- if not url.endswith('/'):
- url = url + '/'
- if self.username:
- url = url.replace('flickr.com/photos/%s/' % self.username,
- 'flickr.com/photos/%s/' % self.key_id())
- url = url.replace('flickr.com/people/%s/' % self.username,
- 'flickr.com/people/%s/' % self.key_id())
- return super().canonicalize_url(url, **kwargs)
-
-
-class AuthHandler():
- """Base OAuth handler for Flickr."""
- def start_oauth_flow(self, feature):
- starter = util.oauth_starter(oauth_flickr.Start, feature=feature)(
- # TODO: delete instead of write. if we do that below, it works, and we get
- # granted delete permissions. however, if we then attempt to actually
- # delete something, it fails with code 99 "Insufficient permissions.
- # Method requires delete privileges; write granted." and
- # https://www.flickr.com/services/auth/list.gne shows that my user's
- # permissions for the Bridgy app are back to write, not delete. wtf?!
- '/flickr/add', scopes='write' if feature == 'publish' else 'read')
- return starter.dispatch_request()
+
+ # Fetching comments and likes is extremely request-intensive, so let's dial
+ # back the frequency for now.
+ FAST_POLL = datetime.timedelta(minutes=60)
+ GR_CLASS = gr_flickr.Flickr
+ OAUTH_START = oauth_flickr.Start
+ SHORT_NAME = "flickr"
+ TRANSIENT_ERROR_HTTP_CODES = ("400",)
+ CAN_PUBLISH = True
+ URL_CANONICALIZER = util.UrlCanonicalizer(
+ domain=GR_CLASS.DOMAIN,
+ approve=r"https://www\.flickr\.com/(photos|people)/[^/?]+/([^/?]+/)?$",
+ reject=r"https://login\.yahoo\.com/.*",
+ subdomain="www",
+ trailing_slash=True,
+ headers=util.REQUEST_HEADERS,
+ )
+
+ # unique name optionally used in URLs instead of nsid (e.g.,
+ # flickr.com/photos/username)
+ username = ndb.StringProperty()
+
+ @staticmethod
+ def new(auth_entity=None, **kwargs):
+ """Creates and returns a :class:`Flickr` for the logged in user.
+
+ Args:
+ auth_entity: :class:`oauth_dropins.flickr.FlickrAuth`
+ """
+ person = json_loads(auth_entity.user_json).get("person", {})
+ return Flickr(
+ id=person.get("nsid"),
+ auth_entity=auth_entity.key,
+ name=person.get("realname", {}).get("_content"),
+ # path_alias, if it exists, is the actual thing that shows up in the url.
+ # I think this is an artifact of the conversion to Yahoo.
+ username=(
+ person.get("path_alias") or person.get("username", {}).get("_content")
+ ),
+ picture="https://farm{}.staticflickr.com/{}/buddyicons/{}.jpg".format(
+ person.get("iconfarm"), person.get("iconserver"), person.get("nsid")
+ ),
+ url=person.get("profileurl", {}).get("_content"),
+ **kwargs
+ )
+
+ def silo_url(self):
+ """Returns the Flickr account URL, e.g. https://www.flickr.com/people/foo/."""
+ return self.url
+
+ def user_tag_id(self):
+ """Returns the tag URI for this source, e.g. 'tag:flickr.com:123456'."""
+ return self.gr_source.tag_uri(self.username)
+
+ def label_name(self):
+ """Human-readable name, username, or id for this source."""
+ return self.name or self.username or self.key_id()
+
+ def get_activities_response(self, *args, **kwargs):
+ """Discard min_id because we still want new comments/likes on old photos."""
+ kwargs.setdefault("group_id", SELF)
+ if "min_id" in kwargs:
+ del kwargs["min_id"]
+ return self.gr_source.get_activities_response(*args, **kwargs)
+
+ def canonicalize_url(self, url, activity=None, **kwargs):
+ if not url.endswith("/"):
+ url = url + "/"
+ if self.username:
+ url = url.replace(
+ "flickr.com/photos/%s/" % self.username,
+ "flickr.com/photos/%s/" % self.key_id(),
+ )
+ url = url.replace(
+ "flickr.com/people/%s/" % self.username,
+ "flickr.com/people/%s/" % self.key_id(),
+ )
+ return super().canonicalize_url(url, **kwargs)
+
+
+class AuthHandler:
+ """Base OAuth handler for Flickr."""
+
+ def start_oauth_flow(self, feature):
+ starter = util.oauth_starter(oauth_flickr.Start, feature=feature)(
+ # TODO: delete instead of write. if we do that below, it works, and we get
+ # granted delete permissions. however, if we then attempt to actually
+ # delete something, it fails with code 99 "Insufficient permissions.
+ # Method requires delete privileges; write granted." and
+ # https://www.flickr.com/services/auth/list.gne shows that my user's
+ # permissions for the Bridgy app are back to write, not delete. wtf?!
+ "/flickr/add",
+ scopes="write" if feature == "publish" else "read",
+ )
+ return starter.dispatch_request()
class Start(oauth_flickr.Start, AuthHandler):
- """Custom handler to start Flickr auth process."""
- def dispatch_request(self):
- return self.start_oauth_flow(request.form.get('feature'))
+ """Custom handler to start Flickr auth process."""
+
+ def dispatch_request(self):
+ return self.start_oauth_flow(request.form.get("feature"))
class AddFlickr(oauth_flickr.Callback, AuthHandler):
- """Custom handler to add Flickr source when auth completes.
-
- If this account was previously authorized with greater permissions, this will
- trigger another round of auth with elevated permissions.
- """
- def finish(self, auth_entity, state=None):
- logging.debug('finish with %s, %s', auth_entity, state)
- source = util.maybe_add_or_delete_source(Flickr, auth_entity, state)
- feature = util.decode_oauth_state(state).get('feature')
- if source and feature == 'listen' and 'publish' in source.features:
- # we had signed up previously with publish, so we'll reauth to
- # avoid losing that permission
- logging.info('Restarting OAuth flow to get publish permissions.')
- source.features.remove('publish')
- source.put()
- return self.start_oauth_flow('publish')
-
-
-app.add_url_rule('/flickr/start', view_func=Start.as_view('flickr_start', '/flickr/add'), methods=['POST'])
-app.add_url_rule('/flickr/add', view_func=AddFlickr.as_view('flickr_add', 'unused'))
-app.add_url_rule('/flickr/delete/finish', view_func=oauth_flickr.Callback.as_view('flickr_delete_finish', '/delete/finish'))
-app.add_url_rule('/flickr/publish/start', view_func=oauth_flickr.Start.as_view('flickr_publish_start', '/publish/flickr/finish'), methods=['POST'])
+ """Custom handler to add Flickr source when auth completes.
+
+ If this account was previously authorized with greater permissions, this will
+ trigger another round of auth with elevated permissions.
+ """
+
+ def finish(self, auth_entity, state=None):
+ logging.debug("finish with %s, %s", auth_entity, state)
+ source = util.maybe_add_or_delete_source(Flickr, auth_entity, state)
+ feature = util.decode_oauth_state(state).get("feature")
+ if source and feature == "listen" and "publish" in source.features:
+ # we had signed up previously with publish, so we'll reauth to
+ # avoid losing that permission
+ logging.info("Restarting OAuth flow to get publish permissions.")
+ source.features.remove("publish")
+ source.put()
+ return self.start_oauth_flow("publish")
+
+
+app.add_url_rule(
+ "/flickr/start",
+ view_func=Start.as_view("flickr_start", "/flickr/add"),
+ methods=["POST"],
+)
+app.add_url_rule("/flickr/add", view_func=AddFlickr.as_view("flickr_add", "unused"))
+app.add_url_rule(
+ "/flickr/delete/finish",
+ view_func=oauth_flickr.Callback.as_view("flickr_delete_finish", "/delete/finish"),
+)
+app.add_url_rule(
+ "/flickr/publish/start",
+ view_func=oauth_flickr.Start.as_view(
+ "flickr_publish_start", "/publish/flickr/finish"
+ ),
+ methods=["POST"],
+)
diff --git a/github.py b/github.py
index 015c8fad..c6d62e24 100644
--- a/github.py
+++ b/github.py
@@ -15,92 +15,109 @@
# https://developer.github.com/apps/building-oauth-apps/scopes-for-oauth-apps/
# https://github.com/dear-github/dear-github/issues/113#issuecomment-365121631
LISTEN_SCOPES = [
- 'notifications',
- 'public_repo',
+ "notifications",
+ "public_repo",
]
PUBLISH_SCOPES = [
- 'public_repo',
+ "public_repo",
]
class GitHub(Source):
- """A GitHub user.
-
- The key name is the GitHub username.
- """
- GR_CLASS = gr_github.GitHub
- OAUTH_START = oauth_github.Start
- SHORT_NAME = 'github'
- TYPE_LABELS = {
- 'post': 'issue',
- 'like': 'star',
- }
- BACKFEED_REQUIRES_SYNDICATION_LINK = True
- DISABLE_HTTP_CODES = Source.DISABLE_HTTP_CODES + ('403',)
- CAN_PUBLISH = True
- URL_CANONICALIZER = util.UrlCanonicalizer(domain=GR_CLASS.DOMAIN,
- headers=util.REQUEST_HEADERS,
- fragment=True)
- # This makes us backfeed issue/PR comments to previous comments on the same
- # issue/PR.
- IGNORE_SYNDICATION_LINK_FRAGMENTS = True
-
- @staticmethod
- def new(auth_entity=None, **kwargs):
- """Creates and returns a :class:`GitHub` for the logged in user.
-
- Args:
- auth_entity: :class:`oauth_dropins.github.GitHubAuth`
- kwargs: property values
+ """A GitHub user.
+
+ The key name is the GitHub username.
"""
- user = json_loads(auth_entity.user_json)
- gr_source = gr_github.GitHub(access_token=auth_entity.access_token())
- actor = gr_source.user_to_actor(user)
- return GitHub(id=auth_entity.key_id(),
- auth_entity=auth_entity.key,
- name=actor.get('displayName'),
- picture=actor.get('image', {}).get('url'),
- url=actor.get('url'),
- **kwargs)
-
- def silo_url(self):
- """Returns the GitHub account URL, e.g. https://github.com/foo."""
- return self.gr_source.user_url(self.key_id())
-
- def label_name(self):
- """Returns the username."""
- return self.key_id()
-
- def user_tag_id(self):
- """Returns this user's tag URI, eg 'tag:github.com:2013,MDQ6VXNlcjc3OD='."""
- id = json_loads(self.auth_entity.get().user_json)['id']
- return self.gr_source.tag_uri(id)
-
- def get_activities_response(self, *args, **kwargs):
- """Drop kwargs that granary doesn't currently support for github."""
- kwargs.update({
- 'fetch_shares': None,
- 'fetch_mentions': None,
- })
- return self.gr_source.get_activities_response(*args, **kwargs)
+
+ GR_CLASS = gr_github.GitHub
+ OAUTH_START = oauth_github.Start
+ SHORT_NAME = "github"
+ TYPE_LABELS = {
+ "post": "issue",
+ "like": "star",
+ }
+ BACKFEED_REQUIRES_SYNDICATION_LINK = True
+ DISABLE_HTTP_CODES = Source.DISABLE_HTTP_CODES + ("403",)
+ CAN_PUBLISH = True
+ URL_CANONICALIZER = util.UrlCanonicalizer(
+ domain=GR_CLASS.DOMAIN, headers=util.REQUEST_HEADERS, fragment=True
+ )
+ # This makes us backfeed issue/PR comments to previous comments on the same
+ # issue/PR.
+ IGNORE_SYNDICATION_LINK_FRAGMENTS = True
+
+ @staticmethod
+ def new(auth_entity=None, **kwargs):
+ """Creates and returns a :class:`GitHub` for the logged in user.
+
+ Args:
+ auth_entity: :class:`oauth_dropins.github.GitHubAuth`
+ kwargs: property values
+ """
+ user = json_loads(auth_entity.user_json)
+ gr_source = gr_github.GitHub(access_token=auth_entity.access_token())
+ actor = gr_source.user_to_actor(user)
+ return GitHub(
+ id=auth_entity.key_id(),
+ auth_entity=auth_entity.key,
+ name=actor.get("displayName"),
+ picture=actor.get("image", {}).get("url"),
+ url=actor.get("url"),
+ **kwargs
+ )
+
+ def silo_url(self):
+ """Returns the GitHub account URL, e.g. https://github.com/foo."""
+ return self.gr_source.user_url(self.key_id())
+
+ def label_name(self):
+ """Returns the username."""
+ return self.key_id()
+
+ def user_tag_id(self):
+ """Returns this user's tag URI, eg 'tag:github.com:2013,MDQ6VXNlcjc3OD='."""
+ id = json_loads(self.auth_entity.get().user_json)["id"]
+ return self.gr_source.tag_uri(id)
+
+ def get_activities_response(self, *args, **kwargs):
+ """Drop kwargs that granary doesn't currently support for github."""
+ kwargs.update(
+ {
+ "fetch_shares": None,
+ "fetch_mentions": None,
+ }
+ )
+ return self.gr_source.get_activities_response(*args, **kwargs)
class Start(View):
- def dispatch_request(self):
- features = request.form['feature']
- scopes = PUBLISH_SCOPES if 'publish' in features else LISTEN_SCOPES
- starter = util.oauth_starter(oauth_github.Start, feature=features
- )('/github/add', scopes=scopes)
- return starter.dispatch_request()
+ def dispatch_request(self):
+ features = request.form["feature"]
+ scopes = PUBLISH_SCOPES if "publish" in features else LISTEN_SCOPES
+ starter = util.oauth_starter(oauth_github.Start, feature=features)(
+ "/github/add", scopes=scopes
+ )
+ return starter.dispatch_request()
class AddGitHub(oauth_github.Callback):
- def finish(self, auth_entity, state=None):
- logging.debug('finish with %s, %s', auth_entity, state)
- util.maybe_add_or_delete_source(GitHub, auth_entity, state)
-
-
-app.add_url_rule('/github/start', view_func=Start.as_view('github_start'), methods=['POST'])
-app.add_url_rule('/github/add', view_func=AddGitHub.as_view('github_add', 'unused'))
-app.add_url_rule('/github/delete/finish', view_func=oauth_github.Callback.as_view('github_delete_finish', '/delete/finish'))
-app.add_url_rule('/github/publish/start', view_func=oauth_github.Start.as_view('github_publish_start', '/publish/github/finish', scopes=PUBLISH_SCOPES), methods=['POST'])
+ def finish(self, auth_entity, state=None):
+ logging.debug("finish with %s, %s", auth_entity, state)
+ util.maybe_add_or_delete_source(GitHub, auth_entity, state)
+
+
+app.add_url_rule(
+ "/github/start", view_func=Start.as_view("github_start"), methods=["POST"]
+)
+app.add_url_rule("/github/add", view_func=AddGitHub.as_view("github_add", "unused"))
+app.add_url_rule(
+ "/github/delete/finish",
+ view_func=oauth_github.Callback.as_view("github_delete_finish", "/delete/finish"),
+)
+app.add_url_rule(
+ "/github/publish/start",
+ view_func=oauth_github.Start.as_view(
+ "github_publish_start", "/publish/github/finish", scopes=PUBLISH_SCOPES
+ ),
+ methods=["POST"],
+)
diff --git a/handlers.py b/handlers.py
index 3326d246..44382505 100644
--- a/handlers.py
+++ b/handlers.py
@@ -38,7 +38,8 @@
CACHE_TIME = datetime.timedelta(minutes=15)
-TEMPLATE = string.Template("""\
+TEMPLATE = string.Template(
+ """\
@@ -64,249 +65,286 @@
$body
-""")
+"""
+)
-@app.route('//',
- methods=['HEAD'])
+@app.route("//", methods=["HEAD"])
def mf2_handler_head(_, __):
- return ''
+ return ""
class Item(View):
- """Fetches a post, repost, like, or comment and serves it as mf2 HTML or JSON.
- """
- source = None
-
- VALID_ID = re.compile(r'^[\w.+:@=<>-]+$')
-
- def get_item(self, **kwargs):
- """Fetches and returns an object from the given source.
-
- To be implemented by subclasses.
-
- Args:
- source: :class:`models.Source` subclass
- id: string
-
- Returns:
- ActivityStreams object dict
- """
- raise NotImplementedError()
-
- def get_post(self, id, **kwargs):
- """Fetch a post.
-
- Args:
- id: string, site-specific post id
- is_event: bool
- kwargs: passed through to :meth:`get_activities`
-
- Returns:
- ActivityStreams object dict
- """
- try:
- posts = self.source.get_activities(
- activity_id=id, user_id=self.source.key_id(), **kwargs)
- if posts:
- return posts[0]
- logging.warning('Source post %s not found', id)
- except AssertionError:
- raise
- except Exception as e:
- util.interpret_http_exception(e)
-
- @flask_util.cached(cache, CACHE_TIME)
- def dispatch_request(self, site, key_id, **kwargs):
- """Handle HTTP request."""
- source_cls = models.sources.get(site)
- if not source_cls:
- error("Source type '%s' not found. Known sources: %s" %
- (site, filter(None, models.sources.keys())))
-
- self.source = source_cls.get_by_id(key_id)
- if not self.source:
- error(f'Source {site} {key_id} not found')
- elif (self.source.status == 'disabled' or
- 'listen' not in self.source.features):
- error(f'Source {self.source.bridgy_path()} is disabled for backfeed')
-
- format = request.values.get('format', 'html')
- if format not in ('html', 'json'):
- error(f'Invalid format {format}, expected html or json')
-
- for id in kwargs.values():
- if not self.VALID_ID.match(id):
- error(f'Invalid id {id}', 404)
-
- try:
- obj = self.get_item(**kwargs)
- except models.DisableSource:
- error("Bridgy's access to your account has expired. Please visit https://brid.gy/ to refresh it!", 401)
- except ValueError as e:
- error(f'{self.source.GR_CLASS.NAME} error: {e}')
-
- if not obj:
- error(f'Not found: {site}:{key_id} {kwargs}', 404)
-
- if self.source.is_blocked(obj):
- error('That user is currently blocked', 410)
-
- # use https for profile pictures so we don't cause SSL mixed mode errors
- # when serving over https.
- author = obj.get('author', {})
- image = author.get('image', {})
- url = image.get('url')
- if url:
- image['url'] = util.update_scheme(url, request)
-
- mf2_json = microformats2.object_to_json(obj, synthesize_content=False)
-
- # try to include the author's silo profile url
- author = first_props(mf2_json.get('properties', {})).get('author', {})
- author_uid = first_props(author.get('properties', {})).get('uid', '')
- if author_uid:
- parsed = util.parse_tag_uri(author_uid)
- if parsed:
- urls = author.get('properties', {}).setdefault('url', [])
+ """Fetches a post, repost, like, or comment and serves it as mf2 HTML or JSON."""
+
+ source = None
+
+ VALID_ID = re.compile(r"^[\w.+:@=<>-]+$")
+
+ def get_item(self, **kwargs):
+ """Fetches and returns an object from the given source.
+
+ To be implemented by subclasses.
+
+ Args:
+ source: :class:`models.Source` subclass
+ id: string
+
+ Returns:
+ ActivityStreams object dict
+ """
+ raise NotImplementedError()
+
+ def get_post(self, id, **kwargs):
+ """Fetch a post.
+
+ Args:
+ id: string, site-specific post id
+ is_event: bool
+ kwargs: passed through to :meth:`get_activities`
+
+ Returns:
+ ActivityStreams object dict
+ """
+ try:
+ posts = self.source.get_activities(
+ activity_id=id, user_id=self.source.key_id(), **kwargs
+ )
+ if posts:
+ return posts[0]
+ logging.warning("Source post %s not found", id)
+ except AssertionError:
+ raise
+ except Exception as e:
+ util.interpret_http_exception(e)
+
+ @flask_util.cached(cache, CACHE_TIME)
+ def dispatch_request(self, site, key_id, **kwargs):
+ """Handle HTTP request."""
+ source_cls = models.sources.get(site)
+ if not source_cls:
+ error(
+ "Source type '%s' not found. Known sources: %s"
+ % (site, filter(None, models.sources.keys()))
+ )
+
+ self.source = source_cls.get_by_id(key_id)
+ if not self.source:
+ error(f"Source {site} {key_id} not found")
+ elif self.source.status == "disabled" or "listen" not in self.source.features:
+ error(f"Source {self.source.bridgy_path()} is disabled for backfeed")
+
+ format = request.values.get("format", "html")
+ if format not in ("html", "json"):
+ error(f"Invalid format {format}, expected html or json")
+
+ for id in kwargs.values():
+ if not self.VALID_ID.match(id):
+ error(f"Invalid id {id}", 404)
+
try:
- silo_url = self.source.gr_source.user_url(parsed[1])
- if silo_url not in microformats2.get_string_urls(urls):
- urls.append(silo_url)
- except NotImplementedError: # from gr_source.user_url()
- pass
-
- # write the response!
- if format == 'html':
- url = obj.get('url', '')
- return TEMPLATE.substitute({
- 'refresh': (f''
- if url else ''),
- 'url': url,
- 'body': microformats2.json_to_html(mf2_json),
- 'title': obj.get('title') or obj.get('content') or 'Bridgy Response',
- })
- elif format == 'json':
- return mf2_json
-
- def merge_urls(self, obj, property, urls, object_type='article'):
- """Updates an object's ActivityStreams URL objects in place.
-
- Adds all URLs in urls that don't already exist in obj[property].
-
- ActivityStreams schema details:
- http://activitystrea.ms/specs/json/1.0/#id-comparison
-
- Args:
- obj: ActivityStreams object to merge URLs into
- property: string property to merge URLs into
- urls: sequence of string URLs to add
- object_type: stored as the objectType alongside each URL
- """
- if obj:
- obj[property] = util.get_list(obj, property)
- existing = set(filter(None, (u.get('url') for u in obj[property])))
- obj[property] += [{'url': url, 'objectType': object_type} for url in urls
- if url not in existing]
+ obj = self.get_item(**kwargs)
+ except models.DisableSource:
+ error(
+ "Bridgy's access to your account has expired. Please visit https://brid.gy/ to refresh it!",
+ 401,
+ )
+ except ValueError as e:
+ error(f"{self.source.GR_CLASS.NAME} error: {e}")
+
+ if not obj:
+ error(f"Not found: {site}:{key_id} {kwargs}", 404)
+
+ if self.source.is_blocked(obj):
+ error("That user is currently blocked", 410)
+
+ # use https for profile pictures so we don't cause SSL mixed mode errors
+ # when serving over https.
+ author = obj.get("author", {})
+ image = author.get("image", {})
+ url = image.get("url")
+ if url:
+ image["url"] = util.update_scheme(url, request)
+
+ mf2_json = microformats2.object_to_json(obj, synthesize_content=False)
+
+ # try to include the author's silo profile url
+ author = first_props(mf2_json.get("properties", {})).get("author", {})
+ author_uid = first_props(author.get("properties", {})).get("uid", "")
+ if author_uid:
+ parsed = util.parse_tag_uri(author_uid)
+ if parsed:
+ urls = author.get("properties", {}).setdefault("url", [])
+ try:
+ silo_url = self.source.gr_source.user_url(parsed[1])
+ if silo_url not in microformats2.get_string_urls(urls):
+ urls.append(silo_url)
+ except NotImplementedError: # from gr_source.user_url()
+ pass
+
+ # write the response!
+ if format == "html":
+ url = obj.get("url", "")
+ return TEMPLATE.substitute(
+ {
+ "refresh": (
+ f''
+ if url
+ else ""
+ ),
+ "url": url,
+ "body": microformats2.json_to_html(mf2_json),
+ "title": obj.get("title")
+ or obj.get("content")
+ or "Bridgy Response",
+ }
+ )
+ elif format == "json":
+ return mf2_json
+
+ def merge_urls(self, obj, property, urls, object_type="article"):
+ """Updates an object's ActivityStreams URL objects in place.
+
+ Adds all URLs in urls that don't already exist in obj[property].
+
+ ActivityStreams schema details:
+ http://activitystrea.ms/specs/json/1.0/#id-comparison
+
+ Args:
+ obj: ActivityStreams object to merge URLs into
+ property: string property to merge URLs into
+ urls: sequence of string URLs to add
+ object_type: stored as the objectType alongside each URL
+ """
+ if obj:
+ obj[property] = util.get_list(obj, property)
+ existing = set(filter(None, (u.get("url") for u in obj[property])))
+ obj[property] += [
+ {"url": url, "objectType": object_type}
+ for url in urls
+ if url not in existing
+ ]
# Note that mention links are included in posts and comments, but not
# likes, reposts, or rsvps. Matches logic in poll() (step 4) in tasks.py!
class Post(Item):
- def get_item(self, post_id):
- posts = self.source.get_activities(activity_id=post_id,
- user_id=self.source.key_id())
- if not posts:
- return None
-
- post = posts[0]
- originals, mentions = original_post_discovery.discover(
- self.source, post, fetch_hfeed=False)
- obj = post['object']
- obj['upstreamDuplicates'] = list(
- set(util.get_list(obj, 'upstreamDuplicates')) | originals)
- self.merge_urls(obj, 'tags', mentions, object_type='mention')
- return obj
+ def get_item(self, post_id):
+ posts = self.source.get_activities(
+ activity_id=post_id, user_id=self.source.key_id()
+ )
+ if not posts:
+ return None
+
+ post = posts[0]
+ originals, mentions = original_post_discovery.discover(
+ self.source, post, fetch_hfeed=False
+ )
+ obj = post["object"]
+ obj["upstreamDuplicates"] = list(
+ set(util.get_list(obj, "upstreamDuplicates")) | originals
+ )
+ self.merge_urls(obj, "tags", mentions, object_type="mention")
+ return obj
class Comment(Item):
- def get_item(self, post_id, comment_id):
- fetch_replies = not self.source.gr_source.OPTIMIZED_COMMENTS
- post = self.get_post(post_id, fetch_replies=fetch_replies)
- has_replies = (post.get('object', {}).get('replies', {}).get('items')
- if post else False)
- cmt = self.source.get_comment(
- comment_id, activity_id=post_id, activity_author_id=self.source.key_id(),
- activity=post if fetch_replies or has_replies else None)
- if post:
- originals, mentions = original_post_discovery.discover(
- self.source, post, fetch_hfeed=False)
- self.merge_urls(cmt, 'inReplyTo', originals)
- self.merge_urls(cmt, 'tags', mentions, object_type='mention')
- return cmt
+ def get_item(self, post_id, comment_id):
+ fetch_replies = not self.source.gr_source.OPTIMIZED_COMMENTS
+ post = self.get_post(post_id, fetch_replies=fetch_replies)
+ has_replies = (
+ post.get("object", {}).get("replies", {}).get("items") if post else False
+ )
+ cmt = self.source.get_comment(
+ comment_id,
+ activity_id=post_id,
+ activity_author_id=self.source.key_id(),
+ activity=post if fetch_replies or has_replies else None,
+ )
+ if post:
+ originals, mentions = original_post_discovery.discover(
+ self.source, post, fetch_hfeed=False
+ )
+ self.merge_urls(cmt, "inReplyTo", originals)
+ self.merge_urls(cmt, "tags", mentions, object_type="mention")
+ return cmt
class Like(Item):
- def get_item(self, post_id, user_id):
- post = self.get_post(post_id, fetch_likes=True)
- like = self.source.get_like(self.source.key_id(), post_id, user_id,
- activity=post)
- if post:
- originals, mentions = original_post_discovery.discover(
- self.source, post, fetch_hfeed=False)
- self.merge_urls(like, 'object', originals)
- return like
+ def get_item(self, post_id, user_id):
+ post = self.get_post(post_id, fetch_likes=True)
+ like = self.source.get_like(
+ self.source.key_id(), post_id, user_id, activity=post
+ )
+ if post:
+ originals, mentions = original_post_discovery.discover(
+ self.source, post, fetch_hfeed=False
+ )
+ self.merge_urls(like, "object", originals)
+ return like
class Reaction(Item):
- def get_item(self, post_id, user_id, reaction_id):
- post = self.get_post(post_id)
- reaction = self.source.gr_source.get_reaction(
- self.source.key_id(), post_id, user_id, reaction_id, activity=post)
- if post:
- originals, mentions = original_post_discovery.discover(
- self.source, post, fetch_hfeed=False)
- self.merge_urls(reaction, 'object', originals)
- return reaction
+ def get_item(self, post_id, user_id, reaction_id):
+ post = self.get_post(post_id)
+ reaction = self.source.gr_source.get_reaction(
+ self.source.key_id(), post_id, user_id, reaction_id, activity=post
+ )
+ if post:
+ originals, mentions = original_post_discovery.discover(
+ self.source, post, fetch_hfeed=False
+ )
+ self.merge_urls(reaction, "object", originals)
+ return reaction
class Repost(Item):
- def get_item(self, post_id, share_id):
- post = self.get_post(post_id, fetch_shares=True)
- repost = self.source.gr_source.get_share(
- self.source.key_id(), post_id, share_id, activity=post)
- # webmention receivers don't want to see their own post in their
- # comments, so remove attachments before rendering.
- if repost and 'attachments' in repost:
- del repost['attachments']
- if post:
- originals, mentions = original_post_discovery.discover(
- self.source, post, fetch_hfeed=False)
- self.merge_urls(repost, 'object', originals)
- return repost
+ def get_item(self, post_id, share_id):
+ post = self.get_post(post_id, fetch_shares=True)
+ repost = self.source.gr_source.get_share(
+ self.source.key_id(), post_id, share_id, activity=post
+ )
+ # webmention receivers don't want to see their own post in their
+ # comments, so remove attachments before rendering.
+ if repost and "attachments" in repost:
+ del repost["attachments"]
+ if post:
+ originals, mentions = original_post_discovery.discover(
+ self.source, post, fetch_hfeed=False
+ )
+ self.merge_urls(repost, "object", originals)
+ return repost
class Rsvp(Item):
- def get_item(self, event_id, user_id):
- event = self.source.gr_source.get_event(event_id)
- rsvp = self.source.gr_source.get_rsvp(
- self.source.key_id(), event_id, user_id, event=event)
- if event:
- originals, mentions = original_post_discovery.discover(
- self.source, event, fetch_hfeed=False)
- self.merge_urls(rsvp, 'inReplyTo', originals)
- return rsvp
-
-
-app.add_url_rule('/post///',
- view_func=Post.as_view('post'))
-app.add_url_rule('/comment////',
- view_func=Comment.as_view('comment'))
-app.add_url_rule('/like////',
- view_func=Like.as_view('like'))
-app.add_url_rule('/react/////',
- view_func=Reaction.as_view('react'))
-app.add_url_rule('/repost////',
- view_func=Repost.as_view('repost'))
-app.add_url_rule('/rsvp////',
- view_func=Rsvp.as_view('rsvp'))
+ def get_item(self, event_id, user_id):
+ event = self.source.gr_source.get_event(event_id)
+ rsvp = self.source.gr_source.get_rsvp(
+ self.source.key_id(), event_id, user_id, event=event
+ )
+ if event:
+ originals, mentions = original_post_discovery.discover(
+ self.source, event, fetch_hfeed=False
+ )
+ self.merge_urls(rsvp, "inReplyTo", originals)
+ return rsvp
+
+
+app.add_url_rule("/post///", view_func=Post.as_view("post"))
+app.add_url_rule(
+ "/comment////",
+ view_func=Comment.as_view("comment"),
+)
+app.add_url_rule(
+ "/like////", view_func=Like.as_view("like")
+)
+app.add_url_rule(
+ "/react/////",
+ view_func=Reaction.as_view("react"),
+)
+app.add_url_rule(
+ "/repost////", view_func=Repost.as_view("repost")
+)
+app.add_url_rule(
+ "/rsvp////", view_func=Rsvp.as_view("rsvp")
+)
diff --git a/indieauth.py b/indieauth.py
index 058c1231..1902abf0 100644
--- a/indieauth.py
+++ b/indieauth.py
@@ -9,52 +9,60 @@
from util import redirect
-@app.route('/indieauth/start', methods=['GET'])
+@app.route("/indieauth/start", methods=["GET"])
def indieauth_enter_web_site():
- """Serves the "Enter your web site" form page."""
- return render_template('indieauth.html', token=request.args['token'])
+ """Serves the "Enter your web site" form page."""
+ return render_template("indieauth.html", token=request.args["token"])
class Start(indieauth.Start):
- """Starts the IndieAuth flow."""
- def dispatch_request(self):
- token = request.form['token']
+ """Starts the IndieAuth flow."""
- try:
- to_url = self.redirect_url(state=token)
- except Exception as e:
- if util.is_connection_failure(e) or util.interpret_http_exception(e)[0]:
- flash("Couldn't fetch your web site: %s" % e)
- return redirect('/')
- raise
+ def dispatch_request(self):
+ token = request.form["token"]
- return redirect(to_url)
+ try:
+ to_url = self.redirect_url(state=token)
+ except Exception as e:
+ if util.is_connection_failure(e) or util.interpret_http_exception(e)[0]:
+ flash("Couldn't fetch your web site: %s" % e)
+ return redirect("/")
+ raise
+
+ return redirect(to_url)
class Callback(indieauth.Callback):
- """IndieAuth callback handler."""
- def finish(self, auth_entity, state=None):
- if not auth_entity:
- return
-
- assert state
-
- @ndb.transactional()
- def add_or_update_domain():
- domain = Domain.get_or_insert(util.domain_from_link(
- util.replace_test_domains_with_localhost(auth_entity.key.id())))
- domain.auth = auth_entity.key
- if state not in domain.tokens:
- domain.tokens.append(state)
- domain.put()
- flash(f'Authorized you for {domain.key.id()}.')
-
- add_or_update_domain()
- return redirect('/')
-
-
-app.add_url_rule('/indieauth/start',
- view_func=Start.as_view('indieauth_start', '/indieauth/callback'),
- methods=['POST'])
-app.add_url_rule('/indieauth/callback',
- view_func=Callback.as_view('indieauth_callback', 'unused'))
+ """IndieAuth callback handler."""
+
+ def finish(self, auth_entity, state=None):
+ if not auth_entity:
+ return
+
+ assert state
+
+ @ndb.transactional()
+ def add_or_update_domain():
+ domain = Domain.get_or_insert(
+ util.domain_from_link(
+ util.replace_test_domains_with_localhost(auth_entity.key.id())
+ )
+ )
+ domain.auth = auth_entity.key
+ if state not in domain.tokens:
+ domain.tokens.append(state)
+ domain.put()
+ flash(f"Authorized you for {domain.key.id()}.")
+
+ add_or_update_domain()
+ return redirect("/")
+
+
+app.add_url_rule(
+ "/indieauth/start",
+ view_func=Start.as_view("indieauth_start", "/indieauth/callback"),
+ methods=["POST"],
+)
+app.add_url_rule(
+ "/indieauth/callback", view_func=Callback.as_view("indieauth_callback", "unused")
+)
diff --git a/instagram.py b/instagram.py
index bd2bd05b..359fc949 100644
--- a/instagram.py
+++ b/instagram.py
@@ -8,38 +8,40 @@
class Instagram(browser.BrowserSource):
- """An Instagram account.
-
- The key name is the username. Instagram usernames may have ASCII letters (case
- insensitive), numbers, periods, and underscores:
- https://stackoverflow.com/questions/15470180
- """
- GR_CLASS = gr_instagram.Instagram
- SHORT_NAME = 'instagram'
- OAUTH_START = oauth_instagram.Start
- URL_CANONICALIZER = util.UrlCanonicalizer(
- # no reject regexp; non-private Instagram post URLs just 404
- domain=GR_CLASS.DOMAIN,
- subdomain='www',
- approve=r'https://www.instagram.com/p/[^/?]+/$',
- trailing_slash=True,
- headers=util.REQUEST_HEADERS)
-
- # blank granary Instagram object, shared across all instances
- gr_source = gr_instagram.Instagram()
-
- @classmethod
- def key_id_from_actor(cls, actor):
- """Returns the actor's username field to be used as this entity's key id."""
- return actor['username']
-
- def silo_url(self):
- """Returns the Instagram account URL, e.g. https://instagram.com/foo."""
- return self.gr_source.user_url(self.key.id())
-
- def label_name(self):
- """Returns the username."""
- return self.key_id()
+ """An Instagram account.
+
+ The key name is the username. Instagram usernames may have ASCII letters (case
+ insensitive), numbers, periods, and underscores:
+ https://stackoverflow.com/questions/15470180
+ """
+
+ GR_CLASS = gr_instagram.Instagram
+ SHORT_NAME = "instagram"
+ OAUTH_START = oauth_instagram.Start
+ URL_CANONICALIZER = util.UrlCanonicalizer(
+ # no reject regexp; non-private Instagram post URLs just 404
+ domain=GR_CLASS.DOMAIN,
+ subdomain="www",
+ approve=r"https://www.instagram.com/p/[^/?]+/$",
+ trailing_slash=True,
+ headers=util.REQUEST_HEADERS,
+ )
+
+ # blank granary Instagram object, shared across all instances
+ gr_source = gr_instagram.Instagram()
+
+ @classmethod
+ def key_id_from_actor(cls, actor):
+ """Returns the actor's username field to be used as this entity's key id."""
+ return actor["username"]
+
+ def silo_url(self):
+ """Returns the Instagram account URL, e.g. https://instagram.com/foo."""
+ return self.gr_source.user_url(self.key.id())
+
+ def label_name(self):
+ """Returns the username."""
+ return self.key_id()
browser.route(Instagram)
diff --git a/mastodon.py b/mastodon.py
index 1dd5357d..8ed6b8be 100644
--- a/mastodon.py
+++ b/mastodon.py
@@ -15,111 +15,116 @@
# https://docs.joinmastodon.org/api/oauth-scopes/
LISTEN_SCOPES = (
- 'read:accounts',
- 'read:blocks',
- 'read:notifications',
- 'read:search',
- 'read:statuses',
+ "read:accounts",
+ "read:blocks",
+ "read:notifications",
+ "read:search",
+ "read:statuses",
)
PUBLISH_SCOPES = LISTEN_SCOPES + (
- 'write:statuses',
- 'write:favourites',
- 'write:media',
+ "write:statuses",
+ "write:favourites",
+ "write:media",
)
SCOPE_SEPARATOR = oauth_dropins.mastodon.Start.SCOPE_SEPARATOR
class StartBase(oauth_dropins.mastodon.Start):
- """Abstract base OAuth starter class with our redirect URLs."""
- DEFAULT_SCOPE = ''
- REDIRECT_PATHS = (
- '/mastodon/callback',
- '/publish/mastodon/finish',
- '/mastodon/delete/finish',
- '/delete/finish',
- )
+ """Abstract base OAuth starter class with our redirect URLs."""
- def app_name(self):
- return 'Bridgy'
+ DEFAULT_SCOPE = ""
+ REDIRECT_PATHS = (
+ "/mastodon/callback",
+ "/publish/mastodon/finish",
+ "/mastodon/delete/finish",
+ "/delete/finish",
+ )
- def app_url(self):
- return util.host_url()
+ def app_name(self):
+ return "Bridgy"
+
+ def app_url(self):
+ return util.host_url()
class Mastodon(models.Source):
- """A Mastodon account.
-
- The key name is the fully qualified address, eg '@snarfed@mastodon.technology'.
- """
- GR_CLASS = gr_mastodon.Mastodon
- OAUTH_START = StartBase
- SHORT_NAME = 'mastodon'
- CAN_PUBLISH = True
- HAS_BLOCKS = True
- TYPE_LABELS = {
- 'post': 'toot',
- 'comment': 'reply',
- 'repost': 'boost',
- 'like': 'favorite',
- }
- DISABLE_HTTP_CODES = ('401', '403', '404')
-
- @property
- def URL_CANONICALIZER(self):
- """Generate URL_CANONICALIZER dynamically to use the instance's domain."""
- return util.UrlCanonicalizer(
- domain=self.gr_source.DOMAIN,
- headers=util.REQUEST_HEADERS)
-
- @staticmethod
- def new(auth_entity=None, **kwargs):
- """Creates and returns a :class:`Mastodon` entity.
-
- Args:
- auth_entity: :class:`oauth_dropins.mastodon.MastodonAuth`
- kwargs: property values
+ """A Mastodon account.
+
+ The key name is the fully qualified address, eg '@snarfed@mastodon.technology'.
"""
- user = json_loads(auth_entity.user_json)
- return Mastodon(id=auth_entity.key_id(),
- auth_entity=auth_entity.key,
- url=user.get('url'),
- name=user.get('display_name') or user.get('username'),
- picture=user.get('avatar'),
- **kwargs)
-
- def username(self):
- """Returns the Mastodon username, e.g. alice."""
- return self._split_address()[0]
-
- def instance(self):
- """Returns the Mastodon instance URL, e.g. https://foo.com/."""
- return self._split_address()[1]
-
- def _split_address(self):
- split = self.key_id().split('@')
- assert len(split) == 3 and split[0] == '', self.key_id()
- return split[1], split[2]
-
- def user_tag_id(self):
- """Returns the tag URI for this source, e.g. 'tag:foo.com:alice'."""
- return self.gr_source.tag_uri(self.username())
-
- def silo_url(self):
- """Returns the Mastodon profile URL, e.g. https://foo.com/@bar."""
- return json_loads(self.auth_entity.get().user_json).get('url')
-
- def label_name(self):
- """Returns the username."""
- return self.key_id()
-
- @classmethod
- def button_html(cls, feature, **kwargs):
- """Override oauth-dropins's button_html() to not show the instance text box."""
- source = kwargs.get('source')
- instance = source.instance() if source else ''
- scopes = SCOPE_SEPARATOR.join(
- PUBLISH_SCOPES if 'publish' in feature else LISTEN_SCOPES)
- return """\
+
+ GR_CLASS = gr_mastodon.Mastodon
+ OAUTH_START = StartBase
+ SHORT_NAME = "mastodon"
+ CAN_PUBLISH = True
+ HAS_BLOCKS = True
+ TYPE_LABELS = {
+ "post": "toot",
+ "comment": "reply",
+ "repost": "boost",
+ "like": "favorite",
+ }
+ DISABLE_HTTP_CODES = ("401", "403", "404")
+
+ @property
+ def URL_CANONICALIZER(self):
+ """Generate URL_CANONICALIZER dynamically to use the instance's domain."""
+ return util.UrlCanonicalizer(
+ domain=self.gr_source.DOMAIN, headers=util.REQUEST_HEADERS
+ )
+
+ @staticmethod
+ def new(auth_entity=None, **kwargs):
+ """Creates and returns a :class:`Mastodon` entity.
+
+ Args:
+ auth_entity: :class:`oauth_dropins.mastodon.MastodonAuth`
+ kwargs: property values
+ """
+ user = json_loads(auth_entity.user_json)
+ return Mastodon(
+ id=auth_entity.key_id(),
+ auth_entity=auth_entity.key,
+ url=user.get("url"),
+ name=user.get("display_name") or user.get("username"),
+ picture=user.get("avatar"),
+ **kwargs
+ )
+
+ def username(self):
+ """Returns the Mastodon username, e.g. alice."""
+ return self._split_address()[0]
+
+ def instance(self):
+ """Returns the Mastodon instance URL, e.g. https://foo.com/."""
+ return self._split_address()[1]
+
+ def _split_address(self):
+ split = self.key_id().split("@")
+ assert len(split) == 3 and split[0] == "", self.key_id()
+ return split[1], split[2]
+
+ def user_tag_id(self):
+ """Returns the tag URI for this source, e.g. 'tag:foo.com:alice'."""
+ return self.gr_source.tag_uri(self.username())
+
+ def silo_url(self):
+ """Returns the Mastodon profile URL, e.g. https://foo.com/@bar."""
+ return json_loads(self.auth_entity.get().user_json).get("url")
+
+ def label_name(self):
+ """Returns the username."""
+ return self.key_id()
+
+ @classmethod
+ def button_html(cls, feature, **kwargs):
+ """Override oauth-dropins's button_html() to not show the instance text box."""
+ source = kwargs.get("source")
+ instance = source.instance() if source else ""
+ scopes = SCOPE_SEPARATOR.join(
+ PUBLISH_SCOPES if "publish" in feature else LISTEN_SCOPES
+ )
+ return """\
-""" % ('post' if instance else 'get', feature, instance, scopes)
-
- def is_private(self):
- """Returns True if this Mastodon account is protected.
-
- https://docs.joinmastodon.org/user/preferences/#misc
- https://docs.joinmastodon.org/entities/account/
- """
- return json_loads(self.auth_entity.get().user_json).get('locked')
-
- def search_for_links(self):
- """Searches for activities with links to any of this source's web sites.
-
- Returns:
- sequence of ActivityStreams activity dicts
- """
- if not self.domains:
- return []
-
- query = ' OR '.join(self.domains)
- return self.get_activities(
- search_query=query, group_id=gr_source.SEARCH, fetch_replies=False,
- fetch_likes=False, fetch_shares=False)
-
- def load_blocklist(self):
- try:
- return super().load_blocklist()
- except requests.HTTPError as e:
- if e.response.status_code == 403:
- # this user signed up before we started asking for the 'follow' OAuth
- # scope, which the block list API endpoint requires. just skip them.
- # https://console.cloud.google.com/errors/CMfA_KfIld6Q2AE
- logging.info("Couldn't fetch block list due to missing OAuth scope")
- self.blocked_ids = []
- self.put()
- else:
- raise
-
-
-@app.route('/mastodon/start', methods=['GET'])
+""" % (
+ "post" if instance else "get",
+ feature,
+ instance,
+ scopes,
+ )
+
+ def is_private(self):
+ """Returns True if this Mastodon account is protected.
+
+ https://docs.joinmastodon.org/user/preferences/#misc
+ https://docs.joinmastodon.org/entities/account/
+ """
+ return json_loads(self.auth_entity.get().user_json).get("locked")
+
+ def search_for_links(self):
+ """Searches for activities with links to any of this source's web sites.
+
+ Returns:
+ sequence of ActivityStreams activity dicts
+ """
+ if not self.domains:
+ return []
+
+ query = " OR ".join(self.domains)
+ return self.get_activities(
+ search_query=query,
+ group_id=gr_source.SEARCH,
+ fetch_replies=False,
+ fetch_likes=False,
+ fetch_shares=False,
+ )
+
+ def load_blocklist(self):
+ try:
+ return super().load_blocklist()
+ except requests.HTTPError as e:
+ if e.response.status_code == 403:
+ # this user signed up before we started asking for the 'follow' OAuth
+ # scope, which the block list API endpoint requires. just skip them.
+ # https://console.cloud.google.com/errors/CMfA_KfIld6Q2AE
+ logging.info("Couldn't fetch block list due to missing OAuth scope")
+ self.blocked_ids = []
+ self.put()
+ else:
+ raise
+
+
+@app.route("/mastodon/start", methods=["GET"])
def enter_your_instance():
- """Serves the "Enter your instance" form page."""
- return render_template('mastodon_instance.html')
+ """Serves the "Enter your instance" form page."""
+ return render_template("mastodon_instance.html")
class Start(StartBase):
- def redirect_url(self, *args, **kwargs):
- features = (request.form.get('feature') or '').split(',')
- starter = util.oauth_starter(StartBase)('/mastodon/callback',
- scopes=PUBLISH_SCOPES if 'publish' in features else LISTEN_SCOPES)
-
- try:
- return starter.redirect_url(*args, instance=request.form['instance'], **kwargs)
- except ValueError as e:
- logging.warning('Bad Mastodon instance', exc_info=True)
- flash(util.linkify(str(e), pretty=True))
- redirect(request.path)
+ def redirect_url(self, *args, **kwargs):
+ features = (request.form.get("feature") or "").split(",")
+ starter = util.oauth_starter(StartBase)(
+ "/mastodon/callback",
+ scopes=PUBLISH_SCOPES if "publish" in features else LISTEN_SCOPES,
+ )
+
+ try:
+ return starter.redirect_url(
+ *args, instance=request.form["instance"], **kwargs
+ )
+ except ValueError as e:
+ logging.warning("Bad Mastodon instance", exc_info=True)
+ flash(util.linkify(str(e), pretty=True))
+ redirect(request.path)
class Callback(oauth_dropins.mastodon.Callback):
- def finish(self, auth_entity, state=None):
- source = util.maybe_add_or_delete_source(Mastodon, auth_entity, state)
-
- features = util.decode_oauth_state(state).get('feature', '').split(',')
- if set(features) != set(source.features):
- # override features with whatever we requested scopes for just now, since
- # scopes are per access token. background:
- # https://github.com/snarfed/bridgy/issues/1015
- source.features = features
- source.put()
-
-
-app.add_url_rule('/mastodon/start',
- view_func=Start.as_view('mastodon_start', '/mastodon/callback'), methods=['POST'])
-app.add_url_rule('/mastodon/callback', view_func=Callback.as_view('mastodon_callback', 'unused'))
-app.add_url_rule('/mastodon/delete/finish',
- view_func=oauth_dropins.mastodon.Callback.as_view('mastodon_delete_finish', '/delete/finish'))
-app.add_url_rule('/mastodon/publish/start',
- view_func=StartBase.as_view('mastodon_publish_finish', '/publish/mastodon/finish', scopes=PUBLISH_SCOPES), methods=['POST'])
+ def finish(self, auth_entity, state=None):
+ source = util.maybe_add_or_delete_source(Mastodon, auth_entity, state)
+
+ features = util.decode_oauth_state(state).get("feature", "").split(",")
+ if set(features) != set(source.features):
+ # override features with whatever we requested scopes for just now, since
+ # scopes are per access token. background:
+ # https://github.com/snarfed/bridgy/issues/1015
+ source.features = features
+ source.put()
+
+
+app.add_url_rule(
+ "/mastodon/start",
+ view_func=Start.as_view("mastodon_start", "/mastodon/callback"),
+ methods=["POST"],
+)
+app.add_url_rule(
+ "/mastodon/callback", view_func=Callback.as_view("mastodon_callback", "unused")
+)
+app.add_url_rule(
+ "/mastodon/delete/finish",
+ view_func=oauth_dropins.mastodon.Callback.as_view(
+ "mastodon_delete_finish", "/delete/finish"
+ ),
+)
+app.add_url_rule(
+ "/mastodon/publish/start",
+ view_func=StartBase.as_view(
+ "mastodon_publish_finish", "/publish/mastodon/finish", scopes=PUBLISH_SCOPES
+ ),
+ methods=["POST"],
+)
diff --git a/medium.py b/medium.py
index fc79c51c..ece77b0a 100644
--- a/medium.py
+++ b/medium.py
@@ -24,155 +24,173 @@
class Medium(models.Source):
- """A Medium publication or user blog.
+ """A Medium publication or user blog.
- The key name is the username (with @ prefix) or publication name.
- """
- GR_CLASS = collections.namedtuple('FakeGrClass', ('NAME',))(NAME='Medium')
- OAUTH_START = oauth_medium.Start
- SHORT_NAME = 'medium'
-
- def is_publication(self):
- return not self.key_id().startswith('@')
-
- def feed_url(self):
- # https://help.medium.com/hc/en-us/articles/214874118-RSS-Feeds-of-publications-and-profiles
- return self.url.replace('medium.com/', 'medium.com/feed/')
-
- def silo_url(self):
- return self.url
-
- @staticmethod
- def new(auth_entity=None, id=None, **kwargs):
- """Creates and returns a Medium for the logged in user.
-
- Args:
- auth_entity: :class:`oauth_dropins.medium.MediumAuth`
- id: string, either username (starting with @) or publication id
+ The key name is the username (with @ prefix) or publication name.
"""
- assert id
- medium = Medium(id=id,
- auth_entity=auth_entity.key,
- superfeedr_secret=util.generate_secret(),
- **kwargs)
-
- data = medium._data(auth_entity)
- medium.name = data.get('name') or data.get('username')
- medium.picture = data.get('imageUrl')
- medium.url = data.get('url')
- return medium
-
- def verified(self):
- return False
-
- def verify(self, force=False):
- """No incoming webmention support yet."""
- pass
-
- def has_bridgy_webmention_endpoint(self):
- return True
-
- def _data(self, auth_entity):
- """Returns the Medium API object for this user or publication.
-
- https://github.com/Medium/medium-api-docs/#user-content-getting-the-authenticated-users-details
-
- Example user::
- {
- 'imageUrl': 'https://cdn-images-1.medium.com/fit/c/200/200/0*4dsrv3pwIJfFraSz.jpeg',
- 'url': 'https://medium.com/@snarfed',
- 'name': 'Ryan Barrett',
- 'username': 'snarfed',
- 'id': '113863a5ca2ab60671e8c9fe089e59c07acbf8137c51523605dc55528516c0d7e'
- }
- Example publication::
- {
- 'id': 'b45573563f5a',
- 'name': 'Developers',
- 'description': "Medium's Developer resources",
- 'url': 'https://medium.com/developers',
- 'imageUrl': 'https://cdn-images-1.medium.com/fit/c/200/200/1*ccokMT4VXmDDO1EoQQHkzg@2x.png'
- }
- """
- id = self.key_id().lstrip('@')
-
- user = json_loads(auth_entity.user_json).get('data')
- if user.get('username').lstrip('@') == id:
- return user
-
- for pub in json_loads(auth_entity.publications_json).get('data', []):
- if pub.get('id') == id:
- return pub
-
- def _urls_and_domains(self, auth_entity, user_url):
- if self.url:
- return [self.url], [util.domain_from_link(self.url)]
-
- return [], []
-
-
-@app.route('/medium/add', methods=['POST'])
+ GR_CLASS = collections.namedtuple("FakeGrClass", ("NAME",))(NAME="Medium")
+ OAUTH_START = oauth_medium.Start
+ SHORT_NAME = "medium"
+
+ def is_publication(self):
+ return not self.key_id().startswith("@")
+
+ def feed_url(self):
+ # https://help.medium.com/hc/en-us/articles/214874118-RSS-Feeds-of-publications-and-profiles
+ return self.url.replace("medium.com/", "medium.com/feed/")
+
+ def silo_url(self):
+ return self.url
+
+ @staticmethod
+ def new(auth_entity=None, id=None, **kwargs):
+ """Creates and returns a Medium for the logged in user.
+
+ Args:
+ auth_entity: :class:`oauth_dropins.medium.MediumAuth`
+ id: string, either username (starting with @) or publication id
+ """
+ assert id
+ medium = Medium(
+ id=id,
+ auth_entity=auth_entity.key,
+ superfeedr_secret=util.generate_secret(),
+ **kwargs,
+ )
+
+ data = medium._data(auth_entity)
+ medium.name = data.get("name") or data.get("username")
+ medium.picture = data.get("imageUrl")
+ medium.url = data.get("url")
+ return medium
+
+ def verified(self):
+ return False
+
+ def verify(self, force=False):
+ """No incoming webmention support yet."""
+ pass
+
+ def has_bridgy_webmention_endpoint(self):
+ return True
+
+ def _data(self, auth_entity):
+ """Returns the Medium API object for this user or publication.
+
+ https://github.com/Medium/medium-api-docs/#user-content-getting-the-authenticated-users-details
+
+ Example user::
+ {
+ 'imageUrl': 'https://cdn-images-1.medium.com/fit/c/200/200/0*4dsrv3pwIJfFraSz.jpeg',
+ 'url': 'https://medium.com/@snarfed',
+ 'name': 'Ryan Barrett',
+ 'username': 'snarfed',
+ 'id': '113863a5ca2ab60671e8c9fe089e59c07acbf8137c51523605dc55528516c0d7e'
+ }
+
+ Example publication::
+ {
+ 'id': 'b45573563f5a',
+ 'name': 'Developers',
+ 'description': "Medium's Developer resources",
+ 'url': 'https://medium.com/developers',
+ 'imageUrl': 'https://cdn-images-1.medium.com/fit/c/200/200/1*ccokMT4VXmDDO1EoQQHkzg@2x.png'
+ }
+ """
+ id = self.key_id().lstrip("@")
+
+ user = json_loads(auth_entity.user_json).get("data")
+ if user.get("username").lstrip("@") == id:
+ return user
+
+ for pub in json_loads(auth_entity.publications_json).get("data", []):
+ if pub.get("id") == id:
+ return pub
+
+ def _urls_and_domains(self, auth_entity, user_url):
+ if self.url:
+ return [self.url], [util.domain_from_link(self.url)]
+
+ return [], []
+
+
+@app.route("/medium/add", methods=["POST"])
def medium_add():
- auth_entity = ndb.Key(urlsafe=request.values['auth_entity_key']).get()
- util.maybe_add_or_delete_source(Medium, auth_entity, request.values['state'],
- id=request.values['blog'])
+ auth_entity = ndb.Key(urlsafe=request.values["auth_entity_key"]).get()
+ util.maybe_add_or_delete_source(
+ Medium, auth_entity, request.values["state"], id=request.values["blog"]
+ )
class ChooseBlog(oauth_medium.Callback):
- def finish(self, auth_entity, state=None):
- if not auth_entity:
- util.maybe_add_or_delete_source(Medium, auth_entity, state)
- return
-
- user = json_loads(auth_entity.user_json)['data']
- username = user['username']
- if not username.startswith('@'):
- username = '@' + username
-
- # fetch publications this user contributes or subscribes to.
- # (sadly medium's API doesn't tell us the difference unless we fetch each
- # pub's metadata separately.)
- # https://github.com/Medium/medium-api-docs/#user-content-listing-the-users-publications
- auth_entity.publications_json = auth_entity.get(
- oauth_medium.API_BASE + 'users/%s/publications' % user['id']).text
- auth_entity.put()
- pubs = json_loads(auth_entity.publications_json).get('data')
- if not pubs:
- util.maybe_add_or_delete_source(Medium, auth_entity, state,
- id=username)
- return
-
- # add user profile to start of pubs list
- user['id'] = username
- pubs.insert(0, user)
-
- vars = {
- 'action': '/medium/add',
- 'state': state,
- 'auth_entity_key': auth_entity.key.urlsafe().decode(),
- 'blogs': [{
- 'id': p['id'],
- 'title': p.get('name', ''),
- 'url': p.get('url', ''),
- 'pretty_url': util.pretty_link(str(p.get('url', ''))),
- 'image': p.get('imageUrl', ''),
- } for p in pubs if p.get('id')],
- }
- logging.info(f'Rendering choose_blog.html with {vars}')
- return render_template('choose_blog.html', **vars)
+ def finish(self, auth_entity, state=None):
+ if not auth_entity:
+ util.maybe_add_or_delete_source(Medium, auth_entity, state)
+ return
+
+ user = json_loads(auth_entity.user_json)["data"]
+ username = user["username"]
+ if not username.startswith("@"):
+ username = "@" + username
+
+ # fetch publications this user contributes or subscribes to.
+ # (sadly medium's API doesn't tell us the difference unless we fetch each
+ # pub's metadata separately.)
+ # https://github.com/Medium/medium-api-docs/#user-content-listing-the-users-publications
+ auth_entity.publications_json = auth_entity.get(
+ oauth_medium.API_BASE + "users/%s/publications" % user["id"]
+ ).text
+ auth_entity.put()
+ pubs = json_loads(auth_entity.publications_json).get("data")
+ if not pubs:
+ util.maybe_add_or_delete_source(Medium, auth_entity, state, id=username)
+ return
+
+ # add user profile to start of pubs list
+ user["id"] = username
+ pubs.insert(0, user)
+
+ vars = {
+ "action": "/medium/add",
+ "state": state,
+ "auth_entity_key": auth_entity.key.urlsafe().decode(),
+ "blogs": [
+ {
+ "id": p["id"],
+ "title": p.get("name", ""),
+ "url": p.get("url", ""),
+ "pretty_url": util.pretty_link(str(p.get("url", ""))),
+ "image": p.get("imageUrl", ""),
+ }
+ for p in pubs
+ if p.get("id")
+ ],
+ }
+ logging.info(f"Rendering choose_blog.html with {vars}")
+ return render_template("choose_blog.html", **vars)
class SuperfeedrNotify(superfeedr.Notify):
- SOURCE_CLS = Medium
+ SOURCE_CLS = Medium
# https://github.com/Medium/medium-api-docs#user-content-21-browser-based-authentication
start = util.oauth_starter(oauth_medium.Start).as_view(
- 'medium_start', '/medium/choose_blog', scopes=('basicProfile', 'listPublications'))
-app.add_url_rule('/medium/start', view_func=start, methods=['POST'])
-app.add_url_rule('/medium/choose_blog', view_func=ChooseBlog.as_view(
- 'medium_choose_blog'), methods=['POST'])
-app.add_url_rule('/medium/delete/finish', view_func=oauth_medium.Callback.as_view(
- 'medium_delete', '/delete/finish')),
-app.add_url_rule('/medium/notify/', view_func=SuperfeedrNotify.as_view('medium_notify'), methods=['POST'])
+ "medium_start", "/medium/choose_blog", scopes=("basicProfile", "listPublications")
+)
+app.add_url_rule("/medium/start", view_func=start, methods=["POST"])
+app.add_url_rule(
+ "/medium/choose_blog",
+ view_func=ChooseBlog.as_view("medium_choose_blog"),
+ methods=["POST"],
+)
+app.add_url_rule(
+ "/medium/delete/finish",
+ view_func=oauth_medium.Callback.as_view("medium_delete", "/delete/finish"),
+),
+app.add_url_rule(
+ "/medium/notify/",
+ view_func=SuperfeedrNotify.as_view("medium_notify"),
+ methods=["POST"],
+)
diff --git a/meetup.py b/meetup.py
index 03ef9d71..dc6c5cc4 100644
--- a/meetup.py
+++ b/meetup.py
@@ -11,54 +11,71 @@
# We don't support listen
LISTEN_SCOPES = []
PUBLISH_SCOPES = [
- 'rsvp',
+ "rsvp",
]
class Meetup(Source):
- GR_CLASS = gr_meetup.Meetup
- OAUTH_START = oauth_meetup.Start
- SHORT_NAME = 'meetup'
- BACKFEED_REQUIRES_SYNDICATION_LINK = True
- CAN_LISTEN = False
- CAN_PUBLISH = True
- URL_CANONICALIZER = util.UrlCanonicalizer(
- domain=GR_CLASS.DOMAIN,
- headers=util.REQUEST_HEADERS)
+ GR_CLASS = gr_meetup.Meetup
+ OAUTH_START = oauth_meetup.Start
+ SHORT_NAME = "meetup"
+ BACKFEED_REQUIRES_SYNDICATION_LINK = True
+ CAN_LISTEN = False
+ CAN_PUBLISH = True
+ URL_CANONICALIZER = util.UrlCanonicalizer(
+ domain=GR_CLASS.DOMAIN, headers=util.REQUEST_HEADERS
+ )
- @staticmethod
- def new(auth_entity=None, **kwargs):
- """Creates and returns a :class:`Meetup` for the logged in user.
+ @staticmethod
+ def new(auth_entity=None, **kwargs):
+ """Creates and returns a :class:`Meetup` for the logged in user.
- Args:
- auth_entity: :class:`oauth_dropins.meetup.MeetupAuth`
- kwargs: property values
- """
- user = json_loads(auth_entity.user_json)
- gr_source = gr_meetup.Meetup(access_token=auth_entity.access_token())
- actor = gr_source.user_to_actor(user)
- return Meetup(id=auth_entity.key.id(),
- auth_entity=auth_entity.key,
- name=actor.get('displayName'),
- picture=actor.get('image', {}).get('url'),
- url=actor.get('url'),
- **kwargs)
+ Args:
+ auth_entity: :class:`oauth_dropins.meetup.MeetupAuth`
+ kwargs: property values
+ """
+ user = json_loads(auth_entity.user_json)
+ gr_source = gr_meetup.Meetup(access_token=auth_entity.access_token())
+ actor = gr_source.user_to_actor(user)
+ return Meetup(
+ id=auth_entity.key.id(),
+ auth_entity=auth_entity.key,
+ name=actor.get("displayName"),
+ picture=actor.get("image", {}).get("url"),
+ url=actor.get("url"),
+ **kwargs
+ )
- def silo_url(self):
- """Returns the Meetup account URL, e.g. https://meetup.com/members/...."""
- return self.gr_source.user_url(self.key.id())
+ def silo_url(self):
+ """Returns the Meetup account URL, e.g. https://meetup.com/members/...."""
+ return self.gr_source.user_url(self.key.id())
- def label_name(self):
- """Returns the username."""
- return self.name
+ def label_name(self):
+ """Returns the username."""
+ return self.name
class Callback(oauth_meetup.Callback):
- def finish(self, auth_entity, state=None):
- util.maybe_add_or_delete_source(Meetup, auth_entity, state)
+ def finish(self, auth_entity, state=None):
+ util.maybe_add_or_delete_source(Meetup, auth_entity, state)
-app.add_url_rule('/meetup/start', view_func=util.oauth_starter(oauth_meetup.Start).as_view('meetup_start', '/meetup/add', scopes=PUBLISH_SCOPES), methods=['POST']) # we don't support listen
-app.add_url_rule('/meetup/add', view_func=Callback.as_view('meetup_add', 'unused'))
-app.add_url_rule('/meetup/delete/finish', view_func=oauth_meetup.Callback.as_view('meetup_delete_finish', '/delete/finish'))
-app.add_url_rule('/meetup/publish/start', view_func=oauth_meetup.Start.as_view('meetup_publish_finish', '/meetup/publish/finish', scopes=PUBLISH_SCOPES), methods=['POST'])
+app.add_url_rule(
+ "/meetup/start",
+ view_func=util.oauth_starter(oauth_meetup.Start).as_view(
+ "meetup_start", "/meetup/add", scopes=PUBLISH_SCOPES
+ ),
+ methods=["POST"],
+) # we don't support listen
+app.add_url_rule("/meetup/add", view_func=Callback.as_view("meetup_add", "unused"))
+app.add_url_rule(
+ "/meetup/delete/finish",
+ view_func=oauth_meetup.Callback.as_view("meetup_delete_finish", "/delete/finish"),
+)
+app.add_url_rule(
+ "/meetup/publish/start",
+ view_func=oauth_meetup.Start.as_view(
+ "meetup_publish_finish", "/meetup/publish/finish", scopes=PUBLISH_SCOPES
+ ),
+ methods=["POST"],
+)
diff --git a/models.py b/models.py
index 8f679371..b613f034 100644
--- a/models.py
+++ b/models.py
@@ -19,8 +19,8 @@
import superfeedr
import util
-VERB_TYPES = ('post', 'comment', 'like', 'react', 'repost', 'rsvp', 'tag')
-PUBLISH_TYPES = VERB_TYPES + ('preview', 'delete')
+VERB_TYPES = ("post", "comment", "like", "react", "repost", "rsvp", "tag")
+PUBLISH_TYPES = VERB_TYPES + ("preview", "delete")
MAX_AUTHOR_URLS = 5
@@ -32,1129 +32,1222 @@
BLOCKLIST_MAX_IDS = 20000
TWITTER_SCRAPE_HEADERS = json_loads(
- os.getenv('TWITTER_SCRAPE_HEADERS') or
- util.read('twitter_scrape_headers.json') or '{}')
+ os.getenv("TWITTER_SCRAPE_HEADERS")
+ or util.read("twitter_scrape_headers.json")
+ or "{}"
+)
# maps string short name to Source subclass. populated by SourceMeta.
sources = {}
def get_type(obj):
- """Returns the :class:`Response` or :class:`Publish` type for an AS object."""
- type = obj.get('objectType')
- verb = obj.get('verb')
- if type == 'activity' and verb == 'share':
- return 'repost'
- elif type == 'issue':
- return 'post'
- elif verb in gr_source.RSVP_VERB_TO_COLLECTION:
- return 'rsvp'
- elif (type == 'comment' or obj.get('inReplyTo') or
- obj.get('context', {}).get('inReplyTo')):
- return 'comment'
- elif verb in VERB_TYPES:
- return verb
- else:
- return 'post'
+ """Returns the :class:`Response` or :class:`Publish` type for an AS object."""
+ type = obj.get("objectType")
+ verb = obj.get("verb")
+ if type == "activity" and verb == "share":
+ return "repost"
+ elif type == "issue":
+ return "post"
+ elif verb in gr_source.RSVP_VERB_TO_COLLECTION:
+ return "rsvp"
+ elif (
+ type == "comment"
+ or obj.get("inReplyTo")
+ or obj.get("context", {}).get("inReplyTo")
+ ):
+ return "comment"
+ elif verb in VERB_TYPES:
+ return verb
+ else:
+ return "post"
class DisableSource(Exception):
- """Raised when a user has deauthorized our app inside a given platform."""
+ """Raised when a user has deauthorized our app inside a given platform."""
class SourceMeta(ndb.MetaModel):
- """:class:`Source` metaclass. Registers all subclasses in the sources global."""
- def __new__(meta, name, bases, class_dict):
- cls = ndb.MetaModel.__new__(meta, name, bases, class_dict)
- if cls.SHORT_NAME:
- sources[cls.SHORT_NAME] = cls
- return cls
+ """:class:`Source` metaclass. Registers all subclasses in the sources global."""
+ def __new__(meta, name, bases, class_dict):
+ cls = ndb.MetaModel.__new__(meta, name, bases, class_dict)
+ if cls.SHORT_NAME:
+ sources[cls.SHORT_NAME] = cls
+ return cls
-class Source(StringIdModel, metaclass=SourceMeta):
- """A silo account, e.g. a Facebook or Google+ account.
-
- Each concrete silo class should subclass this class.
- """
-
- # Turn off NDB instance and memcache caching.
- # https://developers.google.com/appengine/docs/python/ndb/cache
- # https://github.com/snarfed/bridgy/issues/558
- # https://github.com/snarfed/bridgy/issues/68
- _use_cache = False
-
- STATUSES = ('enabled', 'disabled')
- POLL_STATUSES = ('ok', 'error', 'polling')
- FEATURES = ('listen', 'publish', 'webmention', 'email')
-
- # short name for this site type. used in URLs, etc.
- SHORT_NAME = None
- # the corresponding granary class
- GR_CLASS = None
- # oauth-dropins Start class
- OAUTH_START = None
- # whether Bridgy supports listen for this silo - this is unlikely, so we default to True
- CAN_LISTEN = True
- # whether Bridgy supports publish for this silo
- CAN_PUBLISH = None
- # whether this source should poll automatically, or only when triggered
- # (eg Instagram)
- AUTO_POLL = True
- # how often to poll for responses
- FAST_POLL = datetime.timedelta(minutes=30)
- # how often to poll sources that have never sent a webmention
- SLOW_POLL = datetime.timedelta(days=1)
- # how often to poll sources that are currently rate limited by their silo
- RATE_LIMITED_POLL = SLOW_POLL
- # how long to wait after signup for a successful webmention before dropping to
- # the lower frequency poll
- FAST_POLL_GRACE_PERIOD = datetime.timedelta(days=7)
- # how often refetch author url to look for updated syndication links
- FAST_REFETCH = datetime.timedelta(hours=6)
- # refetch less often (this often) if it's been >2w since the last synd link
- SLOW_REFETCH = datetime.timedelta(days=2)
- # rate limiting HTTP status codes returned by this silo. e.g. twitter returns
- # 429, instagram 503, google+ 403.
- RATE_LIMIT_HTTP_CODES = ('429',)
- DISABLE_HTTP_CODES = ('401',)
- TRANSIENT_ERROR_HTTP_CODES = ()
- # whether granary supports fetching block lists
- HAS_BLOCKS = False
- # whether to require a u-syndication link for backfeed
- BACKFEED_REQUIRES_SYNDICATION_LINK = False
- # ignore fragments when comparing syndication links in OPD
- IGNORE_SYNDICATION_LINK_FRAGMENTS = False
-
- # Maps Publish.type (e.g. 'like') to source-specific human readable type label
- # (e.g. 'favorite'). Subclasses should override this.
- TYPE_LABELS = {}
-
- # subclasses should override this
- URL_CANONICALIZER = util.UrlCanonicalizer(headers=util.REQUEST_HEADERS)
-
- # Regexps for URL paths that don't accept incoming webmentions. Currently used
- # by Blogger.
- PATH_BLOCKLIST = ()
-
- created = ndb.DateTimeProperty(auto_now_add=True, required=True)
- url = ndb.StringProperty()
- status = ndb.StringProperty(choices=STATUSES, default='enabled')
- poll_status = ndb.StringProperty(choices=POLL_STATUSES, default='ok')
- rate_limited = ndb.BooleanProperty(default=False)
- name = ndb.StringProperty() # full human-readable name
- picture = ndb.StringProperty()
- domains = ndb.StringProperty(repeated=True)
- domain_urls = ndb.StringProperty(repeated=True)
- features = ndb.StringProperty(repeated=True, choices=FEATURES)
- superfeedr_secret = ndb.StringProperty()
- webmention_endpoint = ndb.StringProperty()
-
- # points to an oauth-dropins auth entity. The model class should be a subclass
- # of oauth_dropins.BaseAuth. the token should be generated with the
- # offline_access scope so that it doesn't expire.
- auth_entity = ndb.KeyProperty()
-
- #
- # listen-only properties
- #
- last_polled = ndb.DateTimeProperty(default=util.EPOCH)
- last_poll_attempt = ndb.DateTimeProperty(default=util.EPOCH)
- last_webmention_sent = ndb.DateTimeProperty()
- last_public_post = ndb.DateTimeProperty()
- recent_private_posts = ndb.IntegerProperty(default=0)
-
- # the last time we re-fetched the author's url looking for updated
- # syndication links
- last_hfeed_refetch = ndb.DateTimeProperty(default=util.EPOCH)
-
- # the last time we've seen a rel=syndication link for this Source.
- # we won't spend the time to re-fetch and look for updates if there's
- # never been one
- last_syndication_url = ndb.DateTimeProperty()
- # the last time we saw a syndication link in an h-feed, as opposed to just on
- # permalinks. background: https://github.com/snarfed/bridgy/issues/624
- last_feed_syndication_url = ndb.DateTimeProperty()
-
- last_activity_id = ndb.StringProperty()
- last_activities_etag = ndb.StringProperty()
- last_activities_cache_json = ndb.TextProperty()
- seen_responses_cache_json = ndb.TextProperty(compressed=True)
-
- # populated in Poll.poll(), used by handlers
- blocked_ids = ndb.JsonProperty(compressed=True)
-
- # maps updated property names to values that put_updates() writes back to the
- # datastore transactionally. set this to {} before beginning.
- updates = None
-
- # gr_source is *not* set to None by default here, since it needs to be unset
- # for __getattr__ to run when it's accessed.
-
- def __init__(self, *args, id=None, **kwargs):
- """Constructor. Escapes the key string id if it starts with `__`."""
- if id and id.startswith('__'):
- id = '\\' + id
- super().__init__(*args, id=id, **kwargs)
-
- def key_id(self):
- """Returns the key's unescaped string id."""
- id = self.key.id()
- return id[1:] if id[0] == '\\' else id
-
- @classmethod
- def new(cls, **kwargs):
- """Factory method. Creates and returns a new instance for the current user.
-
- To be implemented by subclasses.
- """
- raise NotImplementedError()
- def __getattr__(self, name):
- """Lazily load the auth entity and instantiate :attr:`self.gr_source`.
+class Source(StringIdModel, metaclass=SourceMeta):
+ """A silo account, e.g. a Facebook or Google+ account.
- Once :attr:`self.gr_source` is set, this method will *not* be called;
- :attr:`gr_source` will be returned normally.
+ Each concrete silo class should subclass this class.
"""
- if name == 'gr_source':
- super_attr = getattr(super(), name, None)
- if super_attr:
- return super_attr
- elif not self.auth_entity:
- return None
-
- auth_entity = self.auth_entity.get()
- try:
- refresh_token = auth_entity.refresh_token
- self.gr_source = self.GR_CLASS(refresh_token)
- return self.gr_source
- except AttributeError:
- logging.info('no refresh_token')
- args = auth_entity.access_token()
- if not isinstance(args, tuple):
- args = (args,)
-
- kwargs = {}
- if self.key.kind() == 'FacebookPage' and auth_entity.type == 'user':
- kwargs = {'user_id': self.key_id()}
- elif self.key.kind() == 'Instagram':
- kwargs = {'scrape': True, 'cookie': INSTAGRAM_SESSIONID_COOKIE}
- elif self.key.kind() == 'Mastodon':
- args = (auth_entity.instance(),) + args
- inst = auth_entity.app.get().instance_info
- kwargs = {
- 'user_id': json_loads(auth_entity.user_json).get('id'),
- # https://docs-develop.pleroma.social/backend/API/differences_in_mastoapi_responses/#instance
- 'truncate_text_length':
- json_loads(inst).get('max_toot_chars') if inst else None,
- }
- elif self.key.kind() == 'Twitter':
- kwargs = {'username': self.key_id(), 'scrape_headers': TWITTER_SCRAPE_HEADERS}
-
- self.gr_source = self.GR_CLASS(*args, **kwargs)
- return self.gr_source
-
- return getattr(super(), name)
-
- @classmethod
- def lookup(cls, id):
- """Returns the entity with the given id.
-
- By default, interprets id as just the key id. Subclasses may extend this to
- support usernames, etc.
- """
- if id and id.startswith('__'):
- id = '\\' + id
- return ndb.Key(cls, id).get()
-
- def user_tag_id(self):
- """Returns the tag URI for this source, e.g. 'tag:plus.google.com:123456'."""
- return self.gr_source.tag_uri(self.key_id())
-
- def bridgy_path(self):
- """Returns the Bridgy page URL path for this source."""
- return '/%s/%s' % (self.SHORT_NAME, self.key_id())
- def bridgy_url(self):
- """Returns the Bridgy page URL for this source."""
- return util.host_url(self.bridgy_path())
+ # Turn off NDB instance and memcache caching.
+ # https://developers.google.com/appengine/docs/python/ndb/cache
+ # https://github.com/snarfed/bridgy/issues/558
+ # https://github.com/snarfed/bridgy/issues/68
+ _use_cache = False
+
+ STATUSES = ("enabled", "disabled")
+ POLL_STATUSES = ("ok", "error", "polling")
+ FEATURES = ("listen", "publish", "webmention", "email")
+
+ # short name for this site type. used in URLs, etc.
+ SHORT_NAME = None
+ # the corresponding granary class
+ GR_CLASS = None
+ # oauth-dropins Start class
+ OAUTH_START = None
+ # whether Bridgy supports listen for this silo - this is unlikely, so we default to True
+ CAN_LISTEN = True
+ # whether Bridgy supports publish for this silo
+ CAN_PUBLISH = None
+ # whether this source should poll automatically, or only when triggered
+ # (eg Instagram)
+ AUTO_POLL = True
+ # how often to poll for responses
+ FAST_POLL = datetime.timedelta(minutes=30)
+ # how often to poll sources that have never sent a webmention
+ SLOW_POLL = datetime.timedelta(days=1)
+ # how often to poll sources that are currently rate limited by their silo
+ RATE_LIMITED_POLL = SLOW_POLL
+ # how long to wait after signup for a successful webmention before dropping to
+ # the lower frequency poll
+ FAST_POLL_GRACE_PERIOD = datetime.timedelta(days=7)
+ # how often refetch author url to look for updated syndication links
+ FAST_REFETCH = datetime.timedelta(hours=6)
+ # refetch less often (this often) if it's been >2w since the last synd link
+ SLOW_REFETCH = datetime.timedelta(days=2)
+ # rate limiting HTTP status codes returned by this silo. e.g. twitter returns
+ # 429, instagram 503, google+ 403.
+ RATE_LIMIT_HTTP_CODES = ("429",)
+ DISABLE_HTTP_CODES = ("401",)
+ TRANSIENT_ERROR_HTTP_CODES = ()
+ # whether granary supports fetching block lists
+ HAS_BLOCKS = False
+ # whether to require a u-syndication link for backfeed
+ BACKFEED_REQUIRES_SYNDICATION_LINK = False
+ # ignore fragments when comparing syndication links in OPD
+ IGNORE_SYNDICATION_LINK_FRAGMENTS = False
+
+ # Maps Publish.type (e.g. 'like') to source-specific human readable type label
+ # (e.g. 'favorite'). Subclasses should override this.
+ TYPE_LABELS = {}
+
+ # subclasses should override this
+ URL_CANONICALIZER = util.UrlCanonicalizer(headers=util.REQUEST_HEADERS)
+
+ # Regexps for URL paths that don't accept incoming webmentions. Currently used
+ # by Blogger.
+ PATH_BLOCKLIST = ()
+
+ created = ndb.DateTimeProperty(auto_now_add=True, required=True)
+ url = ndb.StringProperty()
+ status = ndb.StringProperty(choices=STATUSES, default="enabled")
+ poll_status = ndb.StringProperty(choices=POLL_STATUSES, default="ok")
+ rate_limited = ndb.BooleanProperty(default=False)
+ name = ndb.StringProperty() # full human-readable name
+ picture = ndb.StringProperty()
+ domains = ndb.StringProperty(repeated=True)
+ domain_urls = ndb.StringProperty(repeated=True)
+ features = ndb.StringProperty(repeated=True, choices=FEATURES)
+ superfeedr_secret = ndb.StringProperty()
+ webmention_endpoint = ndb.StringProperty()
+
+ # points to an oauth-dropins auth entity. The model class should be a subclass
+ # of oauth_dropins.BaseAuth. the token should be generated with the
+ # offline_access scope so that it doesn't expire.
+ auth_entity = ndb.KeyProperty()
+
+ #
+ # listen-only properties
+ #
+ last_polled = ndb.DateTimeProperty(default=util.EPOCH)
+ last_poll_attempt = ndb.DateTimeProperty(default=util.EPOCH)
+ last_webmention_sent = ndb.DateTimeProperty()
+ last_public_post = ndb.DateTimeProperty()
+ recent_private_posts = ndb.IntegerProperty(default=0)
+
+ # the last time we re-fetched the author's url looking for updated
+ # syndication links
+ last_hfeed_refetch = ndb.DateTimeProperty(default=util.EPOCH)
+
+ # the last time we've seen a rel=syndication link for this Source.
+ # we won't spend the time to re-fetch and look for updates if there's
+ # never been one
+ last_syndication_url = ndb.DateTimeProperty()
+ # the last time we saw a syndication link in an h-feed, as opposed to just on
+ # permalinks. background: https://github.com/snarfed/bridgy/issues/624
+ last_feed_syndication_url = ndb.DateTimeProperty()
+
+ last_activity_id = ndb.StringProperty()
+ last_activities_etag = ndb.StringProperty()
+ last_activities_cache_json = ndb.TextProperty()
+ seen_responses_cache_json = ndb.TextProperty(compressed=True)
+
+ # populated in Poll.poll(), used by handlers
+ blocked_ids = ndb.JsonProperty(compressed=True)
+
+ # maps updated property names to values that put_updates() writes back to the
+ # datastore transactionally. set this to {} before beginning.
+ updates = None
+
+ # gr_source is *not* set to None by default here, since it needs to be unset
+ # for __getattr__ to run when it's accessed.
+
+ def __init__(self, *args, id=None, **kwargs):
+ """Constructor. Escapes the key string id if it starts with `__`."""
+ if id and id.startswith("__"):
+ id = "\\" + id
+ super().__init__(*args, id=id, **kwargs)
+
+ def key_id(self):
+ """Returns the key's unescaped string id."""
+ id = self.key.id()
+ return id[1:] if id[0] == "\\" else id
+
+ @classmethod
+ def new(cls, **kwargs):
+ """Factory method. Creates and returns a new instance for the current user.
+
+ To be implemented by subclasses.
+ """
+ raise NotImplementedError()
+
+ def __getattr__(self, name):
+ """Lazily load the auth entity and instantiate :attr:`self.gr_source`.
+
+ Once :attr:`self.gr_source` is set, this method will *not* be called;
+ :attr:`gr_source` will be returned normally.
+ """
+ if name == "gr_source":
+ super_attr = getattr(super(), name, None)
+ if super_attr:
+ return super_attr
+ elif not self.auth_entity:
+ return None
+
+ auth_entity = self.auth_entity.get()
+ try:
+ refresh_token = auth_entity.refresh_token
+ self.gr_source = self.GR_CLASS(refresh_token)
+ return self.gr_source
+ except AttributeError:
+ logging.info("no refresh_token")
+ args = auth_entity.access_token()
+ if not isinstance(args, tuple):
+ args = (args,)
+
+ kwargs = {}
+ if self.key.kind() == "FacebookPage" and auth_entity.type == "user":
+ kwargs = {"user_id": self.key_id()}
+ elif self.key.kind() == "Instagram":
+ kwargs = {"scrape": True, "cookie": INSTAGRAM_SESSIONID_COOKIE}
+ elif self.key.kind() == "Mastodon":
+ args = (auth_entity.instance(),) + args
+ inst = auth_entity.app.get().instance_info
+ kwargs = {
+ "user_id": json_loads(auth_entity.user_json).get("id"),
+ # https://docs-develop.pleroma.social/backend/API/differences_in_mastoapi_responses/#instance
+ "truncate_text_length": json_loads(inst).get("max_toot_chars")
+ if inst
+ else None,
+ }
+ elif self.key.kind() == "Twitter":
+ kwargs = {
+ "username": self.key_id(),
+ "scrape_headers": TWITTER_SCRAPE_HEADERS,
+ }
+
+ self.gr_source = self.GR_CLASS(*args, **kwargs)
+ return self.gr_source
+
+ return getattr(super(), name)
+
+ @classmethod
+ def lookup(cls, id):
+ """Returns the entity with the given id.
+
+ By default, interprets id as just the key id. Subclasses may extend this to
+ support usernames, etc.
+ """
+ if id and id.startswith("__"):
+ id = "\\" + id
+ return ndb.Key(cls, id).get()
+
+ def user_tag_id(self):
+ """Returns the tag URI for this source, e.g. 'tag:plus.google.com:123456'."""
+ return self.gr_source.tag_uri(self.key_id())
+
+ def bridgy_path(self):
+ """Returns the Bridgy page URL path for this source."""
+ return "/%s/%s" % (self.SHORT_NAME, self.key_id())
+
+ def bridgy_url(self):
+ """Returns the Bridgy page URL for this source."""
+ return util.host_url(self.bridgy_path())
+
+ def silo_url(self, handler):
+ """Returns the silo account URL, e.g. https://twitter.com/foo."""
+ raise NotImplementedError()
+
+ def label(self):
+ """Human-readable label for this source."""
+ return "%s (%s)" % (self.label_name(), self.GR_CLASS.NAME)
+
+ def label_name(self):
+ """Human-readable name or username for this source, whichever is preferred."""
+ return self.name or self.key_id()
+
+ @classmethod
+ @ndb.transactional()
+ def put_updates(cls, source):
+ """Writes source.updates to the datastore transactionally.
+
+ Returns:
+ source: :class:`Source`
+
+ Returns:
+ the updated :class:`Source`
+ """
+ if not source.updates:
+ return source
+
+ logging.info(
+ "Updating %s %s : %r",
+ source.label(),
+ source.bridgy_path(),
+ {k: v for k, v in source.updates.items() if not k.endswith("_json")},
+ )
+
+ updates = source.updates
+ source = source.key.get()
+ source.updates = updates
+ for name, val in updates.items():
+ setattr(source, name, val)
+
+ source.put()
+ return source
+
+ def poll_period(self):
+ """Returns the poll frequency for this source, as a :class:`datetime.timedelta`.
+
+ Defaults to ~15m, depending on silo. If we've never sent a webmention for
+ this source, or the last one we sent was over a month ago, we drop them down
+ to ~1d after a week long grace period.
+ """
+ now = datetime.datetime.now()
+ if self.rate_limited:
+ return self.RATE_LIMITED_POLL
+ elif now < self.created + self.FAST_POLL_GRACE_PERIOD:
+ return self.FAST_POLL
+ elif not self.last_webmention_sent:
+ return self.SLOW_POLL
+ elif self.last_webmention_sent > now - datetime.timedelta(days=7):
+ return self.FAST_POLL
+ elif self.last_webmention_sent > now - datetime.timedelta(days=30):
+ return self.FAST_POLL * 10
+ else:
+ return self.SLOW_POLL
+
+ def should_refetch(self):
+ """Returns True if we should run OPD refetch on this source now."""
+ now = datetime.datetime.now()
+ if self.last_hfeed_refetch == REFETCH_HFEED_TRIGGER:
+ return True
+ elif not self.last_syndication_url:
+ return False
+
+ period = (
+ self.FAST_REFETCH
+ if self.last_syndication_url > now - datetime.timedelta(days=14)
+ else self.SLOW_REFETCH
+ )
+ return self.last_poll_attempt >= self.last_hfeed_refetch + period
+
+ @classmethod
+ def bridgy_webmention_endpoint(cls, domain="brid.gy"):
+ """Returns the Bridgy webmention endpoint for this source type."""
+ return "https://%s/webmention/%s" % (domain, cls.SHORT_NAME)
+
+ def has_bridgy_webmention_endpoint(self):
+ """Returns True if this source uses Bridgy's webmention endpoint."""
+ return self.webmention_endpoint in (
+ self.bridgy_webmention_endpoint(),
+ self.bridgy_webmention_endpoint(domain="www.brid.gy"),
+ )
+
+ def get_author_urls(self):
+ """Determine the author urls for a particular source.
+
+ In debug mode, replace test domains with localhost.
+
+ Return:
+ a list of string URLs, possibly empty
+ """
+ return [util.replace_test_domains_with_localhost(u) for u in self.domain_urls]
+
+ def search_for_links(self):
+ """Searches for activities with links to any of this source's web sites.
+
+ https://github.com/snarfed/bridgy/issues/456
+ https://github.com/snarfed/bridgy/issues/565
+
+ Returns:
+ sequence of ActivityStreams activity dicts
+ """
+ return []
+
+ def get_activities_response(self, **kwargs):
+ """Returns recent posts and embedded comments for this source.
+
+ May be overridden by subclasses.
+ """
+ kwargs.setdefault("group_id", gr_source.SELF)
+ resp = self.gr_source.get_activities_response(**kwargs)
+ for activity in resp["items"]:
+ self._inject_user_urls(activity)
+ return resp
+
+ def get_activities(self, **kwargs):
+ return self.get_activities_response(**kwargs)["items"]
+
+ def get_comment(self, comment_id, **kwargs):
+ """Returns a comment from this source.
+
+ Passes through to granary by default. May be overridden by subclasses.
+
+ Args:
+ comment_id: string, site-specific comment id
+ kwargs: passed to :meth:`granary.source.Source.get_comment`
+
+ Returns:
+ dict, decoded ActivityStreams comment object, or None
+ """
+ comment = self.gr_source.get_comment(comment_id, **kwargs)
+ if comment:
+ self._inject_user_urls(comment)
+ return comment
+
+ def get_like(self, activity_user_id, activity_id, like_user_id, **kwargs):
+ """Returns an ActivityStreams 'like' activity object.
+
+ Passes through to granary by default. May be overridden
+ by subclasses.
+
+ Args:
+ activity_user_id: string id of the user who posted the original activity
+ activity_id: string activity id
+ like_user_id: string id of the user who liked the activity
+ kwargs: passed to granary.Source.get_comment
+ """
+ return self.gr_source.get_like(
+ activity_user_id, activity_id, like_user_id, **kwargs
+ )
+
+ def _inject_user_urls(self, activity):
+ """Adds this user's web site URLs to their user mentions (in tags), in place."""
+ obj = activity.get("object") or activity
+ user_tag_id = self.user_tag_id()
+ for tag in obj.get("tags", []):
+ if tag.get("id") == user_tag_id:
+ tag.setdefault("urls", []).extend(
+ [{"value": u} for u in self.domain_urls]
+ )
+
+ def create_comment(self, post_url, author_name, author_url, content):
+ """Creates a new comment in the source silo.
+
+ Must be implemented by subclasses.
+
+ Args:
+ post_url: string
+ author_name: string
+ author_url: string
+ content: string
+
+ Returns:
+ response dict with at least 'id' field
+ """
+ raise NotImplementedError()
+
+ def feed_url(self):
+ """Returns the RSS or Atom (or similar) feed URL for this source.
+
+ Must be implemented by subclasses. Currently only implemented by
+ :mod:`blogger`, :mod:`medium`, :mod:`tumblr`, and :mod:`wordpress_rest`.
+
+ Returns:
+ string URL
+ """
+ raise NotImplementedError()
+
+ def edit_template_url(self):
+ """Returns the URL for editing this blog's template HTML.
+
+ Must be implemented by subclasses. Currently only implemented by
+ :mod:`blogger`, :mod:`medium`, :mod:`tumblr`, and :mod:`wordpress_rest`.
+
+ Returns:
+ string URL
+ """
+ raise NotImplementedError()
+
+ @classmethod
+ def button_html(cls, feature, **kwargs):
+ """Returns an HTML string with a login form and button for this site.
+
+ Mostly just passes through to
+ :meth:`oauth_dropins.handlers.Start.button_html`.
+
+ Returns: string, HTML
+ """
+ assert set(feature.split(",")) <= set(cls.FEATURES)
+ form_extra = (
+ kwargs.pop("form_extra", "")
+ + '' % feature
+ )
+
+ source = kwargs.pop("source", None)
+ if source:
+ form_extra += (
+ '\n' % source.key_id()
+ )
+
+ if cls.OAUTH_START:
+ return cls.OAUTH_START.button_html(
+ "/%s/start" % cls.SHORT_NAME,
+ form_extra=form_extra,
+ image_prefix="/oauth_dropins_static/",
+ **kwargs
+ )
+
+ return ""
+
+ @classmethod
+ @ndb.transactional()
+ def create_new(cls, user_url=None, **kwargs):
+ """Creates and saves a new :class:`Source` and adds a poll task for it.
+
+ Args:
+ user_url: a string, optional. if provided, supersedes other urls when
+ determining the author_url
+ **kwargs: passed to :meth:`new()`
+
+ Returns: newly created :class:`Source`
+ """
+ source = cls.new(**kwargs)
+ if source is None:
+ return None
+
+ if not source.domain_urls: # defer to the source if it already set this
+ auth_entity = kwargs.get("auth_entity")
+ if auth_entity and hasattr(auth_entity, "user_json"):
+ source.domain_urls, source.domains = source._urls_and_domains(
+ auth_entity, user_url
+ )
+ logging.debug("URLs/domains: %s %s", source.domain_urls, source.domains)
+
+ # check if this source already exists
+ existing = source.key.get()
+ if existing:
+ # merge some fields
+ source.features = set(source.features + existing.features)
+ source.populate(
+ **existing.to_dict(
+ include=(
+ "created",
+ "last_hfeed_refetch",
+ "last_poll_attempt",
+ "last_polled",
+ "last_syndication_url",
+ "last_webmention_sent",
+ "superfeedr_secret",
+ "webmention_endpoint",
+ )
+ )
+ )
+ verb = "Updated"
+ else:
+ verb = "Added"
+
+ author_urls = source.get_author_urls()
+ link = (
+ "http://indiewebify.me/send-webmentions/?url=" + author_urls[0]
+ if author_urls
+ else "http://indiewebify.me/#send-webmentions"
+ )
+ feature = source.features[0] if source.features else "listen"
+ blurb = "%s %s. %s" % (
+ verb,
+ source.label(),
+ "Try previewing a post from your web site!"
+ if feature == "publish"
+ else 'Try a webmention!' % link
+ if feature == "webmention"
+ else "Refresh in a minute to see what we've found!",
+ )
+ logging.info("%s %s", blurb, source.bridgy_url())
+
+ source.verify()
+ if source.verified():
+ flash(blurb)
+
+ source.put()
+
+ if "webmention" in source.features:
+ superfeedr.subscribe(source)
+
+ if "listen" in source.features and source.AUTO_POLL:
+ util.add_poll_task(source, now=True)
+ util.add_poll_task(source)
+
+ return source
+
+ def verified(self):
+ """Returns True if this source is ready to be used, false otherwise.
+
+ See :meth:`verify()` for details. May be overridden by subclasses, e.g.
+ :class:`tumblr.Tumblr`.
+ """
+ if not self.domains or not self.domain_urls:
+ return False
+ if "webmention" in self.features and not self.webmention_endpoint:
+ return False
+ if "listen" in self.features and not (
+ self.webmention_endpoint or self.last_webmention_sent
+ ):
+ return False
+ return True
+
+ def verify(self, force=False):
+ """Checks that this source is ready to be used.
+
+ For blog and listen sources, this fetches their front page HTML and
+ discovers their webmention endpoint. For publish sources, this checks that
+ they have a domain.
+
+ May be overridden by subclasses, e.g. :class:`tumblr.Tumblr`.
+
+ Args:
+ force: if True, fully verifies (e.g. re-fetches the blog's HTML and
+ performs webmention discovery) even we already think this source is
+ verified.
+ """
+ author_urls = [
+ u
+ for u, d in zip(self.get_author_urls(), self.domains)
+ if not util.in_webmention_blocklist(d)
+ ]
+ if (
+ (self.verified() and not force)
+ or self.status == "disabled"
+ or not self.features
+ or not author_urls
+ ):
+ return
+
+ author_url = author_urls[0]
+ try:
+ got = webmention.discover(
+ author_url, timeout=util.HTTP_TIMEOUT, headers=util.REQUEST_HEADERS
+ )
+ self.webmention_endpoint = got.endpoint
+ self._fetched_html = got.response.text
+ except BaseException as e:
+ logging.info("Error discovering webmention endpoint", exc_info=e)
+ self.webmention_endpoint = None
+
+ self.put()
+
+ def _urls_and_domains(self, auth_entity, user_url, actor=None):
+ """Returns this user's valid (not webmention-blocklisted) URLs and domains.
+
+ Converts the auth entity's user_json to an ActivityStreams actor and uses
+ its 'urls' and 'url' fields. May be overridden by subclasses.
+
+ Args:
+ auth_entity: :class:`oauth_dropins.models.BaseAuth`
+ user_url: string, optional URL passed in when authorizing
+ actor: dict, optional AS actor for the user. If provided, overrides
+ auth_entity
+
+ Returns:
+ ([string url, ...], [string domain, ...])
+ """
+ if not actor:
+ actor = self.gr_source.user_to_actor(json_loads(auth_entity.user_json))
+ logging.debug(
+ "Extracting URLs and domains from actor: %s", json_dumps(actor, indent=2)
+ )
+
+ candidates = util.trim_nulls(
+ util.uniquify([user_url] + microformats2.object_urls(actor))
+ )
+
+ if len(candidates) > MAX_AUTHOR_URLS:
+ logging.info(
+ "Too many profile links! Only resolving the first %s: %s",
+ MAX_AUTHOR_URLS,
+ candidates,
+ )
+
+ urls = []
+ for i, url in enumerate(candidates):
+ resolved = self.resolve_profile_url(url, resolve=i < MAX_AUTHOR_URLS)
+ if resolved:
+ urls.append(resolved)
+
+ final_urls = []
+ domains = []
+ for url in util.dedupe_urls(urls): # normalizes domains to lower case
+ # skip links on this source's domain itself. only currently needed for
+ # Mastodon; the other silo domains are in the webmention blocklist.
+ domain = util.domain_from_link(url)
+ if domain != self.gr_source.DOMAIN:
+ final_urls.append(url)
+ domains.append(domain)
+
+ return final_urls, domains
+
+ @staticmethod
+ def resolve_profile_url(url, resolve=True):
+ """Resolves a profile URL to be added to a source.
+
+ Args:
+ url: string
+ resolve: boolean, whether to make HTTP requests to follow redirects, etc.
+
+ Returns: string, resolved URL, or None
+ """
+ final, _, ok = util.get_webmention_target(url, resolve=resolve)
+ if not ok:
+ return None
+
+ final = final.lower()
+ if util.schemeless(final).startswith(util.schemeless(url.lower())):
+ # redirected to a deeper path. use the original higher level URL. #652
+ final = url
+
+ # If final has a path segment check if root has a matching rel=me.
+ match = re.match(r"^(https?://[^/]+)/.+", final)
+ if match and resolve:
+ root = match.group(1)
+ try:
+ mf2 = util.fetch_mf2(root)
+ me_urls = mf2["rels"].get("me", [])
+ if final in me_urls:
+ final = root
+ except requests.RequestException:
+ logging.warning(
+ "Couldn't fetch %s, preserving path in %s",
+ root,
+ final,
+ exc_info=True,
+ )
+
+ return final
+
+ def canonicalize_url(self, url, activity=None, **kwargs):
+ """Canonicalizes a post or object URL.
+
+ Wraps :class:`oauth_dropins.webutil.util.UrlCanonicalizer`.
+ """
+ return self.URL_CANONICALIZER(url, **kwargs) if self.URL_CANONICALIZER else url
+
+ def infer_profile_url(self, url):
+ """Given an arbitrary URL representing a person, try to find their
+ profile URL for *this* service.
+
+ Queries Bridgy's registered accounts for users with a particular
+ domain in their silo profile.
+
+ Args:
+ url: string, a person's URL
+
+ Return:
+ a string URL for their profile on this service (or None)
+ """
+ domain = util.domain_from_link(url)
+ if domain == self.gr_source.DOMAIN:
+ return url
+ user = self.__class__.query(self.__class__.domains == domain).get()
+ if user:
+ return self.gr_source.user_url(user.key_id())
+
+ def preprocess_for_publish(self, obj):
+ """Preprocess an object before trying to publish it.
+
+ By default this tries to massage person tags so that the tag's
+ "url" points to the person's profile on this service (as opposed
+ to a person's homepage).
+
+ The object is modified in place.
+
+ Args:
+ obj: ActivityStreams activity or object dict
+ """
+ for tag in obj.get("tags", []):
+ if tag.get("objectType") == "person":
+ silo_url = None
+ for url in microformats2.object_urls(tag):
+ silo_url = url and self.infer_profile_url(url)
+ if silo_url:
+ break
+ if silo_url:
+ tag["url"] = silo_url
+
+ # recurse on contained object(s)
+ for obj in util.get_list(obj, "object"):
+ self.preprocess_for_publish(obj)
+
+ def on_new_syndicated_post(self, syndpost):
+ """Called when a new :class:`SyndicatedPost` is stored for this source.
+
+ Args:
+ syndpost: :class:`SyndicatedPost`
+ """
+ pass
+
+ def is_private(self):
+ """Returns True if this source is private aka protected.
+
+ ...ie their posts are not public.
+ """
+ return False
+
+ def is_activity_public(self, activity):
+ """Returns True if the given activity is public, False otherwise.
+
+ Just wraps :meth:`granary.source.Source.is_public`. Subclasses may override.
+ """
+ return gr_source.Source.is_public(activity)
+
+ def is_beta_user(self):
+ """Returns True if this is a "beta" user opted into new features.
+
+ Beta users come from beta_users.txt.
+ """
+ return self.bridgy_path() in util.BETA_USER_PATHS
+
+ def load_blocklist(self):
+ """Fetches this user's blocklist, if supported, and stores it in the entity."""
+ if not self.HAS_BLOCKS:
+ return
+
+ try:
+ ids = self.gr_source.get_blocklist_ids()
+ except gr_source.RateLimited as e:
+ ids = e.partial or []
+
+ self.blocked_ids = ids[:BLOCKLIST_MAX_IDS]
+ self.put()
- def silo_url(self, handler):
- """Returns the silo account URL, e.g. https://twitter.com/foo."""
- raise NotImplementedError()
+ def is_blocked(self, obj):
+ """Returns True if an object's author is being blocked.
- def label(self):
- """Human-readable label for this source."""
- return '%s (%s)' % (self.label_name(), self.GR_CLASS.NAME)
+ ...ie they're in this user's block list.
- def label_name(self):
- """Human-readable name or username for this source, whichever is preferred."""
- return self.name or self.key_id()
+ Note that this method is tested in test_twitter.py, not test_models.py, for
+ historical reasons.
+ """
+ if not self.blocked_ids:
+ return False
- @classmethod
- @ndb.transactional()
- def put_updates(cls, source):
- """Writes source.updates to the datastore transactionally.
+ for o in [obj] + util.get_list(obj, "object"):
+ for field in "author", "actor":
+ if o.get(field, {}).get("numeric_id") in self.blocked_ids:
+ return True
- Returns:
- source: :class:`Source`
- Returns:
- the updated :class:`Source`
- """
- if not source.updates:
- return source
-
- logging.info('Updating %s %s : %r', source.label(), source.bridgy_path(),
- {k: v for k, v in source.updates.items() if not k.endswith('_json')})
-
- updates = source.updates
- source = source.key.get()
- source.updates = updates
- for name, val in updates.items():
- setattr(source, name, val)
-
- source.put()
- return source
-
- def poll_period(self):
- """Returns the poll frequency for this source, as a :class:`datetime.timedelta`.
+class Webmentions(StringIdModel):
+ """A bundle of links to send webmentions for.
- Defaults to ~15m, depending on silo. If we've never sent a webmention for
- this source, or the last one we sent was over a month ago, we drop them down
- to ~1d after a week long grace period.
- """
- now = datetime.datetime.now()
- if self.rate_limited:
- return self.RATE_LIMITED_POLL
- elif now < self.created + self.FAST_POLL_GRACE_PERIOD:
- return self.FAST_POLL
- elif not self.last_webmention_sent:
- return self.SLOW_POLL
- elif self.last_webmention_sent > now - datetime.timedelta(days=7):
- return self.FAST_POLL
- elif self.last_webmention_sent > now - datetime.timedelta(days=30):
- return self.FAST_POLL * 10
- else:
- return self.SLOW_POLL
-
- def should_refetch(self):
- """Returns True if we should run OPD refetch on this source now."""
- now = datetime.datetime.now()
- if self.last_hfeed_refetch == REFETCH_HFEED_TRIGGER:
- return True
- elif not self.last_syndication_url:
- return False
-
- period = (self.FAST_REFETCH
- if self.last_syndication_url > now - datetime.timedelta(days=14)
- else self.SLOW_REFETCH)
- return self.last_poll_attempt >= self.last_hfeed_refetch + period
-
- @classmethod
- def bridgy_webmention_endpoint(cls, domain='brid.gy'):
- """Returns the Bridgy webmention endpoint for this source type."""
- return 'https://%s/webmention/%s' % (domain, cls.SHORT_NAME)
-
- def has_bridgy_webmention_endpoint(self):
- """Returns True if this source uses Bridgy's webmention endpoint."""
- return self.webmention_endpoint in (
- self.bridgy_webmention_endpoint(),
- self.bridgy_webmention_endpoint(domain='www.brid.gy'))
-
- def get_author_urls(self):
- """Determine the author urls for a particular source.
-
- In debug mode, replace test domains with localhost.
-
- Return:
- a list of string URLs, possibly empty
+ Use the :class:`Response` and :class:`BlogPost` concrete subclasses below.
"""
- return [util.replace_test_domains_with_localhost(u) for u in self.domain_urls]
-
- def search_for_links(self):
- """Searches for activities with links to any of this source's web sites.
- https://github.com/snarfed/bridgy/issues/456
- https://github.com/snarfed/bridgy/issues/565
+ STATUSES = ("new", "processing", "complete", "error")
+
+ # Turn off instance and memcache caching. See Source for details.
+ _use_cache = False
+ _use_memcache = False
+
+ source = ndb.KeyProperty()
+ status = ndb.StringProperty(choices=STATUSES, default="new")
+ leased_until = ndb.DateTimeProperty()
+ created = ndb.DateTimeProperty(auto_now_add=True)
+ updated = ndb.DateTimeProperty(auto_now=True)
+
+ # Original post links, ie webmention targets
+ sent = ndb.StringProperty(repeated=True)
+ unsent = ndb.StringProperty(repeated=True)
+ error = ndb.StringProperty(repeated=True)
+ failed = ndb.StringProperty(repeated=True)
+ skipped = ndb.StringProperty(repeated=True)
+
+ def label(self):
+ """Returns a human-readable string description for use in log messages.
+
+ To be implemented by subclasses.
+ """
+ raise NotImplementedError()
+
+ def add_task(self):
+ """Adds a propagate task for this entity.
+
+ To be implemented by subclasses.
+ """
+ raise NotImplementedError()
+
+ @ndb.transactional()
+ def get_or_save(self):
+ entity = existing = self.key.get()
+
+ propagate = False
+ if entity:
+ # merge targets
+ urls = set(
+ entity.sent
+ + entity.unsent
+ + entity.error
+ + entity.failed
+ + entity.skipped
+ )
+ for field in ("sent", "unsent", "error", "failed", "skipped"):
+ entity_urls = getattr(entity, field)
+ new_urls = set(getattr(self, field)) - urls
+ entity_urls += new_urls
+ if new_urls and field in ("unsent", "error"):
+ propagate = True
+ else:
+ entity = self
+ propagate = self.unsent or self.error
+
+ if propagate:
+ logging.debug("New webmentions to propagate! %s", entity.label())
+ entity.add_task()
+ elif not existing:
+ entity.status = "complete"
+
+ entity.put()
+ return entity
+
+ def restart(self):
+ """Moves status and targets to 'new' and adds a propagate task."""
+ self.status = "new"
+ self.unsent = util.dedupe_urls(
+ self.unsent + self.sent + self.error + self.failed + self.skipped
+ )
+ self.sent = self.error = self.failed = self.skipped = []
+
+ # clear any cached webmention endpoints
+ with util.webmention_endpoint_cache_lock:
+ for url in self.unsent:
+ util.webmention_endpoint_cache.pop(
+ util.webmention_endpoint_cache_key(url), None
+ )
+
+ # this datastore put and task add should be transactional, but Cloud Tasks
+ # doesn't support that :(
+ # https://cloud.google.com/appengine/docs/standard/python/taskqueue/push/migrating-push-queues#features-not-available
+ self.put()
+ self.add_task()
- Returns:
- sequence of ActivityStreams activity dicts
- """
- return []
- def get_activities_response(self, **kwargs):
- """Returns recent posts and embedded comments for this source.
+class Response(Webmentions):
+ """A comment, like, or repost to be propagated.
- May be overridden by subclasses.
+ The key name is the comment object id as a tag URI.
"""
- kwargs.setdefault('group_id', gr_source.SELF)
- resp = self.gr_source.get_activities_response(**kwargs)
- for activity in resp['items']:
- self._inject_user_urls(activity)
- return resp
-
- def get_activities(self, **kwargs):
- return self.get_activities_response(**kwargs)['items']
-
- def get_comment(self, comment_id, **kwargs):
- """Returns a comment from this source.
-
- Passes through to granary by default. May be overridden by subclasses.
- Args:
- comment_id: string, site-specific comment id
- kwargs: passed to :meth:`granary.source.Source.get_comment`
+ # ActivityStreams JSON activity and comment, like, or repost
+ type = ndb.StringProperty(choices=VERB_TYPES, default="comment")
+ # These are TextProperty, and not JsonProperty, so that their plain text is
+ # visible in the App Engine admin console. (JsonProperty uses a blob. :/)
+ activities_json = ndb.TextProperty(repeated=True)
+ response_json = ndb.TextProperty()
+ # Old values for response_json. Populated when the silo reports that the
+ # response has changed, e.g. the user edited a comment or changed their RSVP
+ # to an event.
+ old_response_jsons = ndb.TextProperty(repeated=True)
+ # JSON dict mapping original post url to activity index in activities_json.
+ # only set when there's more than one activity.
+ urls_to_activity = ndb.TextProperty()
+ # Original post links found by original post discovery
+ original_posts = ndb.StringProperty(repeated=True)
+
+ def label(self):
+ return " ".join(
+ (
+ self.key.kind(),
+ self.type,
+ self.key.id(),
+ json_loads(self.response_json).get("url", "[no url]"),
+ )
+ )
+
+ def add_task(self):
+ util.add_propagate_task(self)
+
+ @staticmethod
+ def get_type(obj):
+ type = get_type(obj)
+ return type if type in VERB_TYPES else "comment"
+
+ def get_or_save(self, source, restart=False):
+ resp = super().get_or_save()
+
+ if self.type != resp.type or source.gr_source.activity_changed(
+ json_loads(resp.response_json), json_loads(self.response_json), log=True
+ ):
+ logging.info("Response changed! Re-propagating. Original: %s" % resp)
+
+ resp.old_response_jsons = resp.old_response_jsons[:10] + [
+ resp.response_json
+ ]
+
+ response_json_to_append = json_loads(self.response_json)
+ source.gr_source.append_in_reply_to(
+ json_loads(resp.response_json), response_json_to_append
+ )
+ self.response_json = json_dumps(util.trim_nulls(response_json_to_append))
+ resp.response_json = self.response_json
+ resp.restart(source)
+ elif restart and resp is not self: # ie it already existed
+ resp.restart(source)
+
+ return resp
+
+ def restart(self, source=None):
+ """Moves status and targets to 'new' and adds a propagate task."""
+ # add original posts with syndication URLs
+ # TODO: unify with Poll.repropagate_old_responses()
+ if not source:
+ source = self.source.get()
+
+ synd_urls = set()
+ for activity_json in self.activities_json:
+ activity = json_loads(activity_json)
+ url = activity.get("url") or activity.get("object", {}).get("url")
+ if url:
+ url = source.canonicalize_url(url, activity=activity)
+ if url:
+ synd_urls.add(url)
+
+ if synd_urls:
+ self.unsent += [
+ synd.original
+ for synd in SyndicatedPost.query(
+ SyndicatedPost.syndication.IN(synd_urls)
+ )
+ if synd.original
+ ]
+
+ return super().restart()
- Returns:
- dict, decoded ActivityStreams comment object, or None
- """
- comment = self.gr_source.get_comment(comment_id, **kwargs)
- if comment:
- self._inject_user_urls(comment)
- return comment
-
- def get_like(self, activity_user_id, activity_id, like_user_id, **kwargs):
- """Returns an ActivityStreams 'like' activity object.
-
- Passes through to granary by default. May be overridden
- by subclasses.
-
- Args:
- activity_user_id: string id of the user who posted the original activity
- activity_id: string activity id
- like_user_id: string id of the user who liked the activity
- kwargs: passed to granary.Source.get_comment
- """
- return self.gr_source.get_like(activity_user_id, activity_id, like_user_id,
- **kwargs)
-
- def _inject_user_urls(self, activity):
- """Adds this user's web site URLs to their user mentions (in tags), in place."""
- obj = activity.get('object') or activity
- user_tag_id = self.user_tag_id()
- for tag in obj.get('tags', []):
- if tag.get('id') == user_tag_id:
- tag.setdefault('urls', []).extend([{'value': u} for u in self.domain_urls])
-
- def create_comment(self, post_url, author_name, author_url, content):
- """Creates a new comment in the source silo.
-
- Must be implemented by subclasses.
-
- Args:
- post_url: string
- author_name: string
- author_url: string
- content: string
-
- Returns:
- response dict with at least 'id' field
- """
- raise NotImplementedError()
- def feed_url(self):
- """Returns the RSS or Atom (or similar) feed URL for this source.
+class Activity(StringIdModel):
+ """An activity with responses to be propagated.
- Must be implemented by subclasses. Currently only implemented by
- :mod:`blogger`, :mod:`medium`, :mod:`tumblr`, and :mod:`wordpress_rest`.
+ The key name is the activity id as a tag URI.
- Returns:
- string URL
+ Currently only used for posts sent to us by the browser extension.
"""
- raise NotImplementedError()
- def edit_template_url(self):
- """Returns the URL for editing this blog's template HTML.
-
- Must be implemented by subclasses. Currently only implemented by
- :mod:`blogger`, :mod:`medium`, :mod:`tumblr`, and :mod:`wordpress_rest`.
-
- Returns:
- string URL
- """
- raise NotImplementedError()
+ source = ndb.KeyProperty()
+ created = ndb.DateTimeProperty(auto_now_add=True)
+ updated = ndb.DateTimeProperty(auto_now=True)
+ activity_json = ndb.TextProperty()
+ html = ndb.TextProperty()
- @classmethod
- def button_html(cls, feature, **kwargs):
- """Returns an HTML string with a login form and button for this site.
- Mostly just passes through to
- :meth:`oauth_dropins.handlers.Start.button_html`.
+class BlogPost(Webmentions):
+ """A blog post to be processed for links to send webmentions to.
- Returns: string, HTML
- """
- assert set(feature.split(',')) <= set(cls.FEATURES)
- form_extra = (kwargs.pop('form_extra', '') +
- '' % feature)
-
- source = kwargs.pop('source', None)
- if source:
- form_extra += ('\n' %
- source.key_id())
-
- if cls.OAUTH_START:
- return cls.OAUTH_START.button_html(
- '/%s/start' % cls.SHORT_NAME,
- form_extra=form_extra,
- image_prefix='/oauth_dropins_static/',
- **kwargs)
-
- return ''
-
- @classmethod
- @ndb.transactional()
- def create_new(cls, user_url=None, **kwargs):
- """Creates and saves a new :class:`Source` and adds a poll task for it.
-
- Args:
- user_url: a string, optional. if provided, supersedes other urls when
- determining the author_url
- **kwargs: passed to :meth:`new()`
-
- Returns: newly created :class:`Source`
+ The key name is the URL.
"""
- source = cls.new(**kwargs)
- if source is None:
- return None
-
- if not source.domain_urls: # defer to the source if it already set this
- auth_entity = kwargs.get('auth_entity')
- if auth_entity and hasattr(auth_entity, 'user_json'):
- source.domain_urls, source.domains = source._urls_and_domains(
- auth_entity, user_url)
- logging.debug('URLs/domains: %s %s', source.domain_urls, source.domains)
-
- # check if this source already exists
- existing = source.key.get()
- if existing:
- # merge some fields
- source.features = set(source.features + existing.features)
- source.populate(**existing.to_dict(include=(
- 'created', 'last_hfeed_refetch', 'last_poll_attempt', 'last_polled',
- 'last_syndication_url', 'last_webmention_sent', 'superfeedr_secret',
- 'webmention_endpoint')))
- verb = 'Updated'
- else:
- verb = 'Added'
-
- author_urls = source.get_author_urls()
- link = ('http://indiewebify.me/send-webmentions/?url=' + author_urls[0]
- if author_urls else 'http://indiewebify.me/#send-webmentions')
- feature = source.features[0] if source.features else 'listen'
- blurb = '%s %s. %s' % (
- verb, source.label(),
- 'Try previewing a post from your web site!' if feature == 'publish'
- else 'Try a webmention!' % link if feature == 'webmention'
- else "Refresh in a minute to see what we've found!")
- logging.info('%s %s', blurb, source.bridgy_url())
-
- source.verify()
- if source.verified():
- flash(blurb)
-
- source.put()
-
- if 'webmention' in source.features:
- superfeedr.subscribe(source)
- if 'listen' in source.features and source.AUTO_POLL:
- util.add_poll_task(source, now=True)
- util.add_poll_task(source)
+ feed_item = ndb.JsonProperty(compressed=True) # from Superfeedr
- return source
-
- def verified(self):
- """Returns True if this source is ready to be used, false otherwise.
-
- See :meth:`verify()` for details. May be overridden by subclasses, e.g.
- :class:`tumblr.Tumblr`.
- """
- if not self.domains or not self.domain_urls:
- return False
- if 'webmention' in self.features and not self.webmention_endpoint:
- return False
- if ('listen' in self.features and
- not (self.webmention_endpoint or self.last_webmention_sent)):
- return False
- return True
-
- def verify(self, force=False):
- """Checks that this source is ready to be used.
-
- For blog and listen sources, this fetches their front page HTML and
- discovers their webmention endpoint. For publish sources, this checks that
- they have a domain.
-
- May be overridden by subclasses, e.g. :class:`tumblr.Tumblr`.
-
- Args:
- force: if True, fully verifies (e.g. re-fetches the blog's HTML and
- performs webmention discovery) even we already think this source is
- verified.
- """
- author_urls = [u for u, d in zip(self.get_author_urls(), self.domains)
- if not util.in_webmention_blocklist(d)]
- if ((self.verified() and not force) or self.status == 'disabled' or
- not self.features or not author_urls):
- return
-
- author_url = author_urls[0]
- try:
- got = webmention.discover(author_url, timeout=util.HTTP_TIMEOUT,
- headers=util.REQUEST_HEADERS)
- self.webmention_endpoint = got.endpoint
- self._fetched_html = got.response.text
- except BaseException as e:
- logging.info('Error discovering webmention endpoint', exc_info=e)
- self.webmention_endpoint = None
-
- self.put()
-
- def _urls_and_domains(self, auth_entity, user_url, actor=None):
- """Returns this user's valid (not webmention-blocklisted) URLs and domains.
-
- Converts the auth entity's user_json to an ActivityStreams actor and uses
- its 'urls' and 'url' fields. May be overridden by subclasses.
-
- Args:
- auth_entity: :class:`oauth_dropins.models.BaseAuth`
- user_url: string, optional URL passed in when authorizing
- actor: dict, optional AS actor for the user. If provided, overrides
- auth_entity
-
- Returns:
- ([string url, ...], [string domain, ...])
- """
- if not actor:
- actor = self.gr_source.user_to_actor(json_loads(auth_entity.user_json))
- logging.debug('Extracting URLs and domains from actor: %s',
- json_dumps(actor, indent=2))
-
- candidates = util.trim_nulls(util.uniquify(
- [user_url] + microformats2.object_urls(actor)))
-
- if len(candidates) > MAX_AUTHOR_URLS:
- logging.info('Too many profile links! Only resolving the first %s: %s',
- MAX_AUTHOR_URLS, candidates)
-
- urls = []
- for i, url in enumerate(candidates):
- resolved = self.resolve_profile_url(url, resolve=i < MAX_AUTHOR_URLS)
- if resolved:
- urls.append(resolved)
-
- final_urls = []
- domains = []
- for url in util.dedupe_urls(urls): # normalizes domains to lower case
- # skip links on this source's domain itself. only currently needed for
- # Mastodon; the other silo domains are in the webmention blocklist.
- domain = util.domain_from_link(url)
- if domain != self.gr_source.DOMAIN:
- final_urls.append(url)
- domains.append(domain)
-
- return final_urls, domains
-
- @staticmethod
- def resolve_profile_url(url, resolve=True):
- """Resolves a profile URL to be added to a source.
-
- Args:
- url: string
- resolve: boolean, whether to make HTTP requests to follow redirects, etc.
-
- Returns: string, resolved URL, or None
- """
- final, _, ok = util.get_webmention_target(url, resolve=resolve)
- if not ok:
- return None
-
- final = final.lower()
- if util.schemeless(final).startswith(util.schemeless(url.lower())):
- # redirected to a deeper path. use the original higher level URL. #652
- final = url
-
- # If final has a path segment check if root has a matching rel=me.
- match = re.match(r'^(https?://[^/]+)/.+', final)
- if match and resolve:
- root = match.group(1)
- try:
- mf2 = util.fetch_mf2(root)
- me_urls = mf2['rels'].get('me', [])
- if final in me_urls:
- final = root
- except requests.RequestException:
- logging.warning("Couldn't fetch %s, preserving path in %s",
- root, final, exc_info=True)
-
- return final
-
- def canonicalize_url(self, url, activity=None, **kwargs):
- """Canonicalizes a post or object URL.
-
- Wraps :class:`oauth_dropins.webutil.util.UrlCanonicalizer`.
- """
- return self.URL_CANONICALIZER(url, **kwargs) if self.URL_CANONICALIZER else url
+ def label(self):
+ url = None
+ if self.feed_item:
+ url = self.feed_item.get("permalinkUrl")
+ return " ".join((self.key.kind(), self.key.id(), url or "[no url]"))
- def infer_profile_url(self, url):
- """Given an arbitrary URL representing a person, try to find their
- profile URL for *this* service.
+ def add_task(self):
+ util.add_propagate_blogpost_task(self)
- Queries Bridgy's registered accounts for users with a particular
- domain in their silo profile.
- Args:
- url: string, a person's URL
+class PublishedPage(StringIdModel):
+ """Minimal root entity for :class:`Publish` children with the same source URL.
- Return:
- a string URL for their profile on this service (or None)
+ Key id is the string source URL.
"""
- domain = util.domain_from_link(url)
- if domain == self.gr_source.DOMAIN:
- return url
- user = self.__class__.query(self.__class__.domains == domain).get()
- if user:
- return self.gr_source.user_url(user.key_id())
-
- def preprocess_for_publish(self, obj):
- """Preprocess an object before trying to publish it.
-
- By default this tries to massage person tags so that the tag's
- "url" points to the person's profile on this service (as opposed
- to a person's homepage).
-
- The object is modified in place.
- Args:
- obj: ActivityStreams activity or object dict
- """
- for tag in obj.get('tags', []):
- if tag.get('objectType') == 'person':
- silo_url = None
- for url in microformats2.object_urls(tag):
- silo_url = url and self.infer_profile_url(url)
- if silo_url:
- break
- if silo_url:
- tag['url'] = silo_url
-
- # recurse on contained object(s)
- for obj in util.get_list(obj, 'object'):
- self.preprocess_for_publish(obj)
-
- def on_new_syndicated_post(self, syndpost):
- """Called when a new :class:`SyndicatedPost` is stored for this source.
-
- Args:
- syndpost: :class:`SyndicatedPost`
- """
pass
- def is_private(self):
- """Returns True if this source is private aka protected.
- ...ie their posts are not public.
- """
- return False
-
- def is_activity_public(self, activity):
- """Returns True if the given activity is public, False otherwise.
-
- Just wraps :meth:`granary.source.Source.is_public`. Subclasses may override.
- """
- return gr_source.Source.is_public(activity)
-
- def is_beta_user(self):
- """Returns True if this is a "beta" user opted into new features.
-
- Beta users come from beta_users.txt.
- """
- return self.bridgy_path() in util.BETA_USER_PATHS
-
- def load_blocklist(self):
- """Fetches this user's blocklist, if supported, and stores it in the entity."""
- if not self.HAS_BLOCKS:
- return
-
- try:
- ids = self.gr_source.get_blocklist_ids()
- except gr_source.RateLimited as e:
- ids = e.partial or []
-
- self.blocked_ids = ids[:BLOCKLIST_MAX_IDS]
- self.put()
-
- def is_blocked(self, obj):
- """Returns True if an object's author is being blocked.
-
- ...ie they're in this user's block list.
+class Publish(ndb.Model):
+ """A comment, like, repost, or RSVP published into a silo.
- Note that this method is tested in test_twitter.py, not test_models.py, for
- historical reasons.
+ Child of a :class:`PublishedPage` entity.
"""
- if not self.blocked_ids:
- return False
-
- for o in [obj] + util.get_list(obj, 'object'):
- for field in 'author', 'actor':
- if o.get(field, {}).get('numeric_id') in self.blocked_ids:
- return True
+ STATUSES = ("new", "complete", "failed", "deleted")
-class Webmentions(StringIdModel):
- """A bundle of links to send webmentions for.
-
- Use the :class:`Response` and :class:`BlogPost` concrete subclasses below.
- """
- STATUSES = ('new', 'processing', 'complete', 'error')
+ # Turn off instance and memcache caching. See Source for details.
+ _use_cache = False
+ _use_memcache = False
- # Turn off instance and memcache caching. See Source for details.
- _use_cache = False
- _use_memcache = False
+ type = ndb.StringProperty(choices=PUBLISH_TYPES)
+ status = ndb.StringProperty(choices=STATUSES, default="new")
+ source = ndb.KeyProperty()
+ html = ndb.TextProperty() # raw HTML fetched from source
+ published = ndb.JsonProperty(compressed=True)
+ created = ndb.DateTimeProperty(auto_now_add=True)
+ updated = ndb.DateTimeProperty(auto_now=True)
- source = ndb.KeyProperty()
- status = ndb.StringProperty(choices=STATUSES, default='new')
- leased_until = ndb.DateTimeProperty()
- created = ndb.DateTimeProperty(auto_now_add=True)
- updated = ndb.DateTimeProperty(auto_now=True)
+ def type_label(self):
+ """Returns silo-specific string type, e.g. 'favorite' instead of 'like'."""
+ for cls in sources.values(): # global
+ if cls.__name__ == self.source.kind():
+ return cls.TYPE_LABELS.get(self.type, self.type)
- # Original post links, ie webmention targets
- sent = ndb.StringProperty(repeated=True)
- unsent = ndb.StringProperty(repeated=True)
- error = ndb.StringProperty(repeated=True)
- failed = ndb.StringProperty(repeated=True)
- skipped = ndb.StringProperty(repeated=True)
+ return self.type
- def label(self):
- """Returns a human-readable string description for use in log messages.
- To be implemented by subclasses.
- """
- raise NotImplementedError()
+class BlogWebmention(Publish, StringIdModel):
+ """Datastore entity for webmentions for hosted blog providers.
- def add_task(self):
- """Adds a propagate task for this entity.
+ Key id is the source URL and target URL concated with a space, ie 'SOURCE
+ TARGET'. The source URL is *always* the URL given in the webmention HTTP
+ request. If the source page has a u-url, that's stored in the u_url property.
+ The target URL is always the final URL, after any redirects.
- To be implemented by subclasses.
+ Reuses :class:`Publish`'s fields, but otherwise unrelated.
"""
- raise NotImplementedError()
-
- @ndb.transactional()
- def get_or_save(self):
- entity = existing = self.key.get()
-
- propagate = False
- if entity:
- # merge targets
- urls = set(entity.sent + entity.unsent + entity.error +
- entity.failed + entity.skipped)
- for field in ('sent', 'unsent', 'error', 'failed', 'skipped'):
- entity_urls = getattr(entity, field)
- new_urls = set(getattr(self, field)) - urls
- entity_urls += new_urls
- if new_urls and field in ('unsent', 'error'):
- propagate = True
- else:
- entity = self
- propagate = self.unsent or self.error
-
- if propagate:
- logging.debug('New webmentions to propagate! %s', entity.label())
- entity.add_task()
- elif not existing:
- entity.status = 'complete'
-
- entity.put()
- return entity
-
- def restart(self):
- """Moves status and targets to 'new' and adds a propagate task."""
- self.status = 'new'
- self.unsent = util.dedupe_urls(self.unsent + self.sent + self.error +
- self.failed + self.skipped)
- self.sent = self.error = self.failed = self.skipped = []
-
- # clear any cached webmention endpoints
- with util.webmention_endpoint_cache_lock:
- for url in self.unsent:
- util.webmention_endpoint_cache.pop(util.webmention_endpoint_cache_key(url), None)
-
- # this datastore put and task add should be transactional, but Cloud Tasks
- # doesn't support that :(
- # https://cloud.google.com/appengine/docs/standard/python/taskqueue/push/migrating-push-queues#features-not-available
- self.put()
- self.add_task()
-
-
-class Response(Webmentions):
- """A comment, like, or repost to be propagated.
-
- The key name is the comment object id as a tag URI.
- """
- # ActivityStreams JSON activity and comment, like, or repost
- type = ndb.StringProperty(choices=VERB_TYPES, default='comment')
- # These are TextProperty, and not JsonProperty, so that their plain text is
- # visible in the App Engine admin console. (JsonProperty uses a blob. :/)
- activities_json = ndb.TextProperty(repeated=True)
- response_json = ndb.TextProperty()
- # Old values for response_json. Populated when the silo reports that the
- # response has changed, e.g. the user edited a comment or changed their RSVP
- # to an event.
- old_response_jsons = ndb.TextProperty(repeated=True)
- # JSON dict mapping original post url to activity index in activities_json.
- # only set when there's more than one activity.
- urls_to_activity = ndb.TextProperty()
- # Original post links found by original post discovery
- original_posts = ndb.StringProperty(repeated=True)
-
- def label(self):
- return ' '.join((self.key.kind(), self.type, self.key.id(),
- json_loads(self.response_json).get('url', '[no url]')))
-
- def add_task(self):
- util.add_propagate_task(self)
-
- @staticmethod
- def get_type(obj):
- type = get_type(obj)
- return type if type in VERB_TYPES else 'comment'
-
- def get_or_save(self, source, restart=False):
- resp = super().get_or_save()
-
- if (self.type != resp.type or
- source.gr_source.activity_changed(json_loads(resp.response_json),
- json_loads(self.response_json),
- log=True)):
- logging.info('Response changed! Re-propagating. Original: %s' % resp)
-
- resp.old_response_jsons = resp.old_response_jsons[:10] + [resp.response_json]
-
- response_json_to_append = json_loads(self.response_json)
- source.gr_source.append_in_reply_to(json_loads(resp.response_json), response_json_to_append)
- self.response_json = json_dumps(util.trim_nulls(response_json_to_append))
- resp.response_json = self.response_json
- resp.restart(source)
- elif restart and resp is not self: # ie it already existed
- resp.restart(source)
-
- return resp
-
- def restart(self, source=None):
- """Moves status and targets to 'new' and adds a propagate task."""
- # add original posts with syndication URLs
- # TODO: unify with Poll.repropagate_old_responses()
- if not source:
- source = self.source.get()
-
- synd_urls = set()
- for activity_json in self.activities_json:
- activity = json_loads(activity_json)
- url = activity.get('url') or activity.get('object', {}).get('url')
- if url:
- url = source.canonicalize_url(url, activity=activity)
- if url:
- synd_urls.add(url)
-
- if synd_urls:
- self.unsent += [synd.original for synd in
- SyndicatedPost.query(SyndicatedPost.syndication.IN(synd_urls))
- if synd.original]
-
- return super().restart()
-
-
-class Activity(StringIdModel):
- """An activity with responses to be propagated.
-
- The key name is the activity id as a tag URI.
-
- Currently only used for posts sent to us by the browser extension.
- """
- source = ndb.KeyProperty()
- created = ndb.DateTimeProperty(auto_now_add=True)
- updated = ndb.DateTimeProperty(auto_now=True)
- activity_json = ndb.TextProperty()
- html = ndb.TextProperty()
-
-
-class BlogPost(Webmentions):
- """A blog post to be processed for links to send webmentions to.
-
- The key name is the URL.
- """
- feed_item = ndb.JsonProperty(compressed=True) # from Superfeedr
-
- def label(self):
- url = None
- if self.feed_item:
- url = self.feed_item.get('permalinkUrl')
- return ' '.join((self.key.kind(), self.key.id(), url or '[no url]'))
-
- def add_task(self):
- util.add_propagate_blogpost_task(self)
-
-
-class PublishedPage(StringIdModel):
- """Minimal root entity for :class:`Publish` children with the same source URL.
- Key id is the string source URL.
- """
- pass
+ # If the source page has a u-url, it's stored here and overrides the source
+ # URL in the key id.
+ u_url = ndb.StringProperty()
+ # Any initial target URLs that redirected to the final target URL, in redirect
+ # order.
+ redirected_target_urls = ndb.StringProperty(repeated=True)
-class Publish(ndb.Model):
- """A comment, like, repost, or RSVP published into a silo.
-
- Child of a :class:`PublishedPage` entity.
- """
- STATUSES = ('new', 'complete', 'failed', 'deleted')
-
- # Turn off instance and memcache caching. See Source for details.
- _use_cache = False
- _use_memcache = False
-
- type = ndb.StringProperty(choices=PUBLISH_TYPES)
- status = ndb.StringProperty(choices=STATUSES, default='new')
- source = ndb.KeyProperty()
- html = ndb.TextProperty() # raw HTML fetched from source
- published = ndb.JsonProperty(compressed=True)
- created = ndb.DateTimeProperty(auto_now_add=True)
- updated = ndb.DateTimeProperty(auto_now=True)
+ def source_url(self):
+ return self.u_url or self.key.id().split()[0]
- def type_label(self):
- """Returns silo-specific string type, e.g. 'favorite' instead of 'like'."""
- for cls in sources.values(): # global
- if cls.__name__ == self.source.kind():
- return cls.TYPE_LABELS.get(self.type, self.type)
+ def target_url(self):
+ return self.key.id().split()[1]
- return self.type
-
-
-class BlogWebmention(Publish, StringIdModel):
- """Datastore entity for webmentions for hosted blog providers.
- Key id is the source URL and target URL concated with a space, ie 'SOURCE
- TARGET'. The source URL is *always* the URL given in the webmention HTTP
- request. If the source page has a u-url, that's stored in the u_url property.
- The target URL is always the final URL, after any redirects.
-
- Reuses :class:`Publish`'s fields, but otherwise unrelated.
- """
- # If the source page has a u-url, it's stored here and overrides the source
- # URL in the key id.
- u_url = ndb.StringProperty()
-
- # Any initial target URLs that redirected to the final target URL, in redirect
- # order.
- redirected_target_urls = ndb.StringProperty(repeated=True)
-
- def source_url(self):
- return self.u_url or self.key.id().split()[0]
-
- def target_url(self):
- return self.key.id().split()[1]
+class SyndicatedPost(ndb.Model):
+ """Represents a syndicated post and its discovered original (or not
+ if we found no original post). We discover the relationship by
+ following rel=syndication links on the author's h-feed.
+ See :mod:`original_post_discovery`.
-class SyndicatedPost(ndb.Model):
- """Represents a syndicated post and its discovered original (or not
- if we found no original post). We discover the relationship by
- following rel=syndication links on the author's h-feed.
-
- See :mod:`original_post_discovery`.
-
- When a :class:`SyndicatedPost` entity is about to be stored,
- :meth:`source.Source.on_new_syndicated_post()` is called before it's stored.
- """
-
- # Turn off instance and memcache caching. See Response for details.
- _use_cache = False
- _use_memcache = False
-
- syndication = ndb.StringProperty()
- original = ndb.StringProperty()
- created = ndb.DateTimeProperty(auto_now_add=True)
- updated = ndb.DateTimeProperty(auto_now=True)
-
- @classmethod
- @ndb.transactional()
- def insert_original_blank(cls, source, original):
- """Insert a new original -> None relationship. Does a check-and-set to
- make sure no previous relationship exists for this original. If
- there is, nothing will be added.
-
- Args:
- source: :class:`Source` subclass
- original: string
- """
- if cls.query(cls.original == original, ancestor=source.key).get():
- return
- cls(parent=source.key, original=original, syndication=None).put()
-
- @classmethod
- @ndb.transactional()
- def insert_syndication_blank(cls, source, syndication):
- """Insert a new syndication -> None relationship. Does a check-and-set
- to make sure no previous relationship exists for this
- syndication. If there is, nothing will be added.
-
- Args:
- source: :class:`Source` subclass
- original: string
+ When a :class:`SyndicatedPost` entity is about to be stored,
+ :meth:`source.Source.on_new_syndicated_post()` is called before it's stored.
"""
- if cls.query(cls.syndication == syndication, ancestor=source.key).get():
- return
- cls(parent=source.key, original=None, syndication=syndication).put()
+ # Turn off instance and memcache caching. See Response for details.
+ _use_cache = False
+ _use_memcache = False
+
+ syndication = ndb.StringProperty()
+ original = ndb.StringProperty()
+ created = ndb.DateTimeProperty(auto_now_add=True)
+ updated = ndb.DateTimeProperty(auto_now=True)
+
+ @classmethod
+ @ndb.transactional()
+ def insert_original_blank(cls, source, original):
+ """Insert a new original -> None relationship. Does a check-and-set to
+ make sure no previous relationship exists for this original. If
+ there is, nothing will be added.
+
+ Args:
+ source: :class:`Source` subclass
+ original: string
+ """
+ if cls.query(cls.original == original, ancestor=source.key).get():
+ return
+ cls(parent=source.key, original=original, syndication=None).put()
+
+ @classmethod
+ @ndb.transactional()
+ def insert_syndication_blank(cls, source, syndication):
+ """Insert a new syndication -> None relationship. Does a check-and-set
+ to make sure no previous relationship exists for this
+ syndication. If there is, nothing will be added.
+
+ Args:
+ source: :class:`Source` subclass
+ original: string
+ """
+
+ if cls.query(cls.syndication == syndication, ancestor=source.key).get():
+ return
+ cls(parent=source.key, original=None, syndication=syndication).put()
+
+ @classmethod
+ @ndb.transactional()
+ def insert(cls, source, syndication, original):
+ """Insert a new (non-blank) syndication -> original relationship.
+
+ This method does a check-and-set within transaction to avoid
+ including duplicate relationships.
+
+ If blank entries exists for the syndication or original URL
+ (i.e. syndication -> None or original -> None), they will first be
+ removed. If non-blank relationships exist, they will be retained.
+
+ Args:
+ source: :class:`Source` subclass
+ syndication: string (not None)
+ original: string (not None)
+
+ Returns:
+ SyndicatedPost: newly created or preexisting entity
+ """
+ # check for an exact match
+ duplicate = cls.query(
+ cls.syndication == syndication,
+ cls.original == original,
+ ancestor=source.key,
+ ).get()
+ if duplicate:
+ return duplicate
+
+ # delete blanks (expect at most 1 of each)
+ for filter in (
+ ndb.AND(cls.syndication == syndication, cls.original == None),
+ ndb.AND(cls.original == original, cls.syndication == None),
+ ):
+ for synd in cls.query(filter, ancestor=source.key).fetch(keys_only=True):
+ synd.delete()
+
+ r = cls(parent=source.key, original=original, syndication=syndication)
+ r.put()
+ return r
- @classmethod
- @ndb.transactional()
- def insert(cls, source, syndication, original):
- """Insert a new (non-blank) syndication -> original relationship.
- This method does a check-and-set within transaction to avoid
- including duplicate relationships.
-
- If blank entries exists for the syndication or original URL
- (i.e. syndication -> None or original -> None), they will first be
- removed. If non-blank relationships exist, they will be retained.
+class Domain(StringIdModel):
+ """A domain owned by a user.
- Args:
- source: :class:`Source` subclass
- syndication: string (not None)
- original: string (not None)
+ Ownership is proven via IndieAuth. Supports secret tokens associated with each
+ domain. Clients can include a token with requests that operate on a given
+ domain, eg sending posts and responses from the browser extension.
- Returns:
- SyndicatedPost: newly created or preexisting entity
+ Key id is the string domain, eg 'example.com'.
"""
- # check for an exact match
- duplicate = cls.query(cls.syndication == syndication,
- cls.original == original,
- ancestor=source.key).get()
- if duplicate:
- return duplicate
-
- # delete blanks (expect at most 1 of each)
- for filter in (ndb.AND(cls.syndication == syndication, cls.original == None),
- ndb.AND(cls.original == original, cls.syndication == None)):
- for synd in cls.query(filter, ancestor=source.key).fetch(keys_only=True):
- synd.delete()
- r = cls(parent=source.key, original=original, syndication=syndication)
- r.put()
- return r
-
-
-class Domain(StringIdModel):
- """A domain owned by a user.
-
- Ownership is proven via IndieAuth. Supports secret tokens associated with each
- domain. Clients can include a token with requests that operate on a given
- domain, eg sending posts and responses from the browser extension.
-
- Key id is the string domain, eg 'example.com'.
- """
- tokens = ndb.StringProperty(repeated=True)
- auth = ndb.KeyProperty(IndieAuth)
- created = ndb.DateTimeProperty(auto_now_add=True)
- updated = ndb.DateTimeProperty(auto_now=True)
+ tokens = ndb.StringProperty(repeated=True)
+ auth = ndb.KeyProperty(IndieAuth)
+ created = ndb.DateTimeProperty(auto_now_add=True)
+ updated = ndb.DateTimeProperty(auto_now=True)
diff --git a/original_post_discovery.py b/original_post_discovery.py
index 3e461c54..c68b5334 100644
--- a/original_post_discovery.py
+++ b/original_post_discovery.py
@@ -45,552 +45,629 @@
MAX_ALLOWABLE_QUERIES = 30
-def discover(source, activity, fetch_hfeed=True, include_redirect_sources=True,
- already_fetched_hfeeds=None):
- """Augments the standard original_post_discovery algorithm with a
- reverse lookup that supports posts without a backlink or citation.
-
- If fetch_hfeed is False, then we will check the db for previously found
- :class:`models.SyndicatedPost`\ s but will not do posse-post-discovery to find
- new ones.
-
- Args:
- source: :class:`models.Source` subclass. Changes to property values (e.g.
- domains, domain_urls, last_syndication_url) are stored in source.updates;
- they should be updated transactionally later.
- activity: activity dict
- fetch_hfeed: boolean
- include_redirect_sources: boolean, whether to include URLs that redirect as
- well as their final destination URLs
- already_fetched_hfeeds: set, URLs that we have already fetched and run
- posse-post-discovery on, so we can avoid running it multiple times
-
- Returns:
- (set(string original post URLs), set(string mention URLs)) tuple
-
- """
- logging.debug('discovering original posts for: %s',
- activity.get('url') or activity.get('id'))
-
- if not source.updates:
- source.updates = {}
-
- if already_fetched_hfeeds is None:
- already_fetched_hfeeds = set()
-
- originals, mentions = gr_source.Source.original_post_discovery(
- activity, domains=source.domains,
- include_redirect_sources=include_redirect_sources,
- include_reserved_hosts=DEBUG,
- headers=util.request_headers(source=source))
-
- # only include mentions of the author themselves.
- # (mostly just for Mastodon; other silos' domains are all in the blocklist, so
- # their mention URLs get dropped later anyway.)
- # (these are originally added in Source._inject_user_urls() and in poll step 2.)
- obj = activity.get('object', {})
- other_user_mentions = set(
- t.get('url') for t in obj.get('tags', [])
- if t.get('objectType') == 'person' and t.get('url') not in source.domain_urls)
- originals -= other_user_mentions
- mentions -= other_user_mentions
-
- # original posts are only from the author themselves
- obj_author = obj.get('author', {})
- activity_author = activity.get('actor', {})
- author_id = obj_author.get('id') or activity_author.get('id')
- author_username = obj_author.get('username') or activity_author.get('username')
- if (author_id and author_id != source.user_tag_id() and
- author_username != source.key.id()):
- logging.info(f"Demoting original post links because user {source.user_tag_id()} doesn't match author id {author_id} username {author_username}")
- # this is someone else's post, so all links must be mentions
- mentions.update(originals)
- originals = set()
-
- # look for original URL of attachments (e.g. quote tweets)
- for att in obj.get('attachments', []):
- if (att.get('objectType') in ('note', 'article')
- and att.get('author', {}).get('id') == source.user_tag_id()):
- logging.debug('running original post discovery on attachment: %s',
- att.get('id'))
- att_origs, _ = discover(
- source, att, include_redirect_sources=include_redirect_sources)
- logging.debug('original post discovery found originals for attachment, %s',
- att_origs)
- mentions.update(att_origs)
-
- def resolve(urls):
- resolved = set()
- for url in urls:
- final, domain, send = util.get_webmention_target(url)
- if send and domain != source.gr_source.DOMAIN:
- resolved.add(final)
- if include_redirect_sources:
- resolved.add(url)
- return resolved
-
- originals = resolve(originals)
- mentions = resolve(mentions)
-
- if not source.get_author_urls():
- logging.debug('no author url(s), cannot find h-feed')
- return ((originals, mentions) if not source.BACKFEED_REQUIRES_SYNDICATION_LINK
- else (set(), set()))
-
- # TODO possible optimization: if we've discovered a backlink to a post on the
- # author's domain (i.e., it included a link or citation), then skip the rest
- # of this.
- syndicated = []
- syndication_url = obj.get('url') or activity.get('url')
- if syndication_url:
- # use the canonical syndication url on both sides, so that we have
- # the best chance of finding a match. Some silos allow several
- # different permalink formats to point to the same place.
- syndication_url = source.canonicalize_url(syndication_url)
+def discover(
+ source,
+ activity,
+ fetch_hfeed=True,
+ include_redirect_sources=True,
+ already_fetched_hfeeds=None,
+):
+ """Augments the standard original_post_discovery algorithm with a
+ reverse lookup that supports posts without a backlink or citation.
+
+ If fetch_hfeed is False, then we will check the db for previously found
+ :class:`models.SyndicatedPost`\ s but will not do posse-post-discovery to find
+ new ones.
+
+ Args:
+ source: :class:`models.Source` subclass. Changes to property values (e.g.
+ domains, domain_urls, last_syndication_url) are stored in source.updates;
+ they should be updated transactionally later.
+ activity: activity dict
+ fetch_hfeed: boolean
+ include_redirect_sources: boolean, whether to include URLs that redirect as
+ well as their final destination URLs
+ already_fetched_hfeeds: set, URLs that we have already fetched and run
+ posse-post-discovery on, so we can avoid running it multiple times
+
+ Returns:
+ (set(string original post URLs), set(string mention URLs)) tuple
+
+ """
+ logging.debug(
+ "discovering original posts for: %s", activity.get("url") or activity.get("id")
+ )
+
+ if not source.updates:
+ source.updates = {}
+
+ if already_fetched_hfeeds is None:
+ already_fetched_hfeeds = set()
+
+ originals, mentions = gr_source.Source.original_post_discovery(
+ activity,
+ domains=source.domains,
+ include_redirect_sources=include_redirect_sources,
+ include_reserved_hosts=DEBUG,
+ headers=util.request_headers(source=source),
+ )
+
+ # only include mentions of the author themselves.
+ # (mostly just for Mastodon; other silos' domains are all in the blocklist, so
+ # their mention URLs get dropped later anyway.)
+ # (these are originally added in Source._inject_user_urls() and in poll step 2.)
+ obj = activity.get("object", {})
+ other_user_mentions = set(
+ t.get("url")
+ for t in obj.get("tags", [])
+ if t.get("objectType") == "person" and t.get("url") not in source.domain_urls
+ )
+ originals -= other_user_mentions
+ mentions -= other_user_mentions
+
+ # original posts are only from the author themselves
+ obj_author = obj.get("author", {})
+ activity_author = activity.get("actor", {})
+ author_id = obj_author.get("id") or activity_author.get("id")
+ author_username = obj_author.get("username") or activity_author.get("username")
+ if (
+ author_id
+ and author_id != source.user_tag_id()
+ and author_username != source.key.id()
+ ):
+ logging.info(
+ f"Demoting original post links because user {source.user_tag_id()} doesn't match author id {author_id} username {author_username}"
+ )
+ # this is someone else's post, so all links must be mentions
+ mentions.update(originals)
+ originals = set()
+
+ # look for original URL of attachments (e.g. quote tweets)
+ for att in obj.get("attachments", []):
+ if (
+ att.get("objectType") in ("note", "article")
+ and att.get("author", {}).get("id") == source.user_tag_id()
+ ):
+ logging.debug(
+ "running original post discovery on attachment: %s", att.get("id")
+ )
+ att_origs, _ = discover(
+ source, att, include_redirect_sources=include_redirect_sources
+ )
+ logging.debug(
+ "original post discovery found originals for attachment, %s", att_origs
+ )
+ mentions.update(att_origs)
+
+ def resolve(urls):
+ resolved = set()
+ for url in urls:
+ final, domain, send = util.get_webmention_target(url)
+ if send and domain != source.gr_source.DOMAIN:
+ resolved.add(final)
+ if include_redirect_sources:
+ resolved.add(url)
+ return resolved
+
+ originals = resolve(originals)
+ mentions = resolve(mentions)
+
+ if not source.get_author_urls():
+ logging.debug("no author url(s), cannot find h-feed")
+ return (
+ (originals, mentions)
+ if not source.BACKFEED_REQUIRES_SYNDICATION_LINK
+ else (set(), set())
+ )
+
+ # TODO possible optimization: if we've discovered a backlink to a post on the
+ # author's domain (i.e., it included a link or citation), then skip the rest
+ # of this.
+ syndicated = []
+ syndication_url = obj.get("url") or activity.get("url")
if syndication_url:
- syndicated = _posse_post_discovery(source, activity, syndication_url,
- fetch_hfeed, already_fetched_hfeeds)
- originals.update(syndicated)
- originals = set(util.dedupe_urls(originals))
-
- if not syndication_url:
- logging.debug('no %s syndication url, cannot process h-entries', source.SHORT_NAME)
-
- return ((originals, mentions) if not source.BACKFEED_REQUIRES_SYNDICATION_LINK
- else (set(syndicated), set()))
+ # use the canonical syndication url on both sides, so that we have
+ # the best chance of finding a match. Some silos allow several
+ # different permalink formats to point to the same place.
+ syndication_url = source.canonicalize_url(syndication_url)
+ if syndication_url:
+ syndicated = _posse_post_discovery(
+ source, activity, syndication_url, fetch_hfeed, already_fetched_hfeeds
+ )
+ originals.update(syndicated)
+ originals = set(util.dedupe_urls(originals))
+
+ if not syndication_url:
+ logging.debug(
+ "no %s syndication url, cannot process h-entries", source.SHORT_NAME
+ )
+
+ return (
+ (originals, mentions)
+ if not source.BACKFEED_REQUIRES_SYNDICATION_LINK
+ else (set(syndicated), set())
+ )
def refetch(source):
- """Refetch the author's URLs and look for new or updated syndication
- links that might not have been there the first time we looked.
-
- Args:
- source: :class:`models.Source` subclass. Changes to property values (e.g.
- domains, domain_urls, last_syndication_url) are stored in source.updates;
- they should be updated transactionally later.
-
- Returns:
- dict: mapping syndicated_url to a list of new :class:`models.SyndicatedPost`\ s
- """
- logging.debug('attempting to refetch h-feed for %s', source.label())
-
- if not source.updates:
- source.updates = {}
+ """Refetch the author's URLs and look for new or updated syndication
+ links that might not have been there the first time we looked.
- results = {}
- for url in _get_author_urls(source):
- results.update(_process_author(source, url, refetch=True))
+ Args:
+ source: :class:`models.Source` subclass. Changes to property values (e.g.
+ domains, domain_urls, last_syndication_url) are stored in source.updates;
+ they should be updated transactionally later.
- return results
+ Returns:
+ dict: mapping syndicated_url to a list of new :class:`models.SyndicatedPost`\ s
+ """
+ logging.debug("attempting to refetch h-feed for %s", source.label())
+ if not source.updates:
+ source.updates = {}
-def targets_for_response(resp, originals, mentions):
- """Returns the URLs that we should send webmentions to for a given response.
-
- ...specifically, all responses except posts get sent to original post URLs,
- but only posts and comments get sent to mentioned URLs.
-
- Args:
- resp: ActivityStreams response object
- originals, mentions: sequence of string URLs
-
- Returns:
- set of string URLs
- """
- type = models.Response.get_type(resp)
- targets = set()
- if type != 'post':
- targets |= originals
- if type in ('post', 'comment'):
- targets |= mentions
- return targets
-
-
-def _posse_post_discovery(source, activity, syndication_url, fetch_hfeed,
- already_fetched_hfeeds):
- """Performs the actual meat of the posse-post-discover.
-
- Args:
- source: :class:`models.Source` subclass
- activity: activity dict
- syndication_url: url of the syndicated copy for which we are
- trying to find an original
- fetch_hfeed: boolean, whether or not to fetch and parse the
- author's feed if we don't have a previously stored
- relationship
- already_fetched_hfeeds: set, URLs we've already fetched in a
- previous iteration
-
- Return:
- sequence of string original post urls, possibly empty
- """
- logging.info('starting posse post discovery with syndicated %s',
- syndication_url)
-
- relationships = SyndicatedPost.query(
- SyndicatedPost.syndication == syndication_url,
- ancestor=source.key).fetch()
-
- if source.IGNORE_SYNDICATION_LINK_FRAGMENTS:
- relationships += SyndicatedPost.query(
- # prefix search to find any instances of this synd link with a fragment
- SyndicatedPost.syndication > f'{syndication_url}#',
- SyndicatedPost.syndication < f'{syndication_url}#\ufffd',
- ancestor=source.key).fetch()
-
- if not relationships and fetch_hfeed:
- # a syndicated post we haven't seen before! fetch the author's URLs to see
- # if we can find it.
- #
- # TODO: Consider using the actor's url, with get_author_urls() as the
- # fallback in the future to support content from non-Bridgy users.
results = {}
for url in _get_author_urls(source):
- if url not in already_fetched_hfeeds:
- results.update(_process_author(source, url))
- already_fetched_hfeeds.add(url)
- else:
- logging.debug('skipping %s, already fetched this round', url)
+ results.update(_process_author(source, url, refetch=True))
- relationships = results.get(syndication_url, [])
+ return results
- if not relationships:
- # No relationships were found. Remember that we've seen this
- # syndicated post to avoid reprocessing it every time
- logging.debug('posse post discovery found no relationship for %s',
- syndication_url)
- if fetch_hfeed:
- SyndicatedPost.insert_syndication_blank(source, syndication_url)
- originals = [r.original for r in relationships if r.original]
- if originals:
- logging.debug('posse post discovery found relationship(s) %s -> %s',
- syndication_url, originals)
- return originals
+def targets_for_response(resp, originals, mentions):
+ """Returns the URLs that we should send webmentions to for a given response.
+
+ ...specifically, all responses except posts get sent to original post URLs,
+ but only posts and comments get sent to mentioned URLs.
+
+ Args:
+ resp: ActivityStreams response object
+ originals, mentions: sequence of string URLs
+
+ Returns:
+ set of string URLs
+ """
+ type = models.Response.get_type(resp)
+ targets = set()
+ if type != "post":
+ targets |= originals
+ if type in ("post", "comment"):
+ targets |= mentions
+ return targets
+
+
+def _posse_post_discovery(
+ source, activity, syndication_url, fetch_hfeed, already_fetched_hfeeds
+):
+ """Performs the actual meat of the posse-post-discover.
+
+ Args:
+ source: :class:`models.Source` subclass
+ activity: activity dict
+ syndication_url: url of the syndicated copy for which we are
+ trying to find an original
+ fetch_hfeed: boolean, whether or not to fetch and parse the
+ author's feed if we don't have a previously stored
+ relationship
+ already_fetched_hfeeds: set, URLs we've already fetched in a
+ previous iteration
+
+ Return:
+ sequence of string original post urls, possibly empty
+ """
+ logging.info("starting posse post discovery with syndicated %s", syndication_url)
+
+ relationships = SyndicatedPost.query(
+ SyndicatedPost.syndication == syndication_url, ancestor=source.key
+ ).fetch()
+
+ if source.IGNORE_SYNDICATION_LINK_FRAGMENTS:
+ relationships += SyndicatedPost.query(
+ # prefix search to find any instances of this synd link with a fragment
+ SyndicatedPost.syndication > f"{syndication_url}#",
+ SyndicatedPost.syndication < f"{syndication_url}#\ufffd",
+ ancestor=source.key,
+ ).fetch()
+
+ if not relationships and fetch_hfeed:
+ # a syndicated post we haven't seen before! fetch the author's URLs to see
+ # if we can find it.
+ #
+ # TODO: Consider using the actor's url, with get_author_urls() as the
+ # fallback in the future to support content from non-Bridgy users.
+ results = {}
+ for url in _get_author_urls(source):
+ if url not in already_fetched_hfeeds:
+ results.update(_process_author(source, url))
+ already_fetched_hfeeds.add(url)
+ else:
+ logging.debug("skipping %s, already fetched this round", url)
+
+ relationships = results.get(syndication_url, [])
+
+ if not relationships:
+ # No relationships were found. Remember that we've seen this
+ # syndicated post to avoid reprocessing it every time
+ logging.debug(
+ "posse post discovery found no relationship for %s", syndication_url
+ )
+ if fetch_hfeed:
+ SyndicatedPost.insert_syndication_blank(source, syndication_url)
+
+ originals = [r.original for r in relationships if r.original]
+ if originals:
+ logging.debug(
+ "posse post discovery found relationship(s) %s -> %s",
+ syndication_url,
+ originals,
+ )
+ return originals
def _process_author(source, author_url, refetch=False, store_blanks=True):
- """Fetch the author's domain URL, and look for syndicated posts.
-
- Args:
- source: a subclass of :class:`models.Source`
- author_url: the author's homepage URL
- refetch: boolean, whether to refetch and process entries we've seen before
- store_blanks: boolean, whether we should store blank
- :class:`models.SyndicatedPost`\ s when we don't find a relationship
-
- Return:
- a dict of syndicated_url to a list of new :class:`models.SyndicatedPost`\ s
- """
- # for now use whether the url is a valid webmention target
- # as a proxy for whether it's worth searching it.
- author_url, _, ok = util.get_webmention_target(author_url)
- if not ok:
- return {}
-
- logging.debug('fetching author url %s', author_url)
- try:
- author_mf2 = util.fetch_mf2(author_url)
- except AssertionError:
- raise # for unit tests
- except BaseException:
- # TODO limit allowed failures, cache the author's h-feed url
- # or the # of times we've failed to fetch it
- logging.info('Could not fetch author url %s', author_url, exc_info=True)
- return {}
-
- feeditems = _find_feed_items(author_mf2)
-
- # try rel=feeds
- feed_urls = set()
- for feed_url in author_mf2['rels'].get('feed', []):
- # check that it's html, not too big, etc
- feed_url, _, feed_ok = util.get_webmention_target(feed_url)
- if feed_url == author_url:
- logging.debug('author url is the feed url, ignoring')
- elif not feed_ok:
- logging.debug("skipping feed since it's not HTML or otherwise bad")
- else:
- feed_urls.add(feed_url)
-
- for feed_url in feed_urls:
+ """Fetch the author's domain URL, and look for syndicated posts.
+
+ Args:
+ source: a subclass of :class:`models.Source`
+ author_url: the author's homepage URL
+ refetch: boolean, whether to refetch and process entries we've seen before
+ store_blanks: boolean, whether we should store blank
+ :class:`models.SyndicatedPost`\ s when we don't find a relationship
+
+ Return:
+ a dict of syndicated_url to a list of new :class:`models.SyndicatedPost`\ s
+ """
+ # for now use whether the url is a valid webmention target
+ # as a proxy for whether it's worth searching it.
+ author_url, _, ok = util.get_webmention_target(author_url)
+ if not ok:
+ return {}
+
+ logging.debug("fetching author url %s", author_url)
try:
- logging.debug("fetching author's rel-feed %s", feed_url)
- feed_mf2 = util.fetch_mf2(feed_url)
- feeditems = _merge_hfeeds(feeditems, _find_feed_items(feed_mf2))
- domain = util.domain_from_link(feed_url)
- if source.updates is not None and domain not in source.domains:
- domains = source.updates.setdefault('domains', source.domains)
- if domain not in domains:
- logging.info('rel-feed found new domain %s! adding to source', domain)
- domains.append(domain)
-
+ author_mf2 = util.fetch_mf2(author_url)
except AssertionError:
- raise # reraise assertions for unit tests
+ raise # for unit tests
except BaseException:
- logging.info('Could not fetch h-feed url %s.', feed_url, exc_info=True)
-
- # sort by dt-updated/dt-published
- def updated_or_published(item):
- props = microformats2.first_props(item.get('properties'))
- return props.get('updated') or props.get('published') or ''
-
- feeditems.sort(key=updated_or_published, reverse=True)
-
- permalink_to_entry = collections.OrderedDict()
- for child in feeditems:
- if 'h-entry' in child['type']:
- permalinks = child['properties'].get('url', [])
- if not permalinks:
- logging.debug('ignoring h-entry with no u-url!')
- for permalink in permalinks:
- if isinstance(permalink, str):
- permalink_to_entry[permalink] = child
+ # TODO limit allowed failures, cache the author's h-feed url
+ # or the # of times we've failed to fetch it
+ logging.info("Could not fetch author url %s", author_url, exc_info=True)
+ return {}
+
+ feeditems = _find_feed_items(author_mf2)
+
+ # try rel=feeds
+ feed_urls = set()
+ for feed_url in author_mf2["rels"].get("feed", []):
+ # check that it's html, not too big, etc
+ feed_url, _, feed_ok = util.get_webmention_target(feed_url)
+ if feed_url == author_url:
+ logging.debug("author url is the feed url, ignoring")
+ elif not feed_ok:
+ logging.debug("skipping feed since it's not HTML or otherwise bad")
else:
- logging.warning('unexpected non-string "url" property: %s', permalink)
-
- max = (MAX_PERMALINK_FETCHES_BETA if source.is_beta_user()
- else MAX_PERMALINK_FETCHES)
- if len(permalink_to_entry) >= max:
- logging.info('Hit cap of %d permalinks. Stopping.', max)
- break
-
- # query all preexisting permalinks at once, instead of once per link
- permalinks_list = list(permalink_to_entry.keys())
- # fetch the maximum allowed entries (currently 30) at a time
- preexisting_list = itertools.chain.from_iterable(
- SyndicatedPost.query(
- SyndicatedPost.original.IN(permalinks_list[i:i + MAX_ALLOWABLE_QUERIES]),
- ancestor=source.key)
- for i in range(0, len(permalinks_list), MAX_ALLOWABLE_QUERIES))
- preexisting = {}
- for r in preexisting_list:
- preexisting.setdefault(r.original, []).append(r)
-
- results = {}
- for permalink, entry in permalink_to_entry.items():
- logging.debug('processing permalink: %s', permalink)
- new_results = process_entry(
- source, permalink, entry, refetch, preexisting.get(permalink, []),
- store_blanks=store_blanks)
- for key, value in new_results.items():
- results.setdefault(key, []).extend(value)
-
- if source.updates is not None and results:
- # keep track of the last time we've seen rel=syndication urls for
- # this author. this helps us decide whether to refetch periodically
- # and look for updates.
- # Source will be saved at the end of each round of polling
- source.updates['last_syndication_url'] = util.now_fn()
-
- return results
+ feed_urls.add(feed_url)
+
+ for feed_url in feed_urls:
+ try:
+ logging.debug("fetching author's rel-feed %s", feed_url)
+ feed_mf2 = util.fetch_mf2(feed_url)
+ feeditems = _merge_hfeeds(feeditems, _find_feed_items(feed_mf2))
+ domain = util.domain_from_link(feed_url)
+ if source.updates is not None and domain not in source.domains:
+ domains = source.updates.setdefault("domains", source.domains)
+ if domain not in domains:
+ logging.info(
+ "rel-feed found new domain %s! adding to source", domain
+ )
+ domains.append(domain)
+
+ except AssertionError:
+ raise # reraise assertions for unit tests
+ except BaseException:
+ logging.info("Could not fetch h-feed url %s.", feed_url, exc_info=True)
+
+ # sort by dt-updated/dt-published
+ def updated_or_published(item):
+ props = microformats2.first_props(item.get("properties"))
+ return props.get("updated") or props.get("published") or ""
+
+ feeditems.sort(key=updated_or_published, reverse=True)
+
+ permalink_to_entry = collections.OrderedDict()
+ for child in feeditems:
+ if "h-entry" in child["type"]:
+ permalinks = child["properties"].get("url", [])
+ if not permalinks:
+ logging.debug("ignoring h-entry with no u-url!")
+ for permalink in permalinks:
+ if isinstance(permalink, str):
+ permalink_to_entry[permalink] = child
+ else:
+ logging.warning(
+ 'unexpected non-string "url" property: %s', permalink
+ )
+
+ max = (
+ MAX_PERMALINK_FETCHES_BETA
+ if source.is_beta_user()
+ else MAX_PERMALINK_FETCHES
+ )
+ if len(permalink_to_entry) >= max:
+ logging.info("Hit cap of %d permalinks. Stopping.", max)
+ break
+
+ # query all preexisting permalinks at once, instead of once per link
+ permalinks_list = list(permalink_to_entry.keys())
+ # fetch the maximum allowed entries (currently 30) at a time
+ preexisting_list = itertools.chain.from_iterable(
+ SyndicatedPost.query(
+ SyndicatedPost.original.IN(permalinks_list[i : i + MAX_ALLOWABLE_QUERIES]),
+ ancestor=source.key,
+ )
+ for i in range(0, len(permalinks_list), MAX_ALLOWABLE_QUERIES)
+ )
+ preexisting = {}
+ for r in preexisting_list:
+ preexisting.setdefault(r.original, []).append(r)
+ results = {}
+ for permalink, entry in permalink_to_entry.items():
+ logging.debug("processing permalink: %s", permalink)
+ new_results = process_entry(
+ source,
+ permalink,
+ entry,
+ refetch,
+ preexisting.get(permalink, []),
+ store_blanks=store_blanks,
+ )
+ for key, value in new_results.items():
+ results.setdefault(key, []).extend(value)
+
+ if source.updates is not None and results:
+ # keep track of the last time we've seen rel=syndication urls for
+ # this author. this helps us decide whether to refetch periodically
+ # and look for updates.
+ # Source will be saved at the end of each round of polling
+ source.updates["last_syndication_url"] = util.now_fn()
+
+ return results
-def _merge_hfeeds(feed1, feed2):
- """Merge items from two h-feeds into a composite feed. Skips items in
- feed2 that are already represented in feed1, based on the "url" property.
-
- Args:
- feed1: a list of dicts
- feed2: a list of dicts
-
- Returns:
- a list of dicts
- """
- seen = set()
- for item in feed1:
- for url in item.get('properties', {}).get('url', []):
- if isinstance(url, str):
- seen.add(url)
- return feed1 + [item for item in feed2 if all(
- (url not in seen) for url in item.get('properties', {}).get('url', []) if isinstance(url, str))]
+def _merge_hfeeds(feed1, feed2):
+ """Merge items from two h-feeds into a composite feed. Skips items in
+ feed2 that are already represented in feed1, based on the "url" property.
+
+ Args:
+ feed1: a list of dicts
+ feed2: a list of dicts
+
+ Returns:
+ a list of dicts
+ """
+ seen = set()
+ for item in feed1:
+ for url in item.get("properties", {}).get("url", []):
+ if isinstance(url, str):
+ seen.add(url)
+
+ return feed1 + [
+ item
+ for item in feed2
+ if all(
+ (url not in seen)
+ for url in item.get("properties", {}).get("url", [])
+ if isinstance(url, str)
+ )
+ ]
def _find_feed_items(mf2):
- """Extract feed items from given microformats2 data.
-
- If the top-level h-* item is an h-feed, return its children. Otherwise,
- returns the top-level items.
-
- Args:
- mf2: dict, parsed mf2 data
-
- Returns: list of dicts, each one representing an mf2 h-* item
- """
- feeditems = mf2['items']
- hfeeds = mf2util.find_all_entries(mf2, ('h-feed',))
- if hfeeds:
- feeditems = list(itertools.chain.from_iterable(
- hfeed.get('children', []) for hfeed in hfeeds))
- else:
- logging.debug('No h-feed found, fallback to top-level h-entrys.')
-
- if len(feeditems) > MAX_FEED_ENTRIES:
- logging.info('Feed has %s entries! only processing the first %s.',
- len(feeditems), MAX_FEED_ENTRIES)
- feeditems = feeditems[:MAX_FEED_ENTRIES]
-
- return feeditems
-
-
-def process_entry(source, permalink, feed_entry, refetch, preexisting,
- store_blanks=True):
- """Fetch and process an h-entry and save a new :class:`models.SyndicatedPost`.
-
- Args:
- source:
- permalink: url of the unprocessed post
- feed_entry: the h-feed version of the h-entry dict, often contains
- a partial version of the h-entry at the permalink
- refetch: boolean, whether to refetch and process entries we've seen before
- preexisting: list of previously discovered :class:`models.SyndicatedPost`\ s
- for this permalink
- store_blanks: boolean, whether we should store blank
- :class:`models.SyndicatedPost`\ s when we don't find a relationship
-
- Returns:
- a dict from syndicated url to a list of new :class:`models.SyndicatedPost`\ s
- """
- # if the post has already been processed, do not add to the results
- # since this method only returns *newly* discovered relationships.
- if preexisting:
- # if we're refetching and this one is blank, do not return.
- # if there is a blank entry, it should be the one and only entry,
- # but go ahead and check 'all' of them to be safe.
- if not refetch:
- return {}
- synds = [s.syndication for s in preexisting if s.syndication]
- if synds:
- logging.debug('previously found relationship(s) for original %s: %s',
- permalink, synds)
-
- # first try with the h-entry from the h-feed. if we find the syndication url
- # we're looking for, we don't have to fetch the permalink
- permalink, _, type_ok = util.get_webmention_target(permalink)
- usynd = feed_entry.get('properties', {}).get('syndication', [])
- if usynd:
- logging.debug('u-syndication links on the h-feed h-entry: %s', usynd)
- results = _process_syndication_urls(source, permalink, set(
- url for url in usynd if isinstance(url, str)), preexisting)
- success = True
-
- if results:
- source.updates['last_feed_syndication_url'] = util.now_fn()
- elif not source.last_feed_syndication_url or not feed_entry:
- # fetch the full permalink page if we think it might have more details
- mf2 = None
- try:
- if type_ok:
- logging.debug('fetching post permalink %s', permalink)
- mf2 = util.fetch_mf2(permalink)
- except AssertionError:
- raise # for unit tests
- except BaseException:
- # TODO limit the number of allowed failures
- logging.info('Could not fetch permalink %s', permalink, exc_info=True)
- success = False
-
- if mf2:
- syndication_urls = set()
- relsynd = mf2['rels'].get('syndication', [])
- if relsynd:
- logging.debug('rel-syndication links: %s', relsynd)
- syndication_urls.update(url for url in relsynd
- if isinstance(url, str))
- # there should only be one h-entry on a permalink page, but
- # we'll check all of them just in case.
- for hentry in (item for item in mf2['items']
- if 'h-entry' in item['type']):
- usynd = hentry.get('properties', {}).get('syndication', [])
- if usynd:
- logging.debug('u-syndication links: %s', usynd)
- syndication_urls.update(url for url in usynd
- if isinstance(url, str))
- results = _process_syndication_urls(
- source, permalink, syndication_urls, preexisting)
-
- # detect and delete SyndicatedPosts that were removed from the site
- if success:
- result_syndposts = list(itertools.chain(*results.values()))
- for syndpost in preexisting:
- if syndpost.syndication and syndpost not in result_syndposts:
- logging.info('deleting relationship that disappeared: %s', syndpost)
- syndpost.key.delete()
- preexisting.remove(syndpost)
-
- if not results:
- logging.debug('no syndication links from %s to current source %s.',
- permalink, source.label())
+ """Extract feed items from given microformats2 data.
+
+ If the top-level h-* item is an h-feed, return its children. Otherwise,
+ returns the top-level items.
+
+ Args:
+ mf2: dict, parsed mf2 data
+
+ Returns: list of dicts, each one representing an mf2 h-* item
+ """
+ feeditems = mf2["items"]
+ hfeeds = mf2util.find_all_entries(mf2, ("h-feed",))
+ if hfeeds:
+ feeditems = list(
+ itertools.chain.from_iterable(hfeed.get("children", []) for hfeed in hfeeds)
+ )
+ else:
+ logging.debug("No h-feed found, fallback to top-level h-entrys.")
+
+ if len(feeditems) > MAX_FEED_ENTRIES:
+ logging.info(
+ "Feed has %s entries! only processing the first %s.",
+ len(feeditems),
+ MAX_FEED_ENTRIES,
+ )
+ feeditems = feeditems[:MAX_FEED_ENTRIES]
+
+ return feeditems
+
+
+def process_entry(
+ source, permalink, feed_entry, refetch, preexisting, store_blanks=True
+):
+ """Fetch and process an h-entry and save a new :class:`models.SyndicatedPost`.
+
+ Args:
+ source:
+ permalink: url of the unprocessed post
+ feed_entry: the h-feed version of the h-entry dict, often contains
+ a partial version of the h-entry at the permalink
+ refetch: boolean, whether to refetch and process entries we've seen before
+ preexisting: list of previously discovered :class:`models.SyndicatedPost`\ s
+ for this permalink
+ store_blanks: boolean, whether we should store blank
+ :class:`models.SyndicatedPost`\ s when we don't find a relationship
+
+ Returns:
+ a dict from syndicated url to a list of new :class:`models.SyndicatedPost`\ s
+ """
+ # if the post has already been processed, do not add to the results
+ # since this method only returns *newly* discovered relationships.
+ if preexisting:
+ # if we're refetching and this one is blank, do not return.
+ # if there is a blank entry, it should be the one and only entry,
+ # but go ahead and check 'all' of them to be safe.
+ if not refetch:
+ return {}
+ synds = [s.syndication for s in preexisting if s.syndication]
+ if synds:
+ logging.debug(
+ "previously found relationship(s) for original %s: %s", permalink, synds
+ )
+
+ # first try with the h-entry from the h-feed. if we find the syndication url
+ # we're looking for, we don't have to fetch the permalink
+ permalink, _, type_ok = util.get_webmention_target(permalink)
+ usynd = feed_entry.get("properties", {}).get("syndication", [])
+ if usynd:
+ logging.debug("u-syndication links on the h-feed h-entry: %s", usynd)
+ results = _process_syndication_urls(
+ source,
+ permalink,
+ set(url for url in usynd if isinstance(url, str)),
+ preexisting,
+ )
+ success = True
+
+ if results:
+ source.updates["last_feed_syndication_url"] = util.now_fn()
+ elif not source.last_feed_syndication_url or not feed_entry:
+ # fetch the full permalink page if we think it might have more details
+ mf2 = None
+ try:
+ if type_ok:
+ logging.debug("fetching post permalink %s", permalink)
+ mf2 = util.fetch_mf2(permalink)
+ except AssertionError:
+ raise # for unit tests
+ except BaseException:
+ # TODO limit the number of allowed failures
+ logging.info("Could not fetch permalink %s", permalink, exc_info=True)
+ success = False
+
+ if mf2:
+ syndication_urls = set()
+ relsynd = mf2["rels"].get("syndication", [])
+ if relsynd:
+ logging.debug("rel-syndication links: %s", relsynd)
+ syndication_urls.update(url for url in relsynd if isinstance(url, str))
+ # there should only be one h-entry on a permalink page, but
+ # we'll check all of them just in case.
+ for hentry in (item for item in mf2["items"] if "h-entry" in item["type"]):
+ usynd = hentry.get("properties", {}).get("syndication", [])
+ if usynd:
+ logging.debug("u-syndication links: %s", usynd)
+ syndication_urls.update(url for url in usynd if isinstance(url, str))
+ results = _process_syndication_urls(
+ source, permalink, syndication_urls, preexisting
+ )
+
+ # detect and delete SyndicatedPosts that were removed from the site
+ if success:
+ result_syndposts = list(itertools.chain(*results.values()))
+ for syndpost in preexisting:
+ if syndpost.syndication and syndpost not in result_syndposts:
+ logging.info("deleting relationship that disappeared: %s", syndpost)
+ syndpost.key.delete()
+ preexisting.remove(syndpost)
+
+ if not results:
+ logging.debug(
+ "no syndication links from %s to current source %s.",
+ permalink,
+ source.label(),
+ )
+ results = {}
+ if store_blanks and not preexisting:
+ # remember that this post doesn't have syndication links for this
+ # particular source
+ logging.debug(
+ "saving empty relationship so that %s will not be " "searched again",
+ permalink,
+ )
+ SyndicatedPost.insert_original_blank(source, permalink)
+
+ # only return results that are not in the preexisting list
+ new_results = {}
+ for syndurl, syndposts_for_url in results.items():
+ for syndpost in syndposts_for_url:
+ if syndpost not in preexisting:
+ new_results.setdefault(syndurl, []).append(syndpost)
+
+ if new_results:
+ logging.debug("discovered relationships %s", new_results)
+ return new_results
+
+
+def _process_syndication_urls(source, permalink, syndication_urls, preexisting):
+ """Process a list of syndication URLs looking for one that matches the
+ current source. If one is found, stores a new :class:`models.SyndicatedPost`
+ in the db.
+
+ Args:
+ source: a :class:`models.Source` subclass
+ permalink: a string. the current h-entry permalink
+ syndication_urls: a collection of strings. the unfitered list
+ of syndication urls
+ preexisting: a list of previously discovered :class:`models.SyndicatedPost`\ s
+
+ Returns:
+ dict mapping string syndication url to list of :class:`models.SyndicatedPost`\ s
+ """
results = {}
- if store_blanks and not preexisting:
- # remember that this post doesn't have syndication links for this
- # particular source
- logging.debug('saving empty relationship so that %s will not be '
- 'searched again', permalink)
- SyndicatedPost.insert_original_blank(source, permalink)
-
- # only return results that are not in the preexisting list
- new_results = {}
- for syndurl, syndposts_for_url in results.items():
- for syndpost in syndposts_for_url:
- if syndpost not in preexisting:
- new_results.setdefault(syndurl, []).append(syndpost)
-
- if new_results:
- logging.debug('discovered relationships %s', new_results)
- return new_results
-
-
-def _process_syndication_urls(source, permalink, syndication_urls,
- preexisting):
- """Process a list of syndication URLs looking for one that matches the
- current source. If one is found, stores a new :class:`models.SyndicatedPost`
- in the db.
-
- Args:
- source: a :class:`models.Source` subclass
- permalink: a string. the current h-entry permalink
- syndication_urls: a collection of strings. the unfitered list
- of syndication urls
- preexisting: a list of previously discovered :class:`models.SyndicatedPost`\ s
-
- Returns:
- dict mapping string syndication url to list of :class:`models.SyndicatedPost`\ s
- """
- results = {}
- # save the results (or lack thereof) to the db, and put them in a
- # map for immediate use
- for url in syndication_urls:
- # source-specific logic to standardize the URL
- url = source.canonicalize_url(url)
- if not url:
- continue
-
- # TODO: save future lookups by saving results for other sources too (note:
- # query the appropriate source subclass by author.domains, rather than
- # author.domain_urls)
- #
- # we may have already seen this relationship, save a DB lookup by
- # finding it in the preexisting list
- relationship = next((sp for sp in preexisting
- if sp.syndication == url
- and sp.original == permalink), None)
- if not relationship:
- logging.debug('saving discovered relationship %s -> %s', url, permalink)
- relationship = SyndicatedPost.insert(
- source, syndication=url, original=permalink)
- results.setdefault(url, []).append(relationship)
-
- return results
+ # save the results (or lack thereof) to the db, and put them in a
+ # map for immediate use
+ for url in syndication_urls:
+ # source-specific logic to standardize the URL
+ url = source.canonicalize_url(url)
+ if not url:
+ continue
+
+ # TODO: save future lookups by saving results for other sources too (note:
+ # query the appropriate source subclass by author.domains, rather than
+ # author.domain_urls)
+ #
+ # we may have already seen this relationship, save a DB lookup by
+ # finding it in the preexisting list
+ relationship = next(
+ (
+ sp
+ for sp in preexisting
+ if sp.syndication == url and sp.original == permalink
+ ),
+ None,
+ )
+ if not relationship:
+ logging.debug("saving discovered relationship %s -> %s", url, permalink)
+ relationship = SyndicatedPost.insert(
+ source, syndication=url, original=permalink
+ )
+ results.setdefault(url, []).append(relationship)
+
+ return results
def _get_author_urls(source):
- max = models.MAX_AUTHOR_URLS
- urls = source.get_author_urls()
- if len(urls) > max:
- logging.warning('user has over %d URLs! only running PPD on %s. skipping %s.',
- max, urls[:max], urls[max:])
- urls = urls[:max]
-
- return urls
+ max = models.MAX_AUTHOR_URLS
+ urls = source.get_author_urls()
+ if len(urls) > max:
+ logging.warning(
+ "user has over %d URLs! only running PPD on %s. skipping %s.",
+ max,
+ urls[:max],
+ urls[max:],
+ )
+ urls = urls[:max]
+
+ return urls
diff --git a/pages.py b/pages.py
index dc3db228..603b171c 100644
--- a/pages.py
+++ b/pages.py
@@ -25,545 +25,635 @@
# populate models.sources
import blogger, facebook, flickr, github, indieauth, instagram, mastodon, medium, meetup, reddit, tumblr, twitter, wordpress_rest
-SITES = ','.join(list(models.sources.keys()) + ['fake']) # for unit tests
+SITES = ",".join(list(models.sources.keys()) + ["fake"]) # for unit tests
RECENT_PRIVATE_POSTS_THRESHOLD = 5
-@app.route('/', methods=['HEAD'])
-@app.route('/users', methods=['HEAD'])
-@app.route(f'//', methods=['HEAD'])
-@app.route('/about', methods=['HEAD'])
+@app.route("/", methods=["HEAD"])
+@app.route("/users", methods=["HEAD"])
+@app.route(f"//", methods=["HEAD"])
+@app.route("/about", methods=["HEAD"])
def head(site=None, id=None):
- """Return an empty 200 with no caching directives."""
- if site and site not in models.sources:
- return '', 404
+ """Return an empty 200 with no caching directives."""
+ if site and site not in models.sources:
+ return "", 404
- return ''
+ return ""
-@app.route('/')
+@app.route("/")
@flask_util.cached(cache, datetime.timedelta(days=1))
def front_page():
- """View for the front page."""
- return render_template('index.html')
+ """View for the front page."""
+ return render_template("index.html")
-@app.route('/about')
+@app.route("/about")
def about():
- return render_template('about.html')
+ return render_template("about.html")
-@app.route('/users')
+@app.route("/users")
@flask_util.cached(cache, datetime.timedelta(hours=1))
def users():
- """View for /users.
-
- Semi-optimized. Pages by source name. Queries each source type for results
- with name greater than the start_name query param, then merge sorts the
- results and truncates at PAGE_SIZE.
-
- The start_name param is expected to be capitalized because capital letters
- sort lexicographically before lower case letters. An alternative would be to
- store a lower cased version of the name in another property and query on that.
- """
- PAGE_SIZE = 50
-
- start_name = request.values.get('start_name', '')
- queries = [cls.query(cls.name >= start_name).fetch_async(PAGE_SIZE)
- for cls in models.sources.values()]
-
- sources = sorted(itertools.chain(*[q.get_result() for q in queries]),
- key=lambda s: (s.name.lower(), s.GR_CLASS.NAME))
- sources = [util.preprocess_source(s) for s in sources
- if s.name.lower() >= start_name.lower() and s.features
- and s.status != 'disabled'
- ][:PAGE_SIZE]
-
- return render_template('users.html', PAGE_SIZE=PAGE_SIZE, sources=sources)
-
-
-@app.route(f'//')
+ """View for /users.
+
+ Semi-optimized. Pages by source name. Queries each source type for results
+ with name greater than the start_name query param, then merge sorts the
+ results and truncates at PAGE_SIZE.
+
+ The start_name param is expected to be capitalized because capital letters
+ sort lexicographically before lower case letters. An alternative would be to
+ store a lower cased version of the name in another property and query on that.
+ """
+ PAGE_SIZE = 50
+
+ start_name = request.values.get("start_name", "")
+ queries = [
+ cls.query(cls.name >= start_name).fetch_async(PAGE_SIZE)
+ for cls in models.sources.values()
+ ]
+
+ sources = sorted(
+ itertools.chain(*[q.get_result() for q in queries]),
+ key=lambda s: (s.name.lower(), s.GR_CLASS.NAME),
+ )
+ sources = [
+ util.preprocess_source(s)
+ for s in sources
+ if s.name.lower() >= start_name.lower()
+ and s.features
+ and s.status != "disabled"
+ ][:PAGE_SIZE]
+
+ return render_template("users.html", PAGE_SIZE=PAGE_SIZE, sources=sources)
+
+
+@app.route(f"//")
def user(site, id):
- """View for a user page."""
- cls = models.sources.get(site)
- if not cls:
- return render_template('user_not_found.html'), 404
-
- source = cls.lookup(id)
-
- if not source:
- key = cls.query(ndb.OR(*[ndb.GenericProperty(prop) == id for prop in
- ('domains', 'inferred_username', 'name', 'username')])
- ).get(keys_only=True)
- if key:
- return redirect(cls(key=key).bridgy_path(), code=301)
-
- if not source or not source.features:
- return render_template('user_not_found.html'), 404
-
- source.verify()
- source = util.preprocess_source(source)
-
- vars = {
- 'source': source,
- 'logs': logs,
- 'REFETCH_HFEED_TRIGGER': models.REFETCH_HFEED_TRIGGER,
- 'RECENT_PRIVATE_POSTS_THRESHOLD': RECENT_PRIVATE_POSTS_THRESHOLD,
- }
-
- # Blog webmention promos
- if 'webmention' not in source.features:
- if source.SHORT_NAME in ('blogger', 'medium', 'tumblr', 'wordpress'):
- vars[source.SHORT_NAME + '_promo'] = True
- else:
- for domain in source.domains:
- if ('.blogspot.' in domain and # Blogger uses country TLDs
- not Blogger.query(Blogger.domains == domain).get()):
- vars['blogger_promo'] = True
- elif (util.domain_or_parent_in(domain, ['tumblr.com']) and
- not Tumblr.query(Tumblr.domains == domain).get()):
- vars['tumblr_promo'] = True
- elif (util.domain_or_parent_in(domain, 'wordpress.com') and
- not WordPress.query(WordPress.domains == domain).get()):
- vars['wordpress_promo'] = True
-
- # Responses
- if 'listen' in source.features or 'email' in source.features:
- vars['responses'] = []
- query = Response.query().filter(Response.source == source.key)
-
- # if there's a paging param (responses_before or responses_after), update
- # query with it
- def get_paging_param(param):
- val = request.values.get(param)
- try:
- return util.parse_iso8601(val) if val else None
- except BaseException:
- error(f"Couldn't parse {param}, {val!r} as ISO8601")
-
- before = get_paging_param('responses_before')
- after = get_paging_param('responses_after')
- if before and after:
- error("can't handle both responses_before and responses_after")
- elif after:
- query = query.filter(Response.updated > after).order(Response.updated)
- elif before:
- query = query.filter(Response.updated < before).order(-Response.updated)
- else:
- query = query.order(-Response.updated)
-
- query_iter = query.iter()
- for i, r in enumerate(query_iter):
- r.response = json_loads(r.response_json)
- r.activities = [json_loads(a) for a in r.activities_json]
-
- if (not source.is_activity_public(r.response) or
- not all(source.is_activity_public(a) for a in r.activities)):
- continue
- elif r.type == 'post':
- r.activities = []
-
- verb = r.response.get('verb')
- r.actor = (r.response.get('object') if verb == 'invite'
- else r.response.get('author') or r.response.get('actor')
- ) or {}
-
- activity_content = ''
- for a in r.activities + [r.response]:
- if not a.get('content'):
- obj = a.get('object', {})
- a['content'] = activity_content = (
- obj.get('content') or obj.get('displayName') or
- # historical, from a Reddit bug fixed in granary@4f9df7c
- obj.get('name') or '')
-
- response_content = r.response.get('content')
- phrases = {
- 'like': 'liked this',
- 'repost': 'reposted this',
- 'rsvp-yes': 'is attending',
- 'rsvp-no': 'is not attending',
- 'rsvp-maybe': 'might attend',
- 'rsvp-interested': 'is interested',
- 'invite': 'is invited',
- }
- phrase = phrases.get(r.type) or phrases.get(verb)
- if phrase and (r.type != 'repost' or
- activity_content.startswith(response_content)):
- r.response['content'] = '%s %s.' % (
- r.actor.get('displayName') or '', phrase)
-
- # convert image URL to https if we're serving over SSL
- image_url = r.actor.setdefault('image', {}).get('url')
- if image_url:
- r.actor['image']['url'] = util.update_scheme(image_url, request)
-
- # generate original post links
- r.links = process_webmention_links(r)
- r.original_links = [util.pretty_link(url, new_tab=True)
- for url in r.original_posts]
-
- vars['responses'].append(r)
- if len(vars['responses']) >= 10 or i > 200:
- break
-
- vars['responses'].sort(key=lambda r: r.updated, reverse=True)
-
- # calculate new paging param(s)
- new_after = (
- before if before else
- vars['responses'][0].updated if
- vars['responses'] and query_iter.probably_has_next() and (before or after)
- else None)
- if new_after:
- vars['responses_after_link'] = ('?responses_after=%s#responses' %
- new_after.isoformat())
-
- new_before = (
- after if after else
- vars['responses'][-1].updated if
- vars['responses'] and query_iter.probably_has_next()
- else None)
- if new_before:
- vars['responses_before_link'] = ('?responses_before=%s#responses' %
- new_before.isoformat())
-
- vars['next_poll'] = max(
- source.last_poll_attempt + source.poll_period(),
- # lower bound is 1 minute from now
- util.now_fn() + datetime.timedelta(seconds=90))
-
- # Publishes
- if 'publish' in source.features:
- publishes = Publish.query().filter(Publish.source == source.key)\
- .order(-Publish.updated)\
- .fetch(10)
- for p in publishes:
- p.pretty_page = util.pretty_link(
- p.key.parent().id(),
- attrs={'class': 'original-post u-url u-name'},
- new_tab=True)
-
- vars['publishes'] = publishes
-
- if 'webmention' in source.features:
- # Blog posts
- blogposts = BlogPost.query().filter(BlogPost.source == source.key)\
- .order(-BlogPost.created)\
- .fetch(10)
- for b in blogposts:
- b.links = process_webmention_links(b)
- try:
- text = b.feed_item.get('title')
- except ValueError:
- text = None
- b.pretty_url = util.pretty_link(
- b.key.id(), text=text, attrs={'class': 'original-post u-url u-name'},
- max_length=40, new_tab=True)
-
- # Blog webmentions
- webmentions = BlogWebmention.query()\
- .filter(BlogWebmention.source == source.key)\
- .order(-BlogWebmention.updated)\
- .fetch(10)
- for w in webmentions:
- w.pretty_source = util.pretty_link(
- w.source_url(), attrs={'class': 'original-post'}, new_tab=True)
- try:
- target_is_source = (urllib.parse.urlparse(w.target_url()).netloc in
- source.domains)
- except BaseException:
- target_is_source = False
- w.pretty_target = util.pretty_link(
- w.target_url(), attrs={'class': 'original-post'}, new_tab=True,
- keep_host=target_is_source)
-
- vars.update({'blogposts': blogposts, 'webmentions': webmentions})
-
- return render_template(f'{source.SHORT_NAME}_user.html', **vars)
+ """View for a user page."""
+ cls = models.sources.get(site)
+ if not cls:
+ return render_template("user_not_found.html"), 404
+
+ source = cls.lookup(id)
+
+ if not source:
+ key = cls.query(
+ ndb.OR(
+ *[
+ ndb.GenericProperty(prop) == id
+ for prop in ("domains", "inferred_username", "name", "username")
+ ]
+ )
+ ).get(keys_only=True)
+ if key:
+ return redirect(cls(key=key).bridgy_path(), code=301)
+
+ if not source or not source.features:
+ return render_template("user_not_found.html"), 404
+
+ source.verify()
+ source = util.preprocess_source(source)
+
+ vars = {
+ "source": source,
+ "logs": logs,
+ "REFETCH_HFEED_TRIGGER": models.REFETCH_HFEED_TRIGGER,
+ "RECENT_PRIVATE_POSTS_THRESHOLD": RECENT_PRIVATE_POSTS_THRESHOLD,
+ }
+
+ # Blog webmention promos
+ if "webmention" not in source.features:
+ if source.SHORT_NAME in ("blogger", "medium", "tumblr", "wordpress"):
+ vars[source.SHORT_NAME + "_promo"] = True
+ else:
+ for domain in source.domains:
+ if (
+ ".blogspot." in domain
+ and not Blogger.query( # Blogger uses country TLDs
+ Blogger.domains == domain
+ ).get()
+ ):
+ vars["blogger_promo"] = True
+ elif (
+ util.domain_or_parent_in(domain, ["tumblr.com"])
+ and not Tumblr.query(Tumblr.domains == domain).get()
+ ):
+ vars["tumblr_promo"] = True
+ elif (
+ util.domain_or_parent_in(domain, "wordpress.com")
+ and not WordPress.query(WordPress.domains == domain).get()
+ ):
+ vars["wordpress_promo"] = True
+
+ # Responses
+ if "listen" in source.features or "email" in source.features:
+ vars["responses"] = []
+ query = Response.query().filter(Response.source == source.key)
+
+ # if there's a paging param (responses_before or responses_after), update
+ # query with it
+ def get_paging_param(param):
+ val = request.values.get(param)
+ try:
+ return util.parse_iso8601(val) if val else None
+ except BaseException:
+ error(f"Couldn't parse {param}, {val!r} as ISO8601")
+
+ before = get_paging_param("responses_before")
+ after = get_paging_param("responses_after")
+ if before and after:
+ error("can't handle both responses_before and responses_after")
+ elif after:
+ query = query.filter(Response.updated > after).order(Response.updated)
+ elif before:
+ query = query.filter(Response.updated < before).order(-Response.updated)
+ else:
+ query = query.order(-Response.updated)
+
+ query_iter = query.iter()
+ for i, r in enumerate(query_iter):
+ r.response = json_loads(r.response_json)
+ r.activities = [json_loads(a) for a in r.activities_json]
+
+ if not source.is_activity_public(r.response) or not all(
+ source.is_activity_public(a) for a in r.activities
+ ):
+ continue
+ elif r.type == "post":
+ r.activities = []
+
+ verb = r.response.get("verb")
+ r.actor = (
+ r.response.get("object")
+ if verb == "invite"
+ else r.response.get("author") or r.response.get("actor")
+ ) or {}
+
+ activity_content = ""
+ for a in r.activities + [r.response]:
+ if not a.get("content"):
+ obj = a.get("object", {})
+ a["content"] = activity_content = (
+ obj.get("content")
+ or obj.get("displayName")
+ or
+ # historical, from a Reddit bug fixed in granary@4f9df7c
+ obj.get("name")
+ or ""
+ )
+
+ response_content = r.response.get("content")
+ phrases = {
+ "like": "liked this",
+ "repost": "reposted this",
+ "rsvp-yes": "is attending",
+ "rsvp-no": "is not attending",
+ "rsvp-maybe": "might attend",
+ "rsvp-interested": "is interested",
+ "invite": "is invited",
+ }
+ phrase = phrases.get(r.type) or phrases.get(verb)
+ if phrase and (
+ r.type != "repost" or activity_content.startswith(response_content)
+ ):
+ r.response["content"] = "%s %s." % (
+ r.actor.get("displayName") or "",
+ phrase,
+ )
+
+ # convert image URL to https if we're serving over SSL
+ image_url = r.actor.setdefault("image", {}).get("url")
+ if image_url:
+ r.actor["image"]["url"] = util.update_scheme(image_url, request)
+
+ # generate original post links
+ r.links = process_webmention_links(r)
+ r.original_links = [
+ util.pretty_link(url, new_tab=True) for url in r.original_posts
+ ]
+
+ vars["responses"].append(r)
+ if len(vars["responses"]) >= 10 or i > 200:
+ break
+
+ vars["responses"].sort(key=lambda r: r.updated, reverse=True)
+
+ # calculate new paging param(s)
+ new_after = (
+ before
+ if before
+ else vars["responses"][0].updated
+ if vars["responses"]
+ and query_iter.probably_has_next()
+ and (before or after)
+ else None
+ )
+ if new_after:
+ vars["responses_after_link"] = (
+ "?responses_after=%s#responses" % new_after.isoformat()
+ )
+
+ new_before = (
+ after
+ if after
+ else vars["responses"][-1].updated
+ if vars["responses"] and query_iter.probably_has_next()
+ else None
+ )
+ if new_before:
+ vars["responses_before_link"] = (
+ "?responses_before=%s#responses" % new_before.isoformat()
+ )
+
+ vars["next_poll"] = max(
+ source.last_poll_attempt + source.poll_period(),
+ # lower bound is 1 minute from now
+ util.now_fn() + datetime.timedelta(seconds=90),
+ )
+
+ # Publishes
+ if "publish" in source.features:
+ publishes = (
+ Publish.query()
+ .filter(Publish.source == source.key)
+ .order(-Publish.updated)
+ .fetch(10)
+ )
+ for p in publishes:
+ p.pretty_page = util.pretty_link(
+ p.key.parent().id(),
+ attrs={"class": "original-post u-url u-name"},
+ new_tab=True,
+ )
+
+ vars["publishes"] = publishes
+
+ if "webmention" in source.features:
+ # Blog posts
+ blogposts = (
+ BlogPost.query()
+ .filter(BlogPost.source == source.key)
+ .order(-BlogPost.created)
+ .fetch(10)
+ )
+ for b in blogposts:
+ b.links = process_webmention_links(b)
+ try:
+ text = b.feed_item.get("title")
+ except ValueError:
+ text = None
+ b.pretty_url = util.pretty_link(
+ b.key.id(),
+ text=text,
+ attrs={"class": "original-post u-url u-name"},
+ max_length=40,
+ new_tab=True,
+ )
+
+ # Blog webmentions
+ webmentions = (
+ BlogWebmention.query()
+ .filter(BlogWebmention.source == source.key)
+ .order(-BlogWebmention.updated)
+ .fetch(10)
+ )
+ for w in webmentions:
+ w.pretty_source = util.pretty_link(
+ w.source_url(), attrs={"class": "original-post"}, new_tab=True
+ )
+ try:
+ target_is_source = (
+ urllib.parse.urlparse(w.target_url()).netloc in source.domains
+ )
+ except BaseException:
+ target_is_source = False
+ w.pretty_target = util.pretty_link(
+ w.target_url(),
+ attrs={"class": "original-post"},
+ new_tab=True,
+ keep_host=target_is_source,
+ )
+
+ vars.update({"blogposts": blogposts, "webmentions": webmentions})
+
+ return render_template(f"{source.SHORT_NAME}_user.html", **vars)
def process_webmention_links(e):
- """Generates pretty HTML for the links in a :class:`Webmentions` entity.
-
- Args:
- e: :class:`Webmentions` subclass (:class:`Response` or :class:`BlogPost`)
- """
- def link(url, g):
- return util.pretty_link(
- url, glyphicon=g, attrs={'class': 'original-post u-bridgy-target'},
- new_tab=True)
-
- return util.trim_nulls({
- 'Failed': set(link(url, 'exclamation-sign') for url in e.error + e.failed),
- 'Sending': set(link(url, 'transfer') for url in e.unsent
- if url not in e.error),
- 'Sent': set(link(url, None) for url in e.sent
- if url not in (e.error + e.unsent)),
- 'No webmention '
- 'support': set(link(url, None) for url in e.skipped),
- })
-
-
-@app.route('/delete/start', methods=['POST'])
+ """Generates pretty HTML for the links in a :class:`Webmentions` entity.
+
+ Args:
+ e: :class:`Webmentions` subclass (:class:`Response` or :class:`BlogPost`)
+ """
+
+ def link(url, g):
+ return util.pretty_link(
+ url,
+ glyphicon=g,
+ attrs={"class": "original-post u-bridgy-target"},
+ new_tab=True,
+ )
+
+ return util.trim_nulls(
+ {
+ "Failed": set(link(url, "exclamation-sign") for url in e.error + e.failed),
+ "Sending": set(
+ link(url, "transfer") for url in e.unsent if url not in e.error
+ ),
+ "Sent": set(
+ link(url, None) for url in e.sent if url not in (e.error + e.unsent)
+ ),
+ 'No webmention '
+ "support": set(link(url, None) for url in e.skipped),
+ }
+ )
+
+
+@app.route("/delete/start", methods=["POST"])
def delete_start():
- source = util.load_source()
- kind = source.key.kind()
- feature = request.form['feature']
- state = util.encode_oauth_state({
- 'operation': 'delete',
- 'feature': feature,
- 'source': source.key.urlsafe().decode(),
- 'callback': request.values.get('callback'),
- })
-
- # Blogger don't support redirect_url() yet
- if kind == 'Blogger':
- return redirect('/blogger/delete/start?state=%s' % state)
-
- path = ('/reddit/callback' if kind == 'Reddit'
- else '/wordpress/add' if kind == 'WordPress'
- else f'/{source.SHORT_NAME}/delete/finish')
- kwargs = {}
- if kind == 'Twitter':
- kwargs['access_type'] = 'read' if feature == 'listen' else 'write'
-
- try:
- return redirect(source.OAUTH_START(path).redirect_url(state=state))
- except werkzeug.exceptions.HTTPException:
- # raised by us, probably via self.error()
- raise
- except Exception as e:
- code, body = util.interpret_http_exception(e)
- if not code and util.is_connection_failure(e):
- code = '-'
- body = str(e)
- if code:
- flash(f'{source.GR_CLASS.NAME} API error {code}: {body}')
- return redirect(source.bridgy_url())
- else:
- raise
-
+ source = util.load_source()
+ kind = source.key.kind()
+ feature = request.form["feature"]
+ state = util.encode_oauth_state(
+ {
+ "operation": "delete",
+ "feature": feature,
+ "source": source.key.urlsafe().decode(),
+ "callback": request.values.get("callback"),
+ }
+ )
+
+ # Blogger don't support redirect_url() yet
+ if kind == "Blogger":
+ return redirect("/blogger/delete/start?state=%s" % state)
+
+ path = (
+ "/reddit/callback"
+ if kind == "Reddit"
+ else "/wordpress/add"
+ if kind == "WordPress"
+ else f"/{source.SHORT_NAME}/delete/finish"
+ )
+ kwargs = {}
+ if kind == "Twitter":
+ kwargs["access_type"] = "read" if feature == "listen" else "write"
-@app.route('/delete/finish')
+ try:
+ return redirect(source.OAUTH_START(path).redirect_url(state=state))
+ except werkzeug.exceptions.HTTPException:
+ # raised by us, probably via self.error()
+ raise
+ except Exception as e:
+ code, body = util.interpret_http_exception(e)
+ if not code and util.is_connection_failure(e):
+ code = "-"
+ body = str(e)
+ if code:
+ flash(f"{source.GR_CLASS.NAME} API error {code}: {body}")
+ return redirect(source.bridgy_url())
+ else:
+ raise
+
+
+@app.route("/delete/finish")
def delete_finish():
- parts = util.decode_oauth_state(request.values.get('state') or '')
- callback = parts and parts.get('callback')
-
- if request.values.get('declined'):
- # disable declined means no change took place
- if callback:
- callback = util.add_query_params(callback, {'result': 'declined'})
- return redirect(callback)
- else:
- flash('If you want to disable, please approve the prompt.')
- return redirect('/')
- return
-
- if not parts or 'feature' not in parts or 'source' not in parts:
- error('state query parameter must include "feature" and "source"')
-
- feature = parts['feature']
- if feature not in (Source.FEATURES):
- error('cannot delete unknown feature %s' % feature)
-
- logged_in_as = ndb.Key(urlsafe=request.args['auth_entity']).get()
- source = ndb.Key(urlsafe=parts['source']).get()
-
- logins = None
- if logged_in_as and logged_in_as.is_authority_for(source.auth_entity):
- # TODO: remove credentials
- if feature in source.features:
- source.features.remove(feature)
- source.put()
-
- # remove login cookie
- logins = util.get_logins()
- login = util.Login(path=source.bridgy_path(), site=source.SHORT_NAME,
- name=source.label_name())
- if login in logins:
- logins.remove(login)
-
- noun = 'webmentions' if feature == 'webmention' else feature + 'ing'
- if callback:
- callback = util.add_query_params(callback, {
- 'result': 'success',
- 'user': source.bridgy_url(),
- 'key': source.key.urlsafe().decode(),
- })
+ parts = util.decode_oauth_state(request.values.get("state") or "")
+ callback = parts and parts.get("callback")
+
+ if request.values.get("declined"):
+ # disable declined means no change took place
+ if callback:
+ callback = util.add_query_params(callback, {"result": "declined"})
+ return redirect(callback)
+ else:
+ flash("If you want to disable, please approve the prompt.")
+ return redirect("/")
+ return
+
+ if not parts or "feature" not in parts or "source" not in parts:
+ error('state query parameter must include "feature" and "source"')
+
+ feature = parts["feature"]
+ if feature not in (Source.FEATURES):
+ error("cannot delete unknown feature %s" % feature)
+
+ logged_in_as = ndb.Key(urlsafe=request.args["auth_entity"]).get()
+ source = ndb.Key(urlsafe=parts["source"]).get()
+
+ logins = None
+ if logged_in_as and logged_in_as.is_authority_for(source.auth_entity):
+ # TODO: remove credentials
+ if feature in source.features:
+ source.features.remove(feature)
+ source.put()
+
+ # remove login cookie
+ logins = util.get_logins()
+ login = util.Login(
+ path=source.bridgy_path(),
+ site=source.SHORT_NAME,
+ name=source.label_name(),
+ )
+ if login in logins:
+ logins.remove(login)
+
+ noun = "webmentions" if feature == "webmention" else feature + "ing"
+ if callback:
+ callback = util.add_query_params(
+ callback,
+ {
+ "result": "success",
+ "user": source.bridgy_url(),
+ "key": source.key.urlsafe().decode(),
+ },
+ )
+ else:
+ msg = f"Disabled {noun} for {source.label()}."
+ if not source.features:
+ msg += " Sorry to see you go!"
+ flash(msg)
else:
- msg = f'Disabled {noun} for {source.label()}.'
- if not source.features:
- msg += ' Sorry to see you go!'
- flash(msg)
- else:
- if callback:
- callback = util.add_query_params(callback, {'result': 'failure'})
- else:
- flash(f'Please log into {source.GR_CLASS.NAME} as {source.name} to disable it here.')
+ if callback:
+ callback = util.add_query_params(callback, {"result": "failure"})
+ else:
+ flash(
+ f"Please log into {source.GR_CLASS.NAME} as {source.name} to disable it here."
+ )
- url = callback if callback else source.bridgy_url() if source.features else '/'
- return redirect(url, logins=logins)
+ url = callback if callback else source.bridgy_url() if source.features else "/"
+ return redirect(url, logins=logins)
-@app.route('/poll-now', methods=['POST'])
+@app.route("/poll-now", methods=["POST"])
def poll_now():
- source = util.load_source()
- util.add_poll_task(source, now=True)
- flash("Polling now. Refresh in a minute to see what's new!")
- return redirect(source.bridgy_url())
+ source = util.load_source()
+ util.add_poll_task(source, now=True)
+ flash("Polling now. Refresh in a minute to see what's new!")
+ return redirect(source.bridgy_url())
-@app.route('/crawl-now', methods=['POST'])
+@app.route("/crawl-now", methods=["POST"])
def crawl_now():
- source = None
-
- @ndb.transactional()
- def setup_refetch_hfeed():
- nonlocal source
- source = util.load_source()
- source.last_hfeed_refetch = models.REFETCH_HFEED_TRIGGER
- source.last_feed_syndication_url = None
- source.put()
+ source = None
+
+ @ndb.transactional()
+ def setup_refetch_hfeed():
+ nonlocal source
+ source = util.load_source()
+ source.last_hfeed_refetch = models.REFETCH_HFEED_TRIGGER
+ source.last_feed_syndication_url = None
+ source.put()
- setup_refetch_hfeed()
- util.add_poll_task(source, now=True)
- flash("Crawling now. Refresh in a minute to see what's new!")
- return redirect(source.bridgy_url())
+ setup_refetch_hfeed()
+ util.add_poll_task(source, now=True)
+ flash("Crawling now. Refresh in a minute to see what's new!")
+ return redirect(source.bridgy_url())
-@app.route('/retry', methods=['POST'])
+@app.route("/retry", methods=["POST"])
def retry():
- entity = util.load_source()
- if not isinstance(entity, Webmentions):
- error(f'Unexpected key kind {entity.key.kind()}')
-
- source = entity.source.get()
+ entity = util.load_source()
+ if not isinstance(entity, Webmentions):
+ error(f"Unexpected key kind {entity.key.kind()}")
- # run OPD to pick up any new SyndicatedPosts. note that we don't refetch
- # their h-feed, so if they've added a syndication URL since we last crawled,
- # retry won't make us pick it up. background in #524.
- if entity.key.kind() == 'Response':
source = entity.source.get()
- for activity in [json_loads(a) for a in entity.activities_json]:
- originals, mentions = original_post_discovery.discover(
- source, activity, fetch_hfeed=False, include_redirect_sources=False)
- entity.unsent += original_post_discovery.targets_for_response(
- json_loads(entity.response_json), originals=originals, mentions=mentions)
- entity.restart()
- flash('Retrying. Refresh in a minute to see the results!')
- return redirect(request.values.get('redirect_to') or source.bridgy_url())
+ # run OPD to pick up any new SyndicatedPosts. note that we don't refetch
+ # their h-feed, so if they've added a syndication URL since we last crawled,
+ # retry won't make us pick it up. background in #524.
+ if entity.key.kind() == "Response":
+ source = entity.source.get()
+ for activity in [json_loads(a) for a in entity.activities_json]:
+ originals, mentions = original_post_discovery.discover(
+ source, activity, fetch_hfeed=False, include_redirect_sources=False
+ )
+ entity.unsent += original_post_discovery.targets_for_response(
+ json_loads(entity.response_json), originals=originals, mentions=mentions
+ )
+
+ entity.restart()
+ flash("Retrying. Refresh in a minute to see the results!")
+ return redirect(request.values.get("redirect_to") or source.bridgy_url())
+
+
+@app.route("/discover", methods=["POST"])
+def discover():
+ source = util.load_source()
+ # validate URL, find silo post
+ url = request.form["url"]
+ domain = util.domain_from_link(url)
+ path = urllib.parse.urlparse(url).path
+ msg = "Discovering now. Refresh in a minute to see the results!"
+
+ gr_source = source.gr_source
+ if domain == gr_source.DOMAIN:
+ post_id = gr_source.post_id(url)
+ if post_id:
+ type = "event" if path.startswith("/events/") else None
+ util.add_discover_task(source, post_id, type=type)
+ else:
+ msg = "Sorry, that doesn't look like a %s post URL." % gr_source.NAME
+
+ elif util.domain_or_parent_in(domain, source.domains):
+ synd_links = original_post_discovery.process_entry(source, url, {}, False, [])
+ if synd_links:
+ for link in synd_links:
+ util.add_discover_task(source, gr_source.post_id(link))
+ source.updates = {"last_syndication_url": util.now_fn()}
+ models.Source.put_updates(source)
+ else:
+ msg = "Failed to fetch %s or find a %s syndication link." % (
+ util.pretty_link(url),
+ gr_source.NAME,
+ )
-@app.route('/discover', methods=['POST'])
-def discover():
- source = util.load_source()
-
- # validate URL, find silo post
- url = request.form['url']
- domain = util.domain_from_link(url)
- path = urllib.parse.urlparse(url).path
- msg = 'Discovering now. Refresh in a minute to see the results!'
-
- gr_source = source.gr_source
- if domain == gr_source.DOMAIN:
- post_id = gr_source.post_id(url)
- if post_id:
- type = 'event' if path.startswith('/events/') else None
- util.add_discover_task(source, post_id, type=type)
- else:
- msg = "Sorry, that doesn't look like a %s post URL." % gr_source.NAME
-
- elif util.domain_or_parent_in(domain, source.domains):
- synd_links = original_post_discovery.process_entry(source, url, {}, False, [])
- if synd_links:
- for link in synd_links:
- util.add_discover_task(source, gr_source.post_id(link))
- source.updates = {'last_syndication_url': util.now_fn()}
- models.Source.put_updates(source)
else:
- msg = 'Failed to fetch %s or find a %s syndication link.' % (
- util.pretty_link(url), gr_source.NAME)
+ msg = "Please enter a URL on either your web site or %s." % gr_source.NAME
- else:
- msg = 'Please enter a URL on either your web site or %s.' % gr_source.NAME
+ flash(msg)
+ return redirect(source.bridgy_url())
- flash(msg)
- return redirect(source.bridgy_url())
-
-@app.route('/edit-websites', methods=['GET'])
+@app.route("/edit-websites", methods=["GET"])
def edit_websites_get():
- return render_template('edit_websites.html',
- source=util.preprocess_source(util.load_source()))
+ return render_template(
+ "edit_websites.html", source=util.preprocess_source(util.load_source())
+ )
-@app.route('/edit-websites', methods=['POST'])
+@app.route("/edit-websites", methods=["POST"])
def edit_websites_post():
- source = util.load_source()
- redirect_url = '%s?%s' % (request.path, urllib.parse.urlencode({
- 'source_key': source.key.urlsafe().decode(),
- }))
-
- add = request.values.get('add')
- delete = request.values.get('delete')
- if (add and delete) or (not add and not delete):
- error('Either add or delete param (but not both) required')
-
- link = util.pretty_link(add or delete)
-
- if add:
- resolved = Source.resolve_profile_url(add)
- if resolved:
- if resolved in source.domain_urls:
- flash('%s already exists.' % link)
- else:
- source.domain_urls.append(resolved)
- domain = util.domain_from_link(resolved)
- source.domains.append(domain)
- source.put()
- flash('Added %s.' % link)
- else:
- flash("%s doesn't look like your web site. Try again?" % link)
+ source = util.load_source()
+ redirect_url = "%s?%s" % (
+ request.path,
+ urllib.parse.urlencode(
+ {
+ "source_key": source.key.urlsafe().decode(),
+ }
+ ),
+ )
+
+ add = request.values.get("add")
+ delete = request.values.get("delete")
+ if (add and delete) or (not add and not delete):
+ error("Either add or delete param (but not both) required")
+
+ link = util.pretty_link(add or delete)
+
+ if add:
+ resolved = Source.resolve_profile_url(add)
+ if resolved:
+ if resolved in source.domain_urls:
+ flash("%s already exists." % link)
+ else:
+ source.domain_urls.append(resolved)
+ domain = util.domain_from_link(resolved)
+ source.domains.append(domain)
+ source.put()
+ flash("Added %s." % link)
+ else:
+ flash("%s doesn't look like your web site. Try again?" % link)
- else:
- assert delete
- try:
- source.domain_urls.remove(delete)
- except ValueError:
- error(f"{delete} not found in {source.label()}'s current web sites")
- domain = util.domain_from_link(delete)
- if domain not in set(util.domain_from_link(url) for url in source.domain_urls):
- source.domains.remove(domain)
- source.put()
- flash(f'Removed {link}.')
+ else:
+ assert delete
+ try:
+ source.domain_urls.remove(delete)
+ except ValueError:
+ error(f"{delete} not found in {source.label()}'s current web sites")
+ domain = util.domain_from_link(delete)
+ if domain not in set(util.domain_from_link(url) for url in source.domain_urls):
+ source.domains.remove(domain)
+ source.put()
+ flash(f"Removed {link}.")
- return redirect(redirect_url)
+ return redirect(redirect_url)
-@app.route('/', methods=('GET', 'HEAD'))
+@app.route("/", methods=("GET", "HEAD"))
def redirect_to_front_page(_):
- """Redirect to the front page."""
- return redirect(util.add_query_params('/', request.values.items()), code=301)
+ """Redirect to the front page."""
+ return redirect(util.add_query_params("/", request.values.items()), code=301)
-@app.route('/logout')
+@app.route("/logout")
def logout():
- """Redirect to the front page."""
- flash('Logged out.')
- return redirect('/', logins=[])
+ """Redirect to the front page."""
+ flash("Logged out.")
+ return redirect("/", logins=[])
-@app.route('/csp-report')
+@app.route("/csp-report")
def csp_report():
- """Log Content-Security-Policy reports. https://content-security-policy.com/"""
- logging.info(request.values.get_data(as_text=True))
- return 'OK'
+ """Log Content-Security-Policy reports. https://content-security-policy.com/"""
+ logging.info(request.values.get_data(as_text=True))
+ return "OK"
-@app.route('/log')
+@app.route("/log")
@flask_util.cached(cache, logs.CACHE_TIME)
def log():
return logs.log()
diff --git a/publish.py b/publish.py
index 6a409e5b..ad074cb3 100644
--- a/publish.py
+++ b/publish.py
@@ -16,11 +16,11 @@
from granary import source as gr_source
import grpc
from oauth_dropins import (
- flickr as oauth_flickr,
- github as oauth_github,
- mastodon as oauth_mastodon,
- meetup as oauth_meetup,
- twitter as oauth_twitter,
+ flickr as oauth_flickr,
+ github as oauth_github,
+ mastodon as oauth_mastodon,
+ meetup as oauth_meetup,
+ twitter as oauth_twitter,
)
from oauth_dropins.webutil import appengine_info
from oauth_dropins.webutil import flask_util
@@ -47,728 +47,873 @@
# image URLs matching this regexp should be ignored.
# (This matches Wordpress Jetpack lazy loaded image placeholders.)
# https://github.com/snarfed/bridgy/issues/798
-IGNORE_IMAGE_RE = re.compile(r'.*/lazy-images/images/1x1\.trans\.gif$')
-
-PUBLISHABLE_TYPES = frozenset((
- 'h-checkin',
- 'h-entry',
- 'h-event',
- 'h-geo',
- 'h-item',
- 'h-listing',
- 'h-product',
- 'h-recipe',
- 'h-resume',
- 'h-review',
-))
+IGNORE_IMAGE_RE = re.compile(r".*/lazy-images/images/1x1\.trans\.gif$")
+
+PUBLISHABLE_TYPES = frozenset(
+ (
+ "h-checkin",
+ "h-entry",
+ "h-event",
+ "h-geo",
+ "h-item",
+ "h-listing",
+ "h-product",
+ "h-recipe",
+ "h-resume",
+ "h-review",
+ )
+)
class CollisionError(RuntimeError):
- """Multiple publish requests for the same page at the same time."""
- pass
+ """Multiple publish requests for the same page at the same time."""
+
+ pass
class PublishBase(webmention.Webmention):
- """Base handler for both previews and publishes.
+ """Base handler for both previews and publishes.
- Subclasses must set the :attr:`PREVIEW` attribute to True or False. They may
- also override other methods.
+ Subclasses must set the :attr:`PREVIEW` attribute to True or False. They may
+ also override other methods.
- Attributes:
- fetched: :class:`requests.Response` from fetching source_url
- shortlink: rel-shortlink found in the original post, if any
- """
- PREVIEW = None
+ Attributes:
+ fetched: :class:`requests.Response` from fetching source_url
+ shortlink: rel-shortlink found in the original post, if any
+ """
- shortlink = None
- source = None
+ PREVIEW = None
- def authorize(self):
- """Returns True if the current user is authorized for this request.
+ shortlink = None
+ source = None
- Otherwise, should call :meth:`self.error()` to provide an appropriate
- error message.
- """
- return True
-
- def source_url(self):
- return request.values['source'].strip()
-
- def target_url(self):
- return request.values['target'].strip()
-
- def include_link(self, item):
- val = request.values.get('bridgy_omit_link', None)
-
- if val is None:
- # _run has already parsed and validated the target URL
- vals = urllib.parse.parse_qs(urllib.parse.urlparse(self.target_url()).query)\
- .get('bridgy_omit_link')
- val = vals[0] if vals else None
-
- if val is None:
- vals = item.get('properties', {}).get('bridgy-omit-link')
- val = vals[0] if vals else None
-
- result = (gr_source.INCLUDE_LINK if val is None or val.lower() == 'false'
- else gr_source.INCLUDE_IF_TRUNCATED if val.lower() == 'maybe'
- else gr_source.OMIT_LINK)
-
- return result
-
- def ignore_formatting(self, item):
- val = request.values.get('bridgy_ignore_formatting', None)
-
- if val is None:
- # _run has already parsed and validated the target URL
- vals = urllib.parse.parse_qs(urllib.parse.urlparse(self.target_url()).query)\
- .get('bridgy_ignore_formatting')
- val = vals[0] if vals else None
-
- if val is not None:
- return val.lower() in ('', 'true')
-
- return 'bridgy-ignore-formatting' in item.get('properties', {})
-
- def maybe_inject_silo_content(self, item):
- props = item.setdefault('properties', {})
- silo_content = props.get('bridgy-%s-content' % self.source.SHORT_NAME, [])
- if silo_content:
- props['content'] = silo_content
- props.pop('name', None)
- props.pop('summary', None)
-
- def _run(self):
- """Returns CreationResult on success, None otherwise."""
- logging.info('Params: %s', list(request.values.items()))
- assert self.PREVIEW in (True, False)
-
- # parse and validate target URL
- try:
- parsed = urllib.parse.urlparse(self.target_url())
- except BaseException:
- self.error(f'Could not parse target URL {self.target_url()}')
-
- domain = parsed.netloc
- path_parts = parsed.path.rsplit('/', 1)
- source_cls = SOURCE_NAMES.get(path_parts[-1])
- if (domain not in util.DOMAINS or
- len(path_parts) != 2 or path_parts[0] != '/publish' or not source_cls):
- self.error(
- 'Target must be brid.gy/publish/{flickr,github,mastodon,meetup,twitter}')
- elif source_cls == Instagram:
- self.error(f'Sorry, {source_cls.GR_CLASS.NAME} is not supported.')
-
- # resolve source URL
- source_url = self.source_url()
- resolved_url, domain, ok = util.get_webmention_target(
- source_url, replace_test_domains=False)
- # show nice error message if they're trying to publish a silo post
- if domain in SOURCE_DOMAINS:
- return self.error(
- "Looks like that's a %s URL. Try one from your web site instead!" %
- SOURCE_DOMAINS[domain].GR_CLASS.NAME)
- elif not ok:
- return self.error('Unsupported source URL %s' % resolved_url)
- elif not domain:
- return self.error('Could not parse source URL %s' % resolved_url)
-
- # look up source by domain
- self.source = self._find_source(source_cls, resolved_url, domain)
-
- content_param = 'bridgy_%s_content' % self.source.SHORT_NAME
- if content_param in request.values:
- return self.error('The %s parameter is not supported' % content_param)
-
- # show nice error message if they're trying to publish their home page
- for domain_url in self.source.domain_urls:
- domain_url_parts = urllib.parse.urlparse(domain_url)
- for check_url in resolved_url, source_url:
- parts = urllib.parse.urlparse(check_url)
- if (parts.netloc == domain_url_parts.netloc and
- parts.path.strip('/') == domain_url_parts.path.strip('/') and
- not parts.query):
- return self.error(
- "Looks like that's your home page. Try one of your posts instead!")
-
- # done with the sanity checks, create the Publish entity
- self.entity = self.get_or_add_publish_entity(resolved_url)
- if not self.entity:
- return None
-
- # fetch the source page!
- fragment = urllib.parse.urlparse(source_url).fragment
- try:
- resp = self.fetch_mf2(resolved_url, id=fragment, raise_errors=True)
- except HTTPException:
- # raised by us, probably via self.error()
- raise
- except BaseException as e:
- status, body = util.interpret_http_exception(e)
- if status == '410':
- return self.delete(resolved_url)
- return self.error('Could not fetch source URL %s' % resolved_url)
-
- if not resp:
- return
- self.fetched, mf2 = resp
-
- # check that we haven't already published this URL. (we can't do this before
- # fetching because it might be a 410 delete, which we only know by fetching.)
- if (self.entity.status == 'complete' and self.entity.type != 'preview' and
- not self.PREVIEW and not appengine_info.LOCAL):
- return self.error("Sorry, you've already published that page, and Bridgy Publish doesn't support updating existing posts. Details: https://github.com/snarfed/bridgy/issues/84",
- extra_json={'original': self.entity.published})
-
- # find rel-shortlink, if any
- # http://microformats.org/wiki/rel-shortlink
- # https://github.com/snarfed/bridgy/issues/173
- shortlinks = mf2['rels'].get('shortlink')
- if shortlinks:
- self.shortlink = urllib.parse.urljoin(resolved_url, shortlinks[0])
-
- # loop through each item and its children and try to preview/create it. if
- # it fails, try the next one. break after the first one that works.
- result = None
- types = set()
- queue = collections.deque(mf2.get('items', []))
- while queue:
- item = queue.popleft()
- item_types = set(item.get('type'))
- if 'h-feed' in item_types and 'h-entry' not in item_types:
- queue.extend(item.get('children', []))
- continue
- elif not item_types & PUBLISHABLE_TYPES:
- types = types.union(item_types)
- continue
-
- try:
- result = self.attempt_single_item(item)
- if self.entity.published:
- break
- if result.abort:
- if result.error_plain:
- self.error(result.error_plain, html=result.error_html, data=item)
- return
- # try the next item
- for embedded in ('rsvp', 'invitee', 'repost', 'repost-of', 'like',
- 'like-of', 'in-reply-to'):
- if embedded in item.get('properties', []):
- item_types.add(embedded)
- logging.info(
- 'Object type(s) %s not supported; error=%s; trying next.',
- item_types, result.error_plain)
- types = types.union(item_types)
- queue.extend(item.get('children', []))
- except HTTPException:
- # raised by us, probably via self.error()
- raise
- except BaseException as e:
- code, body = util.interpret_http_exception(e)
- if code in self.source.DISABLE_HTTP_CODES or isinstance(e, models.DisableSource):
- # the user deauthorized the bridgy app, or the token expired, so
- # disable this source.
- logging.warning(f'Disabling source due to: {e}', exc_info=True)
- self.source.status = 'disabled'
- self.source.put()
- if isinstance(e, (NotImplementedError, ValueError, urllib.error.URLError)):
- code = '400'
- elif not code:
- raise
- msg = 'Error: %s %s' % (body or '', e)
- return self.error(msg, status=code, report=code not in ('400', '404', '502', '503', '504'))
-
- if not self.entity.published: # tried all the items
- types.discard('h-entry')
- types.discard('h-note')
- if types:
- msg = ("%s doesn't support type(s) %s, or no content was found." %
- (source_cls.GR_CLASS.NAME, ' + '.join(types)))
- return self.error(msg, data=mf2)
- else:
- msg = 'Could not find content in h-entry or any other element!'
- return self.error(msg, html=msg, data=mf2)
-
- # write results to datastore, but don't overwrite a previous publish with a
- # preview.
- if not (self.PREVIEW and self.entity.type != 'preview'):
- self.entity.status = 'complete'
- self.entity.put()
-
- return result
-
- def _find_source(self, source_cls, url, domain):
- """Returns the source that should publish a post URL, or None if not found.
-
- Args:
- source_cls: :class:`models.Source` subclass for this silo
- url: string
- domain: string, url's domain
-
- Returns: :class:`models.Source`
- """
- domain = domain.lower()
- sources = source_cls.query().filter(source_cls.domains == domain).fetch(100)
- if not sources:
- msg = "Could not find %(type)s account for %(domain)s. Check that your %(type)s profile has %(domain)s in its web site or link field, then try signing up again." % {'type': source_cls.GR_CLASS.NAME, 'domain': domain}
- return self.error(msg, html=msg)
-
- current_url = ''
- sources_ready = []
- best_match = None
- for source in sources:
- logging.info('Source: %s , features %s, status %s, poll status %s',
- source.bridgy_url(), source.features, source.status,
- source.poll_status)
- if source.status != 'disabled' and 'publish' in source.features:
- # use a source that has a domain_url matching the url provided,
- # including path. find the source with the closest match.
- sources_ready.append(source)
- schemeless_url = util.schemeless(url.lower()).strip('/')
- for domain_url in source.domain_urls:
- schemeless_domain_url = util.schemeless(domain_url.lower()).strip('/')
- if (schemeless_url.startswith(schemeless_domain_url) and
- len(domain_url) > len(current_url)):
- current_url = domain_url
- best_match = source
-
- if best_match:
- return best_match
-
- if sources_ready:
- msg = 'No account found that matches {util.pretty_link(url)}. Check that the web site URL is in your silo profile, then sign up again.'
- else:
- msg = 'Publish is not enabled for your account. Try signing up!'
- self.error(msg, html=msg)
-
- def attempt_single_item(self, item):
- """Attempts to preview or publish a single mf2 item.
-
- Args:
- item: mf2 item dict from mf2py
-
- Returns:
- CreationResult
- """
- self.maybe_inject_silo_content(item)
- obj = microformats2.json_to_object(item)
-
- ignore_formatting = self.ignore_formatting(item)
- if ignore_formatting:
- prop = microformats2.first_props(item.get('properties', {}))
- content = microformats2.get_text(prop.get('content'))
- if content:
- obj['content'] = content.strip()
-
- # which original post URL to include? in order of preference:
- # 1. rel-shortlink (background: https://github.com/snarfed/bridgy/issues/173)
- # 2. original user-provided URL if it redirected
- # 3. u-url if available
- # 4. actual final fetched URL
- if self.shortlink:
- obj['url'] = self.shortlink
- elif self.source_url() != self.fetched.url:
- obj['url'] = self.source_url()
- elif 'url' not in obj:
- obj['url'] = self.fetched.url
- logging.debug('Converted to ActivityStreams object: %s', json_dumps(obj, indent=2))
-
- # posts and comments need content
- obj_type = obj.get('objectType')
- if obj_type in ('note', 'article', 'comment'):
- if (not obj.get('content') and not obj.get('summary') and
- not obj.get('displayName')):
- return gr_source.creation_result(
- abort=False,
- error_plain='Could not find content in %s' % self.fetched.url,
- error_html='Could not find content in %s' % self.fetched.url)
-
- self.preprocess(obj)
-
- include_link = self.include_link(item)
-
- if not self.authorize():
- return gr_source.creation_result(abort=True)
-
- if self.PREVIEW:
- result = self.source.gr_source.preview_create(
- obj, include_link=include_link, ignore_formatting=ignore_formatting)
- previewed = result.content or result.description
- if self.entity.type == 'preview':
- self.entity.published = previewed
- if not previewed:
- return result # there was an error
- return self._render_preview(result, include_link=include_link)
-
- else:
- result = self.source.gr_source.create(
- obj, include_link=include_link, ignore_formatting=ignore_formatting)
- self.entity.published = result.content
- if not result.content:
- return result # there was an error
- if 'url' not in self.entity.published:
- self.entity.published['url'] = obj.get('url')
- self.entity.type = self.entity.published.get('type') or models.get_type(obj)
- logging.info('Returning %s', json_dumps(self.entity.published, indent=2))
- return gr_source.creation_result(
- json_dumps(self.entity.published, indent=2))
-
- def delete(self, source_url):
- """Attempts to delete or preview delete a published post.
-
- Args:
- source_url: string, original post URL
-
- Returns:
- dict response data with at least id and url
- """
- assert self.entity
- if ((self.entity.status != 'complete' or self.entity.type == 'preview') and
- not appengine_info.LOCAL):
- return self.error("Can't delete this post from %s because Bridgy Publish didn't originally POSSE it there" % self.source.gr_source.NAME)
-
- id = self.entity.published.get('id')
- url = self.entity.published.get('url')
- if not id and url:
- id = self.source.gr_source.post_id(url)
-
- if not id:
- return self.error(
- "Bridgy Publish can't find the id of the %s post that it originally published for %s" %
- self.source.gr_source.NAME, source_url)
-
- if self.PREVIEW:
- try:
- return self._render_preview(self.source.gr_source.preview_delete(id))
- except NotImplementedError:
- return self.error("Sorry, deleting isn't supported for %s yet" %
- self.source.gr_source.NAME)
-
- logging.info('Deleting silo post id %s', id)
- self.entity = models.Publish(parent=self.entity.key.parent(),
- source=self.source.key, type='delete')
- self.entity.put()
- logging.debug("Publish entity for delete: '%s'",
- self.entity.key.urlsafe().decode())
-
- resp = self.source.gr_source.delete(id)
- resp.content.setdefault('id', id)
- resp.content.setdefault('url', url)
- logging.info(resp.content)
- self.entity.published = resp.content
- self.entity.status = 'deleted'
- self.entity.put()
- return resp
-
- def preprocess(self, activity):
- """Preprocesses an item before trying to publish it.
-
- Specifically, expands inReplyTo/object URLs with rel=syndication URLs.
-
- Args:
- activity: an ActivityStreams activity or object being published
- """
- self.source.preprocess_for_publish(activity)
- self.expand_target_urls(activity)
+ def authorize(self):
+ """Returns True if the current user is authorized for this request.
- activity['image'] = [img for img in util.get_list(activity, 'image')
- if not IGNORE_IMAGE_RE.match(img.get('url', ''))]
- if not activity['image']:
- del activity['image']
+ Otherwise, should call :meth:`self.error()` to provide an appropriate
+ error message.
+ """
+ return True
- def expand_target_urls(self, activity):
- """Expand the inReplyTo or object fields of an ActivityStreams object
- by fetching the original and looking for rel=syndication URLs.
+ def source_url(self):
+ return request.values["source"].strip()
- This method modifies the dict in place.
+ def target_url(self):
+ return request.values["target"].strip()
- Args:
- activity: an ActivityStreams dict of the activity being published
- """
- for field in ('inReplyTo', 'object'):
- # microformats2.json_to_object de-dupes, no need to do it here
- objs = activity.get(field)
- if not objs:
- continue
-
- if isinstance(objs, dict):
- objs = [objs]
-
- augmented = list(objs)
- for obj in objs:
- url = obj.get('url')
- if not url:
- continue
-
- parsed = urllib.parse.urlparse(url)
- # ignore home pages. https://github.com/snarfed/bridgy/issues/760
- if parsed.path in ('', '/'):
- continue
-
- # get_webmention_target weeds out silos and non-HTML targets
- # that we wouldn't want to download and parse
- url, _, ok = util.get_webmention_target(url)
- if not ok:
- continue
-
- logging.debug('expand_target_urls fetching field=%s, url=%s', field, url)
- try:
- mf2 = util.fetch_mf2(url)
- except AssertionError:
- raise # for unit tests
- except HTTPException:
- # raised by us, probably via self.error()
- raise
- except BaseException:
- # it's not a big deal if we can't fetch an in-reply-to url
- logging.info('expand_target_urls could not fetch field=%s, url=%s',
- field, url, exc_info=True)
- continue
+ def include_link(self, item):
+ val = request.values.get("bridgy_omit_link", None)
- synd_urls = mf2['rels'].get('syndication', [])
+ if val is None:
+ # _run has already parsed and validated the target URL
+ vals = urllib.parse.parse_qs(
+ urllib.parse.urlparse(self.target_url()).query
+ ).get("bridgy_omit_link")
+ val = vals[0] if vals else None
- # look for syndication urls in the first h-entry
- queue = collections.deque(mf2.get('items', []))
- while queue:
- item = queue.popleft()
- item_types = set(item.get('type', []))
- if 'h-feed' in item_types and 'h-entry' not in item_types:
- queue.extend(item.get('children', []))
- continue
+ if val is None:
+ vals = item.get("properties", {}).get("bridgy-omit-link")
+ val = vals[0] if vals else None
- # these can be urls or h-cites
- synd_urls += microformats2.get_string_urls(
- item.get('properties', {}).get('syndication', []))
+ result = (
+ gr_source.INCLUDE_LINK
+ if val is None or val.lower() == "false"
+ else gr_source.INCLUDE_IF_TRUNCATED
+ if val.lower() == "maybe"
+ else gr_source.OMIT_LINK
+ )
- logging.debug('expand_target_urls found rel=syndication for url=%s: %r', url, synd_urls)
- augmented += [{'url': u} for u in synd_urls]
+ return result
- activity[field] = augmented
+ def ignore_formatting(self, item):
+ val = request.values.get("bridgy_ignore_formatting", None)
- def get_or_add_publish_entity(self, source_url):
- """Creates and stores :class:`models.Publish` entity.
+ if val is None:
+ # _run has already parsed and validated the target URL
+ vals = urllib.parse.parse_qs(
+ urllib.parse.urlparse(self.target_url()).query
+ ).get("bridgy_ignore_formatting")
+ val = vals[0] if vals else None
- ...and if necessary, :class:`models.PublishedPage` entity.
+ if val is not None:
+ return val.lower() in ("", "true")
- Args:
- source_url: string
- """
- try:
- return self._get_or_add_publish_entity(source_url)
- except CollisionError:
- return self.error("You're already publishing that post in another request.",
- status=429)
- except Exception as e:
- code = getattr(e, 'code', None)
- details = getattr(e, 'details', None)
- logging.info((code and code(), details and details()))
- if (code and code() == grpc.StatusCode.ABORTED and
- details and 'too much contention' in details()):
- return self.error("You're already publishing that post in another request.",
- status=429)
- raise
-
- @ndb.transactional()
- def _get_or_add_publish_entity(self, source_url):
- page = PublishedPage.get_or_insert(source_url)
-
- # Detect concurrent publish request for the same page
- # https://github.com/snarfed/bridgy/issues/996
- pending = Publish.query(
- Publish.status == 'new', Publish.type != 'preview',
- Publish.source == self.source.key, ancestor=page.key).get()
- if pending:
- logging.warning(f'Collided with publish: {pending.key.urlsafe().decode()}')
- raise CollisionError()
-
- entity = Publish.query(
- Publish.status == 'complete', Publish.type != 'preview',
- Publish.source == self.source.key, ancestor=page.key).get()
- if entity is None:
- entity = Publish(parent=page.key, source=self.source.key)
- if self.PREVIEW:
- entity.type = 'preview'
- entity.put()
-
- logging.debug("Publish entity: '%s'", entity.key.urlsafe().decode())
- return entity
-
- def _render_preview(self, result, include_link=False):
- """Renders a preview CreationResult as HTML.
-
- Args:
- result: CreationResult
- include_link: boolean
-
- Returns: CreationResult with the rendered HTML in content
- """
- state = {
- 'source_key': self.source.key.urlsafe().decode(),
- 'source_url': self.source_url(),
- 'target_url': self.target_url(),
- 'include_link': include_link,
- }
- vars = {
- 'source': util.preprocess_source(self.source),
- 'preview': result.content,
- 'description': result.description,
- 'webmention_endpoint': util.host_url('/publish/webmention'),
- 'state': util.encode_oauth_state(state),
- **state,
- }
- logging.info(f'Rendering preview with template vars {pprint.pformat(vars)}')
- return gr_source.creation_result(render_template('preview.html', **vars))
+ return "bridgy-ignore-formatting" in item.get("properties", {})
+
+ def maybe_inject_silo_content(self, item):
+ props = item.setdefault("properties", {})
+ silo_content = props.get("bridgy-%s-content" % self.source.SHORT_NAME, [])
+ if silo_content:
+ props["content"] = silo_content
+ props.pop("name", None)
+ props.pop("summary", None)
+
+ def _run(self):
+ """Returns CreationResult on success, None otherwise."""
+ logging.info("Params: %s", list(request.values.items()))
+ assert self.PREVIEW in (True, False)
+
+ # parse and validate target URL
+ try:
+ parsed = urllib.parse.urlparse(self.target_url())
+ except BaseException:
+ self.error(f"Could not parse target URL {self.target_url()}")
+
+ domain = parsed.netloc
+ path_parts = parsed.path.rsplit("/", 1)
+ source_cls = SOURCE_NAMES.get(path_parts[-1])
+ if (
+ domain not in util.DOMAINS
+ or len(path_parts) != 2
+ or path_parts[0] != "/publish"
+ or not source_cls
+ ):
+ self.error(
+ "Target must be brid.gy/publish/{flickr,github,mastodon,meetup,twitter}"
+ )
+ elif source_cls == Instagram:
+ self.error(f"Sorry, {source_cls.GR_CLASS.NAME} is not supported.")
+
+ # resolve source URL
+ source_url = self.source_url()
+ resolved_url, domain, ok = util.get_webmention_target(
+ source_url, replace_test_domains=False
+ )
+ # show nice error message if they're trying to publish a silo post
+ if domain in SOURCE_DOMAINS:
+ return self.error(
+ "Looks like that's a %s URL. Try one from your web site instead!"
+ % SOURCE_DOMAINS[domain].GR_CLASS.NAME
+ )
+ elif not ok:
+ return self.error("Unsupported source URL %s" % resolved_url)
+ elif not domain:
+ return self.error("Could not parse source URL %s" % resolved_url)
+
+ # look up source by domain
+ self.source = self._find_source(source_cls, resolved_url, domain)
+
+ content_param = "bridgy_%s_content" % self.source.SHORT_NAME
+ if content_param in request.values:
+ return self.error("The %s parameter is not supported" % content_param)
+
+ # show nice error message if they're trying to publish their home page
+ for domain_url in self.source.domain_urls:
+ domain_url_parts = urllib.parse.urlparse(domain_url)
+ for check_url in resolved_url, source_url:
+ parts = urllib.parse.urlparse(check_url)
+ if (
+ parts.netloc == domain_url_parts.netloc
+ and parts.path.strip("/") == domain_url_parts.path.strip("/")
+ and not parts.query
+ ):
+ return self.error(
+ "Looks like that's your home page. Try one of your posts instead!"
+ )
+
+ # done with the sanity checks, create the Publish entity
+ self.entity = self.get_or_add_publish_entity(resolved_url)
+ if not self.entity:
+ return None
+
+ # fetch the source page!
+ fragment = urllib.parse.urlparse(source_url).fragment
+ try:
+ resp = self.fetch_mf2(resolved_url, id=fragment, raise_errors=True)
+ except HTTPException:
+ # raised by us, probably via self.error()
+ raise
+ except BaseException as e:
+ status, body = util.interpret_http_exception(e)
+ if status == "410":
+ return self.delete(resolved_url)
+ return self.error("Could not fetch source URL %s" % resolved_url)
+
+ if not resp:
+ return
+ self.fetched, mf2 = resp
+
+ # check that we haven't already published this URL. (we can't do this before
+ # fetching because it might be a 410 delete, which we only know by fetching.)
+ if (
+ self.entity.status == "complete"
+ and self.entity.type != "preview"
+ and not self.PREVIEW
+ and not appengine_info.LOCAL
+ ):
+ return self.error(
+ "Sorry, you've already published that page, and Bridgy Publish doesn't support updating existing posts. Details: https://github.com/snarfed/bridgy/issues/84",
+ extra_json={"original": self.entity.published},
+ )
+
+ # find rel-shortlink, if any
+ # http://microformats.org/wiki/rel-shortlink
+ # https://github.com/snarfed/bridgy/issues/173
+ shortlinks = mf2["rels"].get("shortlink")
+ if shortlinks:
+ self.shortlink = urllib.parse.urljoin(resolved_url, shortlinks[0])
+
+ # loop through each item and its children and try to preview/create it. if
+ # it fails, try the next one. break after the first one that works.
+ result = None
+ types = set()
+ queue = collections.deque(mf2.get("items", []))
+ while queue:
+ item = queue.popleft()
+ item_types = set(item.get("type"))
+ if "h-feed" in item_types and "h-entry" not in item_types:
+ queue.extend(item.get("children", []))
+ continue
+ elif not item_types & PUBLISHABLE_TYPES:
+ types = types.union(item_types)
+ continue
+
+ try:
+ result = self.attempt_single_item(item)
+ if self.entity.published:
+ break
+ if result.abort:
+ if result.error_plain:
+ self.error(
+ result.error_plain, html=result.error_html, data=item
+ )
+ return
+ # try the next item
+ for embedded in (
+ "rsvp",
+ "invitee",
+ "repost",
+ "repost-of",
+ "like",
+ "like-of",
+ "in-reply-to",
+ ):
+ if embedded in item.get("properties", []):
+ item_types.add(embedded)
+ logging.info(
+ "Object type(s) %s not supported; error=%s; trying next.",
+ item_types,
+ result.error_plain,
+ )
+ types = types.union(item_types)
+ queue.extend(item.get("children", []))
+ except HTTPException:
+ # raised by us, probably via self.error()
+ raise
+ except BaseException as e:
+ code, body = util.interpret_http_exception(e)
+ if code in self.source.DISABLE_HTTP_CODES or isinstance(
+ e, models.DisableSource
+ ):
+ # the user deauthorized the bridgy app, or the token expired, so
+ # disable this source.
+ logging.warning(f"Disabling source due to: {e}", exc_info=True)
+ self.source.status = "disabled"
+ self.source.put()
+ if isinstance(
+ e, (NotImplementedError, ValueError, urllib.error.URLError)
+ ):
+ code = "400"
+ elif not code:
+ raise
+ msg = "Error: %s %s" % (body or "", e)
+ return self.error(
+ msg,
+ status=code,
+ report=code not in ("400", "404", "502", "503", "504"),
+ )
+
+ if not self.entity.published: # tried all the items
+ types.discard("h-entry")
+ types.discard("h-note")
+ if types:
+ msg = "%s doesn't support type(s) %s, or no content was found." % (
+ source_cls.GR_CLASS.NAME,
+ " + ".join(types),
+ )
+ return self.error(msg, data=mf2)
+ else:
+ msg = 'Could not find content in h-entry or any other element!'
+ return self.error(msg, html=msg, data=mf2)
+
+ # write results to datastore, but don't overwrite a previous publish with a
+ # preview.
+ if not (self.PREVIEW and self.entity.type != "preview"):
+ self.entity.status = "complete"
+ self.entity.put()
+
+ return result
+
+ def _find_source(self, source_cls, url, domain):
+ """Returns the source that should publish a post URL, or None if not found.
+
+ Args:
+ source_cls: :class:`models.Source` subclass for this silo
+ url: string
+ domain: string, url's domain
+
+ Returns: :class:`models.Source`
+ """
+ domain = domain.lower()
+ sources = source_cls.query().filter(source_cls.domains == domain).fetch(100)
+ if not sources:
+ msg = (
+ "Could not find %(type)s account for %(domain)s. Check that your %(type)s profile has %(domain)s in its web site or link field, then try signing up again."
+ % {"type": source_cls.GR_CLASS.NAME, "domain": domain}
+ )
+ return self.error(msg, html=msg)
+
+ current_url = ""
+ sources_ready = []
+ best_match = None
+ for source in sources:
+ logging.info(
+ "Source: %s , features %s, status %s, poll status %s",
+ source.bridgy_url(),
+ source.features,
+ source.status,
+ source.poll_status,
+ )
+ if source.status != "disabled" and "publish" in source.features:
+ # use a source that has a domain_url matching the url provided,
+ # including path. find the source with the closest match.
+ sources_ready.append(source)
+ schemeless_url = util.schemeless(url.lower()).strip("/")
+ for domain_url in source.domain_urls:
+ schemeless_domain_url = util.schemeless(domain_url.lower()).strip(
+ "/"
+ )
+ if schemeless_url.startswith(schemeless_domain_url) and len(
+ domain_url
+ ) > len(current_url):
+ current_url = domain_url
+ best_match = source
+
+ if best_match:
+ return best_match
+
+ if sources_ready:
+ msg = 'No account found that matches {util.pretty_link(url)}. Check that the web site URL is in your silo profile, then sign up again.'
+ else:
+ msg = 'Publish is not enabled for your account. Try signing up!'
+ self.error(msg, html=msg)
+
+ def attempt_single_item(self, item):
+ """Attempts to preview or publish a single mf2 item.
+
+ Args:
+ item: mf2 item dict from mf2py
+
+ Returns:
+ CreationResult
+ """
+ self.maybe_inject_silo_content(item)
+ obj = microformats2.json_to_object(item)
+
+ ignore_formatting = self.ignore_formatting(item)
+ if ignore_formatting:
+ prop = microformats2.first_props(item.get("properties", {}))
+ content = microformats2.get_text(prop.get("content"))
+ if content:
+ obj["content"] = content.strip()
+
+ # which original post URL to include? in order of preference:
+ # 1. rel-shortlink (background: https://github.com/snarfed/bridgy/issues/173)
+ # 2. original user-provided URL if it redirected
+ # 3. u-url if available
+ # 4. actual final fetched URL
+ if self.shortlink:
+ obj["url"] = self.shortlink
+ elif self.source_url() != self.fetched.url:
+ obj["url"] = self.source_url()
+ elif "url" not in obj:
+ obj["url"] = self.fetched.url
+ logging.debug(
+ "Converted to ActivityStreams object: %s", json_dumps(obj, indent=2)
+ )
+
+ # posts and comments need content
+ obj_type = obj.get("objectType")
+ if obj_type in ("note", "article", "comment"):
+ if (
+ not obj.get("content")
+ and not obj.get("summary")
+ and not obj.get("displayName")
+ ):
+ return gr_source.creation_result(
+ abort=False,
+ error_plain="Could not find content in %s" % self.fetched.url,
+ error_html='Could not find content in %s'
+ % self.fetched.url,
+ )
+
+ self.preprocess(obj)
+
+ include_link = self.include_link(item)
+
+ if not self.authorize():
+ return gr_source.creation_result(abort=True)
+
+ if self.PREVIEW:
+ result = self.source.gr_source.preview_create(
+ obj, include_link=include_link, ignore_formatting=ignore_formatting
+ )
+ previewed = result.content or result.description
+ if self.entity.type == "preview":
+ self.entity.published = previewed
+ if not previewed:
+ return result # there was an error
+ return self._render_preview(result, include_link=include_link)
+
+ else:
+ result = self.source.gr_source.create(
+ obj, include_link=include_link, ignore_formatting=ignore_formatting
+ )
+ self.entity.published = result.content
+ if not result.content:
+ return result # there was an error
+ if "url" not in self.entity.published:
+ self.entity.published["url"] = obj.get("url")
+ self.entity.type = self.entity.published.get("type") or models.get_type(obj)
+ logging.info("Returning %s", json_dumps(self.entity.published, indent=2))
+ return gr_source.creation_result(
+ json_dumps(self.entity.published, indent=2)
+ )
+
+ def delete(self, source_url):
+ """Attempts to delete or preview delete a published post.
+
+ Args:
+ source_url: string, original post URL
+
+ Returns:
+ dict response data with at least id and url
+ """
+ assert self.entity
+ if (
+ self.entity.status != "complete" or self.entity.type == "preview"
+ ) and not appengine_info.LOCAL:
+ return self.error(
+ "Can't delete this post from %s because Bridgy Publish didn't originally POSSE it there"
+ % self.source.gr_source.NAME
+ )
+
+ id = self.entity.published.get("id")
+ url = self.entity.published.get("url")
+ if not id and url:
+ id = self.source.gr_source.post_id(url)
+
+ if not id:
+ return self.error(
+ "Bridgy Publish can't find the id of the %s post that it originally published for %s"
+ % self.source.gr_source.NAME,
+ source_url,
+ )
+
+ if self.PREVIEW:
+ try:
+ return self._render_preview(self.source.gr_source.preview_delete(id))
+ except NotImplementedError:
+ return self.error(
+ "Sorry, deleting isn't supported for %s yet"
+ % self.source.gr_source.NAME
+ )
+
+ logging.info("Deleting silo post id %s", id)
+ self.entity = models.Publish(
+ parent=self.entity.key.parent(), source=self.source.key, type="delete"
+ )
+ self.entity.put()
+ logging.debug(
+ "Publish entity for delete: '%s'", self.entity.key.urlsafe().decode()
+ )
+
+ resp = self.source.gr_source.delete(id)
+ resp.content.setdefault("id", id)
+ resp.content.setdefault("url", url)
+ logging.info(resp.content)
+ self.entity.published = resp.content
+ self.entity.status = "deleted"
+ self.entity.put()
+ return resp
+
+ def preprocess(self, activity):
+ """Preprocesses an item before trying to publish it.
+
+ Specifically, expands inReplyTo/object URLs with rel=syndication URLs.
+
+ Args:
+ activity: an ActivityStreams activity or object being published
+ """
+ self.source.preprocess_for_publish(activity)
+ self.expand_target_urls(activity)
+
+ activity["image"] = [
+ img
+ for img in util.get_list(activity, "image")
+ if not IGNORE_IMAGE_RE.match(img.get("url", ""))
+ ]
+ if not activity["image"]:
+ del activity["image"]
+
+ def expand_target_urls(self, activity):
+ """Expand the inReplyTo or object fields of an ActivityStreams object
+ by fetching the original and looking for rel=syndication URLs.
+
+ This method modifies the dict in place.
+
+ Args:
+ activity: an ActivityStreams dict of the activity being published
+ """
+ for field in ("inReplyTo", "object"):
+ # microformats2.json_to_object de-dupes, no need to do it here
+ objs = activity.get(field)
+ if not objs:
+ continue
+
+ if isinstance(objs, dict):
+ objs = [objs]
+
+ augmented = list(objs)
+ for obj in objs:
+ url = obj.get("url")
+ if not url:
+ continue
+
+ parsed = urllib.parse.urlparse(url)
+ # ignore home pages. https://github.com/snarfed/bridgy/issues/760
+ if parsed.path in ("", "/"):
+ continue
+
+ # get_webmention_target weeds out silos and non-HTML targets
+ # that we wouldn't want to download and parse
+ url, _, ok = util.get_webmention_target(url)
+ if not ok:
+ continue
+
+ logging.debug(
+ "expand_target_urls fetching field=%s, url=%s", field, url
+ )
+ try:
+ mf2 = util.fetch_mf2(url)
+ except AssertionError:
+ raise # for unit tests
+ except HTTPException:
+ # raised by us, probably via self.error()
+ raise
+ except BaseException:
+ # it's not a big deal if we can't fetch an in-reply-to url
+ logging.info(
+ "expand_target_urls could not fetch field=%s, url=%s",
+ field,
+ url,
+ exc_info=True,
+ )
+ continue
+
+ synd_urls = mf2["rels"].get("syndication", [])
+
+ # look for syndication urls in the first h-entry
+ queue = collections.deque(mf2.get("items", []))
+ while queue:
+ item = queue.popleft()
+ item_types = set(item.get("type", []))
+ if "h-feed" in item_types and "h-entry" not in item_types:
+ queue.extend(item.get("children", []))
+ continue
+
+ # these can be urls or h-cites
+ synd_urls += microformats2.get_string_urls(
+ item.get("properties", {}).get("syndication", [])
+ )
+
+ logging.debug(
+ "expand_target_urls found rel=syndication for url=%s: %r",
+ url,
+ synd_urls,
+ )
+ augmented += [{"url": u} for u in synd_urls]
+
+ activity[field] = augmented
+
+ def get_or_add_publish_entity(self, source_url):
+ """Creates and stores :class:`models.Publish` entity.
+
+ ...and if necessary, :class:`models.PublishedPage` entity.
+
+ Args:
+ source_url: string
+ """
+ try:
+ return self._get_or_add_publish_entity(source_url)
+ except CollisionError:
+ return self.error(
+ "You're already publishing that post in another request.", status=429
+ )
+ except Exception as e:
+ code = getattr(e, "code", None)
+ details = getattr(e, "details", None)
+ logging.info((code and code(), details and details()))
+ if (
+ code
+ and code() == grpc.StatusCode.ABORTED
+ and details
+ and "too much contention" in details()
+ ):
+ return self.error(
+ "You're already publishing that post in another request.",
+ status=429,
+ )
+ raise
+
+ @ndb.transactional()
+ def _get_or_add_publish_entity(self, source_url):
+ page = PublishedPage.get_or_insert(source_url)
+
+ # Detect concurrent publish request for the same page
+ # https://github.com/snarfed/bridgy/issues/996
+ pending = Publish.query(
+ Publish.status == "new",
+ Publish.type != "preview",
+ Publish.source == self.source.key,
+ ancestor=page.key,
+ ).get()
+ if pending:
+ logging.warning(f"Collided with publish: {pending.key.urlsafe().decode()}")
+ raise CollisionError()
+
+ entity = Publish.query(
+ Publish.status == "complete",
+ Publish.type != "preview",
+ Publish.source == self.source.key,
+ ancestor=page.key,
+ ).get()
+ if entity is None:
+ entity = Publish(parent=page.key, source=self.source.key)
+ if self.PREVIEW:
+ entity.type = "preview"
+ entity.put()
+
+ logging.debug("Publish entity: '%s'", entity.key.urlsafe().decode())
+ return entity
+
+ def _render_preview(self, result, include_link=False):
+ """Renders a preview CreationResult as HTML.
+
+ Args:
+ result: CreationResult
+ include_link: boolean
+
+ Returns: CreationResult with the rendered HTML in content
+ """
+ state = {
+ "source_key": self.source.key.urlsafe().decode(),
+ "source_url": self.source_url(),
+ "target_url": self.target_url(),
+ "include_link": include_link,
+ }
+ vars = {
+ "source": util.preprocess_source(self.source),
+ "preview": result.content,
+ "description": result.description,
+ "webmention_endpoint": util.host_url("/publish/webmention"),
+ "state": util.encode_oauth_state(state),
+ **state,
+ }
+ logging.info(f"Rendering preview with template vars {pprint.pformat(vars)}")
+ return gr_source.creation_result(render_template("preview.html", **vars))
class Preview(PublishBase):
- """Renders a preview HTML snippet of how a webmention would be handled.
- """
- PREVIEW = True
-
- def dispatch_request(self):
- try:
- result = self._run()
- return result.content if result and result.content else r'¯\_(ツ)_/¯'
- except HTTPException as e:
- return e.description, e.code
-
- def authorize(self):
- from_source = util.load_source()
- if from_source.key != self.source.key:
- msg = 'Try publishing that page from %s instead.' % (self.source.bridgy_path(), self.source.label())
- self.error(msg, html=msg)
- return False
-
- return True
-
- def include_link(self, item):
- # always use query param because there's a checkbox in the UI
- val = request.values.get('bridgy_omit_link', None)
- return (gr_source.INCLUDE_LINK if val is None or val.lower() == 'false'
- else gr_source.INCLUDE_IF_TRUNCATED if val.lower() == 'maybe'
- else gr_source.OMIT_LINK)
-
- def error(self, error, html=None, status=400, data=None, report=False, **kwargs):
- error = html if html else util.linkify(error)
- logging.info(f'publish: {error}')
- if report:
- self.report_error(error, status=status)
- flask_util.error(error, status=status)
+ """Renders a preview HTML snippet of how a webmention would be handled."""
+
+ PREVIEW = True
+
+ def dispatch_request(self):
+ try:
+ result = self._run()
+ return result.content if result and result.content else r"¯\_(ツ)_/¯"
+ except HTTPException as e:
+ return e.description, e.code
+
+ def authorize(self):
+ from_source = util.load_source()
+ if from_source.key != self.source.key:
+ msg = 'Try publishing that page from %s instead.' % (
+ self.source.bridgy_path(),
+ self.source.label(),
+ )
+ self.error(msg, html=msg)
+ return False
+
+ return True
+
+ def include_link(self, item):
+ # always use query param because there's a checkbox in the UI
+ val = request.values.get("bridgy_omit_link", None)
+ return (
+ gr_source.INCLUDE_LINK
+ if val is None or val.lower() == "false"
+ else gr_source.INCLUDE_IF_TRUNCATED
+ if val.lower() == "maybe"
+ else gr_source.OMIT_LINK
+ )
+
+ def error(self, error, html=None, status=400, data=None, report=False, **kwargs):
+ error = html if html else util.linkify(error)
+ logging.info(f"publish: {error}")
+ if report:
+ self.report_error(error, status=status)
+ flask_util.error(error, status=status)
class Send(PublishBase):
- """Interactive publish handler. Redirected to after each silo's OAuth dance.
-
- Note that this is GET, not POST, since HTTP redirects always GET.
- """
- PREVIEW = False
-
- def finish(self, auth_entity, state=None):
- self.state = util.decode_oauth_state(state)
- if not state:
- self.error('If you want to publish or preview, please approve the prompt.')
- return redirect('/')
-
- source = ndb.Key(urlsafe=self.state['source_key']).get()
- if auth_entity is None:
- self.error('If you want to publish or preview, please approve the prompt.')
- elif not auth_entity.is_authority_for(source.auth_entity):
- self.error('Please log into %s as %s to publish that page.' %
- (source.GR_CLASS.NAME, source.name))
- else:
- result = self._run()
- if result and result.content:
- flash('Done! Click here to view.' %
- self.entity.published.get('url'))
- granary_message = self.entity.published.get('granary_message')
- if granary_message:
- flash(granary_message)
- # otherwise error() added an error message
-
- return redirect(source.bridgy_url())
-
- def source_url(self):
- return self.state['source_url']
-
- def target_url(self):
- return self.state['target_url']
-
- def include_link(self, item):
- return self.state['include_link']
-
- def error(self, error, html=None, status=400, data=None, report=False, **kwargs):
- logging.info(f'publish: {error}')
- error = html if html else util.linkify(error)
- flash('%s' % error)
- if report:
- self.report_error(error, status=status)
+ """Interactive publish handler. Redirected to after each silo's OAuth dance.
+
+ Note that this is GET, not POST, since HTTP redirects always GET.
+ """
+
+ PREVIEW = False
+
+ def finish(self, auth_entity, state=None):
+ self.state = util.decode_oauth_state(state)
+ if not state:
+ self.error("If you want to publish or preview, please approve the prompt.")
+ return redirect("/")
+
+ source = ndb.Key(urlsafe=self.state["source_key"]).get()
+ if auth_entity is None:
+ self.error("If you want to publish or preview, please approve the prompt.")
+ elif not auth_entity.is_authority_for(source.auth_entity):
+ self.error(
+ "Please log into %s as %s to publish that page."
+ % (source.GR_CLASS.NAME, source.name)
+ )
+ else:
+ result = self._run()
+ if result and result.content:
+ flash(
+ 'Done! Click here to view.'
+ % self.entity.published.get("url")
+ )
+ granary_message = self.entity.published.get("granary_message")
+ if granary_message:
+ flash(granary_message)
+ # otherwise error() added an error message
+
+ return redirect(source.bridgy_url())
+
+ def source_url(self):
+ return self.state["source_url"]
+
+ def target_url(self):
+ return self.state["target_url"]
+
+ def include_link(self, item):
+ return self.state["include_link"]
+
+ def error(self, error, html=None, status=400, data=None, report=False, **kwargs):
+ logging.info(f"publish: {error}")
+ error = html if html else util.linkify(error)
+ flash("%s" % error)
+ if report:
+ self.report_error(error, status=status)
# We want Callback.get() and Send.finish(), so put
# Callback first and override finish.
class FlickrSend(oauth_flickr.Callback, Send):
- finish = Send.finish
+ finish = Send.finish
class GitHubSend(oauth_github.Callback, Send):
- finish = Send.finish
+ finish = Send.finish
class MastodonSend(oauth_mastodon.Callback, Send):
- finish = Send.finish
+ finish = Send.finish
class MeetupSend(oauth_meetup.Callback, Send):
- finish = Send.finish
+ finish = Send.finish
class TwitterSend(oauth_twitter.Callback, Send):
- finish = Send.finish
+ finish = Send.finish
class Webmention(PublishBase):
- """Accepts webmentions and translates them to publish requests."""
- PREVIEW = False
-
- def dispatch_request(self):
- result = self._run()
- if result:
- return result.content, 201, {
- 'Content-Type': 'application/json',
- 'Location': self.entity.published['url'],
- }
-
- return ''
-
- def authorize(self):
- """Check for a backlink to brid.gy/publish/SILO."""
- bases = set()
- if request.host == 'brid.gy':
- bases.add('brid.gy')
- bases.add('www.brid.gy') # also accept www
- else:
- bases.add(request.host)
-
- expected = ['%s/publish/%s' % (base, self.source.SHORT_NAME) for base in bases]
-
- if self.entity.html:
- for url in expected:
- if url in self.entity.html or urllib.parse.quote(url, safe='') in self.entity.html:
- return True
-
- self.error(f"Couldn't find link to {expected[0]}")
- return False
-
- def error(self, error, **kwargs):
- logging.info(f'publish: {error}')
- return super().error(error, **kwargs)
-
-
-app.add_url_rule('/publish/preview', view_func=Preview.as_view('publish_preview'), methods=['POST'])
-app.add_url_rule('/publish/webmention', view_func=Webmention.as_view('publish_webmention'), methods=['POST'])
-app.add_url_rule('/publish/flickr/finish', view_func=FlickrSend.as_view('publish_flickr_finish', 'unused'))
-app.add_url_rule('/publish/github/finish', view_func=GitHubSend.as_view('publish_github_finish', 'unused'))
-app.add_url_rule('/publish/mastodon/finish', view_func=MastodonSend.as_view('publish_mastodon_finish', 'unused'))
+ """Accepts webmentions and translates them to publish requests."""
+
+ PREVIEW = False
+
+ def dispatch_request(self):
+ result = self._run()
+ if result:
+ return (
+ result.content,
+ 201,
+ {
+ "Content-Type": "application/json",
+ "Location": self.entity.published["url"],
+ },
+ )
+
+ return ""
+
+ def authorize(self):
+ """Check for a backlink to brid.gy/publish/SILO."""
+ bases = set()
+ if request.host == "brid.gy":
+ bases.add("brid.gy")
+ bases.add("www.brid.gy") # also accept www
+ else:
+ bases.add(request.host)
+
+ expected = ["%s/publish/%s" % (base, self.source.SHORT_NAME) for base in bases]
+
+ if self.entity.html:
+ for url in expected:
+ if (
+ url in self.entity.html
+ or urllib.parse.quote(url, safe="") in self.entity.html
+ ):
+ return True
+
+ self.error(f"Couldn't find link to {expected[0]}")
+ return False
+
+ def error(self, error, **kwargs):
+ logging.info(f"publish: {error}")
+ return super().error(error, **kwargs)
+
+
+app.add_url_rule(
+ "/publish/preview", view_func=Preview.as_view("publish_preview"), methods=["POST"]
+)
+app.add_url_rule(
+ "/publish/webmention",
+ view_func=Webmention.as_view("publish_webmention"),
+ methods=["POST"],
+)
+app.add_url_rule(
+ "/publish/flickr/finish",
+ view_func=FlickrSend.as_view("publish_flickr_finish", "unused"),
+)
+app.add_url_rule(
+ "/publish/github/finish",
+ view_func=GitHubSend.as_view("publish_github_finish", "unused"),
+)
+app.add_url_rule(
+ "/publish/mastodon/finish",
+ view_func=MastodonSend.as_view("publish_mastodon_finish", "unused"),
+)
# because Meetup's `redirect_uri` handling is a little more restrictive
-app.add_url_rule('/meetup/publish/finish', view_func=MeetupSend.as_view('publish_meetup_finish', 'unused'))
-app.add_url_rule('/publish/twitter/finish', view_func=TwitterSend.as_view('publish_twitter_finish', 'unused'))
+app.add_url_rule(
+ "/meetup/publish/finish",
+ view_func=MeetupSend.as_view("publish_meetup_finish", "unused"),
+)
+app.add_url_rule(
+ "/publish/twitter/finish",
+ view_func=TwitterSend.as_view("publish_twitter_finish", "unused"),
+)
diff --git a/reddit.py b/reddit.py
index bfe31e82..ece14deb 100644
--- a/reddit.py
+++ b/reddit.py
@@ -10,69 +10,86 @@
class Reddit(models.Source):
- """A Reddit account.
-
- The key name is the username.
- """
- GR_CLASS = gr_reddit.Reddit
- OAUTH_START = oauth_reddit.Start
- SHORT_NAME = 'reddit'
- TYPE_LABELS = {
- 'post': 'submission',
- 'comment': 'comment',
- }
- CAN_PUBLISH = False
-
- @staticmethod
- def new(auth_entity=None, **kwargs):
- """Creates and returns a :class:`Reddit` entity.
-
- Args:
- auth_entity: :class:`oauth_dropins.reddit.RedditAuth`
- kwargs: property values
- """
- user = json_loads(auth_entity.user_json)
- gr_source = gr_reddit.Reddit(auth_entity.refresh_token)
- return Reddit(id=user.get('name'),
- auth_entity=auth_entity.key,
- url=gr_source.user_url(user.get('name')),
- name=user.get('name'),
- picture=user.get('icon_img'),
- **kwargs)
-
- def silo_url(self):
- """Returns the Reddit account URL, e.g. https://reddit.com/user/foo."""
- return self.gr_source.user_url(self.key_id())
-
- def label_name(self):
- """Returns the username."""
- return self.key_id()
-
- def search_for_links(self):
- """Searches for activities with links to any of this source's web sites.
-
- Returns:
- sequence of ActivityStreams activity dicts
+ """A Reddit account.
+
+ The key name is the username.
"""
- urls = set(util.schemeless(util.fragmentless(url), slashes=False)
- for url in self.domain_urls
- if not util.in_webmention_blocklist(util.domain_from_link(url)))
- if not urls:
- return []
- # Search syntax: https://www.reddit.com/wiki/search
- url_query = ' OR '.join([f'site:"{u}" OR selftext:"{u}"' for u in urls])
- return self.get_activities(
- search_query=url_query, group_id=gr_source.SEARCH, etag=self.last_activities_etag,
- fetch_replies=False, fetch_likes=False, fetch_shares=False, count=50)
+ GR_CLASS = gr_reddit.Reddit
+ OAUTH_START = oauth_reddit.Start
+ SHORT_NAME = "reddit"
+ TYPE_LABELS = {
+ "post": "submission",
+ "comment": "comment",
+ }
+ CAN_PUBLISH = False
+
+ @staticmethod
+ def new(auth_entity=None, **kwargs):
+ """Creates and returns a :class:`Reddit` entity.
+
+ Args:
+ auth_entity: :class:`oauth_dropins.reddit.RedditAuth`
+ kwargs: property values
+ """
+ user = json_loads(auth_entity.user_json)
+ gr_source = gr_reddit.Reddit(auth_entity.refresh_token)
+ return Reddit(
+ id=user.get("name"),
+ auth_entity=auth_entity.key,
+ url=gr_source.user_url(user.get("name")),
+ name=user.get("name"),
+ picture=user.get("icon_img"),
+ **kwargs,
+ )
+
+ def silo_url(self):
+ """Returns the Reddit account URL, e.g. https://reddit.com/user/foo."""
+ return self.gr_source.user_url(self.key_id())
+
+ def label_name(self):
+ """Returns the username."""
+ return self.key_id()
+
+ def search_for_links(self):
+ """Searches for activities with links to any of this source's web sites.
+
+ Returns:
+ sequence of ActivityStreams activity dicts
+ """
+ urls = set(
+ util.schemeless(util.fragmentless(url), slashes=False)
+ for url in self.domain_urls
+ if not util.in_webmention_blocklist(util.domain_from_link(url))
+ )
+ if not urls:
+ return []
+
+ # Search syntax: https://www.reddit.com/wiki/search
+ url_query = " OR ".join([f'site:"{u}" OR selftext:"{u}"' for u in urls])
+ return self.get_activities(
+ search_query=url_query,
+ group_id=gr_source.SEARCH,
+ etag=self.last_activities_etag,
+ fetch_replies=False,
+ fetch_likes=False,
+ fetch_shares=False,
+ count=50,
+ )
class Callback(oauth_reddit.Callback):
- def finish(self, auth_entity, state=None):
- util.maybe_add_or_delete_source(Reddit, auth_entity, state)
+ def finish(self, auth_entity, state=None):
+ util.maybe_add_or_delete_source(Reddit, auth_entity, state)
-app.add_url_rule('/reddit/start',
- view_func=util.oauth_starter(oauth_reddit.Start).as_view('reddit_start', '/reddit/callback'), methods=['POST'])
-app.add_url_rule('/reddit/callback',
- view_func=Callback.as_view('reddit_callback', 'unused to_path'))
+app.add_url_rule(
+ "/reddit/start",
+ view_func=util.oauth_starter(oauth_reddit.Start).as_view(
+ "reddit_start", "/reddit/callback"
+ ),
+ methods=["POST"],
+)
+app.add_url_rule(
+ "/reddit/callback", view_func=Callback.as_view("reddit_callback", "unused to_path")
+)
diff --git a/superfeedr.py b/superfeedr.py
index 8b1c757c..99d7c1e3 100644
--- a/superfeedr.py
+++ b/superfeedr.py
@@ -19,122 +19,135 @@
import models
import util
-SUPERFEEDR_TOKEN = util.read('superfeedr_token')
-SUPERFEEDR_USERNAME = util.read('superfeedr_username')
-PUSH_API_URL = 'https://push.superfeedr.com'
+SUPERFEEDR_TOKEN = util.read("superfeedr_token")
+SUPERFEEDR_USERNAME = util.read("superfeedr_username")
+PUSH_API_URL = "https://push.superfeedr.com"
MAX_BLOGPOST_LINKS = 10
def subscribe(source):
- """Subscribes to a source.
-
- Also receives some past posts and adds propagate tasks for them.
-
- http://documentation.superfeedr.com/subscribers.html#addingfeedswithpubsubhubbub
-
- Args:
- source: Blogger, Tumblr, or WordPress
- """
- if appengine_info.LOCAL:
- logging.info('Running in dev_appserver, not subscribing to Superfeedr')
- return
-
- data = {
- 'hub.mode': 'subscribe',
- 'hub.topic': source.feed_url(),
- 'hub.callback': util.host_url(f'/{source.SHORT_NAME}/notify/{source.key_id()}'),
- # TODO
- # 'hub.secret': 'xxx',
- 'format': 'json',
- 'retrieve': 'true',
- }
-
- logging.info('Adding Superfeedr subscription: %s', data)
- resp = util.requests_post(
- PUSH_API_URL, data=data,
- auth=HTTPBasicAuth(SUPERFEEDR_USERNAME, SUPERFEEDR_TOKEN),
- headers=util.REQUEST_HEADERS)
- handle_feed(resp.json(), source)
+ """Subscribes to a source.
+
+ Also receives some past posts and adds propagate tasks for them.
+
+ http://documentation.superfeedr.com/subscribers.html#addingfeedswithpubsubhubbub
+
+ Args:
+ source: Blogger, Tumblr, or WordPress
+ """
+ if appengine_info.LOCAL:
+ logging.info("Running in dev_appserver, not subscribing to Superfeedr")
+ return
+
+ data = {
+ "hub.mode": "subscribe",
+ "hub.topic": source.feed_url(),
+ "hub.callback": util.host_url(f"/{source.SHORT_NAME}/notify/{source.key_id()}"),
+ # TODO
+ # 'hub.secret': 'xxx',
+ "format": "json",
+ "retrieve": "true",
+ }
+
+ logging.info("Adding Superfeedr subscription: %s", data)
+ resp = util.requests_post(
+ PUSH_API_URL,
+ data=data,
+ auth=HTTPBasicAuth(SUPERFEEDR_USERNAME, SUPERFEEDR_TOKEN),
+ headers=util.REQUEST_HEADERS,
+ )
+ handle_feed(resp.json(), source)
def handle_feed(feed, source):
- """Handles a Superfeedr JSON feed.
-
- Creates :class:`models.BlogPost` entities and adds propagate-blogpost tasks
- for new items.
-
- http://documentation.superfeedr.com/schema.html#json
- http://documentation.superfeedr.com/subscribers.html#pubsubhubbubnotifications
-
- Args:
- feed: unicode string, Superfeedr JSON feed
- source: Blogger, Tumblr, or WordPress
- """
- logging.info('Source: %s %s', source.label(), source.key_id())
- logging.info('Raw feed: %s', feed)
-
- if not feed:
- return
-
- if source.status != 'enabled':
- logging.info('Dropping because source is %s', source.status)
- return
- elif 'webmention' not in source.features:
- logging.info("Dropping because source doesn't have webmention feature")
- return
-
- for item in feed.get('items', []):
- url = item.get('permalinkUrl') or item.get('id')
- if not url:
- logging.error('Dropping feed item without permalinkUrl or id!')
- continue
-
- # extract links from content, discarding self links.
- #
- # i don't use get_webmention_target[s]() here because they follows redirects
- # and fetch link contents, and this handler should be small and fast and try
- # to return a response to superfeedr successfully.
- #
- # TODO: extract_links currently has a bug that makes it drop trailing
- # slashes. ugh. fix that.
- content = item.get('content') or item.get('summary', '')
- links = [util.clean_url(util.unwrap_t_umblr_com(url))
- for url in util.extract_links(content)
- if util.domain_from_link(url) not in source.domains]
-
- unique = []
- for link in util.dedupe_urls(links):
- if len(link) <= _MAX_STRING_LENGTH:
- unique.append(link)
- else:
- logging.info('Giving up on link over %s chars! %s', _MAX_STRING_LENGTH, link)
- if len(unique) >= MAX_BLOGPOST_LINKS:
- logging.info('Stopping at 10 links! Skipping the rest.')
- break
-
- logging.info('Found links: %s', unique)
- if len(url) > _MAX_KEYPART_BYTES:
- logging.warning('Blog post URL is too long (over 500 chars)! Giving up.')
- bp = models.BlogPost(id=url[:_MAX_KEYPART_BYTES], source=source.key,
- feed_item=item, failed=unique)
- else:
- bp = models.BlogPost(id=url, source=source.key, feed_item=item, unsent=unique)
-
- bp.get_or_save()
+ """Handles a Superfeedr JSON feed.
+
+ Creates :class:`models.BlogPost` entities and adds propagate-blogpost tasks
+ for new items.
+
+ http://documentation.superfeedr.com/schema.html#json
+ http://documentation.superfeedr.com/subscribers.html#pubsubhubbubnotifications
+
+ Args:
+ feed: unicode string, Superfeedr JSON feed
+ source: Blogger, Tumblr, or WordPress
+ """
+ logging.info("Source: %s %s", source.label(), source.key_id())
+ logging.info("Raw feed: %s", feed)
+
+ if not feed:
+ return
+
+ if source.status != "enabled":
+ logging.info("Dropping because source is %s", source.status)
+ return
+ elif "webmention" not in source.features:
+ logging.info("Dropping because source doesn't have webmention feature")
+ return
+
+ for item in feed.get("items", []):
+ url = item.get("permalinkUrl") or item.get("id")
+ if not url:
+ logging.error("Dropping feed item without permalinkUrl or id!")
+ continue
+
+ # extract links from content, discarding self links.
+ #
+ # i don't use get_webmention_target[s]() here because they follows redirects
+ # and fetch link contents, and this handler should be small and fast and try
+ # to return a response to superfeedr successfully.
+ #
+ # TODO: extract_links currently has a bug that makes it drop trailing
+ # slashes. ugh. fix that.
+ content = item.get("content") or item.get("summary", "")
+ links = [
+ util.clean_url(util.unwrap_t_umblr_com(url))
+ for url in util.extract_links(content)
+ if util.domain_from_link(url) not in source.domains
+ ]
+
+ unique = []
+ for link in util.dedupe_urls(links):
+ if len(link) <= _MAX_STRING_LENGTH:
+ unique.append(link)
+ else:
+ logging.info(
+ "Giving up on link over %s chars! %s", _MAX_STRING_LENGTH, link
+ )
+ if len(unique) >= MAX_BLOGPOST_LINKS:
+ logging.info("Stopping at 10 links! Skipping the rest.")
+ break
+
+ logging.info("Found links: %s", unique)
+ if len(url) > _MAX_KEYPART_BYTES:
+ logging.warning("Blog post URL is too long (over 500 chars)! Giving up.")
+ bp = models.BlogPost(
+ id=url[:_MAX_KEYPART_BYTES],
+ source=source.key,
+ feed_item=item,
+ failed=unique,
+ )
+ else:
+ bp = models.BlogPost(
+ id=url, source=source.key, feed_item=item, unsent=unique
+ )
+
+ bp.get_or_save()
class Notify(View):
- """Handles a Superfeedr notification.
+ """Handles a Superfeedr notification.
+
+ Abstract; subclasses must set the SOURCE_CLS attr.
- Abstract; subclasses must set the SOURCE_CLS attr.
+ http://documentation.superfeedr.com/subscribers.html#pubsubhubbubnotifications
+ """
- http://documentation.superfeedr.com/subscribers.html#pubsubhubbubnotifications
- """
- SOURCE_CLS = None
+ SOURCE_CLS = None
- def dispatch_request(self, id):
- source = self.SOURCE_CLS.get_by_id(id)
- if source:
- handle_feed(request.json, source)
+ def dispatch_request(self, id):
+ source = self.SOURCE_CLS.get_by_id(id)
+ if source:
+ handle_feed(request.json, source)
- return ''
+ return ""
diff --git a/tasks.py b/tasks.py
index d386e33b..46277844 100644
--- a/tasks.py
+++ b/tasks.py
@@ -18,817 +18,960 @@
from flask_background import app
from models import Response
from util import ERROR_HTTP_RETURN_CODE
+
# need to import model class definitions since poll creates and saves entities.
import blogger, facebook, flickr, github, instagram, mastodon, medium, reddit, tumblr, twitter, wordpress_rest
# Used as a sentinel value in the webmention endpoint cache
-NO_ENDPOINT = 'NONE'
+NO_ENDPOINT = "NONE"
class Poll(View):
- """Task handler that fetches and processes new responses from a single source.
-
- Request parameters:
-
- * source_key: string key of source entity
- * last_polled: timestamp, YYYY-MM-DD-HH-MM-SS
-
- Inserts a propagate task for each response that hasn't been seen before.
-
- Steps:
- 1: Fetch activities: posts by the user, links to the user's domain(s).
- 2: Extract responses, store their activities.
- 3: Filter out responses we've already seen, using Responses in the datastore.
- 4: Store new responses and enqueue propagate tasks.
- 5: Possibly refetch updated syndication urls.
-
- 1-4 are in backfeed(); 5 is in poll().
- """
- RESTART_EXISTING_TASKS = False # overridden in Discover
-
- def _last_poll_url(self, source):
- return util.host_url(logs.url(source.last_poll_attempt, source.key))
-
- def dispatch_request(self):
- logging.debug('Params: %s', list(request.values.items()))
-
- key = request.values['source_key']
- source = g.source = ndb.Key(urlsafe=key).get()
- if not source or source.status == 'disabled' or 'listen' not in source.features:
- logging.error('Source not found or disabled. Dropping task.')
- return ''
- logging.info('Source: %s %s, %s', source.label(), source.key_id(),
- source.bridgy_url())
-
- if source.AUTO_POLL:
- last_polled = request.values['last_polled']
- if last_polled != source.last_polled.strftime(util.POLL_TASK_DATETIME_FORMAT):
- logging.warning('duplicate poll task! deferring to the other task.')
- return ''
-
- logging.info('Last poll: %s', self._last_poll_url(source))
-
- # mark this source as polling
- source.updates = {
- 'poll_status': 'polling',
- 'last_poll_attempt': util.now_fn(),
- 'rate_limited': False,
- }
- source = models.Source.put_updates(source)
-
- source.updates = {}
- try:
- self.poll(source)
- except Exception as e:
- source.updates['poll_status'] = 'error'
- code, _ = util.interpret_http_exception(e)
- if code in source.DISABLE_HTTP_CODES or isinstance(e, models.DisableSource):
- # the user deauthorized the bridgy app, so disable this source.
- # let the task complete successfully so that it's not retried.
- logging.warning(f'Disabling source due to: {e}', exc_info=True)
- source.updates.update({
- 'status': 'disabled',
- 'poll_status': 'ok',
- })
- elif code in source.RATE_LIMIT_HTTP_CODES:
- logging.info('Rate limited. Marking as error and finishing. %s', e)
- source.updates['rate_limited'] = True
- else:
- raise
- finally:
- source = models.Source.put_updates(source)
-
- if source.AUTO_POLL:
- util.add_poll_task(source)
-
- # feeble attempt to avoid hitting the instance memory limit
- source = None
- gc.collect()
-
- return 'OK'
-
- def poll(self, source):
- """Actually runs the poll.
-
- Stores property names and values to update in source.updates.
- """
- if source.last_activities_etag or source.last_activity_id:
- logging.debug('Using ETag %s, last activity id %s',
- source.last_activities_etag, source.last_activity_id)
-
- #
- # Step 1: fetch activities:
- # * posts by the user
- # * search all posts for the user's domain URLs to find links
- #
- cache = util.CacheDict()
- if source.last_activities_cache_json:
- cache.update(json_loads(source.last_activities_cache_json))
-
- # search for links first so that the user's activities and responses
- # override them if they overlap
- links = source.search_for_links()
-
- # this user's own activities (and user mentions)
- resp = source.get_activities_response(
- fetch_replies=True, fetch_likes=True, fetch_shares=True,
- fetch_mentions=True, count=50, etag=source.last_activities_etag,
- min_id=source.last_activity_id, cache=cache)
- etag = resp.get('etag') # used later
- user_activities = resp.get('items', [])
-
- # these map ids to AS objects.
- # backfeed all links as responses, but only include the user's own links as
- # activities, since their responses also get backfeed.
- responses = {a['id']: a for a in links}
-
- user_id = source.user_tag_id()
- links_by_user = [a for a in links
- if a.get('object', {}).get('author', {}).get('id') == user_id]
- activities = {a['id']: a for a in links_by_user + user_activities}
-
- # extract silo activity ids, update last_activity_id
- silo_activity_ids = set()
- last_activity_id = source.last_activity_id
- for id, activity in activities.items():
- # maybe replace stored last activity id
- parsed = util.parse_tag_uri(id)
- if parsed:
- id = parsed[1]
- silo_activity_ids.add(id)
- try:
- # try numeric comparison first
- greater = int(id) > int(last_activity_id)
- except (TypeError, ValueError):
- greater = str(id) > str(last_activity_id)
- if greater:
- last_activity_id = id
-
- if last_activity_id and last_activity_id != source.last_activity_id:
- source.updates['last_activity_id'] = last_activity_id
-
- # trim cache to just the returned activity ids, so that it doesn't grow
- # without bound. (WARNING: depends on get_activities_response()'s cache key
- # format, e.g. 'PREFIX ACTIVITY_ID'!)
- source.updates['last_activities_cache_json'] = json_dumps(
- {k: v for k, v in cache.items() if k.split()[-1] in silo_activity_ids})
-
- self.backfeed(source, responses, activities=activities)
-
- source.updates.update({'last_polled': source.last_poll_attempt,
- 'poll_status': 'ok'})
- if etag and etag != source.last_activities_etag:
- source.updates['last_activities_etag'] = etag
-
- #
- # Possibly refetch updated syndication urls.
- #
- # if the author has added syndication urls since the first time
- # original_post_discovery ran, we'll miss them. this cleanup task will
- # periodically check for updated urls. only kicks in if the author has
- # *ever* published a rel=syndication url
- if source.should_refetch():
- logging.info('refetching h-feed for source %s', source.label())
- relationships = original_post_discovery.refetch(source)
-
- now = util.now_fn()
- source.updates['last_hfeed_refetch'] = now
-
- if relationships:
- logging.info('refetch h-feed found new rel=syndication relationships: %s',
- relationships)
- try:
- self.repropagate_old_responses(source, relationships)
- except BaseException as e:
- if ('BadRequestError' in str(e.__class__) or
- 'Timeout' in str(e.__class__) or
- util.is_connection_failure(e)):
- logging.info('Timeout while repropagating responses.', exc_info=True)
- else:
- raise
- else:
- logging.info(
- 'skipping refetch h-feed. last-syndication-url %s, last-refetch %s',
- source.last_syndication_url, source.last_hfeed_refetch)
+ """Task handler that fetches and processes new responses from a single source.
- def backfeed(self, source, responses=None, activities=None):
- """Processes responses and activities and generates propagate tasks.
+ Request parameters:
- Stores property names and values to update in source.updates.
+ * source_key: string key of source entity
+ * last_polled: timestamp, YYYY-MM-DD-HH-MM-SS
- Args:
- source: Source
- responses: dict mapping AS response id to AS object
- activities: dict mapping AS activity id to AS object
- """
- if responses is None:
- responses = {}
- if activities is None:
- activities = {}
-
- # Cache to make sure we only fetch the author's h-feed(s) the
- # first time we see it
- fetched_hfeeds = set()
-
- # narrow down to just public activities
- public = {}
- private = {}
- for id, activity in activities.items():
- (public if source.is_activity_public(activity) else private)[id] = activity
- logging.info('Found %d public activities: %s', len(public), public.keys())
- logging.info('Found %d private activities: %s', len(private), private.keys())
-
- last_public_post = (source.last_public_post or util.EPOCH).isoformat()
- public_published = util.trim_nulls(
- [a.get('object', {}).get('published') for a in public.values()])
- if public_published:
- max_published = max(public_published)
- if max_published > last_public_post:
- last_public_post = max_published
- source.updates['last_public_post'] = \
- util.as_utc(util.parse_iso8601(max_published))
-
- source.updates['recent_private_posts'] = \
- len([a for a in private.values()
- if a.get('object', {}).get('published', util.EPOCH_ISO) > last_public_post])
-
- #
- # Step 2: extract responses, store their activities in response['activities']
- #
- # WARNING: this creates circular references in link posts found by search
- # queries in step 1, since they are their own activity. We use
- # prune_activity() and prune_response() in step 4 to remove these before
- # serializing to JSON.
- #
- for id, activity in public.items():
- obj = activity.get('object') or activity
-
- # handle user mentions
- user_id = source.user_tag_id()
- if obj.get('author', {}).get('id') != user_id and activity.get('verb') != 'share':
- for tag in obj.get('tags', []):
- urls = tag.get('urls')
- if tag.get('objectType') == 'person' and tag.get('id') == user_id and urls:
- activity['originals'], activity['mentions'] = \
- original_post_discovery.discover(
- source, activity, fetch_hfeed=True,
- include_redirect_sources=False,
- already_fetched_hfeeds=fetched_hfeeds)
- activity['mentions'].update(u.get('value') for u in urls)
- responses[id] = activity
- break
-
- # handle quote mentions
- for att in obj.get('attachments', []):
- if (att.get('objectType') in ('note', 'article')
- and att.get('author', {}).get('id') == source.user_tag_id()):
- # now that we've confirmed that one exists, OPD will dig
- # into the actual attachments
- if 'originals' not in activity or 'mentions' not in activity:
- activity['originals'], activity['mentions'] = \
- original_post_discovery.discover(
- source, activity, fetch_hfeed=True,
- include_redirect_sources=False,
- already_fetched_hfeeds=fetched_hfeeds)
- responses[id] = activity
- break
-
- # extract replies, likes, reactions, reposts, and rsvps
- replies = obj.get('replies', {}).get('items', [])
- tags = obj.get('tags', [])
- likes = [t for t in tags if Response.get_type(t) == 'like']
- reactions = [t for t in tags if Response.get_type(t) == 'react']
- reposts = [t for t in tags if Response.get_type(t) == 'repost']
- rsvps = Source.get_rsvps_from_event(obj)
-
- # coalesce responses. drop any without ids
- for resp in replies + likes + reactions + reposts + rsvps:
- id = resp.get('id')
- if not id:
- logging.error('Skipping response without id: %s', json_dumps(resp, indent=2))
- continue
-
- if source.is_blocked(resp):
- logging.info('Skipping response by blocked user: %s',
- json_dumps(resp.get('author') or resp.get('actor'), indent=2))
- continue
-
- resp.setdefault('activities', []).append(activity)
-
- # when we find two responses with the same id, the earlier one may have
- # come from a link post or user mention, and this one is probably better
- # since it probably came from the user's activity, so prefer this one.
- # background: https://github.com/snarfed/bridgy/issues/533
- existing = responses.get(id)
- if existing:
- if source.gr_source.activity_changed(resp, existing, log=True):
- logging.warning('Got two different versions of same response!\n%s\n%s',
- existing, resp)
- resp['activities'].extend(existing.get('activities', []))
-
- responses[id] = resp
-
- #
- # Step 3: filter out responses we've already seen
- #
- # seen responses (JSON objects) for each source are stored in its entity.
- unchanged_responses = []
- if source.seen_responses_cache_json:
- for seen in json_loads(source.seen_responses_cache_json):
- id = seen['id']
- resp = responses.get(id)
- if resp and not source.gr_source.activity_changed(seen, resp, log=True):
- unchanged_responses.append(seen)
- del responses[id]
-
- #
- # Step 4: store new responses and enqueue propagate tasks
- #
- pruned_responses = []
- source.blocked_ids = None
-
- for id, resp in responses.items():
- resp_type = Response.get_type(resp)
- activities = resp.pop('activities', [])
- if not activities and resp_type == 'post':
- activities = [resp]
- too_long = set()
- urls_to_activity = {}
- for i, activity in enumerate(activities):
- # we'll usually have multiple responses for the same activity, and the
- # objects in resp['activities'] are shared, so cache each activity's
- # discovered webmention targets inside its object.
- if 'originals' not in activity or 'mentions' not in activity:
- activity['originals'], activity['mentions'] = \
- original_post_discovery.discover(
- source, activity, fetch_hfeed=True,
- include_redirect_sources=False,
- already_fetched_hfeeds=fetched_hfeeds)
-
- targets = original_post_discovery.targets_for_response(
- resp, originals=activity['originals'], mentions=activity['mentions'])
- if targets:
- logging.info('%s has %d webmention target(s): %s', activity.get('url'),
- len(targets), ' '.join(targets))
- # new response to propagate! load block list if we haven't already
- if source.blocked_ids is None:
- source.load_blocklist()
-
- for t in targets:
- if len(t) <= _MAX_STRING_LENGTH:
- urls_to_activity[t] = i
- else:
- logging.info('Giving up on target URL over %s chars! %s',
- _MAX_STRING_LENGTH, t)
- too_long.add(t[:_MAX_STRING_LENGTH - 4] + '...')
-
- # store/update response entity. the prune_*() calls are important to
- # remove circular references in link responses, which are their own
- # activities. details in the step 2 comment above.
- pruned_response = util.prune_response(resp)
- pruned_responses.append(pruned_response)
- resp_entity = Response(
- id=id,
- source=source.key,
- activities_json=[json_dumps(util.prune_activity(a, source))
- for a in activities],
- response_json=json_dumps(pruned_response),
- type=resp_type,
- unsent=list(urls_to_activity.keys()),
- failed=list(too_long),
- original_posts=resp.get('originals', []))
- if urls_to_activity and len(activities) > 1:
- resp_entity.urls_to_activity=json_dumps(urls_to_activity)
- resp_entity.get_or_save(source, restart=self.RESTART_EXISTING_TASKS)
-
- # update cache
- if pruned_responses:
- source.updates['seen_responses_cache_json'] = json_dumps(
- pruned_responses + unchanged_responses)
-
- def repropagate_old_responses(self, source, relationships):
- """Find old Responses that match a new SyndicatedPost and repropagate them.
-
- We look through as many responses as we can until the datastore query expires.
-
- Args:
- source: :class:`models.Source`
- relationships: refetch result
+ Inserts a propagate task for each response that hasn't been seen before.
+
+ Steps:
+ 1: Fetch activities: posts by the user, links to the user's domain(s).
+ 2: Extract responses, store their activities.
+ 3: Filter out responses we've already seen, using Responses in the datastore.
+ 4: Store new responses and enqueue propagate tasks.
+ 5: Possibly refetch updated syndication urls.
+
+ 1-4 are in backfeed(); 5 is in poll().
"""
- for response in (Response.query(Response.source == source.key)
- .order(-Response.updated)):
- new_orig_urls = set()
- for activity_json in response.activities_json:
- activity = json_loads(activity_json)
- activity_url = activity.get('url') or activity.get('object', {}).get('url')
- if not activity_url:
- logging.warning('activity has no url %s', activity_json)
- continue
-
- activity_url = source.canonicalize_url(activity_url, activity=activity)
- if not activity_url:
- continue
-
- # look for activity url in the newly discovered list of relationships
- for relationship in relationships.get(activity_url, []):
- # won't re-propagate if the discovered link is already among
- # these well-known upstream duplicates
- if (relationship.original in response.sent or
- relationship.original in response.original_posts):
- logging.info(
- '%s found a new rel=syndication link %s -> %s, but the '
- 'relationship had already been discovered by another method',
- response.label(), relationship.original, relationship.syndication)
- else:
- logging.info(
- '%s found a new rel=syndication link %s -> %s, and '
- 'will be repropagated with a new target!',
- response.label(), relationship.original, relationship.syndication)
- new_orig_urls.add(relationship.original)
- if new_orig_urls:
- # re-open a previously 'complete' propagate task
- response.status = 'new'
- response.unsent.extend(list(new_orig_urls))
- response.put()
- response.add_task()
+ RESTART_EXISTING_TASKS = False # overridden in Discover
+
+ def _last_poll_url(self, source):
+ return util.host_url(logs.url(source.last_poll_attempt, source.key))
+
+ def dispatch_request(self):
+ logging.debug("Params: %s", list(request.values.items()))
+
+ key = request.values["source_key"]
+ source = g.source = ndb.Key(urlsafe=key).get()
+ if not source or source.status == "disabled" or "listen" not in source.features:
+ logging.error("Source not found or disabled. Dropping task.")
+ return ""
+ logging.info(
+ "Source: %s %s, %s", source.label(), source.key_id(), source.bridgy_url()
+ )
+
+ if source.AUTO_POLL:
+ last_polled = request.values["last_polled"]
+ if last_polled != source.last_polled.strftime(
+ util.POLL_TASK_DATETIME_FORMAT
+ ):
+ logging.warning("duplicate poll task! deferring to the other task.")
+ return ""
+
+ logging.info("Last poll: %s", self._last_poll_url(source))
+
+ # mark this source as polling
+ source.updates = {
+ "poll_status": "polling",
+ "last_poll_attempt": util.now_fn(),
+ "rate_limited": False,
+ }
+ source = models.Source.put_updates(source)
+
+ source.updates = {}
+ try:
+ self.poll(source)
+ except Exception as e:
+ source.updates["poll_status"] = "error"
+ code, _ = util.interpret_http_exception(e)
+ if code in source.DISABLE_HTTP_CODES or isinstance(e, models.DisableSource):
+ # the user deauthorized the bridgy app, so disable this source.
+ # let the task complete successfully so that it's not retried.
+ logging.warning(f"Disabling source due to: {e}", exc_info=True)
+ source.updates.update(
+ {
+ "status": "disabled",
+ "poll_status": "ok",
+ }
+ )
+ elif code in source.RATE_LIMIT_HTTP_CODES:
+ logging.info("Rate limited. Marking as error and finishing. %s", e)
+ source.updates["rate_limited"] = True
+ else:
+ raise
+ finally:
+ source = models.Source.put_updates(source)
+
+ if source.AUTO_POLL:
+ util.add_poll_task(source)
+
+ # feeble attempt to avoid hitting the instance memory limit
+ source = None
+ gc.collect()
+
+ return "OK"
+
+ def poll(self, source):
+ """Actually runs the poll.
+
+ Stores property names and values to update in source.updates.
+ """
+ if source.last_activities_etag or source.last_activity_id:
+ logging.debug(
+ "Using ETag %s, last activity id %s",
+ source.last_activities_etag,
+ source.last_activity_id,
+ )
+
+ #
+ # Step 1: fetch activities:
+ # * posts by the user
+ # * search all posts for the user's domain URLs to find links
+ #
+ cache = util.CacheDict()
+ if source.last_activities_cache_json:
+ cache.update(json_loads(source.last_activities_cache_json))
+
+ # search for links first so that the user's activities and responses
+ # override them if they overlap
+ links = source.search_for_links()
+
+ # this user's own activities (and user mentions)
+ resp = source.get_activities_response(
+ fetch_replies=True,
+ fetch_likes=True,
+ fetch_shares=True,
+ fetch_mentions=True,
+ count=50,
+ etag=source.last_activities_etag,
+ min_id=source.last_activity_id,
+ cache=cache,
+ )
+ etag = resp.get("etag") # used later
+ user_activities = resp.get("items", [])
+
+ # these map ids to AS objects.
+ # backfeed all links as responses, but only include the user's own links as
+ # activities, since their responses also get backfeed.
+ responses = {a["id"]: a for a in links}
+
+ user_id = source.user_tag_id()
+ links_by_user = [
+ a
+ for a in links
+ if a.get("object", {}).get("author", {}).get("id") == user_id
+ ]
+ activities = {a["id"]: a for a in links_by_user + user_activities}
+
+ # extract silo activity ids, update last_activity_id
+ silo_activity_ids = set()
+ last_activity_id = source.last_activity_id
+ for id, activity in activities.items():
+ # maybe replace stored last activity id
+ parsed = util.parse_tag_uri(id)
+ if parsed:
+ id = parsed[1]
+ silo_activity_ids.add(id)
+ try:
+ # try numeric comparison first
+ greater = int(id) > int(last_activity_id)
+ except (TypeError, ValueError):
+ greater = str(id) > str(last_activity_id)
+ if greater:
+ last_activity_id = id
+
+ if last_activity_id and last_activity_id != source.last_activity_id:
+ source.updates["last_activity_id"] = last_activity_id
+
+ # trim cache to just the returned activity ids, so that it doesn't grow
+ # without bound. (WARNING: depends on get_activities_response()'s cache key
+ # format, e.g. 'PREFIX ACTIVITY_ID'!)
+ source.updates["last_activities_cache_json"] = json_dumps(
+ {k: v for k, v in cache.items() if k.split()[-1] in silo_activity_ids}
+ )
+
+ self.backfeed(source, responses, activities=activities)
+
+ source.updates.update(
+ {"last_polled": source.last_poll_attempt, "poll_status": "ok"}
+ )
+ if etag and etag != source.last_activities_etag:
+ source.updates["last_activities_etag"] = etag
+
+ #
+ # Possibly refetch updated syndication urls.
+ #
+ # if the author has added syndication urls since the first time
+ # original_post_discovery ran, we'll miss them. this cleanup task will
+ # periodically check for updated urls. only kicks in if the author has
+ # *ever* published a rel=syndication url
+ if source.should_refetch():
+ logging.info("refetching h-feed for source %s", source.label())
+ relationships = original_post_discovery.refetch(source)
+
+ now = util.now_fn()
+ source.updates["last_hfeed_refetch"] = now
+
+ if relationships:
+ logging.info(
+ "refetch h-feed found new rel=syndication relationships: %s",
+ relationships,
+ )
+ try:
+ self.repropagate_old_responses(source, relationships)
+ except BaseException as e:
+ if (
+ "BadRequestError" in str(e.__class__)
+ or "Timeout" in str(e.__class__)
+ or util.is_connection_failure(e)
+ ):
+ logging.info(
+ "Timeout while repropagating responses.", exc_info=True
+ )
+ else:
+ raise
+ else:
+ logging.info(
+ "skipping refetch h-feed. last-syndication-url %s, last-refetch %s",
+ source.last_syndication_url,
+ source.last_hfeed_refetch,
+ )
+
+ def backfeed(self, source, responses=None, activities=None):
+ """Processes responses and activities and generates propagate tasks.
+
+ Stores property names and values to update in source.updates.
+
+ Args:
+ source: Source
+ responses: dict mapping AS response id to AS object
+ activities: dict mapping AS activity id to AS object
+ """
+ if responses is None:
+ responses = {}
+ if activities is None:
+ activities = {}
+
+ # Cache to make sure we only fetch the author's h-feed(s) the
+ # first time we see it
+ fetched_hfeeds = set()
+
+ # narrow down to just public activities
+ public = {}
+ private = {}
+ for id, activity in activities.items():
+ (public if source.is_activity_public(activity) else private)[id] = activity
+ logging.info("Found %d public activities: %s", len(public), public.keys())
+ logging.info("Found %d private activities: %s", len(private), private.keys())
+
+ last_public_post = (source.last_public_post or util.EPOCH).isoformat()
+ public_published = util.trim_nulls(
+ [a.get("object", {}).get("published") for a in public.values()]
+ )
+ if public_published:
+ max_published = max(public_published)
+ if max_published > last_public_post:
+ last_public_post = max_published
+ source.updates["last_public_post"] = util.as_utc(
+ util.parse_iso8601(max_published)
+ )
+
+ source.updates["recent_private_posts"] = len(
+ [
+ a
+ for a in private.values()
+ if a.get("object", {}).get("published", util.EPOCH_ISO)
+ > last_public_post
+ ]
+ )
+
+ #
+ # Step 2: extract responses, store their activities in response['activities']
+ #
+ # WARNING: this creates circular references in link posts found by search
+ # queries in step 1, since they are their own activity. We use
+ # prune_activity() and prune_response() in step 4 to remove these before
+ # serializing to JSON.
+ #
+ for id, activity in public.items():
+ obj = activity.get("object") or activity
+
+ # handle user mentions
+ user_id = source.user_tag_id()
+ if (
+ obj.get("author", {}).get("id") != user_id
+ and activity.get("verb") != "share"
+ ):
+ for tag in obj.get("tags", []):
+ urls = tag.get("urls")
+ if (
+ tag.get("objectType") == "person"
+ and tag.get("id") == user_id
+ and urls
+ ):
+ (
+ activity["originals"],
+ activity["mentions"],
+ ) = original_post_discovery.discover(
+ source,
+ activity,
+ fetch_hfeed=True,
+ include_redirect_sources=False,
+ already_fetched_hfeeds=fetched_hfeeds,
+ )
+ activity["mentions"].update(u.get("value") for u in urls)
+ responses[id] = activity
+ break
+
+ # handle quote mentions
+ for att in obj.get("attachments", []):
+ if (
+ att.get("objectType") in ("note", "article")
+ and att.get("author", {}).get("id") == source.user_tag_id()
+ ):
+ # now that we've confirmed that one exists, OPD will dig
+ # into the actual attachments
+ if "originals" not in activity or "mentions" not in activity:
+ (
+ activity["originals"],
+ activity["mentions"],
+ ) = original_post_discovery.discover(
+ source,
+ activity,
+ fetch_hfeed=True,
+ include_redirect_sources=False,
+ already_fetched_hfeeds=fetched_hfeeds,
+ )
+ responses[id] = activity
+ break
+
+ # extract replies, likes, reactions, reposts, and rsvps
+ replies = obj.get("replies", {}).get("items", [])
+ tags = obj.get("tags", [])
+ likes = [t for t in tags if Response.get_type(t) == "like"]
+ reactions = [t for t in tags if Response.get_type(t) == "react"]
+ reposts = [t for t in tags if Response.get_type(t) == "repost"]
+ rsvps = Source.get_rsvps_from_event(obj)
+
+ # coalesce responses. drop any without ids
+ for resp in replies + likes + reactions + reposts + rsvps:
+ id = resp.get("id")
+ if not id:
+ logging.error(
+ "Skipping response without id: %s", json_dumps(resp, indent=2)
+ )
+ continue
+
+ if source.is_blocked(resp):
+ logging.info(
+ "Skipping response by blocked user: %s",
+ json_dumps(resp.get("author") or resp.get("actor"), indent=2),
+ )
+ continue
+
+ resp.setdefault("activities", []).append(activity)
+
+ # when we find two responses with the same id, the earlier one may have
+ # come from a link post or user mention, and this one is probably better
+ # since it probably came from the user's activity, so prefer this one.
+ # background: https://github.com/snarfed/bridgy/issues/533
+ existing = responses.get(id)
+ if existing:
+ if source.gr_source.activity_changed(resp, existing, log=True):
+ logging.warning(
+ "Got two different versions of same response!\n%s\n%s",
+ existing,
+ resp,
+ )
+ resp["activities"].extend(existing.get("activities", []))
+
+ responses[id] = resp
+
+ #
+ # Step 3: filter out responses we've already seen
+ #
+ # seen responses (JSON objects) for each source are stored in its entity.
+ unchanged_responses = []
+ if source.seen_responses_cache_json:
+ for seen in json_loads(source.seen_responses_cache_json):
+ id = seen["id"]
+ resp = responses.get(id)
+ if resp and not source.gr_source.activity_changed(seen, resp, log=True):
+ unchanged_responses.append(seen)
+ del responses[id]
+
+ #
+ # Step 4: store new responses and enqueue propagate tasks
+ #
+ pruned_responses = []
+ source.blocked_ids = None
+
+ for id, resp in responses.items():
+ resp_type = Response.get_type(resp)
+ activities = resp.pop("activities", [])
+ if not activities and resp_type == "post":
+ activities = [resp]
+ too_long = set()
+ urls_to_activity = {}
+ for i, activity in enumerate(activities):
+ # we'll usually have multiple responses for the same activity, and the
+ # objects in resp['activities'] are shared, so cache each activity's
+ # discovered webmention targets inside its object.
+ if "originals" not in activity or "mentions" not in activity:
+ (
+ activity["originals"],
+ activity["mentions"],
+ ) = original_post_discovery.discover(
+ source,
+ activity,
+ fetch_hfeed=True,
+ include_redirect_sources=False,
+ already_fetched_hfeeds=fetched_hfeeds,
+ )
+
+ targets = original_post_discovery.targets_for_response(
+ resp, originals=activity["originals"], mentions=activity["mentions"]
+ )
+ if targets:
+ logging.info(
+ "%s has %d webmention target(s): %s",
+ activity.get("url"),
+ len(targets),
+ " ".join(targets),
+ )
+ # new response to propagate! load block list if we haven't already
+ if source.blocked_ids is None:
+ source.load_blocklist()
+
+ for t in targets:
+ if len(t) <= _MAX_STRING_LENGTH:
+ urls_to_activity[t] = i
+ else:
+ logging.info(
+ "Giving up on target URL over %s chars! %s",
+ _MAX_STRING_LENGTH,
+ t,
+ )
+ too_long.add(t[: _MAX_STRING_LENGTH - 4] + "...")
+
+ # store/update response entity. the prune_*() calls are important to
+ # remove circular references in link responses, which are their own
+ # activities. details in the step 2 comment above.
+ pruned_response = util.prune_response(resp)
+ pruned_responses.append(pruned_response)
+ resp_entity = Response(
+ id=id,
+ source=source.key,
+ activities_json=[
+ json_dumps(util.prune_activity(a, source)) for a in activities
+ ],
+ response_json=json_dumps(pruned_response),
+ type=resp_type,
+ unsent=list(urls_to_activity.keys()),
+ failed=list(too_long),
+ original_posts=resp.get("originals", []),
+ )
+ if urls_to_activity and len(activities) > 1:
+ resp_entity.urls_to_activity = json_dumps(urls_to_activity)
+ resp_entity.get_or_save(source, restart=self.RESTART_EXISTING_TASKS)
+
+ # update cache
+ if pruned_responses:
+ source.updates["seen_responses_cache_json"] = json_dumps(
+ pruned_responses + unchanged_responses
+ )
+
+ def repropagate_old_responses(self, source, relationships):
+ """Find old Responses that match a new SyndicatedPost and repropagate them.
+
+ We look through as many responses as we can until the datastore query expires.
+
+ Args:
+ source: :class:`models.Source`
+ relationships: refetch result
+ """
+ for response in Response.query(Response.source == source.key).order(
+ -Response.updated
+ ):
+ new_orig_urls = set()
+ for activity_json in response.activities_json:
+ activity = json_loads(activity_json)
+ activity_url = activity.get("url") or activity.get("object", {}).get(
+ "url"
+ )
+ if not activity_url:
+ logging.warning("activity has no url %s", activity_json)
+ continue
+
+ activity_url = source.canonicalize_url(activity_url, activity=activity)
+ if not activity_url:
+ continue
+
+ # look for activity url in the newly discovered list of relationships
+ for relationship in relationships.get(activity_url, []):
+ # won't re-propagate if the discovered link is already among
+ # these well-known upstream duplicates
+ if (
+ relationship.original in response.sent
+ or relationship.original in response.original_posts
+ ):
+ logging.info(
+ "%s found a new rel=syndication link %s -> %s, but the "
+ "relationship had already been discovered by another method",
+ response.label(),
+ relationship.original,
+ relationship.syndication,
+ )
+ else:
+ logging.info(
+ "%s found a new rel=syndication link %s -> %s, and "
+ "will be repropagated with a new target!",
+ response.label(),
+ relationship.original,
+ relationship.syndication,
+ )
+ new_orig_urls.add(relationship.original)
+
+ if new_orig_urls:
+ # re-open a previously 'complete' propagate task
+ response.status = "new"
+ response.unsent.extend(list(new_orig_urls))
+ response.put()
+ response.add_task()
class Discover(Poll):
- """Task handler that fetches and processes new responses to a single post.
+ """Task handler that fetches and processes new responses to a single post.
- Request parameters:
+ Request parameters:
- * source_key: string key of source entity
- * post_id: string, silo post id(s)
+ * source_key: string key of source entity
+ * post_id: string, silo post id(s)
- Inserts a propagate task for each response that hasn't been seen before.
+ Inserts a propagate task for each response that hasn't been seen before.
- Original feature request: https://github.com/snarfed/bridgy/issues/579
- """
- RESTART_EXISTING_TASKS = True
+ Original feature request: https://github.com/snarfed/bridgy/issues/579
+ """
- def dispatch_request(self):
- logging.debug('Params: %s', list(request.values.items()))
- g.TRANSIENT_ERROR_HTTP_CODES = ('400', '404')
+ RESTART_EXISTING_TASKS = True
- type = request.values.get('type')
- if type:
- assert type in ('event',)
+ def dispatch_request(self):
+ logging.debug("Params: %s", list(request.values.items()))
+ g.TRANSIENT_ERROR_HTTP_CODES = ("400", "404")
- source = g.source = util.load_source()
- if not source or source.status == 'disabled' or 'listen' not in source.features:
- logging.error('Source not found or disabled. Dropping task.')
- return ''
- logging.info('Source: %s %s, %s', source.label(), source.key_id(),
- source.bridgy_url())
+ type = request.values.get("type")
+ if type:
+ assert type in ("event",)
- post_id = request.values['post_id']
- source.updates = {}
+ source = g.source = util.load_source()
+ if not source or source.status == "disabled" or "listen" not in source.features:
+ logging.error("Source not found or disabled. Dropping task.")
+ return ""
+ logging.info(
+ "Source: %s %s, %s", source.label(), source.key_id(), source.bridgy_url()
+ )
- if type == 'event':
- activities = [source.gr_source.get_event(post_id)]
- else:
- activities = source.get_activities(
- fetch_replies=True, fetch_likes=True, fetch_shares=True,
- activity_id=post_id, user_id=source.key_id())
+ post_id = request.values["post_id"]
+ source.updates = {}
- if not activities or not activities[0]:
- logging.info('Post %s not found.', post_id)
- return ''
- assert len(activities) == 1, activities
- activity = activities[0]
- activities = {activity['id']: activity}
- self.backfeed(source, responses=activities, activities=activities)
+ if type == "event":
+ activities = [source.gr_source.get_event(post_id)]
+ else:
+ activities = source.get_activities(
+ fetch_replies=True,
+ fetch_likes=True,
+ fetch_shares=True,
+ activity_id=post_id,
+ user_id=source.key_id(),
+ )
+
+ if not activities or not activities[0]:
+ logging.info("Post %s not found.", post_id)
+ return ""
+ assert len(activities) == 1, activities
+ activity = activities[0]
+ activities = {activity["id"]: activity}
+ self.backfeed(source, responses=activities, activities=activities)
+
+ obj = activity.get("object") or activity
+ in_reply_to = util.get_first(obj, "inReplyTo")
+ if in_reply_to:
+ parsed = util.parse_tag_uri(
+ in_reply_to.get("id", "")
+ ) # TODO: fall back to url
+ if parsed:
+ util.add_discover_task(source, parsed[1])
+
+ return "OK"
- obj = activity.get('object') or activity
- in_reply_to = util.get_first(obj, 'inReplyTo')
- if in_reply_to:
- parsed = util.parse_tag_uri(in_reply_to.get('id', '')) # TODO: fall back to url
- if parsed:
- util.add_discover_task(source, parsed[1])
- return 'OK'
+class SendWebmentions(View):
+ """Abstract base task handler that can send webmentions.
+ Attributes:
-class SendWebmentions(View):
- """Abstract base task handler that can send webmentions.
+ * entity: :class:`models.Webmentions` subclass instance (set in :meth:`lease_entity`)
+ * source: :class:`models.Source` entity (set in :meth:`send_webmentions`)
+ """
- Attributes:
+ # request deadline (10m) plus some padding
+ LEASE_LENGTH = datetime.timedelta(minutes=12)
- * entity: :class:`models.Webmentions` subclass instance (set in :meth:`lease_entity`)
- * source: :class:`models.Source` entity (set in :meth:`send_webmentions`)
- """
- # request deadline (10m) plus some padding
- LEASE_LENGTH = datetime.timedelta(minutes=12)
+ def source_url(self, target_url):
+ """Return the source URL to use for a given target URL.
- def source_url(self, target_url):
- """Return the source URL to use for a given target URL.
+ Subclasses must implement.
- Subclasses must implement.
+ Args:
+ target_url: string
- Args:
- target_url: string
+ Returns:
+ string
+ """
+ raise NotImplementedError()
- Returns:
- string
- """
- raise NotImplementedError()
+ def send_webmentions(self):
+ """Tries to send each unsent webmention in self.entity.
- def send_webmentions(self):
- """Tries to send each unsent webmention in self.entity.
+ Uses :meth:`source_url()` to determine the source parameter for each
+ webmention.
- Uses :meth:`source_url()` to determine the source parameter for each
- webmention.
+ :meth:`lease()` *must* be called before this!
+ """
+ logging.info("Starting %s", self.entity.label())
- :meth:`lease()` *must* be called before this!
- """
- logging.info('Starting %s', self.entity.label())
-
- try:
- self.do_send_webmentions()
- except:
- logging.info('Propagate task failed', exc_info=True)
- self.release('error')
- raise
-
- def do_send_webmentions(self):
- urls = self.entity.unsent + self.entity.error + self.entity.failed
- unsent = set()
- self.entity.error = []
- self.entity.failed = []
-
- for orig_url in urls:
- # recheck the url here since the checks may have failed during the poll
- # or streaming add.
- url, domain, ok = util.get_webmention_target(orig_url)
- if ok:
- if len(url) <= _MAX_STRING_LENGTH:
- unsent.add(url)
- else:
- logging.info('Giving up on target URL over %s chars! %s',
- _MAX_STRING_LENGTH, url)
- self.entity.failed.append(orig_url)
- self.entity.unsent = sorted(unsent)
-
- while self.entity.unsent:
- target = self.entity.unsent.pop(0)
- source_url = self.source_url(target)
- logging.info('Webmention from %s to %s', source_url, target)
-
- # see if we've cached webmention discovery for this domain. the cache
- # value is a string URL endpoint if discovery succeeded, NO_ENDPOINT if
- # no endpoint was ofund.
- cache_key = util.webmention_endpoint_cache_key(target)
- endpoint = util.webmention_endpoint_cache.get(cache_key)
- if endpoint:
- logging.info(f'Webmention discovery: using cached endpoint {cache_key}: {endpoint}')
-
- # send! and handle response or error
- try:
- resp = None
- headers = util.request_headers(source=g.source)
- if not endpoint:
- endpoint, resp = webmention.discover(target, headers=headers)
- with util.webmention_endpoint_cache_lock:
- util.webmention_endpoint_cache[cache_key] = endpoint or NO_ENDPOINT
-
- if endpoint and endpoint != NO_ENDPOINT:
- logging.info('Sending...')
- resp = webmention.send(endpoint, source_url, target, timeout=999,
- headers=headers)
- logging.info('Sent! %s', resp)
- self.record_source_webmention(endpoint, target)
- self.entity.sent.append(target)
+ try:
+ self.do_send_webmentions()
+ except:
+ logging.info("Propagate task failed", exc_info=True)
+ self.release("error")
+ raise
+
+ def do_send_webmentions(self):
+ urls = self.entity.unsent + self.entity.error + self.entity.failed
+ unsent = set()
+ self.entity.error = []
+ self.entity.failed = []
+
+ for orig_url in urls:
+ # recheck the url here since the checks may have failed during the poll
+ # or streaming add.
+ url, domain, ok = util.get_webmention_target(orig_url)
+ if ok:
+ if len(url) <= _MAX_STRING_LENGTH:
+ unsent.add(url)
+ else:
+ logging.info(
+ "Giving up on target URL over %s chars! %s",
+ _MAX_STRING_LENGTH,
+ url,
+ )
+ self.entity.failed.append(orig_url)
+ self.entity.unsent = sorted(unsent)
+
+ while self.entity.unsent:
+ target = self.entity.unsent.pop(0)
+ source_url = self.source_url(target)
+ logging.info("Webmention from %s to %s", source_url, target)
+
+ # see if we've cached webmention discovery for this domain. the cache
+ # value is a string URL endpoint if discovery succeeded, NO_ENDPOINT if
+ # no endpoint was ofund.
+ cache_key = util.webmention_endpoint_cache_key(target)
+ endpoint = util.webmention_endpoint_cache.get(cache_key)
+ if endpoint:
+ logging.info(
+ f"Webmention discovery: using cached endpoint {cache_key}: {endpoint}"
+ )
+
+ # send! and handle response or error
+ try:
+ resp = None
+ headers = util.request_headers(source=g.source)
+ if not endpoint:
+ endpoint, resp = webmention.discover(target, headers=headers)
+ with util.webmention_endpoint_cache_lock:
+ util.webmention_endpoint_cache[cache_key] = (
+ endpoint or NO_ENDPOINT
+ )
+
+ if endpoint and endpoint != NO_ENDPOINT:
+ logging.info("Sending...")
+ resp = webmention.send(
+ endpoint, source_url, target, timeout=999, headers=headers
+ )
+ logging.info("Sent! %s", resp)
+ self.record_source_webmention(endpoint, target)
+ self.entity.sent.append(target)
+ else:
+ logging.info("Giving up this target.")
+ self.entity.skipped.append(target)
+
+ except ValueError:
+ logging.info("Bad URL; giving up this target.")
+ self.entity.skipped.append(target)
+
+ except BaseException as e:
+ logging.info("", exc_info=True)
+ # Give up on 4XX and DNS errors; we don't expect retries to succeed.
+ code, _ = util.interpret_http_exception(e)
+ if (code and code.startswith("4")) or "DNS lookup failed" in str(e):
+ logging.info("Giving up this target.")
+ self.entity.failed.append(target)
+ else:
+ self.fail(f"Error sending to endpoint: {resp}")
+ self.entity.error.append(target)
+
+ if target in self.entity.unsent:
+ self.entity.unsent.remove(target)
+
+ if self.entity.error:
+ logging.info("Some targets failed")
+ self.release("error")
else:
- logging.info('Giving up this target.')
- self.entity.skipped.append(target)
-
- except ValueError:
- logging.info('Bad URL; giving up this target.')
- self.entity.skipped.append(target)
-
- except BaseException as e:
- logging.info('', exc_info=True)
- # Give up on 4XX and DNS errors; we don't expect retries to succeed.
- code, _ = util.interpret_http_exception(e)
- if (code and code.startswith('4')) or 'DNS lookup failed' in str(e):
- logging.info('Giving up this target.')
- self.entity.failed.append(target)
+ self.complete()
+
+ @ndb.transactional()
+ def lease(self, key):
+ """Attempts to acquire and lease the :class:`models.Webmentions` entity.
+
+ Also loads and sets `g.source`, and returns False if the source doesn't
+ exist or is disabled.
+
+ TODO: unify with :meth:`complete()`
+
+ Args:
+ key: :class:`ndb.Key`
+
+ Returns: True on success, False or None otherwise
+ """
+ self.entity = key.get()
+
+ if self.entity is None:
+ return self.fail("no entity!")
+ elif self.entity.status == "complete":
+ # let this task return 200 and finish
+ logging.warning("duplicate task already propagated this")
+ return
+ elif (
+ self.entity.status == "processing"
+ and util.now_fn() < self.entity.leased_until
+ ):
+ return self.fail("duplicate task is currently processing!")
+
+ g.source = self.entity.source.get()
+ if not g.source or g.source.status == "disabled":
+ logging.error("Source not found or disabled. Dropping task.")
+ return False
+ logging.info(
+ "Source: %s %s, %s",
+ g.source.label(),
+ g.source.key_id(),
+ g.source.bridgy_url(),
+ )
+
+ assert self.entity.status in ("new", "processing", "error"), self.entity.status
+ self.entity.status = "processing"
+ self.entity.leased_until = util.now_fn() + self.LEASE_LENGTH
+ self.entity.put()
+ return True
+
+ @ndb.transactional()
+ def complete(self):
+ """Attempts to mark the :class:`models.Webmentions` entity completed.
+
+ Returns True on success, False otherwise.
+ """
+ existing = self.entity.key.get()
+ if existing is None:
+ self.fail("entity disappeared!")
+ elif existing.status == "complete":
+ # let this task return 200 and finish
+ logging.warning(
+ "another task stole and finished this. did my lease expire?"
+ )
+ elif self.entity.status == "complete":
+ # let this task return 200 and finish
+ logging.error(
+ "i already completed this task myself somehow?! "
+ "https://github.com/snarfed/bridgy/issues/610"
+ )
+ elif existing.status == "new":
+ self.fail("went backward from processing to new!")
else:
- self.fail(f'Error sending to endpoint: {resp}')
- self.entity.error.append(target)
+ assert existing.status == "processing", existing.status
+ assert self.entity.status == "processing", self.entity.status
+ self.entity.status = "complete"
+ self.entity.put()
+ return True
+
+ return False
+
+ @ndb.transactional()
+ def release(self, new_status):
+ """Attempts to unlease the :class:`models.Webmentions` entity.
+
+ Args:
+ new_status: string
+ """
+ existing = self.entity.key.get()
+ if existing and existing.status == "processing":
+ self.entity.status = new_status
+ self.entity.leased_until = None
+ self.entity.put()
+
+ def fail(self, message):
+ """Marks the request failed and logs an error message."""
+ logging.warning(message)
+ g.failed = True
+
+ @ndb.transactional()
+ def record_source_webmention(self, endpoint, target):
+ """Sets this source's last_webmention_sent and maybe webmention_endpoint.
+
+ Args:
+ endpoint: str, URL
+ target: str, URL
+ """
+ g.source = g.source.key.get()
+ logging.info("Setting last_webmention_sent")
+ g.source.last_webmention_sent = util.now_fn()
+
+ if (
+ endpoint != g.source.webmention_endpoint
+ and util.domain_from_link(target) in g.source.domains
+ ):
+ logging.info(
+ "Also setting webmention_endpoint to %s (discovered in %s; was %s)",
+ endpoint,
+ target,
+ g.source.webmention_endpoint,
+ )
+ g.source.webmention_endpoint = endpoint
- if target in self.entity.unsent:
- self.entity.unsent.remove(target)
+ g.source.put()
- if self.entity.error:
- logging.info('Some targets failed')
- self.release('error')
- else:
- self.complete()
- @ndb.transactional()
- def lease(self, key):
- """Attempts to acquire and lease the :class:`models.Webmentions` entity.
+class PropagateResponse(SendWebmentions):
+ """Task handler that sends webmentions for a :class:`models.Response`.
- Also loads and sets `g.source`, and returns False if the source doesn't
- exist or is disabled.
+ Attributes:
- TODO: unify with :meth:`complete()`
+ * activities: parsed :attr:`models.Response.activities_json` list
- Args:
- key: :class:`ndb.Key`
+ Request parameters:
- Returns: True on success, False or None otherwise
- """
- self.entity = key.get()
-
- if self.entity is None:
- return self.fail('no entity!')
- elif self.entity.status == 'complete':
- # let this task return 200 and finish
- logging.warning('duplicate task already propagated this')
- return
- elif (self.entity.status == 'processing' and
- util.now_fn() < self.entity.leased_until):
- return self.fail('duplicate task is currently processing!')
-
- g.source = self.entity.source.get()
- if not g.source or g.source.status == 'disabled':
- logging.error('Source not found or disabled. Dropping task.')
- return False
- logging.info('Source: %s %s, %s', g.source.label(), g.source.key_id(),
- g.source.bridgy_url())
-
- assert self.entity.status in ('new', 'processing', 'error'), self.entity.status
- self.entity.status = 'processing'
- self.entity.leased_until = util.now_fn() + self.LEASE_LENGTH
- self.entity.put()
- return True
-
- @ndb.transactional()
- def complete(self):
- """Attempts to mark the :class:`models.Webmentions` entity completed.
-
- Returns True on success, False otherwise.
- """
- existing = self.entity.key.get()
- if existing is None:
- self.fail('entity disappeared!')
- elif existing.status == 'complete':
- # let this task return 200 and finish
- logging.warning('another task stole and finished this. did my lease expire?')
- elif self.entity.status == 'complete':
- # let this task return 200 and finish
- logging.error('i already completed this task myself somehow?! '
- 'https://github.com/snarfed/bridgy/issues/610')
- elif existing.status == 'new':
- self.fail('went backward from processing to new!')
- else:
- assert existing.status == 'processing', existing.status
- assert self.entity.status == 'processing', self.entity.status
- self.entity.status = 'complete'
- self.entity.put()
- return True
-
- return False
-
- @ndb.transactional()
- def release(self, new_status):
- """Attempts to unlease the :class:`models.Webmentions` entity.
-
- Args:
- new_status: string
- """
- existing = self.entity.key.get()
- if existing and existing.status == 'processing':
- self.entity.status = new_status
- self.entity.leased_until = None
- self.entity.put()
-
- def fail(self, message):
- """Marks the request failed and logs an error message."""
- logging.warning(message)
- g.failed = True
-
- @ndb.transactional()
- def record_source_webmention(self, endpoint, target):
- """Sets this source's last_webmention_sent and maybe webmention_endpoint.
-
- Args:
- endpoint: str, URL
- target: str, URL
+ * response_key: string key of :class:`models.Response` entity
"""
- g.source = g.source.key.get()
- logging.info('Setting last_webmention_sent')
- g.source.last_webmention_sent = util.now_fn()
- if (endpoint != g.source.webmention_endpoint and
- util.domain_from_link(target) in g.source.domains):
- logging.info('Also setting webmention_endpoint to %s (discovered in %s; was %s)',
- endpoint, target, g.source.webmention_endpoint)
- g.source.webmention_endpoint = endpoint
-
- g.source.put()
-
-
-class PropagateResponse(SendWebmentions):
- """Task handler that sends webmentions for a :class:`models.Response`.
-
- Attributes:
-
- * activities: parsed :attr:`models.Response.activities_json` list
-
- Request parameters:
-
- * response_key: string key of :class:`models.Response` entity
- """
-
- def dispatch_request(self):
- logging.debug('Params: %s', list(request.values.items()))
- if not self.lease(ndb.Key(urlsafe=request.values['response_key'])):
- return ('', ERROR_HTTP_RETURN_CODE) if getattr(g, 'failed', None) else 'OK'
-
- source = g.source
- poll_estimate = self.entity.created - datetime.timedelta(seconds=61)
- poll_url = util.host_url(logs.url(poll_estimate, source.key))
- logging.info(f'Created by this poll: {poll_url}')
-
- self.activities = [json_loads(a) for a in self.entity.activities_json]
- response_obj = json_loads(self.entity.response_json)
- if (not source.is_activity_public(response_obj) or
- not all(source.is_activity_public(a) for a in self.activities)):
- logging.info('Response or activity is non-public. Dropping.')
- self.complete()
- return ''
-
- self.send_webmentions()
- return ('', ERROR_HTTP_RETURN_CODE) if getattr(g, 'failed', None) else 'OK'
-
- def source_url(self, target_url):
- # determine which activity to use
- try:
- activity = self.activities[0]
- if self.entity.urls_to_activity:
- urls_to_activity = json_loads(self.entity.urls_to_activity)
- if urls_to_activity:
- activity = self.activities[urls_to_activity[target_url]]
- except (KeyError, IndexError):
- error("""Hit https://github.com/snarfed/bridgy/issues/237 KeyError!
+ def dispatch_request(self):
+ logging.debug("Params: %s", list(request.values.items()))
+ if not self.lease(ndb.Key(urlsafe=request.values["response_key"])):
+ return ("", ERROR_HTTP_RETURN_CODE) if getattr(g, "failed", None) else "OK"
+
+ source = g.source
+ poll_estimate = self.entity.created - datetime.timedelta(seconds=61)
+ poll_url = util.host_url(logs.url(poll_estimate, source.key))
+ logging.info(f"Created by this poll: {poll_url}")
+
+ self.activities = [json_loads(a) for a in self.entity.activities_json]
+ response_obj = json_loads(self.entity.response_json)
+ if not source.is_activity_public(response_obj) or not all(
+ source.is_activity_public(a) for a in self.activities
+ ):
+ logging.info("Response or activity is non-public. Dropping.")
+ self.complete()
+ return ""
+
+ self.send_webmentions()
+ return ("", ERROR_HTTP_RETURN_CODE) if getattr(g, "failed", None) else "OK"
+
+ def source_url(self, target_url):
+ # determine which activity to use
+ try:
+ activity = self.activities[0]
+ if self.entity.urls_to_activity:
+ urls_to_activity = json_loads(self.entity.urls_to_activity)
+ if urls_to_activity:
+ activity = self.activities[urls_to_activity[target_url]]
+ except (KeyError, IndexError):
+ error(
+ """Hit https://github.com/snarfed/bridgy/issues/237 KeyError!
target url {target_url} not in urls_to_activity: {self.entity.urls_to_activity}
-activities: {self.activities}""", status=ERROR_HTTP_RETURN_CODE)
-
- # generate source URL
- id = activity['id']
- parsed = util.parse_tag_uri(id)
- post_id = parsed[1] if parsed else id
- parts = [self.entity.type, g.source.SHORT_NAME, g.source.key.string_id(), post_id]
-
- if self.entity.type != 'post':
- # parse and add response id. (we know Response key ids are always tag URIs)
- _, response_id = util.parse_tag_uri(self.entity.key.string_id())
- reaction_id = response_id
- if self.entity.type in ('like', 'react', 'repost', 'rsvp'):
- response_id = response_id.split('_')[-1] # extract responder user id
- parts.append(response_id)
- if self.entity.type == 'react':
- parts.append(reaction_id)
-
- return util.host_url('/'.join(parts))
+activities: {self.activities}""",
+ status=ERROR_HTTP_RETURN_CODE,
+ )
+
+ # generate source URL
+ id = activity["id"]
+ parsed = util.parse_tag_uri(id)
+ post_id = parsed[1] if parsed else id
+ parts = [
+ self.entity.type,
+ g.source.SHORT_NAME,
+ g.source.key.string_id(),
+ post_id,
+ ]
+
+ if self.entity.type != "post":
+ # parse and add response id. (we know Response key ids are always tag URIs)
+ _, response_id = util.parse_tag_uri(self.entity.key.string_id())
+ reaction_id = response_id
+ if self.entity.type in ("like", "react", "repost", "rsvp"):
+ response_id = response_id.split("_")[-1] # extract responder user id
+ parts.append(response_id)
+ if self.entity.type == "react":
+ parts.append(reaction_id)
+
+ return util.host_url("/".join(parts))
class PropagateBlogPost(SendWebmentions):
- """Task handler that sends webmentions for a :class:`models.BlogPost`.
-
- Request parameters:
-
- * key: string key of :class:`models.BlogPost` entity
- """
+ """Task handler that sends webmentions for a :class:`models.BlogPost`.
- def dispatch_request(self):
- logging.debug('Params: %s', list(request.values.items()))
-
- if not self.lease(ndb.Key(urlsafe=request.values['key'])):
- return ('', ERROR_HTTP_RETURN_CODE) if getattr(g, 'failed', None) else 'OK'
-
- to_send = set()
- for url in self.entity.unsent:
- url, domain, ok = util.get_webmention_target(url)
- # skip "self" links to this blog's domain
- if ok and domain not in g.source.domains:
- to_send.add(url)
-
- self.entity.unsent = list(to_send)
- self.send_webmentions()
- return ('', ERROR_HTTP_RETURN_CODE) if getattr(g, 'failed', None) else 'OK'
-
- def source_url(self, target_url):
- return self.entity.key.id()
+ Request parameters:
+ * key: string key of :class:`models.BlogPost` entity
+ """
-app.add_url_rule('/_ah/queue/poll', view_func=Poll.as_view('poll'), methods=['POST'])
-app.add_url_rule('/_ah/queue/poll-now', view_func=Poll.as_view('poll-now'), methods=['POST'])
-app.add_url_rule('/_ah/queue/discover', view_func=Discover.as_view('discover'), methods=['POST'])
-app.add_url_rule('/_ah/queue/propagate', view_func=PropagateResponse.as_view('propagate'), methods=['POST'])
-app.add_url_rule('/_ah/queue/propagate-blogpost', view_func=PropagateBlogPost.as_view('propagate_blogpost'), methods=['POST'])
+ def dispatch_request(self):
+ logging.debug("Params: %s", list(request.values.items()))
+
+ if not self.lease(ndb.Key(urlsafe=request.values["key"])):
+ return ("", ERROR_HTTP_RETURN_CODE) if getattr(g, "failed", None) else "OK"
+
+ to_send = set()
+ for url in self.entity.unsent:
+ url, domain, ok = util.get_webmention_target(url)
+ # skip "self" links to this blog's domain
+ if ok and domain not in g.source.domains:
+ to_send.add(url)
+
+ self.entity.unsent = list(to_send)
+ self.send_webmentions()
+ return ("", ERROR_HTTP_RETURN_CODE) if getattr(g, "failed", None) else "OK"
+
+ def source_url(self, target_url):
+ return self.entity.key.id()
+
+
+app.add_url_rule("/_ah/queue/poll", view_func=Poll.as_view("poll"), methods=["POST"])
+app.add_url_rule(
+ "/_ah/queue/poll-now", view_func=Poll.as_view("poll-now"), methods=["POST"]
+)
+app.add_url_rule(
+ "/_ah/queue/discover", view_func=Discover.as_view("discover"), methods=["POST"]
+)
+app.add_url_rule(
+ "/_ah/queue/propagate",
+ view_func=PropagateResponse.as_view("propagate"),
+ methods=["POST"],
+)
+app.add_url_rule(
+ "/_ah/queue/propagate-blogpost",
+ view_func=PropagateBlogPost.as_view("propagate_blogpost"),
+ methods=["POST"],
+)
diff --git a/tests/test_blog_webmention.py b/tests/test_blog_webmention.py
index 58f7a823..1e95ae48 100644
--- a/tests/test_blog_webmention.py
+++ b/tests/test_blog_webmention.py
@@ -14,57 +14,64 @@
class BlogWebmentionTest(testutil.AppTest):
-
- def setUp(self):
- super().setUp()
- self.source = testutil.FakeSource(id='foo.com',
- domains=['x.com', 'foo.com', 'y.com'],
- features=['webmention'])
- self.source.put()
-
- self.mox.StubOutWithMock(testutil.FakeSource, 'create_comment')
- self.mention_html = """\
+ def setUp(self):
+ super().setUp()
+ self.source = testutil.FakeSource(
+ id="foo.com", domains=["x.com", "foo.com", "y.com"], features=["webmention"]
+ )
+ self.source.put()
+
+ self.mox.StubOutWithMock(testutil.FakeSource, "create_comment")
+ self.mention_html = """\
- """)
+ """,
+ )
- # syndicated to two places
- self.expect_requests_get('http://author/post/permalink', """
+ # syndicated to two places
+ self.expect_requests_get(
+ "http://author/post/permalink",
+ """
-
""")
-
- self.mox.ReplayAll()
- self.assertIsNone(self.source.last_syndication_url)
- self.assert_discover(['http://author/post/permalink'])
- self.assert_syndicated_posts(('http://author/post/permalink',
- 'https://fa.ke/post/url'))
- self.assertEqual(testutil.NOW, self.source.updates['last_syndication_url'])
-
- def test_syndication_url_in_hfeed(self):
- """Like test_single_post, but because the syndication URL is given in
- the h-feed we skip fetching the permalink.
- """
- # silo domain is fa.ke
- self.expect_requests_get('http://author/', """
+
""",
+ )
+
+ self.mox.ReplayAll()
+ self.assertIsNone(self.source.last_syndication_url)
+ self.assert_discover(["http://author/post/permalink"])
+ self.assert_syndicated_posts(
+ ("http://author/post/permalink", "https://fa.ke/post/url")
+ )
+ self.assertEqual(testutil.NOW, self.source.updates["last_syndication_url"])
+
+ def test_syndication_url_in_hfeed(self):
+ """Like test_single_post, but because the syndication URL is given in
+ the h-feed we skip fetching the permalink.
+ """
+ # silo domain is fa.ke
+ self.expect_requests_get(
+ "http://author/",
+ """
- """)
-
- self.mox.ReplayAll()
- self.assert_discover(['http://author/post/permalink'])
- self.assert_syndicated_posts(('http://author/post/permalink',
- 'https://fa.ke/post/url'))
-
- self.assertEqual(testutil.NOW, self.source.updates['last_syndication_url'])
- self.assertEqual(testutil.NOW, self.source.updates['last_feed_syndication_url'])
-
- def test_syndication_url_in_hfeed_with_redirect(self):
- """Like test_syndication_url_in_hfeed but u-url redirects to the
- actual post URL. We should follow the redirect like we do everywhere
- else.
- """
- self.expect_requests_head('https://fa.ke/post/url')
- self.expect_requests_head('http://author/')
- self.expect_requests_get('http://author/', """
+