diff --git a/admin.py b/admin.py index e50b4bfd..2eae52e8 100644 --- a/admin.py +++ b/admin.py @@ -15,6 +15,7 @@ from flask_app import app import models import util + # Import source class files so their metaclasses are initialized. from models import BlogPost, Response, Source import blogger, flickr, github, instagram, mastodon, medium, tumblr, twitter, wordpress_rest @@ -22,96 +23,118 @@ NUM_ENTITIES = 10 -@app.route('/admin/responses') +@app.route("/admin/responses") def responses(): - """Find the most recently attempted responses and blog posts with error URLs.""" - entities = [] - - for cls in (Response,): # BlogPost - for e in cls.query().order(-cls.updated): - if (len(entities) >= NUM_ENTITIES or - e.updated < datetime.datetime.now() - datetime.timedelta(hours=1)): - break - elif (not e.error and not e.unsent) or e.status == 'complete': - continue - - e.links = [util.pretty_link(u, new_tab=True) for u in e.error + e.failed] - if e.key.kind() == 'Response': - e.response = json_loads(e.response_json) - e.activities = [json_loads(a) for a in e.activities_json] - else: - e.response = {'content': '[BlogPost]'} - e.activities = [{'url': e.key.id()}] - - entities.append(e) - - return render_template('admin_responses.html', responses=entities, logs=logs) - - -@app.route('/admin/sources') + """Find the most recently attempted responses and blog posts with error URLs.""" + entities = [] + + for cls in (Response,): # BlogPost + for e in cls.query().order(-cls.updated): + if len( + entities + ) >= NUM_ENTITIES or e.updated < datetime.datetime.now() - datetime.timedelta( + hours=1 + ): + break + elif (not e.error and not e.unsent) or e.status == "complete": + continue + + e.links = [util.pretty_link(u, new_tab=True) for u in e.error + e.failed] + if e.key.kind() == "Response": + e.response = json_loads(e.response_json) + e.activities = [json_loads(a) for a in e.activities_json] + else: + e.response = {"content": "[BlogPost]"} + e.activities = [{"url": e.key.id()}] + + entities.append(e) + + return render_template("admin_responses.html", responses=entities, logs=logs) + + +@app.route("/admin/sources") def sources(): - """Find sources whose last poll errored out.""" - CLASSES = (flickr.Flickr, github.GitHub, twitter.Twitter, - instagram.Instagram, mastodon.Mastodon) - queries = [cls.query(Source.status == 'enabled', - Source.poll_status == 'error', - Source.rate_limited.IN((False, None)), - Source.features == 'listen', - ).fetch_async(NUM_ENTITIES) - for cls in CLASSES] - - return render_template( - 'admin_sources.html', - sources=itertools.chain(*[q.get_result() for q in queries]), - logs=logs, - ) - - -@app.route('/admin/mark_complete', methods=['POST']) + """Find sources whose last poll errored out.""" + CLASSES = ( + flickr.Flickr, + github.GitHub, + twitter.Twitter, + instagram.Instagram, + mastodon.Mastodon, + ) + queries = [ + cls.query( + Source.status == "enabled", + Source.poll_status == "error", + Source.rate_limited.IN((False, None)), + Source.features == "listen", + ).fetch_async(NUM_ENTITIES) + for cls in CLASSES + ] + + return render_template( + "admin_sources.html", + sources=itertools.chain(*[q.get_result() for q in queries]), + logs=logs, + ) + + +@app.route("/admin/mark_complete", methods=["POST"]) def mark_complete(): - entities = ndb.get_multi(ndb.Key(urlsafe=u) - for u in request.values.getlist('key')) - for e in entities: - e.status = 'complete' - ndb.put_multi(entities) - return util.redirect('/admin/responses') + entities = ndb.get_multi(ndb.Key(urlsafe=u) for u in request.values.getlist("key")) + for e in entities: + e.status = "complete" + ndb.put_multi(entities) + return util.redirect("/admin/responses") -@app.route('/admin/stats') +@app.route("/admin/stats") def stats(): - """Collect and report misc lifetime stats. - - https://developers.google.com/appengine/docs/python/ndb/admin#Statistics_queries - - Used to be on the front page, dropped them during the Flask port in August 2021. - """ - def count(query): - stat = query.get() # no datastore stats in dev_appserver - return stat.count if stat else 0 - - def kind_count(kind): - return count(KindStat.query(KindStat.kind_name == kind)) - - num_users = sum(kind_count(cls.__name__) for cls in models.sources.values()) - link_counts = { - property: sum(count(KindPropertyNamePropertyTypeStat.query( - KindPropertyNamePropertyTypeStat.kind_name == kind, - KindPropertyNamePropertyTypeStat.property_name == property, - # specify string because there are also >2M Response entities with null - # values for some of these properties, as opposed to missing altogether, - # which we don't want to include. - KindPropertyNamePropertyTypeStat.property_type == 'String')) - for kind in ('BlogPost', 'Response')) - for property in ('sent', 'unsent', 'error', 'failed', 'skipped')} - - return render_template('admin_stats.html', **{ - # add comma separator between thousands - k: '{:,}'.format(v) for k, v in { - 'users': num_users, - 'responses': kind_count('Response'), - 'links': sum(link_counts.values()), - 'webmentions': link_counts['sent'] + kind_count('BlogPost'), - 'publishes': kind_count('Publish'), - 'blogposts': kind_count('BlogPost'), - 'webmentions_received': kind_count('BlogWebmention'), - }.items()}) + """Collect and report misc lifetime stats. + + https://developers.google.com/appengine/docs/python/ndb/admin#Statistics_queries + + Used to be on the front page, dropped them during the Flask port in August 2021. + """ + + def count(query): + stat = query.get() # no datastore stats in dev_appserver + return stat.count if stat else 0 + + def kind_count(kind): + return count(KindStat.query(KindStat.kind_name == kind)) + + num_users = sum(kind_count(cls.__name__) for cls in models.sources.values()) + link_counts = { + property: sum( + count( + KindPropertyNamePropertyTypeStat.query( + KindPropertyNamePropertyTypeStat.kind_name == kind, + KindPropertyNamePropertyTypeStat.property_name == property, + # specify string because there are also >2M Response entities with null + # values for some of these properties, as opposed to missing altogether, + # which we don't want to include. + KindPropertyNamePropertyTypeStat.property_type == "String", + ) + ) + for kind in ("BlogPost", "Response") + ) + for property in ("sent", "unsent", "error", "failed", "skipped") + } + + return render_template( + "admin_stats.html", + **{ + # add comma separator between thousands + k: "{:,}".format(v) + for k, v in { + "users": num_users, + "responses": kind_count("Response"), + "links": sum(link_counts.values()), + "webmentions": link_counts["sent"] + kind_count("BlogPost"), + "publishes": kind_count("Publish"), + "blogposts": kind_count("BlogPost"), + "webmentions_received": kind_count("BlogWebmention"), + }.items() + } + ) diff --git a/appengine_config.py b/appengine_config.py index 993c0277..7674fbb3 100644 --- a/appengine_config.py +++ b/appengine_config.py @@ -3,6 +3,7 @@ # Needed because I originally generated tag URIs with the current year, which # resulted in different URIs for the same objects when the year changed. :/ from oauth_dropins.webutil import util -if not hasattr(util, '_orig_tag_uri'): + +if not hasattr(util, "_orig_tag_uri"): util._orig_tag_uri = util.tag_uri util.tag_uri = lambda domain, name: util._orig_tag_uri(domain, name, year=2013) diff --git a/blog_webmention.py b/blog_webmention.py index 0f95bc60..14bcefc1 100644 --- a/blog_webmention.py +++ b/blog_webmention.py @@ -15,211 +15,233 @@ class BlogWebmentionView(webmention.Webmention): - """View for incoming webmentions against blog providers.""" - - def dispatch_request(self, site): - logging.info('Params: %s', list(request.values.items())) - # strip fragments from source and target url - self.source_url = urllib.parse.urldefrag(request.form['source'])[0] - self.target_url = urllib.parse.urldefrag(request.form['target'])[0] - - # follow target url through any redirects, strip utm_* query params - resp = util.follow_redirects(self.target_url) - redirected_target_urls = [r.url for r in resp.history] - self.target_url = util.clean_url(resp.url) - - # parse and validate target URL - domain = util.domain_from_link(self.target_url) - if not domain: - self.error('Could not parse target URL %s' % self.target_url) - - # look up source by domain - source_cls = models.sources[site] - domain = domain.lower() - self.source = (source_cls.query() - .filter(source_cls.domains == domain) - .filter(source_cls.features == 'webmention') - .filter(source_cls.status == 'enabled') - .get()) - if not self.source: - # check for a rel-canonical link. Blogger uses these when it serves a post - # from multiple domains, e.g country TLDs like epeus.blogspot.co.uk vs - # epeus.blogspot.com. - # https://github.com/snarfed/bridgy/issues/805 - mf2 = self.fetch_mf2(self.target_url, require_mf2=False) - if not mf2: - # fetch_mf2() already wrote the error response - return - domains = util.dedupe_urls( - util.domain_from_link(url) - for url in mf2[1]['rels'].get('canonical', [])) - if domains: - self.source = (source_cls.query() - .filter(source_cls.domains.IN(domains)) - .filter(source_cls.features == 'webmention') - .filter(source_cls.status == 'enabled') - .get()) - - if not self.source: - self.error( - 'Could not find %s account for %s. Is it registered with Bridgy?' % - (source_cls.GR_CLASS.NAME, domain)) - - # check that the target URL path is supported - target_path = urllib.parse.urlparse(self.target_url).path - if target_path in ('', '/'): - msg = 'Home page webmentions are not currently supported.' - logging.info(msg) - return {'error': msg}, 202 - for pattern in self.source.PATH_BLOCKLIST: - if pattern.match(target_path): - msg = f'{self.source.GR_CLASS.NAME} webmentions are not supported for URL path: {target_path}' - logging.info(msg) - return {'error': msg}, 202 - - # create BlogWebmention entity - id = '%s %s' % (self.source_url, self.target_url) - self.entity = BlogWebmention.get_or_insert( - id, source=self.source.key, redirected_target_urls=redirected_target_urls) - if self.entity.status == 'complete': - # TODO: response message saying update isn't supported - return self.entity.published - logging.debug("BlogWebmention entity: '%s'", self.entity.key.urlsafe().decode()) - - # fetch source page - fetched = self.fetch_mf2(self.source_url) - if not fetched: - return - resp, mf2 = fetched - - item = self.find_mention_item(mf2.get('items', [])) - if not item: - self.error('Could not find target URL %s in source page %s' % - (self.target_url, resp.url), data=mf2, log_exception=False) - - # default author to target domain - author_name = domain - author_url = 'http://%s/' % domain - - # extract author name and URL from h-card, if any - props = item['properties'] - author = get_first(props, 'author') - if author: - if isinstance(author, str): - author_name = author - else: - author_props = author.get('properties', {}) - author_name = get_first(author_props, 'name') - author_url = get_first(author_props, 'url') - - # if present, u-url overrides source url - u_url = get_first(props, 'url') - if u_url: - self.entity.u_url = u_url - - # generate content - content = props['content'][0] # find_mention_item() guaranteed this is here - text = (content.get('html') or content.get('value')).strip() - source_url = self.entity.source_url() - text += '
via %s' % ( - source_url, util.domain_from_link(source_url)) - - # write comment - try: - self.entity.published = self.source.create_comment( - self.target_url, author_name, author_url, text) - except Exception as e: - code, body = util.interpret_http_exception(e) - msg = 'Error: %s: %s; %s' % (code, e, body) - if code == '401': - logging.warning(f'Disabling source due to: {e}', exc_info=True) - self.source.status = 'disabled' - self.source.put() - self.error(msg, status=code, report=self.source.is_beta_user()) - elif code == '404': - # post is gone - self.error(msg, status=code, report=False) - elif util.is_connection_failure(e) or (code and int(code) // 100 == 5): - self.error(msg, status=502, report=False) - elif code or body: - self.error(msg, status=code, report=True) - else: - raise - - # write results to datastore - self.entity.status = 'complete' - self.entity.put() - - return self.entity.published - - def find_mention_item(self, items): - """Returns the mf2 item that mentions (or replies to, likes, etc) the target. - - May modify the items arg, e.g. may set or replace content.html or - content.value. - - Args: - items: sequence of mf2 item dicts - - Returns: - mf2 item dict or None - """ - # find target URL in source - for item in items: - props = item.setdefault('properties', {}) - - # find first non-empty content element - content = props.setdefault('content', [{}])[0] - text = content.get('html') or content.get('value') - - for type in 'in-reply-to', 'like', 'like-of', 'repost', 'repost-of': - urls = [urllib.parse.urldefrag(u)[0] for u in - microformats2.get_string_urls(props.get(type, []))] - if self.any_target_in(urls): - break - else: - if text and self.any_target_in(text): - type = 'post' - url = get_first(props, 'url') or self.source_url - name = get_first(props, 'name') or get_first(props, 'summary') - text = content['html'] = ('mentioned this in %s.' % - util.pretty_link(url, text=name, max_length=280)) - else: - type = None - - if type: - # found the target! - rsvp = get_first(props, 'rsvp') - if rsvp: - self.entity.type = 'rsvp' - if not text: - content['value'] = 'RSVPed %s.' % rsvp - else: - self.entity.type = {'in-reply-to': 'comment', - 'like-of': 'like', - 'repost-of': 'repost', - }.get(type, type) - if not text: - content['value'] = {'comment': 'replied to this.', - 'like': 'liked this.', - 'repost': 'reposted this.', - }[self.entity.type] - return item - - # check children in case this is eg an h-feed - found = self.find_mention_item(item.get('children', [])) - if found: - return found - - return None - - def any_target_in(self, haystack): - """Returns true if any target URL (including redirects) is in haystack.""" - for target in self.entity.redirected_target_urls + [self.target_url]: - if target in haystack: - return True - return False - - -app.add_url_rule('/webmention/', - view_func=BlogWebmentionView.as_view('blog_wm'), methods=['POST']) + """View for incoming webmentions against blog providers.""" + + def dispatch_request(self, site): + logging.info("Params: %s", list(request.values.items())) + # strip fragments from source and target url + self.source_url = urllib.parse.urldefrag(request.form["source"])[0] + self.target_url = urllib.parse.urldefrag(request.form["target"])[0] + + # follow target url through any redirects, strip utm_* query params + resp = util.follow_redirects(self.target_url) + redirected_target_urls = [r.url for r in resp.history] + self.target_url = util.clean_url(resp.url) + + # parse and validate target URL + domain = util.domain_from_link(self.target_url) + if not domain: + self.error("Could not parse target URL %s" % self.target_url) + + # look up source by domain + source_cls = models.sources[site] + domain = domain.lower() + self.source = ( + source_cls.query() + .filter(source_cls.domains == domain) + .filter(source_cls.features == "webmention") + .filter(source_cls.status == "enabled") + .get() + ) + if not self.source: + # check for a rel-canonical link. Blogger uses these when it serves a post + # from multiple domains, e.g country TLDs like epeus.blogspot.co.uk vs + # epeus.blogspot.com. + # https://github.com/snarfed/bridgy/issues/805 + mf2 = self.fetch_mf2(self.target_url, require_mf2=False) + if not mf2: + # fetch_mf2() already wrote the error response + return + domains = util.dedupe_urls( + util.domain_from_link(url) + for url in mf2[1]["rels"].get("canonical", []) + ) + if domains: + self.source = ( + source_cls.query() + .filter(source_cls.domains.IN(domains)) + .filter(source_cls.features == "webmention") + .filter(source_cls.status == "enabled") + .get() + ) + + if not self.source: + self.error( + "Could not find %s account for %s. Is it registered with Bridgy?" + % (source_cls.GR_CLASS.NAME, domain) + ) + + # check that the target URL path is supported + target_path = urllib.parse.urlparse(self.target_url).path + if target_path in ("", "/"): + msg = "Home page webmentions are not currently supported." + logging.info(msg) + return {"error": msg}, 202 + for pattern in self.source.PATH_BLOCKLIST: + if pattern.match(target_path): + msg = f"{self.source.GR_CLASS.NAME} webmentions are not supported for URL path: {target_path}" + logging.info(msg) + return {"error": msg}, 202 + + # create BlogWebmention entity + id = "%s %s" % (self.source_url, self.target_url) + self.entity = BlogWebmention.get_or_insert( + id, source=self.source.key, redirected_target_urls=redirected_target_urls + ) + if self.entity.status == "complete": + # TODO: response message saying update isn't supported + return self.entity.published + logging.debug("BlogWebmention entity: '%s'", self.entity.key.urlsafe().decode()) + + # fetch source page + fetched = self.fetch_mf2(self.source_url) + if not fetched: + return + resp, mf2 = fetched + + item = self.find_mention_item(mf2.get("items", [])) + if not item: + self.error( + "Could not find target URL %s in source page %s" + % (self.target_url, resp.url), + data=mf2, + log_exception=False, + ) + + # default author to target domain + author_name = domain + author_url = "http://%s/" % domain + + # extract author name and URL from h-card, if any + props = item["properties"] + author = get_first(props, "author") + if author: + if isinstance(author, str): + author_name = author + else: + author_props = author.get("properties", {}) + author_name = get_first(author_props, "name") + author_url = get_first(author_props, "url") + + # if present, u-url overrides source url + u_url = get_first(props, "url") + if u_url: + self.entity.u_url = u_url + + # generate content + content = props["content"][0] # find_mention_item() guaranteed this is here + text = (content.get("html") or content.get("value")).strip() + source_url = self.entity.source_url() + text += '
via %s' % ( + source_url, + util.domain_from_link(source_url), + ) + + # write comment + try: + self.entity.published = self.source.create_comment( + self.target_url, author_name, author_url, text + ) + except Exception as e: + code, body = util.interpret_http_exception(e) + msg = "Error: %s: %s; %s" % (code, e, body) + if code == "401": + logging.warning(f"Disabling source due to: {e}", exc_info=True) + self.source.status = "disabled" + self.source.put() + self.error(msg, status=code, report=self.source.is_beta_user()) + elif code == "404": + # post is gone + self.error(msg, status=code, report=False) + elif util.is_connection_failure(e) or (code and int(code) // 100 == 5): + self.error(msg, status=502, report=False) + elif code or body: + self.error(msg, status=code, report=True) + else: + raise + + # write results to datastore + self.entity.status = "complete" + self.entity.put() + + return self.entity.published + + def find_mention_item(self, items): + """Returns the mf2 item that mentions (or replies to, likes, etc) the target. + + May modify the items arg, e.g. may set or replace content.html or + content.value. + + Args: + items: sequence of mf2 item dicts + + Returns: + mf2 item dict or None + """ + # find target URL in source + for item in items: + props = item.setdefault("properties", {}) + + # find first non-empty content element + content = props.setdefault("content", [{}])[0] + text = content.get("html") or content.get("value") + + for type in "in-reply-to", "like", "like-of", "repost", "repost-of": + urls = [ + urllib.parse.urldefrag(u)[0] + for u in microformats2.get_string_urls(props.get(type, [])) + ] + if self.any_target_in(urls): + break + else: + if text and self.any_target_in(text): + type = "post" + url = get_first(props, "url") or self.source_url + name = get_first(props, "name") or get_first(props, "summary") + text = content["html"] = "mentioned this in %s." % util.pretty_link( + url, text=name, max_length=280 + ) + else: + type = None + + if type: + # found the target! + rsvp = get_first(props, "rsvp") + if rsvp: + self.entity.type = "rsvp" + if not text: + content["value"] = "RSVPed %s." % rsvp + else: + self.entity.type = { + "in-reply-to": "comment", + "like-of": "like", + "repost-of": "repost", + }.get(type, type) + if not text: + content["value"] = { + "comment": "replied to this.", + "like": "liked this.", + "repost": "reposted this.", + }[self.entity.type] + return item + + # check children in case this is eg an h-feed + found = self.find_mention_item(item.get("children", [])) + if found: + return found + + return None + + def any_target_in(self, haystack): + """Returns true if any target URL (including redirects) is in haystack.""" + for target in self.entity.redirected_target_urls + [self.target_url]: + if target in haystack: + return True + return False + + +app.add_url_rule( + "/webmention/", + view_func=BlogWebmentionView.as_view("blog_wm"), + methods=["POST"], +) diff --git a/blogger.py b/blogger.py index b62f4dfc..018a27bd 100644 --- a/blogger.py +++ b/blogger.py @@ -43,186 +43,213 @@ class Blogger(models.Source): - """A Blogger blog. + """A Blogger blog. - The key name is the blog id. - """ - GR_CLASS = collections.namedtuple('FakeGrClass', ('NAME',))(NAME='Blogger') - OAUTH_START = oauth_blogger.Start - SHORT_NAME = 'blogger' - PATH_BLOCKLIST = (re.compile('^/search/.*'),) - - def feed_url(self): - # https://support.google.com/blogger/answer/97933?hl=en - return urllib.parse.urljoin(self.url, '/feeds/posts/default') # Atom - - def silo_url(self): - return self.url - - def edit_template_url(self): - return 'https://www.blogger.com/blogger.g?blogID=%s#template' % self.key_id() - - @staticmethod - def new(auth_entity=None, blog_id=None, **kwargs): - """Creates and returns a Blogger for the logged in user. - - Args: - auth_entity: :class:`oauth_dropins.blogger.BloggerV2Auth` - blog_id: which blog. optional. if not provided, uses the first available. - """ - urls, domains = Blogger._urls_and_domains(auth_entity, blog_id=blog_id) - if not urls or not domains: - flash('Blogger blog not found. Please create one first!') - return None - - if blog_id is None: - for blog_id, hostname in zip(auth_entity.blog_ids, auth_entity.blog_hostnames): - if domains[0] == hostname: - break - else: - assert False, "Internal error, shouldn't happen" - - return Blogger(id=blog_id, - auth_entity=auth_entity.key, - url=urls[0], - name=auth_entity.user_display_name(), - domains=domains, - domain_urls=urls, - picture=auth_entity.picture_url, - superfeedr_secret=util.generate_secret(), - **kwargs) - - @staticmethod - def _urls_and_domains(auth_entity, blog_id=None): - """Returns an auth entity's URL and domain. - - Args: - auth_entity: oauth_dropins.blogger.BloggerV2Auth - blog_id: which blog. optional. if not provided, uses the first available. - - Returns: - ([string url], [string domain]) + The key name is the blog id. """ - for id, host in zip(auth_entity.blog_ids, auth_entity.blog_hostnames): - if blog_id == id or (not blog_id and host): - return ['http://%s/' % host], [host] - return [], [] - - def create_comment(self, post_url, author_name, author_url, content, client=None): - """Creates a new comment in the source silo. - - Must be implemented by subclasses. - - Args: - post_url: string - author_name: string - author_url: string - content: string - client: :class:`gdata.blogger.client.BloggerClient`. If None, one will be - created from auth_entity. Used for dependency injection in the unit - test. + GR_CLASS = collections.namedtuple("FakeGrClass", ("NAME",))(NAME="Blogger") + OAUTH_START = oauth_blogger.Start + SHORT_NAME = "blogger" + PATH_BLOCKLIST = (re.compile("^/search/.*"),) + + def feed_url(self): + # https://support.google.com/blogger/answer/97933?hl=en + return urllib.parse.urljoin(self.url, "/feeds/posts/default") # Atom + + def silo_url(self): + return self.url + + def edit_template_url(self): + return "https://www.blogger.com/blogger.g?blogID=%s#template" % self.key_id() + + @staticmethod + def new(auth_entity=None, blog_id=None, **kwargs): + """Creates and returns a Blogger for the logged in user. + + Args: + auth_entity: :class:`oauth_dropins.blogger.BloggerV2Auth` + blog_id: which blog. optional. if not provided, uses the first available. + """ + urls, domains = Blogger._urls_and_domains(auth_entity, blog_id=blog_id) + if not urls or not domains: + flash("Blogger blog not found. Please create one first!") + return None + + if blog_id is None: + for blog_id, hostname in zip( + auth_entity.blog_ids, auth_entity.blog_hostnames + ): + if domains[0] == hostname: + break + else: + assert False, "Internal error, shouldn't happen" + + return Blogger( + id=blog_id, + auth_entity=auth_entity.key, + url=urls[0], + name=auth_entity.user_display_name(), + domains=domains, + domain_urls=urls, + picture=auth_entity.picture_url, + superfeedr_secret=util.generate_secret(), + **kwargs, + ) + + @staticmethod + def _urls_and_domains(auth_entity, blog_id=None): + """Returns an auth entity's URL and domain. + + Args: + auth_entity: oauth_dropins.blogger.BloggerV2Auth + blog_id: which blog. optional. if not provided, uses the first available. + + Returns: + ([string url], [string domain]) + """ + for id, host in zip(auth_entity.blog_ids, auth_entity.blog_hostnames): + if blog_id == id or (not blog_id and host): + return ["http://%s/" % host], [host] + + return [], [] + + def create_comment(self, post_url, author_name, author_url, content, client=None): + """Creates a new comment in the source silo. + + Must be implemented by subclasses. + + Args: + post_url: string + author_name: string + author_url: string + content: string + client: :class:`gdata.blogger.client.BloggerClient`. If None, one will be + created from auth_entity. Used for dependency injection in the unit + test. + + Returns: + JSON response dict with 'id' and other fields + """ + if client is None: + client = self.auth_entity.get().api() + + # extract the post's path and look up its post id + path = urllib.parse.urlparse(post_url).path + logging.info("Looking up post id for %s", path) + feed = client.get_posts(self.key_id(), query=Query(path=path)) + + if not feed.entry: + return self.error("Could not find Blogger post %s" % post_url) + elif len(feed.entry) > 1: + logging.warning( + "Found %d Blogger posts for path %s , expected 1", len(feed.entry), path + ) + post_id = feed.entry[0].get_post_id() + + # create the comment + content = '%s: %s' % (author_url, author_name, content) + if len(content) > MAX_COMMENT_LENGTH: + content = content[: MAX_COMMENT_LENGTH - 3] + "..." + logging.info( + "Creating comment on blog %s, post %s: %s", + self.key.id(), + post_id, + content.encode("utf-8"), + ) + try: + comment = client.add_comment(self.key.id(), post_id, content) + except Error as e: + msg = str(e) + if "Internal error:" in msg: + # known errors. e.g. https://github.com/snarfed/bridgy/issues/175 + # https://groups.google.com/d/topic/bloggerdev/szGkT5xA9CE/discussion + return {"error": msg} + else: + raise + + resp = {"id": comment.get_comment_id(), "response": comment.to_string()} + logging.info(f"Response: {resp}") + return resp + + +@app.route("/blogger/oauth_handler") +def oauth_callback(): + """OAuth callback handler. - Returns: - JSON response dict with 'id' and other fields + Both the add and delete flows have to share this because Blogger's + oauth-dropin doesn't yet allow multiple callback handlers. :/ """ - if client is None: - client = self.auth_entity.get().api() - - # extract the post's path and look up its post id - path = urllib.parse.urlparse(post_url).path - logging.info('Looking up post id for %s', path) - feed = client.get_posts(self.key_id(), query=Query(path=path)) - - if not feed.entry: - return self.error('Could not find Blogger post %s' % post_url) - elif len(feed.entry) > 1: - logging.warning('Found %d Blogger posts for path %s , expected 1', - len(feed.entry), path) - post_id = feed.entry[0].get_post_id() - - # create the comment - content = '%s: %s' % (author_url, author_name, content) - if len(content) > MAX_COMMENT_LENGTH: - content = content[:MAX_COMMENT_LENGTH - 3] + '...' - logging.info('Creating comment on blog %s, post %s: %s', self.key.id(), - post_id, content.encode('utf-8')) - try: - comment = client.add_comment(self.key.id(), post_id, content) - except Error as e: - msg = str(e) - if ('Internal error:' in msg): - # known errors. e.g. https://github.com/snarfed/bridgy/issues/175 - # https://groups.google.com/d/topic/bloggerdev/szGkT5xA9CE/discussion - return {'error': msg} - else: - raise - - resp = {'id': comment.get_comment_id(), 'response': comment.to_string()} - logging.info(f'Response: {resp}') - return resp - - -@app.route('/blogger/oauth_handler') -def oauth_callback(): - """OAuth callback handler. - - Both the add and delete flows have to share this because Blogger's - oauth-dropin doesn't yet allow multiple callback handlers. :/ - """ - auth_entity = None - auth_entity_str_key = request.values.get('auth_entity') - if auth_entity_str_key: - auth_entity = ndb.Key(urlsafe=auth_entity_str_key).get() - if not auth_entity.blog_ids or not auth_entity.blog_hostnames: - auth_entity = None - - if not auth_entity: - flash("Couldn't fetch your blogs. Maybe you're not a Blogger user?") - - state = request.values.get('state') - if not state: - state = util.construct_state_param_for_add(feature='webmention') - - if not auth_entity: - util.maybe_add_or_delete_source(Blogger, auth_entity, state) - return - - vars = { - 'action': '/blogger/add', - 'state': state, - 'operation': util.decode_oauth_state(state).get('operation'), - 'auth_entity_key': auth_entity.key.urlsafe().decode(), - 'blogs': [{'id': id, 'title': title, 'domain': host} - for id, title, host in zip(auth_entity.blog_ids, - auth_entity.blog_titles, - auth_entity.blog_hostnames)], + auth_entity = None + auth_entity_str_key = request.values.get("auth_entity") + if auth_entity_str_key: + auth_entity = ndb.Key(urlsafe=auth_entity_str_key).get() + if not auth_entity.blog_ids or not auth_entity.blog_hostnames: + auth_entity = None + + if not auth_entity: + flash("Couldn't fetch your blogs. Maybe you're not a Blogger user?") + + state = request.values.get("state") + if not state: + state = util.construct_state_param_for_add(feature="webmention") + + if not auth_entity: + util.maybe_add_or_delete_source(Blogger, auth_entity, state) + return + + vars = { + "action": "/blogger/add", + "state": state, + "operation": util.decode_oauth_state(state).get("operation"), + "auth_entity_key": auth_entity.key.urlsafe().decode(), + "blogs": [ + {"id": id, "title": title, "domain": host} + for id, title, host in zip( + auth_entity.blog_ids, + auth_entity.blog_titles, + auth_entity.blog_hostnames, + ) + ], } - logging.info(f'Rendering choose_blog.html with {vars}') - return render_template('choose_blog.html', **vars) + logging.info(f"Rendering choose_blog.html with {vars}") + return render_template("choose_blog.html", **vars) -@app.route('/blogger/add', methods=['POST']) +@app.route("/blogger/add", methods=["POST"]) def blogger_add(): - util.maybe_add_or_delete_source( - Blogger, - ndb.Key(urlsafe=request.form['auth_entity_key']).get(), - request.form['state'], - blog_id=request.form['blog'], - ) + util.maybe_add_or_delete_source( + Blogger, + ndb.Key(urlsafe=request.form["auth_entity_key"]).get(), + request.form["state"], + blog_id=request.form["blog"], + ) class SuperfeedrNotify(superfeedr.Notify): - SOURCE_CLS = Blogger + SOURCE_CLS = Blogger # Blogger only has one OAuth scope. oauth-dropins fills it in. # https://developers.google.com/blogger/docs/2.0/developers_guide_protocol#OAuth2Authorizing start = util.oauth_starter(oauth_blogger.Start).as_view( - 'blogger_start', '/blogger/oauth2callback') -app.add_url_rule('/blogger/start', view_func=start, methods=['POST']) -app.add_url_rule('/blogger/oauth2callback', view_func=oauth_blogger.Callback.as_view( - 'blogger_oauth2callback', '/blogger/oauth_handler')) -app.add_url_rule('/blogger/delete/start', view_func=oauth_blogger.Start.as_view( - 'blogger_delete_start', '/blogger/oauth2callback')) -app.add_url_rule('/blogger/notify/', view_func=SuperfeedrNotify.as_view('blogger_notify'), methods=['POST']) + "blogger_start", "/blogger/oauth2callback" +) +app.add_url_rule("/blogger/start", view_func=start, methods=["POST"]) +app.add_url_rule( + "/blogger/oauth2callback", + view_func=oauth_blogger.Callback.as_view( + "blogger_oauth2callback", "/blogger/oauth_handler" + ), +) +app.add_url_rule( + "/blogger/delete/start", + view_func=oauth_blogger.Start.as_view( + "blogger_delete_start", "/blogger/oauth2callback" + ), +) +app.add_url_rule( + "/blogger/notify/", + view_func=SuperfeedrNotify.as_view("blogger_notify"), + methods=["POST"], +) diff --git a/browser.py b/browser.py index aeed99e0..3c3572ab 100644 --- a/browser.py +++ b/browser.py @@ -17,377 +17,413 @@ from models import Activity, Domain, Source import util -JSON_CONTENT_TYPE = 'application/json' +JSON_CONTENT_TYPE = "application/json" # See https://www.cloudimage.io/ -IMAGE_PROXY_URL_BASE = 'https://aujtzahimq.cloudimg.io/v7/' +IMAGE_PROXY_URL_BASE = "https://aujtzahimq.cloudimg.io/v7/" def merge_by_id(existing, updates): - """Merges two lists of AS1 objects by id. + """Merges two lists of AS1 objects by id. - Overwrites the objects in the existing list with objects in the updates list - with the same id. Requires all objects to have ids. + Overwrites the objects in the existing list with objects in the updates list + with the same id. Requires all objects to have ids. - Args: - existing: sequence of AS1 dicts - updates: sequence of AS1 dicts + Args: + existing: sequence of AS1 dicts + updates: sequence of AS1 dicts - Returns: merged list of AS1 dicts - """ - objs = {o['id']: o for o in existing} - objs.update({o['id']: o for o in updates}) - return sorted(objs.values(), key=itemgetter('id')) + Returns: merged list of AS1 dicts + """ + objs = {o["id"]: o for o in existing} + objs.update({o["id"]: o for o in updates}) + return sorted(objs.values(), key=itemgetter("id")) class BrowserSource(Source): - """A source whose data is provided by the browser extension. - - Current subclasses are Instagram and Facebook. - """ - CAN_LISTEN = True - CAN_PUBLISH = False - AUTO_POLL = False - - # set by subclasses - GR_CLASS = None - OAUTH_START = None - gr_source = None - - @classmethod - def key_id_from_actor(cls, actor): - """Returns the key id for this entity from a given AS1 actor. + """A source whose data is provided by the browser extension. - To be implemented by subclasses. - - Args: - actor: dict AS1 actor - - Returns: str, key id to use for the corresponding datastore entity + Current subclasses are Instagram and Facebook. """ - raise NotImplementedError() - - @classmethod - def new(cls, auth_entity=None, actor=None, **kwargs): - """Creates and returns an entity based on an AS1 actor. - Args: - auth_entity: unused - actor: dict AS1 actor - """ - assert not auth_entity - assert actor - - if not kwargs.get('features'): - kwargs['features'] = ['listen'] - - src = cls(id=cls.key_id_from_actor(actor), - name=actor.get('displayName'), - picture=actor.get('image', {}).get('url'), - **kwargs) - src.domain_urls, src.domains = src._urls_and_domains(None, None, actor=actor) - return src - - @classmethod - def button_html(cls, feature, **kwargs): - return cls.OAUTH_START.button_html( - '/about#browser-extension', - form_method='get', - image_prefix='/oauth_dropins_static/') - - def get_activities_response(self, *args, **kwargs): - """Uses Activity entities stored in the datastore.""" - activities = [] - - activity_id = kwargs.get('activity_id') - if activity_id: - activity = Activity.get_by_id(self.gr_source.tag_uri(activity_id)) - if activity: - activities = [activity] - else: - activities = Activity.query(Activity.source == self.key)\ - .order(-Activity.updated).fetch(50) - - activities = [json_loads(a.activity_json) for a in activities] - for a in activities: - microformats2.prefix_image_urls(a, IMAGE_PROXY_URL_BASE) - - return self.gr_source.make_activities_base_response(activities) - - def get_comment(self, comment_id, activity=None, **kwargs): - """Uses the activity passed in the activity kwarg.""" - if activity: - for reply in activity.get('object', {}).get('replies', {}).get('items', []): - parsed = util.parse_tag_uri(reply.get('id', '')) - if parsed and parsed[1] == comment_id: - return reply - - def get_like(self, activity_user_id, activity_id, like_user_id, activity=None, - **kwargs): - """Uses the activity passed in the activity kwarg.""" - if activity: - for tag in activity.get('object', {}).get('tags', []): - if tag.get('verb') == 'like': - parsed = util.parse_tag_uri(tag.get('author', {}).get('id', '')) - if parsed and parsed[1] == like_user_id: - return tag + CAN_LISTEN = True + CAN_PUBLISH = False + AUTO_POLL = False + + # set by subclasses + GR_CLASS = None + OAUTH_START = None + gr_source = None + + @classmethod + def key_id_from_actor(cls, actor): + """Returns the key id for this entity from a given AS1 actor. + + To be implemented by subclasses. + + Args: + actor: dict AS1 actor + + Returns: str, key id to use for the corresponding datastore entity + """ + raise NotImplementedError() + + @classmethod + def new(cls, auth_entity=None, actor=None, **kwargs): + """Creates and returns an entity based on an AS1 actor. + + Args: + auth_entity: unused + actor: dict AS1 actor + """ + assert not auth_entity + assert actor + + if not kwargs.get("features"): + kwargs["features"] = ["listen"] + + src = cls( + id=cls.key_id_from_actor(actor), + name=actor.get("displayName"), + picture=actor.get("image", {}).get("url"), + **kwargs, + ) + src.domain_urls, src.domains = src._urls_and_domains(None, None, actor=actor) + return src + + @classmethod + def button_html(cls, feature, **kwargs): + return cls.OAUTH_START.button_html( + "/about#browser-extension", + form_method="get", + image_prefix="/oauth_dropins_static/", + ) + + def get_activities_response(self, *args, **kwargs): + """Uses Activity entities stored in the datastore.""" + activities = [] + + activity_id = kwargs.get("activity_id") + if activity_id: + activity = Activity.get_by_id(self.gr_source.tag_uri(activity_id)) + if activity: + activities = [activity] + else: + activities = ( + Activity.query(Activity.source == self.key) + .order(-Activity.updated) + .fetch(50) + ) + + activities = [json_loads(a.activity_json) for a in activities] + for a in activities: + microformats2.prefix_image_urls(a, IMAGE_PROXY_URL_BASE) + + return self.gr_source.make_activities_base_response(activities) + + def get_comment(self, comment_id, activity=None, **kwargs): + """Uses the activity passed in the activity kwarg.""" + if activity: + for reply in activity.get("object", {}).get("replies", {}).get("items", []): + parsed = util.parse_tag_uri(reply.get("id", "")) + if parsed and parsed[1] == comment_id: + return reply + + def get_like( + self, activity_user_id, activity_id, like_user_id, activity=None, **kwargs + ): + """Uses the activity passed in the activity kwarg.""" + if activity: + for tag in activity.get("object", {}).get("tags", []): + if tag.get("verb") == "like": + parsed = util.parse_tag_uri(tag.get("author", {}).get("id", "")) + if parsed and parsed[1] == like_user_id: + return tag class BrowserView(View): - """Base class for requests from the browser extension.""" - def source_class(self): - return models.sources.get(request.path.strip('/').split('/')[0]) + """Base class for requests from the browser extension.""" - def gr_source(self): - return self.source_class().gr_source + def source_class(self): + return models.sources.get(request.path.strip("/").split("/")[0]) - def check_token_for_actor(self, actor): - """Checks that the given actor is public and matches the request's token. + def gr_source(self): + return self.source_class().gr_source - Raises: :class:`HTTPException` with HTTP 403 - """ - if not actor: - error('Missing actor!') + def check_token_for_actor(self, actor): + """Checks that the given actor is public and matches the request's token. - if not gr_source.Source.is_public(actor): - error(f'Your {self.gr_source().NAME} account is private. Bridgy only supports public accounts.') + Raises: :class:`HTTPException` with HTTP 403 + """ + if not actor: + error("Missing actor!") - token = request.values['token'] - domains = set(util.domain_from_link(util.replace_test_domains_with_localhost(u)) - for u in microformats2.object_urls(actor)) - domains.discard(self.source_class().GR_CLASS.DOMAIN) + if not gr_source.Source.is_public(actor): + error( + f"Your {self.gr_source().NAME} account is private. Bridgy only supports public accounts." + ) - logging.info(f'Checking token against domains {domains}') - for domain in ndb.get_multi(ndb.Key(Domain, d) for d in domains): - if domain and token in domain.tokens: - return + token = request.values["token"] + domains = set( + util.domain_from_link(util.replace_test_domains_with_localhost(u)) + for u in microformats2.object_urls(actor) + ) + domains.discard(self.source_class().GR_CLASS.DOMAIN) - error(f'Token {token} is not authorized for any of: {domains}', 403) + logging.info(f"Checking token against domains {domains}") + for domain in ndb.get_multi(ndb.Key(Domain, d) for d in domains): + if domain and token in domain.tokens: + return - def auth(self): - """Loads the source and token and checks that they're valid. + error(f"Token {token} is not authorized for any of: {domains}", 403) - Expects token in the `token` query param, source in `key` or `username`. + def auth(self): + """Loads the source and token and checks that they're valid. - Raises: :class:`HTTPException` with HTTP 400 if the token or source are - missing or invalid + Expects token in the `token` query param, source in `key` or `username`. - Returns: BrowserSource or None - """ - # Load source - source = util.load_source() + Raises: :class:`HTTPException` with HTTP 400 if the token or source are + missing or invalid + + Returns: BrowserSource or None + """ + # Load source + source = util.load_source() - # Load and check token - token = request.values['token'] - for domain in Domain.query(Domain.tokens == token): - if domain.key.id() in source.domains: - return source + # Load and check token + token = request.values["token"] + for domain in Domain.query(Domain.tokens == token): + if domain.key.id() in source.domains: + return source - error(f'Token {token} is not authorized for any of: {source.domains}', 403) + error(f"Token {token} is not authorized for any of: {source.domains}", 403) class Status(BrowserView): - """Runs preflight checks for a source and returns status and config info. + """Runs preflight checks for a source and returns status and config info. + + Response body is a JSON map with these fields: + status: string, 'enabled' or 'disabled' + poll-seconds: integer, current poll frequency for this source in seconds + """ - Response body is a JSON map with these fields: - status: string, 'enabled' or 'disabled' - poll-seconds: integer, current poll frequency for this source in seconds - """ - def dispatch_request(self): - source = self.auth() - logging.info(f'Got source: {source}') + def dispatch_request(self): + source = self.auth() + logging.info(f"Got source: {source}") - out = { - 'status': source.status, - 'poll-seconds': source.poll_period().total_seconds(), - } - logging.info(f'Returning {out}') - return out + out = { + "status": source.status, + "poll-seconds": source.poll_period().total_seconds(), + } + logging.info(f"Returning {out}") + return out class Homepage(BrowserView): - """Parses a silo home page and returns the logged in user's username. + """Parses a silo home page and returns the logged in user's username. - Request body is https://www.instagram.com/ HTML for a logged in user. - """ - def dispatch_request(self): - gr_src = self.gr_source() - _, actor = gr_src.scraped_to_activities(request.get_data(as_text=True)) - logging.info(f'Got actor: {actor}') + Request body is https://www.instagram.com/ HTML for a logged in user. + """ + + def dispatch_request(self): + gr_src = self.gr_source() + _, actor = gr_src.scraped_to_activities(request.get_data(as_text=True)) + logging.info(f"Got actor: {actor}") - if actor: - username = actor.get('username') - if username: - logging.info(f'Returning {username}') - return jsonify(username) + if actor: + username = actor.get("username") + if username: + logging.info(f"Returning {username}") + return jsonify(username) - error(f"Couldn't determine logged in {gr_src.NAME} user or username") + error(f"Couldn't determine logged in {gr_src.NAME} user or username") class Feed(BrowserView): - """Parses a silo feed page and returns the posts. + """Parses a silo feed page and returns the posts. - Request body is HTML from a silo profile with posts, eg - https://www.instagram.com/name/ , for a logged in user. + Request body is HTML from a silo profile with posts, eg + https://www.instagram.com/name/ , for a logged in user. + + Response body is the JSON list of translated ActivityStreams activities. + """ - Response body is the JSON list of translated ActivityStreams activities. - """ - def dispatch_request(self): - self.auth() - activities, _ = self.scrape() - return jsonify(activities) + def dispatch_request(self): + self.auth() + activities, _ = self.scrape() + return jsonify(activities) - def scrape(self): - activities, actor = self.gr_source().scraped_to_activities( - request.get_data(as_text=True)) - ids = ' '.join(a['id'] for a in activities) - logging.info(f"Returning activities: {ids}") - return activities, actor + def scrape(self): + activities, actor = self.gr_source().scraped_to_activities( + request.get_data(as_text=True) + ) + ids = " ".join(a["id"] for a in activities) + logging.info(f"Returning activities: {ids}") + return activities, actor class Profile(Feed): - """Parses a silo profile page and creates or updates its Bridgy user. + """Parses a silo profile page and creates or updates its Bridgy user. - Request body is HTML from an IG profile, eg https://www.instagram.com/name/ , - for a logged in user. + Request body is HTML from an IG profile, eg https://www.instagram.com/name/ , + for a logged in user. - Response body is the JSON string URL-safe key of the Bridgy source entity. - """ - def dispatch_request(self): - _, actor = self.scrape() - if not actor: - actor = self.gr_source().scraped_to_actor(request.get_data(as_text=True)) - self.check_token_for_actor(actor) + Response body is the JSON string URL-safe key of the Bridgy source entity. + """ + + def dispatch_request(self): + _, actor = self.scrape() + if not actor: + actor = self.gr_source().scraped_to_actor(request.get_data(as_text=True)) + self.check_token_for_actor(actor) - # create/update the Bridgy account - source = self.source_class().create_new(self, actor=actor) - return jsonify(source.key.urlsafe().decode()) + # create/update the Bridgy account + source = self.source_class().create_new(self, actor=actor) + return jsonify(source.key.urlsafe().decode()) class Post(BrowserView): - """Parses a silo post's HTML and creates or updates an Activity. - - Request body is HTML from a silo post, eg https://www.instagram.com/p/ABC123/ - - Response body is the translated ActivityStreams activity JSON. - """ - def dispatch_request(self): - source = self.auth() - - gr_src = self.gr_source() - new_activity, actor = gr_src.scraped_to_activity(request.get_data(as_text=True)) - if not new_activity: - error(f'No {gr_src.NAME} post found in HTML') - - @ndb.transactional() - def update_activity(): - id = new_activity.get('id') - if not id: - error('Scraped post missing id') - activity = Activity.get_by_id(id) - - if activity: - # we already have this activity! merge in any new comments. - merged_activity = copy.deepcopy(new_activity) - existing_activity = json_loads(activity.activity_json) - # TODO: extract out merging replies - replies = merged_activity.setdefault('object', {}).setdefault('replies', {}) - gr_source.merge_by_id(replies, 'items', - existing_activity.get('object', {}).get('replies', {}).get('items', [])) - replies['totalItems'] = len(replies.get('items', [])) - # TODO: merge tags too - activity.activity_json = json_dumps(merged_activity) - else: - activity = Activity(id=id, source=source.key, - html=request.get_data(as_text=True), - activity_json=json_dumps(new_activity)) - - # store and return the activity - activity.put() - logging.info(f"Stored activity {id}") - - update_activity() - return new_activity + """Parses a silo post's HTML and creates or updates an Activity. + + Request body is HTML from a silo post, eg https://www.instagram.com/p/ABC123/ + + Response body is the translated ActivityStreams activity JSON. + """ + + def dispatch_request(self): + source = self.auth() + + gr_src = self.gr_source() + new_activity, actor = gr_src.scraped_to_activity(request.get_data(as_text=True)) + if not new_activity: + error(f"No {gr_src.NAME} post found in HTML") + + @ndb.transactional() + def update_activity(): + id = new_activity.get("id") + if not id: + error("Scraped post missing id") + activity = Activity.get_by_id(id) + + if activity: + # we already have this activity! merge in any new comments. + merged_activity = copy.deepcopy(new_activity) + existing_activity = json_loads(activity.activity_json) + # TODO: extract out merging replies + replies = merged_activity.setdefault("object", {}).setdefault( + "replies", {} + ) + gr_source.merge_by_id( + replies, + "items", + existing_activity.get("object", {}) + .get("replies", {}) + .get("items", []), + ) + replies["totalItems"] = len(replies.get("items", [])) + # TODO: merge tags too + activity.activity_json = json_dumps(merged_activity) + else: + activity = Activity( + id=id, + source=source.key, + html=request.get_data(as_text=True), + activity_json=json_dumps(new_activity), + ) + + # store and return the activity + activity.put() + logging.info(f"Stored activity {id}") + + update_activity() + return new_activity class Reactions(BrowserView): - """Parses reactions/likes from silo HTML and adds them to an existing Activity. + """Parses reactions/likes from silo HTML and adds them to an existing Activity. - Requires the request parameter `id` with the silo post's id (not shortcode!). + Requires the request parameter `id` with the silo post's id (not shortcode!). + + Response body is the translated ActivityStreams JSON for the reactions. + """ - Response body is the translated ActivityStreams JSON for the reactions. - """ - def dispatch_request(self, *args): - source = self.auth() + def dispatch_request(self, *args): + source = self.auth() - gr_src = self.gr_source() - id = request.values['id'] + gr_src = self.gr_source() + id = request.values["id"] - # validate request - parsed_id = util.parse_tag_uri(id) - if not parsed_id: - error(f'Expected id to be tag URI; got {id}') + # validate request + parsed_id = util.parse_tag_uri(id) + if not parsed_id: + error(f"Expected id to be tag URI; got {id}") - activity = Activity.get_by_id(id) - if not activity: - error(f'No {gr_src.NAME} post found for id {id}', 404) - elif activity.source != source.key: - error(f'Activity {id} is owned by {activity.source}, not {source.key}', 403) + activity = Activity.get_by_id(id) + if not activity: + error(f"No {gr_src.NAME} post found for id {id}", 404) + elif activity.source != source.key: + error(f"Activity {id} is owned by {activity.source}, not {source.key}", 403) - activity_data = json_loads(activity.activity_json) + activity_data = json_loads(activity.activity_json) - # convert new reactions to AS, merge into existing activity - try: - new_reactions = gr_src.merge_scraped_reactions( - request.get_data(as_text=True), activity_data) - except ValueError as e: - msg = "Couldn't parse scraped reactions: %s" % e - logging.error(msg, exc_info=True) - error(msg) + # convert new reactions to AS, merge into existing activity + try: + new_reactions = gr_src.merge_scraped_reactions( + request.get_data(as_text=True), activity_data + ) + except ValueError as e: + msg = "Couldn't parse scraped reactions: %s" % e + logging.error(msg, exc_info=True) + error(msg) - activity.activity_json = json_dumps(activity_data) - activity.put() + activity.activity_json = json_dumps(activity_data) + activity.put() - reaction_ids = ' '.join(r['id'] for r in new_reactions) - logging.info(f"Stored reactions for activity {id}: {reaction_ids}") - return jsonify(new_reactions) + reaction_ids = " ".join(r["id"] for r in new_reactions) + logging.info(f"Stored reactions for activity {id}: {reaction_ids}") + return jsonify(new_reactions) class Poll(BrowserView): - """Triggers a poll for a browser-based account.""" - def dispatch_request(self): - source = self.auth() - util.add_poll_task(source) - return jsonify('OK') + """Triggers a poll for a browser-based account.""" + + def dispatch_request(self): + source = self.auth() + util.add_poll_task(source) + return jsonify("OK") class TokenDomains(BrowserView): - """Returns the domains that a token is registered for.""" - def dispatch_request(self): - token = request.values['token'] + """Returns the domains that a token is registered for.""" + + def dispatch_request(self): + token = request.values["token"] - domains = [d.key.id() for d in Domain.query(Domain.tokens == token)] - if not domains: - error(f'No registered domains for token {token}', 404) + domains = [d.key.id() for d in Domain.query(Domain.tokens == token)] + if not domains: + error(f"No registered domains for token {token}", 404) - return jsonify(domains) + return jsonify(domains) def route(source_cls): - """Registers browser extension URL routes for a given source class. - - ...specifically, with the source's short name as the routes' URL prefix. - """ - for route, cls in ( - (f'/{source_cls.SHORT_NAME}/browser/status', Status), - (f'/{source_cls.SHORT_NAME}/browser/homepage', Homepage), - (f'/{source_cls.SHORT_NAME}/browser/profile', Profile), - (f'/{source_cls.SHORT_NAME}/browser/feed', Feed), - (f'/{source_cls.SHORT_NAME}/browser/post', Post), - (f'/{source_cls.SHORT_NAME}/browser/likes', Reactions), - (f'/{source_cls.SHORT_NAME}/browser/reactions', Reactions), - (f'/{source_cls.SHORT_NAME}/browser/poll', Poll), - (f'/{source_cls.SHORT_NAME}/browser/token-domains', TokenDomains), + """Registers browser extension URL routes for a given source class. + + ...specifically, with the source's short name as the routes' URL prefix. + """ + for route, cls in ( + (f"/{source_cls.SHORT_NAME}/browser/status", Status), + (f"/{source_cls.SHORT_NAME}/browser/homepage", Homepage), + (f"/{source_cls.SHORT_NAME}/browser/profile", Profile), + (f"/{source_cls.SHORT_NAME}/browser/feed", Feed), + (f"/{source_cls.SHORT_NAME}/browser/post", Post), + (f"/{source_cls.SHORT_NAME}/browser/likes", Reactions), + (f"/{source_cls.SHORT_NAME}/browser/reactions", Reactions), + (f"/{source_cls.SHORT_NAME}/browser/poll", Poll), + (f"/{source_cls.SHORT_NAME}/browser/token-domains", TokenDomains), ): - app.add_url_rule(route, view_func=cls.as_view(route), - methods=['GET', 'POST'] if cls == Status else ['POST']) + app.add_url_rule( + route, + view_func=cls.as_view(route), + methods=["GET", "POST"] if cls == Status else ["POST"], + ) diff --git a/config.py b/config.py index fd344555..13815dd7 100644 --- a/config.py +++ b/config.py @@ -7,10 +7,10 @@ JSONIFY_PRETTYPRINT_REGULAR = True if appengine_info.DEBUG: - ENV = 'development' - CACHE_TYPE = 'NullCache' - SECRET_KEY = 'sooper seekret' + ENV = "development" + CACHE_TYPE = "NullCache" + SECRET_KEY = "sooper seekret" else: - ENV = 'production' - CACHE_TYPE = 'SimpleCache' - SECRET_KEY = util.read('flask_secret_key') + ENV = "production" + CACHE_TYPE = "SimpleCache" + SECRET_KEY = util.read("flask_secret_key") diff --git a/cron.py b/cron.py index d90601e8..36d43ec0 100644 --- a/cron.py +++ b/cron.py @@ -20,112 +20,127 @@ from twitter import Twitter import util -CIRCLECI_TOKEN = util.read('circleci_token') +CIRCLECI_TOKEN = util.read("circleci_token") PAGE_SIZE = 20 class LastUpdatedPicture(StringIdModel): - """Stores the last user in a given silo that we updated profile picture for. + """Stores the last user in a given silo that we updated profile picture for. - Key id is the silo's SHORT_NAME. - """ - last = ndb.KeyProperty() - created = ndb.DateTimeProperty(auto_now_add=True, required=True) - updated = ndb.DateTimeProperty(auto_now=True) + Key id is the silo's SHORT_NAME. + """ + last = ndb.KeyProperty() + created = ndb.DateTimeProperty(auto_now_add=True, required=True) + updated = ndb.DateTimeProperty(auto_now=True) -@app.route('/cron/replace_poll_tasks') -def replace_poll_tasks(): - """Finds sources missing their poll tasks and adds new ones.""" - now = datetime.datetime.now() - queries = [cls.query(Source.features == 'listen', Source.status == 'enabled') - for cls in models.sources.values() if cls.AUTO_POLL] - for source in itertools.chain(*queries): - age = now - source.last_poll_attempt - if age > max(source.poll_period() * 2, datetime.timedelta(hours=2)): - logging.info('%s last polled %s ago. Adding new poll task.', - source.bridgy_url(), age) - util.add_poll_task(source) - return '' +@app.route("/cron/replace_poll_tasks") +def replace_poll_tasks(): + """Finds sources missing their poll tasks and adds new ones.""" + now = datetime.datetime.now() + queries = [ + cls.query(Source.features == "listen", Source.status == "enabled") + for cls in models.sources.values() + if cls.AUTO_POLL + ] + for source in itertools.chain(*queries): + age = now - source.last_poll_attempt + if age > max(source.poll_period() * 2, datetime.timedelta(hours=2)): + logging.info( + "%s last polled %s ago. Adding new poll task.", source.bridgy_url(), age + ) + util.add_poll_task(source) + + return "" class UpdatePictures(View): - """Finds sources with new profile pictures and updates them.""" - SOURCE_CLS = None - - @classmethod - def user_id(cls, source): - return source.key_id() - - def dispatch_request(self): - g.TRANSIENT_ERROR_HTTP_CODES = (self.SOURCE_CLS.TRANSIENT_ERROR_HTTP_CODES + - self.SOURCE_CLS.RATE_LIMIT_HTTP_CODES) - - query = self.SOURCE_CLS.query().order(self.SOURCE_CLS.key) - last = LastUpdatedPicture.get_by_id(self.SOURCE_CLS.SHORT_NAME) - if last and last.last: - query = query.filter(self.SOURCE_CLS.key > last.last) - - results, _, more = query.fetch_page(PAGE_SIZE) - for source in results: - if source.features and source.status != 'disabled': - logging.debug('checking for updated profile pictures for: %s', - source.bridgy_url()) - try: - actor = source.gr_source.get_actor(self.user_id(source)) - except BaseException as e: - # Mastodon API returns HTTP 404 for deleted (etc) users, and - # often one or more users' Mastodon instances are down. - code, _ = util.interpret_http_exception(e) - if code: - continue - raise - - if not actor: - logging.info(f"Couldn't fetch {source.bridgy_url()} 's user") - continue - - new_pic = actor.get('image', {}).get('url') - if not new_pic or source.picture == new_pic: - logging.info(f'No new picture found for {source.bridgy_url()}') - continue - - @ndb.transactional() - def update(): - src = source.key.get() - src.picture = new_pic - src.put() - - logging.info(f'Updating profile picture for {source.bridgy_url()} from {source.picture} to {new_pic}') - update() - - LastUpdatedPicture(id=self.SOURCE_CLS.SHORT_NAME, - last=source.key if more else None).put() - return 'OK' + """Finds sources with new profile pictures and updates them.""" + + SOURCE_CLS = None + + @classmethod + def user_id(cls, source): + return source.key_id() + + def dispatch_request(self): + g.TRANSIENT_ERROR_HTTP_CODES = ( + self.SOURCE_CLS.TRANSIENT_ERROR_HTTP_CODES + + self.SOURCE_CLS.RATE_LIMIT_HTTP_CODES + ) + + query = self.SOURCE_CLS.query().order(self.SOURCE_CLS.key) + last = LastUpdatedPicture.get_by_id(self.SOURCE_CLS.SHORT_NAME) + if last and last.last: + query = query.filter(self.SOURCE_CLS.key > last.last) + + results, _, more = query.fetch_page(PAGE_SIZE) + for source in results: + if source.features and source.status != "disabled": + logging.debug( + "checking for updated profile pictures for: %s", source.bridgy_url() + ) + try: + actor = source.gr_source.get_actor(self.user_id(source)) + except BaseException as e: + # Mastodon API returns HTTP 404 for deleted (etc) users, and + # often one or more users' Mastodon instances are down. + code, _ = util.interpret_http_exception(e) + if code: + continue + raise + + if not actor: + logging.info(f"Couldn't fetch {source.bridgy_url()} 's user") + continue + + new_pic = actor.get("image", {}).get("url") + if not new_pic or source.picture == new_pic: + logging.info(f"No new picture found for {source.bridgy_url()}") + continue + + @ndb.transactional() + def update(): + src = source.key.get() + src.picture = new_pic + src.put() + + logging.info( + f"Updating profile picture for {source.bridgy_url()} from {source.picture} to {new_pic}" + ) + update() + + LastUpdatedPicture( + id=self.SOURCE_CLS.SHORT_NAME, last=source.key if more else None + ).put() + return "OK" class UpdateFlickrPictures(UpdatePictures): - """Finds :class:`Flickr` sources with new profile pictures and updates them.""" - SOURCE_CLS = Flickr + """Finds :class:`Flickr` sources with new profile pictures and updates them.""" + + SOURCE_CLS = Flickr class UpdateMastodonPictures(UpdatePictures): - """Finds :class:`Mastodon` sources with new profile pictures and updates them.""" - SOURCE_CLS = Mastodon + """Finds :class:`Mastodon` sources with new profile pictures and updates them.""" + + SOURCE_CLS = Mastodon - @classmethod - def user_id(cls, source): - return source.auth_entity.get().user_id() + @classmethod + def user_id(cls, source): + return source.auth_entity.get().user_id() class UpdateTwitterPictures(UpdatePictures): - """Finds :class:`Twitter` sources with new profile pictures and updates them. + """Finds :class:`Twitter` sources with new profile pictures and updates them. - https://github.com/snarfed/granary/commit/dfc3d406a20965a5ed14c9705e3d3c2223c8c3ff - http://indiewebcamp.com/Twitter#Profile_Image_URLs - """ - SOURCE_CLS = Twitter + https://github.com/snarfed/granary/commit/dfc3d406a20965a5ed14c9705e3d3c2223c8c3ff + http://indiewebcamp.com/Twitter#Profile_Image_URLs + """ + + SOURCE_CLS = Twitter # class UpdateBloggerPictures(UpdatePictures): @@ -135,20 +150,29 @@ class UpdateTwitterPictures(UpdatePictures): # # TODO: no granary.Blogger! -@app.route('/cron/build_circle') +@app.route("/cron/build_circle") def build_circle(): - """Trigger CircleCI to build and test the main branch. - - ...to run twitter_live_test.py, to check that scraping likes is still working. - """ - resp = requests.post('https://circleci.com/api/v1.1/project/github/snarfed/bridgy/tree/main?circle-token=%s' % CIRCLECI_TOKEN) - resp.raise_for_status() - return 'OK' - - -app.add_url_rule('/cron/update_flickr_pictures', - view_func=UpdateFlickrPictures.as_view('update_flickr_pictures')) -app.add_url_rule('/cron/update_mastodon_pictures', - view_func=UpdateMastodonPictures.as_view('update_mastodon_pictures')) -app.add_url_rule('/cron/update_twitter_pictures', - view_func=UpdateTwitterPictures.as_view('update_twitter_pictures')) + """Trigger CircleCI to build and test the main branch. + + ...to run twitter_live_test.py, to check that scraping likes is still working. + """ + resp = requests.post( + "https://circleci.com/api/v1.1/project/github/snarfed/bridgy/tree/main?circle-token=%s" + % CIRCLECI_TOKEN + ) + resp.raise_for_status() + return "OK" + + +app.add_url_rule( + "/cron/update_flickr_pictures", + view_func=UpdateFlickrPictures.as_view("update_flickr_pictures"), +) +app.add_url_rule( + "/cron/update_mastodon_pictures", + view_func=UpdateMastodonPictures.as_view("update_mastodon_pictures"), +) +app.add_url_rule( + "/cron/update_twitter_pictures", + view_func=UpdateTwitterPictures.as_view("update_twitter_pictures"), +) diff --git a/facebook.py b/facebook.py index 03db4823..00307c9e 100644 --- a/facebook.py +++ b/facebook.py @@ -11,101 +11,105 @@ class Facebook(browser.BrowserSource): - """A Facebook account. - - The key name is the Facebook global user id. - """ - GR_CLASS = gr_facebook.Facebook - SHORT_NAME = 'facebook' - OAUTH_START = oauth_facebook.Start - URL_CANONICALIZER = util.UrlCanonicalizer( - # no reject regexp; non-private FB post URLs just 404 - domain=GR_CLASS.DOMAIN, - subdomain='www', - query=True, - approve=r'https://www\.facebook\.com/[^/?]+/posts/[^/?]+$', - headers=util.REQUEST_HEADERS) - - # blank granary Facebook object, shared across all instances - gr_source = gr_facebook.Facebook() - - # unique name used in FB URLs, e.g. facebook.com/[username] - username = ndb.StringProperty() - - @classmethod - def new(cls, auth_entity=None, actor=None, **kwargs): - """Creates and returns an entity based on an AS1 actor.""" - src = super().new(auth_entity=None, actor=actor, **kwargs) - src.username = actor.get('username') - return src - - @classmethod - def key_id_from_actor(cls, actor): - """Returns the actor's numeric_id field to use as this entity's key id. - - numeric_id is the Facebook global user id. - """ - return actor['numeric_id'] - - @classmethod - def lookup(cls, id): - """Returns the entity with the given id or username.""" - return ndb.Key(cls, id).get() or cls.query(cls.username == id).get() - - def silo_url(self): - """Returns the Facebook profile URL, e.g. https://facebook.com/foo. - - Facebook profile URLS with app-scoped user ids (eg www.facebook.com/ID) no - longer work as of April 2018, so if that's all we have, return None instead. - https://developers.facebook.com/blog/post/2018/04/19/facebook-login-changes-address-abuse/ - """ - if self.username: - return self.gr_source.user_url(self.username) - - user_id = self.key.id() - # STATE: define this, where is it? not here or granary or o-d - if util.is_int(id) and int(id) < MIN_APP_SCOPED_ID: - return self.gr_source.user_url(user_id) - - @classmethod - def button_html(cls, feature, **kwargs): - return super(cls, cls).button_html(feature, form_method='get', **kwargs) - return oauth_facebook.Start.button_html( - '/about#browser-extension', - form_method='get', - image_prefix='/oauth_dropins_static/') + """A Facebook account. - def canonicalize_url(self, url, **kwargs): - """Facebook-specific standardization of syndicated urls. - - Canonical form is https://www.facebook.com/USERID/posts/POSTID - - Args: - url: a string, the url of the syndicated content - kwargs: unused - - Return: - a string, the canonical form of the syndication url + The key name is the Facebook global user id. """ - if util.domain_from_link(url) != self.gr_source.DOMAIN: - return None - - def post_url(id): - return 'https://www.facebook.com/%s/posts/%s' % (self.key.id(), id) - - parsed = urllib.parse.urlparse(url) - params = urllib.parse.parse_qs(parsed.query) - url_id = self.gr_source.post_id(url) - ids = params.get('story_fbid') or params.get('fbid') - - post_id = ids[0] if ids else url_id - if post_id: - url = post_url(post_id) - - url = url.replace('facebook.com/%s/' % self.username, - 'facebook.com/%s/' % self.key.id()) - return super().canonicalize_url(url) + GR_CLASS = gr_facebook.Facebook + SHORT_NAME = "facebook" + OAUTH_START = oauth_facebook.Start + URL_CANONICALIZER = util.UrlCanonicalizer( + # no reject regexp; non-private FB post URLs just 404 + domain=GR_CLASS.DOMAIN, + subdomain="www", + query=True, + approve=r"https://www\.facebook\.com/[^/?]+/posts/[^/?]+$", + headers=util.REQUEST_HEADERS, + ) + + # blank granary Facebook object, shared across all instances + gr_source = gr_facebook.Facebook() + + # unique name used in FB URLs, e.g. facebook.com/[username] + username = ndb.StringProperty() + + @classmethod + def new(cls, auth_entity=None, actor=None, **kwargs): + """Creates and returns an entity based on an AS1 actor.""" + src = super().new(auth_entity=None, actor=actor, **kwargs) + src.username = actor.get("username") + return src + + @classmethod + def key_id_from_actor(cls, actor): + """Returns the actor's numeric_id field to use as this entity's key id. + + numeric_id is the Facebook global user id. + """ + return actor["numeric_id"] + + @classmethod + def lookup(cls, id): + """Returns the entity with the given id or username.""" + return ndb.Key(cls, id).get() or cls.query(cls.username == id).get() + + def silo_url(self): + """Returns the Facebook profile URL, e.g. https://facebook.com/foo. + + Facebook profile URLS with app-scoped user ids (eg www.facebook.com/ID) no + longer work as of April 2018, so if that's all we have, return None instead. + https://developers.facebook.com/blog/post/2018/04/19/facebook-login-changes-address-abuse/ + """ + if self.username: + return self.gr_source.user_url(self.username) + + user_id = self.key.id() + # STATE: define this, where is it? not here or granary or o-d + if util.is_int(id) and int(id) < MIN_APP_SCOPED_ID: + return self.gr_source.user_url(user_id) + + @classmethod + def button_html(cls, feature, **kwargs): + return super(cls, cls).button_html(feature, form_method="get", **kwargs) + return oauth_facebook.Start.button_html( + "/about#browser-extension", + form_method="get", + image_prefix="/oauth_dropins_static/", + ) + + def canonicalize_url(self, url, **kwargs): + """Facebook-specific standardization of syndicated urls. + + Canonical form is https://www.facebook.com/USERID/posts/POSTID + + Args: + url: a string, the url of the syndicated content + kwargs: unused + + Return: + a string, the canonical form of the syndication url + """ + if util.domain_from_link(url) != self.gr_source.DOMAIN: + return None + + def post_url(id): + return "https://www.facebook.com/%s/posts/%s" % (self.key.id(), id) + + parsed = urllib.parse.urlparse(url) + params = urllib.parse.parse_qs(parsed.query) + url_id = self.gr_source.post_id(url) + ids = params.get("story_fbid") or params.get("fbid") + + post_id = ids[0] if ids else url_id + if post_id: + url = post_url(post_id) + + url = url.replace( + "facebook.com/%s/" % self.username, "facebook.com/%s/" % self.key.id() + ) + + return super().canonicalize_url(url) browser.route(Facebook) diff --git a/flask_app.py b/flask_app.py index 45888468..3934f69c 100644 --- a/flask_app.py +++ b/flask_app.py @@ -14,29 +14,32 @@ # Flask app -app = Flask('default') -app.template_folder = './templates' -app.config.from_pyfile('config.py') -app.url_map.converters['regex'] = flask_util.RegexConverter +app = Flask("default") +app.template_folder = "./templates" +app.config.from_pyfile("config.py") +app.url_map.converters["regex"] = flask_util.RegexConverter app.after_request(flask_util.default_modern_headers) app.register_error_handler(Exception, flask_util.handle_exception) -app.before_request(flask_util.canonicalize_domain( - util.OTHER_DOMAINS, util.PRIMARY_DOMAIN)) +app.before_request( + flask_util.canonicalize_domain(util.OTHER_DOMAINS, util.PRIMARY_DOMAIN) +) app.wsgi_app = flask_util.ndb_context_middleware(app.wsgi_app, client=ndb_client) -app.jinja_env.globals.update({ - 'naturaltime': humanize.naturaltime, - 'get_logins': util.get_logins, - 'sources': models.sources, - 'string': string, - 'util': util, - 'EPOCH': util.EPOCH, -}) +app.jinja_env.globals.update( + { + "naturaltime": humanize.naturaltime, + "get_logins": util.get_logins, + "sources": models.sources, + "string": string, + "util": util, + "EPOCH": util.EPOCH, + } +) cache = Cache(app) -@app.route('/_ah/') +@app.route("/_ah/") def noop(_): - return 'OK' + return "OK" diff --git a/flask_background.py b/flask_background.py index a2e7b35c..19f0163d 100644 --- a/flask_background.py +++ b/flask_background.py @@ -10,35 +10,38 @@ # Flask app -app = Flask('background') -app.config.from_pyfile('config.py') +app = Flask("background") +app.config.from_pyfile("config.py") app.wsgi_app = flask_util.ndb_context_middleware(app.wsgi_app, client=ndb_client) @app.errorhandler(Exception) def background_handle_exception(e): - """Common exception handler for background tasks. + """Common exception handler for background tasks. - Catches failed outbound HTTP requests and returns HTTP 304. - """ - if isinstance(e, HTTPException): - # raised by this app itself, pass it through - return str(e), e.code + Catches failed outbound HTTP requests and returns HTTP 304. + """ + if isinstance(e, HTTPException): + # raised by this app itself, pass it through + return str(e), e.code - transients = getattr(g, 'TRANSIENT_ERROR_HTTP_CODES', ()) - source = getattr(g, 'source', None) - if source: - transients += source.RATE_LIMIT_HTTP_CODES + source.TRANSIENT_ERROR_HTTP_CODES + transients = getattr(g, "TRANSIENT_ERROR_HTTP_CODES", ()) + source = getattr(g, "source", None) + if source: + transients += source.RATE_LIMIT_HTTP_CODES + source.TRANSIENT_ERROR_HTTP_CODES - code, body = util.interpret_http_exception(e) - if ((code and int(code) // 100 == 5) or code in transients or - util.is_connection_failure(e)): - logging.error(f'Marking as error and finishing. {code}: {body}\n{e}') - return '', util.ERROR_HTTP_RETURN_CODE + code, body = util.interpret_http_exception(e) + if ( + (code and int(code) // 100 == 5) + or code in transients + or util.is_connection_failure(e) + ): + logging.error(f"Marking as error and finishing. {code}: {body}\n{e}") + return "", util.ERROR_HTTP_RETURN_CODE - raise e + raise e -@app.route('/_ah/') +@app.route("/_ah/") def noop(_): - return 'OK' + return "OK" diff --git a/flickr.py b/flickr.py index 8c333aa6..6d2b7db1 100644 --- a/flickr.py +++ b/flickr.py @@ -15,122 +15,148 @@ class Flickr(models.Source): - """A Flickr account. - - The key name is the nsid. - """ - # Fetching comments and likes is extremely request-intensive, so let's dial - # back the frequency for now. - FAST_POLL = datetime.timedelta(minutes=60) - GR_CLASS = gr_flickr.Flickr - OAUTH_START = oauth_flickr.Start - SHORT_NAME = 'flickr' - TRANSIENT_ERROR_HTTP_CODES = ('400',) - CAN_PUBLISH = True - URL_CANONICALIZER = util.UrlCanonicalizer( - domain=GR_CLASS.DOMAIN, - approve=r'https://www\.flickr\.com/(photos|people)/[^/?]+/([^/?]+/)?$', - reject=r'https://login\.yahoo\.com/.*', - subdomain='www', - trailing_slash=True, - headers=util.REQUEST_HEADERS) - - # unique name optionally used in URLs instead of nsid (e.g., - # flickr.com/photos/username) - username = ndb.StringProperty() - - @staticmethod - def new(auth_entity=None, **kwargs): - """Creates and returns a :class:`Flickr` for the logged in user. - - Args: - auth_entity: :class:`oauth_dropins.flickr.FlickrAuth` + """A Flickr account. + + The key name is the nsid. """ - person = json_loads(auth_entity.user_json).get('person', {}) - return Flickr( - id=person.get('nsid'), - auth_entity=auth_entity.key, - name=person.get('realname', {}).get('_content'), - # path_alias, if it exists, is the actual thing that shows up in the url. - # I think this is an artifact of the conversion to Yahoo. - username=(person.get('path_alias') - or person.get('username', {}).get('_content')), - picture='https://farm{}.staticflickr.com/{}/buddyicons/{}.jpg' .format( - person.get('iconfarm'), person.get('iconserver'), - person.get('nsid')), - url=person.get('profileurl', {}).get('_content'), - **kwargs) - - def silo_url(self): - """Returns the Flickr account URL, e.g. https://www.flickr.com/people/foo/.""" - return self.url - - def user_tag_id(self): - """Returns the tag URI for this source, e.g. 'tag:flickr.com:123456'.""" - return self.gr_source.tag_uri(self.username) - - def label_name(self): - """Human-readable name, username, or id for this source.""" - return self.name or self.username or self.key_id() - - def get_activities_response(self, *args, **kwargs): - """Discard min_id because we still want new comments/likes on old photos.""" - kwargs.setdefault('group_id', SELF) - if 'min_id' in kwargs: - del kwargs['min_id'] - return self.gr_source.get_activities_response(*args, **kwargs) - - def canonicalize_url(self, url, activity=None, **kwargs): - if not url.endswith('/'): - url = url + '/' - if self.username: - url = url.replace('flickr.com/photos/%s/' % self.username, - 'flickr.com/photos/%s/' % self.key_id()) - url = url.replace('flickr.com/people/%s/' % self.username, - 'flickr.com/people/%s/' % self.key_id()) - return super().canonicalize_url(url, **kwargs) - - -class AuthHandler(): - """Base OAuth handler for Flickr.""" - def start_oauth_flow(self, feature): - starter = util.oauth_starter(oauth_flickr.Start, feature=feature)( - # TODO: delete instead of write. if we do that below, it works, and we get - # granted delete permissions. however, if we then attempt to actually - # delete something, it fails with code 99 "Insufficient permissions. - # Method requires delete privileges; write granted." and - # https://www.flickr.com/services/auth/list.gne shows that my user's - # permissions for the Bridgy app are back to write, not delete. wtf?! - '/flickr/add', scopes='write' if feature == 'publish' else 'read') - return starter.dispatch_request() + + # Fetching comments and likes is extremely request-intensive, so let's dial + # back the frequency for now. + FAST_POLL = datetime.timedelta(minutes=60) + GR_CLASS = gr_flickr.Flickr + OAUTH_START = oauth_flickr.Start + SHORT_NAME = "flickr" + TRANSIENT_ERROR_HTTP_CODES = ("400",) + CAN_PUBLISH = True + URL_CANONICALIZER = util.UrlCanonicalizer( + domain=GR_CLASS.DOMAIN, + approve=r"https://www\.flickr\.com/(photos|people)/[^/?]+/([^/?]+/)?$", + reject=r"https://login\.yahoo\.com/.*", + subdomain="www", + trailing_slash=True, + headers=util.REQUEST_HEADERS, + ) + + # unique name optionally used in URLs instead of nsid (e.g., + # flickr.com/photos/username) + username = ndb.StringProperty() + + @staticmethod + def new(auth_entity=None, **kwargs): + """Creates and returns a :class:`Flickr` for the logged in user. + + Args: + auth_entity: :class:`oauth_dropins.flickr.FlickrAuth` + """ + person = json_loads(auth_entity.user_json).get("person", {}) + return Flickr( + id=person.get("nsid"), + auth_entity=auth_entity.key, + name=person.get("realname", {}).get("_content"), + # path_alias, if it exists, is the actual thing that shows up in the url. + # I think this is an artifact of the conversion to Yahoo. + username=( + person.get("path_alias") or person.get("username", {}).get("_content") + ), + picture="https://farm{}.staticflickr.com/{}/buddyicons/{}.jpg".format( + person.get("iconfarm"), person.get("iconserver"), person.get("nsid") + ), + url=person.get("profileurl", {}).get("_content"), + **kwargs + ) + + def silo_url(self): + """Returns the Flickr account URL, e.g. https://www.flickr.com/people/foo/.""" + return self.url + + def user_tag_id(self): + """Returns the tag URI for this source, e.g. 'tag:flickr.com:123456'.""" + return self.gr_source.tag_uri(self.username) + + def label_name(self): + """Human-readable name, username, or id for this source.""" + return self.name or self.username or self.key_id() + + def get_activities_response(self, *args, **kwargs): + """Discard min_id because we still want new comments/likes on old photos.""" + kwargs.setdefault("group_id", SELF) + if "min_id" in kwargs: + del kwargs["min_id"] + return self.gr_source.get_activities_response(*args, **kwargs) + + def canonicalize_url(self, url, activity=None, **kwargs): + if not url.endswith("/"): + url = url + "/" + if self.username: + url = url.replace( + "flickr.com/photos/%s/" % self.username, + "flickr.com/photos/%s/" % self.key_id(), + ) + url = url.replace( + "flickr.com/people/%s/" % self.username, + "flickr.com/people/%s/" % self.key_id(), + ) + return super().canonicalize_url(url, **kwargs) + + +class AuthHandler: + """Base OAuth handler for Flickr.""" + + def start_oauth_flow(self, feature): + starter = util.oauth_starter(oauth_flickr.Start, feature=feature)( + # TODO: delete instead of write. if we do that below, it works, and we get + # granted delete permissions. however, if we then attempt to actually + # delete something, it fails with code 99 "Insufficient permissions. + # Method requires delete privileges; write granted." and + # https://www.flickr.com/services/auth/list.gne shows that my user's + # permissions for the Bridgy app are back to write, not delete. wtf?! + "/flickr/add", + scopes="write" if feature == "publish" else "read", + ) + return starter.dispatch_request() class Start(oauth_flickr.Start, AuthHandler): - """Custom handler to start Flickr auth process.""" - def dispatch_request(self): - return self.start_oauth_flow(request.form.get('feature')) + """Custom handler to start Flickr auth process.""" + + def dispatch_request(self): + return self.start_oauth_flow(request.form.get("feature")) class AddFlickr(oauth_flickr.Callback, AuthHandler): - """Custom handler to add Flickr source when auth completes. - - If this account was previously authorized with greater permissions, this will - trigger another round of auth with elevated permissions. - """ - def finish(self, auth_entity, state=None): - logging.debug('finish with %s, %s', auth_entity, state) - source = util.maybe_add_or_delete_source(Flickr, auth_entity, state) - feature = util.decode_oauth_state(state).get('feature') - if source and feature == 'listen' and 'publish' in source.features: - # we had signed up previously with publish, so we'll reauth to - # avoid losing that permission - logging.info('Restarting OAuth flow to get publish permissions.') - source.features.remove('publish') - source.put() - return self.start_oauth_flow('publish') - - -app.add_url_rule('/flickr/start', view_func=Start.as_view('flickr_start', '/flickr/add'), methods=['POST']) -app.add_url_rule('/flickr/add', view_func=AddFlickr.as_view('flickr_add', 'unused')) -app.add_url_rule('/flickr/delete/finish', view_func=oauth_flickr.Callback.as_view('flickr_delete_finish', '/delete/finish')) -app.add_url_rule('/flickr/publish/start', view_func=oauth_flickr.Start.as_view('flickr_publish_start', '/publish/flickr/finish'), methods=['POST']) + """Custom handler to add Flickr source when auth completes. + + If this account was previously authorized with greater permissions, this will + trigger another round of auth with elevated permissions. + """ + + def finish(self, auth_entity, state=None): + logging.debug("finish with %s, %s", auth_entity, state) + source = util.maybe_add_or_delete_source(Flickr, auth_entity, state) + feature = util.decode_oauth_state(state).get("feature") + if source and feature == "listen" and "publish" in source.features: + # we had signed up previously with publish, so we'll reauth to + # avoid losing that permission + logging.info("Restarting OAuth flow to get publish permissions.") + source.features.remove("publish") + source.put() + return self.start_oauth_flow("publish") + + +app.add_url_rule( + "/flickr/start", + view_func=Start.as_view("flickr_start", "/flickr/add"), + methods=["POST"], +) +app.add_url_rule("/flickr/add", view_func=AddFlickr.as_view("flickr_add", "unused")) +app.add_url_rule( + "/flickr/delete/finish", + view_func=oauth_flickr.Callback.as_view("flickr_delete_finish", "/delete/finish"), +) +app.add_url_rule( + "/flickr/publish/start", + view_func=oauth_flickr.Start.as_view( + "flickr_publish_start", "/publish/flickr/finish" + ), + methods=["POST"], +) diff --git a/github.py b/github.py index 015c8fad..c6d62e24 100644 --- a/github.py +++ b/github.py @@ -15,92 +15,109 @@ # https://developer.github.com/apps/building-oauth-apps/scopes-for-oauth-apps/ # https://github.com/dear-github/dear-github/issues/113#issuecomment-365121631 LISTEN_SCOPES = [ - 'notifications', - 'public_repo', + "notifications", + "public_repo", ] PUBLISH_SCOPES = [ - 'public_repo', + "public_repo", ] class GitHub(Source): - """A GitHub user. - - The key name is the GitHub username. - """ - GR_CLASS = gr_github.GitHub - OAUTH_START = oauth_github.Start - SHORT_NAME = 'github' - TYPE_LABELS = { - 'post': 'issue', - 'like': 'star', - } - BACKFEED_REQUIRES_SYNDICATION_LINK = True - DISABLE_HTTP_CODES = Source.DISABLE_HTTP_CODES + ('403',) - CAN_PUBLISH = True - URL_CANONICALIZER = util.UrlCanonicalizer(domain=GR_CLASS.DOMAIN, - headers=util.REQUEST_HEADERS, - fragment=True) - # This makes us backfeed issue/PR comments to previous comments on the same - # issue/PR. - IGNORE_SYNDICATION_LINK_FRAGMENTS = True - - @staticmethod - def new(auth_entity=None, **kwargs): - """Creates and returns a :class:`GitHub` for the logged in user. - - Args: - auth_entity: :class:`oauth_dropins.github.GitHubAuth` - kwargs: property values + """A GitHub user. + + The key name is the GitHub username. """ - user = json_loads(auth_entity.user_json) - gr_source = gr_github.GitHub(access_token=auth_entity.access_token()) - actor = gr_source.user_to_actor(user) - return GitHub(id=auth_entity.key_id(), - auth_entity=auth_entity.key, - name=actor.get('displayName'), - picture=actor.get('image', {}).get('url'), - url=actor.get('url'), - **kwargs) - - def silo_url(self): - """Returns the GitHub account URL, e.g. https://github.com/foo.""" - return self.gr_source.user_url(self.key_id()) - - def label_name(self): - """Returns the username.""" - return self.key_id() - - def user_tag_id(self): - """Returns this user's tag URI, eg 'tag:github.com:2013,MDQ6VXNlcjc3OD='.""" - id = json_loads(self.auth_entity.get().user_json)['id'] - return self.gr_source.tag_uri(id) - - def get_activities_response(self, *args, **kwargs): - """Drop kwargs that granary doesn't currently support for github.""" - kwargs.update({ - 'fetch_shares': None, - 'fetch_mentions': None, - }) - return self.gr_source.get_activities_response(*args, **kwargs) + + GR_CLASS = gr_github.GitHub + OAUTH_START = oauth_github.Start + SHORT_NAME = "github" + TYPE_LABELS = { + "post": "issue", + "like": "star", + } + BACKFEED_REQUIRES_SYNDICATION_LINK = True + DISABLE_HTTP_CODES = Source.DISABLE_HTTP_CODES + ("403",) + CAN_PUBLISH = True + URL_CANONICALIZER = util.UrlCanonicalizer( + domain=GR_CLASS.DOMAIN, headers=util.REQUEST_HEADERS, fragment=True + ) + # This makes us backfeed issue/PR comments to previous comments on the same + # issue/PR. + IGNORE_SYNDICATION_LINK_FRAGMENTS = True + + @staticmethod + def new(auth_entity=None, **kwargs): + """Creates and returns a :class:`GitHub` for the logged in user. + + Args: + auth_entity: :class:`oauth_dropins.github.GitHubAuth` + kwargs: property values + """ + user = json_loads(auth_entity.user_json) + gr_source = gr_github.GitHub(access_token=auth_entity.access_token()) + actor = gr_source.user_to_actor(user) + return GitHub( + id=auth_entity.key_id(), + auth_entity=auth_entity.key, + name=actor.get("displayName"), + picture=actor.get("image", {}).get("url"), + url=actor.get("url"), + **kwargs + ) + + def silo_url(self): + """Returns the GitHub account URL, e.g. https://github.com/foo.""" + return self.gr_source.user_url(self.key_id()) + + def label_name(self): + """Returns the username.""" + return self.key_id() + + def user_tag_id(self): + """Returns this user's tag URI, eg 'tag:github.com:2013,MDQ6VXNlcjc3OD='.""" + id = json_loads(self.auth_entity.get().user_json)["id"] + return self.gr_source.tag_uri(id) + + def get_activities_response(self, *args, **kwargs): + """Drop kwargs that granary doesn't currently support for github.""" + kwargs.update( + { + "fetch_shares": None, + "fetch_mentions": None, + } + ) + return self.gr_source.get_activities_response(*args, **kwargs) class Start(View): - def dispatch_request(self): - features = request.form['feature'] - scopes = PUBLISH_SCOPES if 'publish' in features else LISTEN_SCOPES - starter = util.oauth_starter(oauth_github.Start, feature=features - )('/github/add', scopes=scopes) - return starter.dispatch_request() + def dispatch_request(self): + features = request.form["feature"] + scopes = PUBLISH_SCOPES if "publish" in features else LISTEN_SCOPES + starter = util.oauth_starter(oauth_github.Start, feature=features)( + "/github/add", scopes=scopes + ) + return starter.dispatch_request() class AddGitHub(oauth_github.Callback): - def finish(self, auth_entity, state=None): - logging.debug('finish with %s, %s', auth_entity, state) - util.maybe_add_or_delete_source(GitHub, auth_entity, state) - - -app.add_url_rule('/github/start', view_func=Start.as_view('github_start'), methods=['POST']) -app.add_url_rule('/github/add', view_func=AddGitHub.as_view('github_add', 'unused')) -app.add_url_rule('/github/delete/finish', view_func=oauth_github.Callback.as_view('github_delete_finish', '/delete/finish')) -app.add_url_rule('/github/publish/start', view_func=oauth_github.Start.as_view('github_publish_start', '/publish/github/finish', scopes=PUBLISH_SCOPES), methods=['POST']) + def finish(self, auth_entity, state=None): + logging.debug("finish with %s, %s", auth_entity, state) + util.maybe_add_or_delete_source(GitHub, auth_entity, state) + + +app.add_url_rule( + "/github/start", view_func=Start.as_view("github_start"), methods=["POST"] +) +app.add_url_rule("/github/add", view_func=AddGitHub.as_view("github_add", "unused")) +app.add_url_rule( + "/github/delete/finish", + view_func=oauth_github.Callback.as_view("github_delete_finish", "/delete/finish"), +) +app.add_url_rule( + "/github/publish/start", + view_func=oauth_github.Start.as_view( + "github_publish_start", "/publish/github/finish", scopes=PUBLISH_SCOPES + ), + methods=["POST"], +) diff --git a/handlers.py b/handlers.py index 3326d246..44382505 100644 --- a/handlers.py +++ b/handlers.py @@ -38,7 +38,8 @@ CACHE_TIME = datetime.timedelta(minutes=15) -TEMPLATE = string.Template("""\ +TEMPLATE = string.Template( + """\ @@ -64,249 +65,286 @@ $body -""") +""" +) -@app.route('//', - methods=['HEAD']) +@app.route("//", methods=["HEAD"]) def mf2_handler_head(_, __): - return '' + return "" class Item(View): - """Fetches a post, repost, like, or comment and serves it as mf2 HTML or JSON. - """ - source = None - - VALID_ID = re.compile(r'^[\w.+:@=<>-]+$') - - def get_item(self, **kwargs): - """Fetches and returns an object from the given source. - - To be implemented by subclasses. - - Args: - source: :class:`models.Source` subclass - id: string - - Returns: - ActivityStreams object dict - """ - raise NotImplementedError() - - def get_post(self, id, **kwargs): - """Fetch a post. - - Args: - id: string, site-specific post id - is_event: bool - kwargs: passed through to :meth:`get_activities` - - Returns: - ActivityStreams object dict - """ - try: - posts = self.source.get_activities( - activity_id=id, user_id=self.source.key_id(), **kwargs) - if posts: - return posts[0] - logging.warning('Source post %s not found', id) - except AssertionError: - raise - except Exception as e: - util.interpret_http_exception(e) - - @flask_util.cached(cache, CACHE_TIME) - def dispatch_request(self, site, key_id, **kwargs): - """Handle HTTP request.""" - source_cls = models.sources.get(site) - if not source_cls: - error("Source type '%s' not found. Known sources: %s" % - (site, filter(None, models.sources.keys()))) - - self.source = source_cls.get_by_id(key_id) - if not self.source: - error(f'Source {site} {key_id} not found') - elif (self.source.status == 'disabled' or - 'listen' not in self.source.features): - error(f'Source {self.source.bridgy_path()} is disabled for backfeed') - - format = request.values.get('format', 'html') - if format not in ('html', 'json'): - error(f'Invalid format {format}, expected html or json') - - for id in kwargs.values(): - if not self.VALID_ID.match(id): - error(f'Invalid id {id}', 404) - - try: - obj = self.get_item(**kwargs) - except models.DisableSource: - error("Bridgy's access to your account has expired. Please visit https://brid.gy/ to refresh it!", 401) - except ValueError as e: - error(f'{self.source.GR_CLASS.NAME} error: {e}') - - if not obj: - error(f'Not found: {site}:{key_id} {kwargs}', 404) - - if self.source.is_blocked(obj): - error('That user is currently blocked', 410) - - # use https for profile pictures so we don't cause SSL mixed mode errors - # when serving over https. - author = obj.get('author', {}) - image = author.get('image', {}) - url = image.get('url') - if url: - image['url'] = util.update_scheme(url, request) - - mf2_json = microformats2.object_to_json(obj, synthesize_content=False) - - # try to include the author's silo profile url - author = first_props(mf2_json.get('properties', {})).get('author', {}) - author_uid = first_props(author.get('properties', {})).get('uid', '') - if author_uid: - parsed = util.parse_tag_uri(author_uid) - if parsed: - urls = author.get('properties', {}).setdefault('url', []) + """Fetches a post, repost, like, or comment and serves it as mf2 HTML or JSON.""" + + source = None + + VALID_ID = re.compile(r"^[\w.+:@=<>-]+$") + + def get_item(self, **kwargs): + """Fetches and returns an object from the given source. + + To be implemented by subclasses. + + Args: + source: :class:`models.Source` subclass + id: string + + Returns: + ActivityStreams object dict + """ + raise NotImplementedError() + + def get_post(self, id, **kwargs): + """Fetch a post. + + Args: + id: string, site-specific post id + is_event: bool + kwargs: passed through to :meth:`get_activities` + + Returns: + ActivityStreams object dict + """ + try: + posts = self.source.get_activities( + activity_id=id, user_id=self.source.key_id(), **kwargs + ) + if posts: + return posts[0] + logging.warning("Source post %s not found", id) + except AssertionError: + raise + except Exception as e: + util.interpret_http_exception(e) + + @flask_util.cached(cache, CACHE_TIME) + def dispatch_request(self, site, key_id, **kwargs): + """Handle HTTP request.""" + source_cls = models.sources.get(site) + if not source_cls: + error( + "Source type '%s' not found. Known sources: %s" + % (site, filter(None, models.sources.keys())) + ) + + self.source = source_cls.get_by_id(key_id) + if not self.source: + error(f"Source {site} {key_id} not found") + elif self.source.status == "disabled" or "listen" not in self.source.features: + error(f"Source {self.source.bridgy_path()} is disabled for backfeed") + + format = request.values.get("format", "html") + if format not in ("html", "json"): + error(f"Invalid format {format}, expected html or json") + + for id in kwargs.values(): + if not self.VALID_ID.match(id): + error(f"Invalid id {id}", 404) + try: - silo_url = self.source.gr_source.user_url(parsed[1]) - if silo_url not in microformats2.get_string_urls(urls): - urls.append(silo_url) - except NotImplementedError: # from gr_source.user_url() - pass - - # write the response! - if format == 'html': - url = obj.get('url', '') - return TEMPLATE.substitute({ - 'refresh': (f'' - if url else ''), - 'url': url, - 'body': microformats2.json_to_html(mf2_json), - 'title': obj.get('title') or obj.get('content') or 'Bridgy Response', - }) - elif format == 'json': - return mf2_json - - def merge_urls(self, obj, property, urls, object_type='article'): - """Updates an object's ActivityStreams URL objects in place. - - Adds all URLs in urls that don't already exist in obj[property]. - - ActivityStreams schema details: - http://activitystrea.ms/specs/json/1.0/#id-comparison - - Args: - obj: ActivityStreams object to merge URLs into - property: string property to merge URLs into - urls: sequence of string URLs to add - object_type: stored as the objectType alongside each URL - """ - if obj: - obj[property] = util.get_list(obj, property) - existing = set(filter(None, (u.get('url') for u in obj[property]))) - obj[property] += [{'url': url, 'objectType': object_type} for url in urls - if url not in existing] + obj = self.get_item(**kwargs) + except models.DisableSource: + error( + "Bridgy's access to your account has expired. Please visit https://brid.gy/ to refresh it!", + 401, + ) + except ValueError as e: + error(f"{self.source.GR_CLASS.NAME} error: {e}") + + if not obj: + error(f"Not found: {site}:{key_id} {kwargs}", 404) + + if self.source.is_blocked(obj): + error("That user is currently blocked", 410) + + # use https for profile pictures so we don't cause SSL mixed mode errors + # when serving over https. + author = obj.get("author", {}) + image = author.get("image", {}) + url = image.get("url") + if url: + image["url"] = util.update_scheme(url, request) + + mf2_json = microformats2.object_to_json(obj, synthesize_content=False) + + # try to include the author's silo profile url + author = first_props(mf2_json.get("properties", {})).get("author", {}) + author_uid = first_props(author.get("properties", {})).get("uid", "") + if author_uid: + parsed = util.parse_tag_uri(author_uid) + if parsed: + urls = author.get("properties", {}).setdefault("url", []) + try: + silo_url = self.source.gr_source.user_url(parsed[1]) + if silo_url not in microformats2.get_string_urls(urls): + urls.append(silo_url) + except NotImplementedError: # from gr_source.user_url() + pass + + # write the response! + if format == "html": + url = obj.get("url", "") + return TEMPLATE.substitute( + { + "refresh": ( + f'' + if url + else "" + ), + "url": url, + "body": microformats2.json_to_html(mf2_json), + "title": obj.get("title") + or obj.get("content") + or "Bridgy Response", + } + ) + elif format == "json": + return mf2_json + + def merge_urls(self, obj, property, urls, object_type="article"): + """Updates an object's ActivityStreams URL objects in place. + + Adds all URLs in urls that don't already exist in obj[property]. + + ActivityStreams schema details: + http://activitystrea.ms/specs/json/1.0/#id-comparison + + Args: + obj: ActivityStreams object to merge URLs into + property: string property to merge URLs into + urls: sequence of string URLs to add + object_type: stored as the objectType alongside each URL + """ + if obj: + obj[property] = util.get_list(obj, property) + existing = set(filter(None, (u.get("url") for u in obj[property]))) + obj[property] += [ + {"url": url, "objectType": object_type} + for url in urls + if url not in existing + ] # Note that mention links are included in posts and comments, but not # likes, reposts, or rsvps. Matches logic in poll() (step 4) in tasks.py! class Post(Item): - def get_item(self, post_id): - posts = self.source.get_activities(activity_id=post_id, - user_id=self.source.key_id()) - if not posts: - return None - - post = posts[0] - originals, mentions = original_post_discovery.discover( - self.source, post, fetch_hfeed=False) - obj = post['object'] - obj['upstreamDuplicates'] = list( - set(util.get_list(obj, 'upstreamDuplicates')) | originals) - self.merge_urls(obj, 'tags', mentions, object_type='mention') - return obj + def get_item(self, post_id): + posts = self.source.get_activities( + activity_id=post_id, user_id=self.source.key_id() + ) + if not posts: + return None + + post = posts[0] + originals, mentions = original_post_discovery.discover( + self.source, post, fetch_hfeed=False + ) + obj = post["object"] + obj["upstreamDuplicates"] = list( + set(util.get_list(obj, "upstreamDuplicates")) | originals + ) + self.merge_urls(obj, "tags", mentions, object_type="mention") + return obj class Comment(Item): - def get_item(self, post_id, comment_id): - fetch_replies = not self.source.gr_source.OPTIMIZED_COMMENTS - post = self.get_post(post_id, fetch_replies=fetch_replies) - has_replies = (post.get('object', {}).get('replies', {}).get('items') - if post else False) - cmt = self.source.get_comment( - comment_id, activity_id=post_id, activity_author_id=self.source.key_id(), - activity=post if fetch_replies or has_replies else None) - if post: - originals, mentions = original_post_discovery.discover( - self.source, post, fetch_hfeed=False) - self.merge_urls(cmt, 'inReplyTo', originals) - self.merge_urls(cmt, 'tags', mentions, object_type='mention') - return cmt + def get_item(self, post_id, comment_id): + fetch_replies = not self.source.gr_source.OPTIMIZED_COMMENTS + post = self.get_post(post_id, fetch_replies=fetch_replies) + has_replies = ( + post.get("object", {}).get("replies", {}).get("items") if post else False + ) + cmt = self.source.get_comment( + comment_id, + activity_id=post_id, + activity_author_id=self.source.key_id(), + activity=post if fetch_replies or has_replies else None, + ) + if post: + originals, mentions = original_post_discovery.discover( + self.source, post, fetch_hfeed=False + ) + self.merge_urls(cmt, "inReplyTo", originals) + self.merge_urls(cmt, "tags", mentions, object_type="mention") + return cmt class Like(Item): - def get_item(self, post_id, user_id): - post = self.get_post(post_id, fetch_likes=True) - like = self.source.get_like(self.source.key_id(), post_id, user_id, - activity=post) - if post: - originals, mentions = original_post_discovery.discover( - self.source, post, fetch_hfeed=False) - self.merge_urls(like, 'object', originals) - return like + def get_item(self, post_id, user_id): + post = self.get_post(post_id, fetch_likes=True) + like = self.source.get_like( + self.source.key_id(), post_id, user_id, activity=post + ) + if post: + originals, mentions = original_post_discovery.discover( + self.source, post, fetch_hfeed=False + ) + self.merge_urls(like, "object", originals) + return like class Reaction(Item): - def get_item(self, post_id, user_id, reaction_id): - post = self.get_post(post_id) - reaction = self.source.gr_source.get_reaction( - self.source.key_id(), post_id, user_id, reaction_id, activity=post) - if post: - originals, mentions = original_post_discovery.discover( - self.source, post, fetch_hfeed=False) - self.merge_urls(reaction, 'object', originals) - return reaction + def get_item(self, post_id, user_id, reaction_id): + post = self.get_post(post_id) + reaction = self.source.gr_source.get_reaction( + self.source.key_id(), post_id, user_id, reaction_id, activity=post + ) + if post: + originals, mentions = original_post_discovery.discover( + self.source, post, fetch_hfeed=False + ) + self.merge_urls(reaction, "object", originals) + return reaction class Repost(Item): - def get_item(self, post_id, share_id): - post = self.get_post(post_id, fetch_shares=True) - repost = self.source.gr_source.get_share( - self.source.key_id(), post_id, share_id, activity=post) - # webmention receivers don't want to see their own post in their - # comments, so remove attachments before rendering. - if repost and 'attachments' in repost: - del repost['attachments'] - if post: - originals, mentions = original_post_discovery.discover( - self.source, post, fetch_hfeed=False) - self.merge_urls(repost, 'object', originals) - return repost + def get_item(self, post_id, share_id): + post = self.get_post(post_id, fetch_shares=True) + repost = self.source.gr_source.get_share( + self.source.key_id(), post_id, share_id, activity=post + ) + # webmention receivers don't want to see their own post in their + # comments, so remove attachments before rendering. + if repost and "attachments" in repost: + del repost["attachments"] + if post: + originals, mentions = original_post_discovery.discover( + self.source, post, fetch_hfeed=False + ) + self.merge_urls(repost, "object", originals) + return repost class Rsvp(Item): - def get_item(self, event_id, user_id): - event = self.source.gr_source.get_event(event_id) - rsvp = self.source.gr_source.get_rsvp( - self.source.key_id(), event_id, user_id, event=event) - if event: - originals, mentions = original_post_discovery.discover( - self.source, event, fetch_hfeed=False) - self.merge_urls(rsvp, 'inReplyTo', originals) - return rsvp - - -app.add_url_rule('/post///', - view_func=Post.as_view('post')) -app.add_url_rule('/comment////', - view_func=Comment.as_view('comment')) -app.add_url_rule('/like////', - view_func=Like.as_view('like')) -app.add_url_rule('/react/////', - view_func=Reaction.as_view('react')) -app.add_url_rule('/repost////', - view_func=Repost.as_view('repost')) -app.add_url_rule('/rsvp////', - view_func=Rsvp.as_view('rsvp')) + def get_item(self, event_id, user_id): + event = self.source.gr_source.get_event(event_id) + rsvp = self.source.gr_source.get_rsvp( + self.source.key_id(), event_id, user_id, event=event + ) + if event: + originals, mentions = original_post_discovery.discover( + self.source, event, fetch_hfeed=False + ) + self.merge_urls(rsvp, "inReplyTo", originals) + return rsvp + + +app.add_url_rule("/post///", view_func=Post.as_view("post")) +app.add_url_rule( + "/comment////", + view_func=Comment.as_view("comment"), +) +app.add_url_rule( + "/like////", view_func=Like.as_view("like") +) +app.add_url_rule( + "/react/////", + view_func=Reaction.as_view("react"), +) +app.add_url_rule( + "/repost////", view_func=Repost.as_view("repost") +) +app.add_url_rule( + "/rsvp////", view_func=Rsvp.as_view("rsvp") +) diff --git a/indieauth.py b/indieauth.py index 058c1231..1902abf0 100644 --- a/indieauth.py +++ b/indieauth.py @@ -9,52 +9,60 @@ from util import redirect -@app.route('/indieauth/start', methods=['GET']) +@app.route("/indieauth/start", methods=["GET"]) def indieauth_enter_web_site(): - """Serves the "Enter your web site" form page.""" - return render_template('indieauth.html', token=request.args['token']) + """Serves the "Enter your web site" form page.""" + return render_template("indieauth.html", token=request.args["token"]) class Start(indieauth.Start): - """Starts the IndieAuth flow.""" - def dispatch_request(self): - token = request.form['token'] + """Starts the IndieAuth flow.""" - try: - to_url = self.redirect_url(state=token) - except Exception as e: - if util.is_connection_failure(e) or util.interpret_http_exception(e)[0]: - flash("Couldn't fetch your web site: %s" % e) - return redirect('/') - raise + def dispatch_request(self): + token = request.form["token"] - return redirect(to_url) + try: + to_url = self.redirect_url(state=token) + except Exception as e: + if util.is_connection_failure(e) or util.interpret_http_exception(e)[0]: + flash("Couldn't fetch your web site: %s" % e) + return redirect("/") + raise + + return redirect(to_url) class Callback(indieauth.Callback): - """IndieAuth callback handler.""" - def finish(self, auth_entity, state=None): - if not auth_entity: - return - - assert state - - @ndb.transactional() - def add_or_update_domain(): - domain = Domain.get_or_insert(util.domain_from_link( - util.replace_test_domains_with_localhost(auth_entity.key.id()))) - domain.auth = auth_entity.key - if state not in domain.tokens: - domain.tokens.append(state) - domain.put() - flash(f'Authorized you for {domain.key.id()}.') - - add_or_update_domain() - return redirect('/') - - -app.add_url_rule('/indieauth/start', - view_func=Start.as_view('indieauth_start', '/indieauth/callback'), - methods=['POST']) -app.add_url_rule('/indieauth/callback', - view_func=Callback.as_view('indieauth_callback', 'unused')) + """IndieAuth callback handler.""" + + def finish(self, auth_entity, state=None): + if not auth_entity: + return + + assert state + + @ndb.transactional() + def add_or_update_domain(): + domain = Domain.get_or_insert( + util.domain_from_link( + util.replace_test_domains_with_localhost(auth_entity.key.id()) + ) + ) + domain.auth = auth_entity.key + if state not in domain.tokens: + domain.tokens.append(state) + domain.put() + flash(f"Authorized you for {domain.key.id()}.") + + add_or_update_domain() + return redirect("/") + + +app.add_url_rule( + "/indieauth/start", + view_func=Start.as_view("indieauth_start", "/indieauth/callback"), + methods=["POST"], +) +app.add_url_rule( + "/indieauth/callback", view_func=Callback.as_view("indieauth_callback", "unused") +) diff --git a/instagram.py b/instagram.py index bd2bd05b..359fc949 100644 --- a/instagram.py +++ b/instagram.py @@ -8,38 +8,40 @@ class Instagram(browser.BrowserSource): - """An Instagram account. - - The key name is the username. Instagram usernames may have ASCII letters (case - insensitive), numbers, periods, and underscores: - https://stackoverflow.com/questions/15470180 - """ - GR_CLASS = gr_instagram.Instagram - SHORT_NAME = 'instagram' - OAUTH_START = oauth_instagram.Start - URL_CANONICALIZER = util.UrlCanonicalizer( - # no reject regexp; non-private Instagram post URLs just 404 - domain=GR_CLASS.DOMAIN, - subdomain='www', - approve=r'https://www.instagram.com/p/[^/?]+/$', - trailing_slash=True, - headers=util.REQUEST_HEADERS) - - # blank granary Instagram object, shared across all instances - gr_source = gr_instagram.Instagram() - - @classmethod - def key_id_from_actor(cls, actor): - """Returns the actor's username field to be used as this entity's key id.""" - return actor['username'] - - def silo_url(self): - """Returns the Instagram account URL, e.g. https://instagram.com/foo.""" - return self.gr_source.user_url(self.key.id()) - - def label_name(self): - """Returns the username.""" - return self.key_id() + """An Instagram account. + + The key name is the username. Instagram usernames may have ASCII letters (case + insensitive), numbers, periods, and underscores: + https://stackoverflow.com/questions/15470180 + """ + + GR_CLASS = gr_instagram.Instagram + SHORT_NAME = "instagram" + OAUTH_START = oauth_instagram.Start + URL_CANONICALIZER = util.UrlCanonicalizer( + # no reject regexp; non-private Instagram post URLs just 404 + domain=GR_CLASS.DOMAIN, + subdomain="www", + approve=r"https://www.instagram.com/p/[^/?]+/$", + trailing_slash=True, + headers=util.REQUEST_HEADERS, + ) + + # blank granary Instagram object, shared across all instances + gr_source = gr_instagram.Instagram() + + @classmethod + def key_id_from_actor(cls, actor): + """Returns the actor's username field to be used as this entity's key id.""" + return actor["username"] + + def silo_url(self): + """Returns the Instagram account URL, e.g. https://instagram.com/foo.""" + return self.gr_source.user_url(self.key.id()) + + def label_name(self): + """Returns the username.""" + return self.key_id() browser.route(Instagram) diff --git a/mastodon.py b/mastodon.py index 1dd5357d..8ed6b8be 100644 --- a/mastodon.py +++ b/mastodon.py @@ -15,111 +15,116 @@ # https://docs.joinmastodon.org/api/oauth-scopes/ LISTEN_SCOPES = ( - 'read:accounts', - 'read:blocks', - 'read:notifications', - 'read:search', - 'read:statuses', + "read:accounts", + "read:blocks", + "read:notifications", + "read:search", + "read:statuses", ) PUBLISH_SCOPES = LISTEN_SCOPES + ( - 'write:statuses', - 'write:favourites', - 'write:media', + "write:statuses", + "write:favourites", + "write:media", ) SCOPE_SEPARATOR = oauth_dropins.mastodon.Start.SCOPE_SEPARATOR class StartBase(oauth_dropins.mastodon.Start): - """Abstract base OAuth starter class with our redirect URLs.""" - DEFAULT_SCOPE = '' - REDIRECT_PATHS = ( - '/mastodon/callback', - '/publish/mastodon/finish', - '/mastodon/delete/finish', - '/delete/finish', - ) + """Abstract base OAuth starter class with our redirect URLs.""" - def app_name(self): - return 'Bridgy' + DEFAULT_SCOPE = "" + REDIRECT_PATHS = ( + "/mastodon/callback", + "/publish/mastodon/finish", + "/mastodon/delete/finish", + "/delete/finish", + ) - def app_url(self): - return util.host_url() + def app_name(self): + return "Bridgy" + + def app_url(self): + return util.host_url() class Mastodon(models.Source): - """A Mastodon account. - - The key name is the fully qualified address, eg '@snarfed@mastodon.technology'. - """ - GR_CLASS = gr_mastodon.Mastodon - OAUTH_START = StartBase - SHORT_NAME = 'mastodon' - CAN_PUBLISH = True - HAS_BLOCKS = True - TYPE_LABELS = { - 'post': 'toot', - 'comment': 'reply', - 'repost': 'boost', - 'like': 'favorite', - } - DISABLE_HTTP_CODES = ('401', '403', '404') - - @property - def URL_CANONICALIZER(self): - """Generate URL_CANONICALIZER dynamically to use the instance's domain.""" - return util.UrlCanonicalizer( - domain=self.gr_source.DOMAIN, - headers=util.REQUEST_HEADERS) - - @staticmethod - def new(auth_entity=None, **kwargs): - """Creates and returns a :class:`Mastodon` entity. - - Args: - auth_entity: :class:`oauth_dropins.mastodon.MastodonAuth` - kwargs: property values + """A Mastodon account. + + The key name is the fully qualified address, eg '@snarfed@mastodon.technology'. """ - user = json_loads(auth_entity.user_json) - return Mastodon(id=auth_entity.key_id(), - auth_entity=auth_entity.key, - url=user.get('url'), - name=user.get('display_name') or user.get('username'), - picture=user.get('avatar'), - **kwargs) - - def username(self): - """Returns the Mastodon username, e.g. alice.""" - return self._split_address()[0] - - def instance(self): - """Returns the Mastodon instance URL, e.g. https://foo.com/.""" - return self._split_address()[1] - - def _split_address(self): - split = self.key_id().split('@') - assert len(split) == 3 and split[0] == '', self.key_id() - return split[1], split[2] - - def user_tag_id(self): - """Returns the tag URI for this source, e.g. 'tag:foo.com:alice'.""" - return self.gr_source.tag_uri(self.username()) - - def silo_url(self): - """Returns the Mastodon profile URL, e.g. https://foo.com/@bar.""" - return json_loads(self.auth_entity.get().user_json).get('url') - - def label_name(self): - """Returns the username.""" - return self.key_id() - - @classmethod - def button_html(cls, feature, **kwargs): - """Override oauth-dropins's button_html() to not show the instance text box.""" - source = kwargs.get('source') - instance = source.instance() if source else '' - scopes = SCOPE_SEPARATOR.join( - PUBLISH_SCOPES if 'publish' in feature else LISTEN_SCOPES) - return """\ + + GR_CLASS = gr_mastodon.Mastodon + OAUTH_START = StartBase + SHORT_NAME = "mastodon" + CAN_PUBLISH = True + HAS_BLOCKS = True + TYPE_LABELS = { + "post": "toot", + "comment": "reply", + "repost": "boost", + "like": "favorite", + } + DISABLE_HTTP_CODES = ("401", "403", "404") + + @property + def URL_CANONICALIZER(self): + """Generate URL_CANONICALIZER dynamically to use the instance's domain.""" + return util.UrlCanonicalizer( + domain=self.gr_source.DOMAIN, headers=util.REQUEST_HEADERS + ) + + @staticmethod + def new(auth_entity=None, **kwargs): + """Creates and returns a :class:`Mastodon` entity. + + Args: + auth_entity: :class:`oauth_dropins.mastodon.MastodonAuth` + kwargs: property values + """ + user = json_loads(auth_entity.user_json) + return Mastodon( + id=auth_entity.key_id(), + auth_entity=auth_entity.key, + url=user.get("url"), + name=user.get("display_name") or user.get("username"), + picture=user.get("avatar"), + **kwargs + ) + + def username(self): + """Returns the Mastodon username, e.g. alice.""" + return self._split_address()[0] + + def instance(self): + """Returns the Mastodon instance URL, e.g. https://foo.com/.""" + return self._split_address()[1] + + def _split_address(self): + split = self.key_id().split("@") + assert len(split) == 3 and split[0] == "", self.key_id() + return split[1], split[2] + + def user_tag_id(self): + """Returns the tag URI for this source, e.g. 'tag:foo.com:alice'.""" + return self.gr_source.tag_uri(self.username()) + + def silo_url(self): + """Returns the Mastodon profile URL, e.g. https://foo.com/@bar.""" + return json_loads(self.auth_entity.get().user_json).get("url") + + def label_name(self): + """Returns the username.""" + return self.key_id() + + @classmethod + def button_html(cls, feature, **kwargs): + """Override oauth-dropins's button_html() to not show the instance text box.""" + source = kwargs.get("source") + instance = source.instance() if source else "" + scopes = SCOPE_SEPARATOR.join( + PUBLISH_SCOPES if "publish" in feature else LISTEN_SCOPES + ) + return """\
@@ -127,82 +132,109 @@ def button_html(cls, feature, **kwargs):
-""" % ('post' if instance else 'get', feature, instance, scopes) - - def is_private(self): - """Returns True if this Mastodon account is protected. - - https://docs.joinmastodon.org/user/preferences/#misc - https://docs.joinmastodon.org/entities/account/ - """ - return json_loads(self.auth_entity.get().user_json).get('locked') - - def search_for_links(self): - """Searches for activities with links to any of this source's web sites. - - Returns: - sequence of ActivityStreams activity dicts - """ - if not self.domains: - return [] - - query = ' OR '.join(self.domains) - return self.get_activities( - search_query=query, group_id=gr_source.SEARCH, fetch_replies=False, - fetch_likes=False, fetch_shares=False) - - def load_blocklist(self): - try: - return super().load_blocklist() - except requests.HTTPError as e: - if e.response.status_code == 403: - # this user signed up before we started asking for the 'follow' OAuth - # scope, which the block list API endpoint requires. just skip them. - # https://console.cloud.google.com/errors/CMfA_KfIld6Q2AE - logging.info("Couldn't fetch block list due to missing OAuth scope") - self.blocked_ids = [] - self.put() - else: - raise - - -@app.route('/mastodon/start', methods=['GET']) +""" % ( + "post" if instance else "get", + feature, + instance, + scopes, + ) + + def is_private(self): + """Returns True if this Mastodon account is protected. + + https://docs.joinmastodon.org/user/preferences/#misc + https://docs.joinmastodon.org/entities/account/ + """ + return json_loads(self.auth_entity.get().user_json).get("locked") + + def search_for_links(self): + """Searches for activities with links to any of this source's web sites. + + Returns: + sequence of ActivityStreams activity dicts + """ + if not self.domains: + return [] + + query = " OR ".join(self.domains) + return self.get_activities( + search_query=query, + group_id=gr_source.SEARCH, + fetch_replies=False, + fetch_likes=False, + fetch_shares=False, + ) + + def load_blocklist(self): + try: + return super().load_blocklist() + except requests.HTTPError as e: + if e.response.status_code == 403: + # this user signed up before we started asking for the 'follow' OAuth + # scope, which the block list API endpoint requires. just skip them. + # https://console.cloud.google.com/errors/CMfA_KfIld6Q2AE + logging.info("Couldn't fetch block list due to missing OAuth scope") + self.blocked_ids = [] + self.put() + else: + raise + + +@app.route("/mastodon/start", methods=["GET"]) def enter_your_instance(): - """Serves the "Enter your instance" form page.""" - return render_template('mastodon_instance.html') + """Serves the "Enter your instance" form page.""" + return render_template("mastodon_instance.html") class Start(StartBase): - def redirect_url(self, *args, **kwargs): - features = (request.form.get('feature') or '').split(',') - starter = util.oauth_starter(StartBase)('/mastodon/callback', - scopes=PUBLISH_SCOPES if 'publish' in features else LISTEN_SCOPES) - - try: - return starter.redirect_url(*args, instance=request.form['instance'], **kwargs) - except ValueError as e: - logging.warning('Bad Mastodon instance', exc_info=True) - flash(util.linkify(str(e), pretty=True)) - redirect(request.path) + def redirect_url(self, *args, **kwargs): + features = (request.form.get("feature") or "").split(",") + starter = util.oauth_starter(StartBase)( + "/mastodon/callback", + scopes=PUBLISH_SCOPES if "publish" in features else LISTEN_SCOPES, + ) + + try: + return starter.redirect_url( + *args, instance=request.form["instance"], **kwargs + ) + except ValueError as e: + logging.warning("Bad Mastodon instance", exc_info=True) + flash(util.linkify(str(e), pretty=True)) + redirect(request.path) class Callback(oauth_dropins.mastodon.Callback): - def finish(self, auth_entity, state=None): - source = util.maybe_add_or_delete_source(Mastodon, auth_entity, state) - - features = util.decode_oauth_state(state).get('feature', '').split(',') - if set(features) != set(source.features): - # override features with whatever we requested scopes for just now, since - # scopes are per access token. background: - # https://github.com/snarfed/bridgy/issues/1015 - source.features = features - source.put() - - -app.add_url_rule('/mastodon/start', - view_func=Start.as_view('mastodon_start', '/mastodon/callback'), methods=['POST']) -app.add_url_rule('/mastodon/callback', view_func=Callback.as_view('mastodon_callback', 'unused')) -app.add_url_rule('/mastodon/delete/finish', - view_func=oauth_dropins.mastodon.Callback.as_view('mastodon_delete_finish', '/delete/finish')) -app.add_url_rule('/mastodon/publish/start', - view_func=StartBase.as_view('mastodon_publish_finish', '/publish/mastodon/finish', scopes=PUBLISH_SCOPES), methods=['POST']) + def finish(self, auth_entity, state=None): + source = util.maybe_add_or_delete_source(Mastodon, auth_entity, state) + + features = util.decode_oauth_state(state).get("feature", "").split(",") + if set(features) != set(source.features): + # override features with whatever we requested scopes for just now, since + # scopes are per access token. background: + # https://github.com/snarfed/bridgy/issues/1015 + source.features = features + source.put() + + +app.add_url_rule( + "/mastodon/start", + view_func=Start.as_view("mastodon_start", "/mastodon/callback"), + methods=["POST"], +) +app.add_url_rule( + "/mastodon/callback", view_func=Callback.as_view("mastodon_callback", "unused") +) +app.add_url_rule( + "/mastodon/delete/finish", + view_func=oauth_dropins.mastodon.Callback.as_view( + "mastodon_delete_finish", "/delete/finish" + ), +) +app.add_url_rule( + "/mastodon/publish/start", + view_func=StartBase.as_view( + "mastodon_publish_finish", "/publish/mastodon/finish", scopes=PUBLISH_SCOPES + ), + methods=["POST"], +) diff --git a/medium.py b/medium.py index fc79c51c..ece77b0a 100644 --- a/medium.py +++ b/medium.py @@ -24,155 +24,173 @@ class Medium(models.Source): - """A Medium publication or user blog. + """A Medium publication or user blog. - The key name is the username (with @ prefix) or publication name. - """ - GR_CLASS = collections.namedtuple('FakeGrClass', ('NAME',))(NAME='Medium') - OAUTH_START = oauth_medium.Start - SHORT_NAME = 'medium' - - def is_publication(self): - return not self.key_id().startswith('@') - - def feed_url(self): - # https://help.medium.com/hc/en-us/articles/214874118-RSS-Feeds-of-publications-and-profiles - return self.url.replace('medium.com/', 'medium.com/feed/') - - def silo_url(self): - return self.url - - @staticmethod - def new(auth_entity=None, id=None, **kwargs): - """Creates and returns a Medium for the logged in user. - - Args: - auth_entity: :class:`oauth_dropins.medium.MediumAuth` - id: string, either username (starting with @) or publication id + The key name is the username (with @ prefix) or publication name. """ - assert id - medium = Medium(id=id, - auth_entity=auth_entity.key, - superfeedr_secret=util.generate_secret(), - **kwargs) - - data = medium._data(auth_entity) - medium.name = data.get('name') or data.get('username') - medium.picture = data.get('imageUrl') - medium.url = data.get('url') - return medium - - def verified(self): - return False - - def verify(self, force=False): - """No incoming webmention support yet.""" - pass - - def has_bridgy_webmention_endpoint(self): - return True - - def _data(self, auth_entity): - """Returns the Medium API object for this user or publication. - - https://github.com/Medium/medium-api-docs/#user-content-getting-the-authenticated-users-details - - Example user:: - { - 'imageUrl': 'https://cdn-images-1.medium.com/fit/c/200/200/0*4dsrv3pwIJfFraSz.jpeg', - 'url': 'https://medium.com/@snarfed', - 'name': 'Ryan Barrett', - 'username': 'snarfed', - 'id': '113863a5ca2ab60671e8c9fe089e59c07acbf8137c51523605dc55528516c0d7e' - } - Example publication:: - { - 'id': 'b45573563f5a', - 'name': 'Developers', - 'description': "Medium's Developer resources", - 'url': 'https://medium.com/developers', - 'imageUrl': 'https://cdn-images-1.medium.com/fit/c/200/200/1*ccokMT4VXmDDO1EoQQHkzg@2x.png' - } - """ - id = self.key_id().lstrip('@') - - user = json_loads(auth_entity.user_json).get('data') - if user.get('username').lstrip('@') == id: - return user - - for pub in json_loads(auth_entity.publications_json).get('data', []): - if pub.get('id') == id: - return pub - - def _urls_and_domains(self, auth_entity, user_url): - if self.url: - return [self.url], [util.domain_from_link(self.url)] - - return [], [] - - -@app.route('/medium/add', methods=['POST']) + GR_CLASS = collections.namedtuple("FakeGrClass", ("NAME",))(NAME="Medium") + OAUTH_START = oauth_medium.Start + SHORT_NAME = "medium" + + def is_publication(self): + return not self.key_id().startswith("@") + + def feed_url(self): + # https://help.medium.com/hc/en-us/articles/214874118-RSS-Feeds-of-publications-and-profiles + return self.url.replace("medium.com/", "medium.com/feed/") + + def silo_url(self): + return self.url + + @staticmethod + def new(auth_entity=None, id=None, **kwargs): + """Creates and returns a Medium for the logged in user. + + Args: + auth_entity: :class:`oauth_dropins.medium.MediumAuth` + id: string, either username (starting with @) or publication id + """ + assert id + medium = Medium( + id=id, + auth_entity=auth_entity.key, + superfeedr_secret=util.generate_secret(), + **kwargs, + ) + + data = medium._data(auth_entity) + medium.name = data.get("name") or data.get("username") + medium.picture = data.get("imageUrl") + medium.url = data.get("url") + return medium + + def verified(self): + return False + + def verify(self, force=False): + """No incoming webmention support yet.""" + pass + + def has_bridgy_webmention_endpoint(self): + return True + + def _data(self, auth_entity): + """Returns the Medium API object for this user or publication. + + https://github.com/Medium/medium-api-docs/#user-content-getting-the-authenticated-users-details + + Example user:: + { + 'imageUrl': 'https://cdn-images-1.medium.com/fit/c/200/200/0*4dsrv3pwIJfFraSz.jpeg', + 'url': 'https://medium.com/@snarfed', + 'name': 'Ryan Barrett', + 'username': 'snarfed', + 'id': '113863a5ca2ab60671e8c9fe089e59c07acbf8137c51523605dc55528516c0d7e' + } + + Example publication:: + { + 'id': 'b45573563f5a', + 'name': 'Developers', + 'description': "Medium's Developer resources", + 'url': 'https://medium.com/developers', + 'imageUrl': 'https://cdn-images-1.medium.com/fit/c/200/200/1*ccokMT4VXmDDO1EoQQHkzg@2x.png' + } + """ + id = self.key_id().lstrip("@") + + user = json_loads(auth_entity.user_json).get("data") + if user.get("username").lstrip("@") == id: + return user + + for pub in json_loads(auth_entity.publications_json).get("data", []): + if pub.get("id") == id: + return pub + + def _urls_and_domains(self, auth_entity, user_url): + if self.url: + return [self.url], [util.domain_from_link(self.url)] + + return [], [] + + +@app.route("/medium/add", methods=["POST"]) def medium_add(): - auth_entity = ndb.Key(urlsafe=request.values['auth_entity_key']).get() - util.maybe_add_or_delete_source(Medium, auth_entity, request.values['state'], - id=request.values['blog']) + auth_entity = ndb.Key(urlsafe=request.values["auth_entity_key"]).get() + util.maybe_add_or_delete_source( + Medium, auth_entity, request.values["state"], id=request.values["blog"] + ) class ChooseBlog(oauth_medium.Callback): - def finish(self, auth_entity, state=None): - if not auth_entity: - util.maybe_add_or_delete_source(Medium, auth_entity, state) - return - - user = json_loads(auth_entity.user_json)['data'] - username = user['username'] - if not username.startswith('@'): - username = '@' + username - - # fetch publications this user contributes or subscribes to. - # (sadly medium's API doesn't tell us the difference unless we fetch each - # pub's metadata separately.) - # https://github.com/Medium/medium-api-docs/#user-content-listing-the-users-publications - auth_entity.publications_json = auth_entity.get( - oauth_medium.API_BASE + 'users/%s/publications' % user['id']).text - auth_entity.put() - pubs = json_loads(auth_entity.publications_json).get('data') - if not pubs: - util.maybe_add_or_delete_source(Medium, auth_entity, state, - id=username) - return - - # add user profile to start of pubs list - user['id'] = username - pubs.insert(0, user) - - vars = { - 'action': '/medium/add', - 'state': state, - 'auth_entity_key': auth_entity.key.urlsafe().decode(), - 'blogs': [{ - 'id': p['id'], - 'title': p.get('name', ''), - 'url': p.get('url', ''), - 'pretty_url': util.pretty_link(str(p.get('url', ''))), - 'image': p.get('imageUrl', ''), - } for p in pubs if p.get('id')], - } - logging.info(f'Rendering choose_blog.html with {vars}') - return render_template('choose_blog.html', **vars) + def finish(self, auth_entity, state=None): + if not auth_entity: + util.maybe_add_or_delete_source(Medium, auth_entity, state) + return + + user = json_loads(auth_entity.user_json)["data"] + username = user["username"] + if not username.startswith("@"): + username = "@" + username + + # fetch publications this user contributes or subscribes to. + # (sadly medium's API doesn't tell us the difference unless we fetch each + # pub's metadata separately.) + # https://github.com/Medium/medium-api-docs/#user-content-listing-the-users-publications + auth_entity.publications_json = auth_entity.get( + oauth_medium.API_BASE + "users/%s/publications" % user["id"] + ).text + auth_entity.put() + pubs = json_loads(auth_entity.publications_json).get("data") + if not pubs: + util.maybe_add_or_delete_source(Medium, auth_entity, state, id=username) + return + + # add user profile to start of pubs list + user["id"] = username + pubs.insert(0, user) + + vars = { + "action": "/medium/add", + "state": state, + "auth_entity_key": auth_entity.key.urlsafe().decode(), + "blogs": [ + { + "id": p["id"], + "title": p.get("name", ""), + "url": p.get("url", ""), + "pretty_url": util.pretty_link(str(p.get("url", ""))), + "image": p.get("imageUrl", ""), + } + for p in pubs + if p.get("id") + ], + } + logging.info(f"Rendering choose_blog.html with {vars}") + return render_template("choose_blog.html", **vars) class SuperfeedrNotify(superfeedr.Notify): - SOURCE_CLS = Medium + SOURCE_CLS = Medium # https://github.com/Medium/medium-api-docs#user-content-21-browser-based-authentication start = util.oauth_starter(oauth_medium.Start).as_view( - 'medium_start', '/medium/choose_blog', scopes=('basicProfile', 'listPublications')) -app.add_url_rule('/medium/start', view_func=start, methods=['POST']) -app.add_url_rule('/medium/choose_blog', view_func=ChooseBlog.as_view( - 'medium_choose_blog'), methods=['POST']) -app.add_url_rule('/medium/delete/finish', view_func=oauth_medium.Callback.as_view( - 'medium_delete', '/delete/finish')), -app.add_url_rule('/medium/notify/', view_func=SuperfeedrNotify.as_view('medium_notify'), methods=['POST']) + "medium_start", "/medium/choose_blog", scopes=("basicProfile", "listPublications") +) +app.add_url_rule("/medium/start", view_func=start, methods=["POST"]) +app.add_url_rule( + "/medium/choose_blog", + view_func=ChooseBlog.as_view("medium_choose_blog"), + methods=["POST"], +) +app.add_url_rule( + "/medium/delete/finish", + view_func=oauth_medium.Callback.as_view("medium_delete", "/delete/finish"), +), +app.add_url_rule( + "/medium/notify/", + view_func=SuperfeedrNotify.as_view("medium_notify"), + methods=["POST"], +) diff --git a/meetup.py b/meetup.py index 03ef9d71..dc6c5cc4 100644 --- a/meetup.py +++ b/meetup.py @@ -11,54 +11,71 @@ # We don't support listen LISTEN_SCOPES = [] PUBLISH_SCOPES = [ - 'rsvp', + "rsvp", ] class Meetup(Source): - GR_CLASS = gr_meetup.Meetup - OAUTH_START = oauth_meetup.Start - SHORT_NAME = 'meetup' - BACKFEED_REQUIRES_SYNDICATION_LINK = True - CAN_LISTEN = False - CAN_PUBLISH = True - URL_CANONICALIZER = util.UrlCanonicalizer( - domain=GR_CLASS.DOMAIN, - headers=util.REQUEST_HEADERS) + GR_CLASS = gr_meetup.Meetup + OAUTH_START = oauth_meetup.Start + SHORT_NAME = "meetup" + BACKFEED_REQUIRES_SYNDICATION_LINK = True + CAN_LISTEN = False + CAN_PUBLISH = True + URL_CANONICALIZER = util.UrlCanonicalizer( + domain=GR_CLASS.DOMAIN, headers=util.REQUEST_HEADERS + ) - @staticmethod - def new(auth_entity=None, **kwargs): - """Creates and returns a :class:`Meetup` for the logged in user. + @staticmethod + def new(auth_entity=None, **kwargs): + """Creates and returns a :class:`Meetup` for the logged in user. - Args: - auth_entity: :class:`oauth_dropins.meetup.MeetupAuth` - kwargs: property values - """ - user = json_loads(auth_entity.user_json) - gr_source = gr_meetup.Meetup(access_token=auth_entity.access_token()) - actor = gr_source.user_to_actor(user) - return Meetup(id=auth_entity.key.id(), - auth_entity=auth_entity.key, - name=actor.get('displayName'), - picture=actor.get('image', {}).get('url'), - url=actor.get('url'), - **kwargs) + Args: + auth_entity: :class:`oauth_dropins.meetup.MeetupAuth` + kwargs: property values + """ + user = json_loads(auth_entity.user_json) + gr_source = gr_meetup.Meetup(access_token=auth_entity.access_token()) + actor = gr_source.user_to_actor(user) + return Meetup( + id=auth_entity.key.id(), + auth_entity=auth_entity.key, + name=actor.get("displayName"), + picture=actor.get("image", {}).get("url"), + url=actor.get("url"), + **kwargs + ) - def silo_url(self): - """Returns the Meetup account URL, e.g. https://meetup.com/members/....""" - return self.gr_source.user_url(self.key.id()) + def silo_url(self): + """Returns the Meetup account URL, e.g. https://meetup.com/members/....""" + return self.gr_source.user_url(self.key.id()) - def label_name(self): - """Returns the username.""" - return self.name + def label_name(self): + """Returns the username.""" + return self.name class Callback(oauth_meetup.Callback): - def finish(self, auth_entity, state=None): - util.maybe_add_or_delete_source(Meetup, auth_entity, state) + def finish(self, auth_entity, state=None): + util.maybe_add_or_delete_source(Meetup, auth_entity, state) -app.add_url_rule('/meetup/start', view_func=util.oauth_starter(oauth_meetup.Start).as_view('meetup_start', '/meetup/add', scopes=PUBLISH_SCOPES), methods=['POST']) # we don't support listen -app.add_url_rule('/meetup/add', view_func=Callback.as_view('meetup_add', 'unused')) -app.add_url_rule('/meetup/delete/finish', view_func=oauth_meetup.Callback.as_view('meetup_delete_finish', '/delete/finish')) -app.add_url_rule('/meetup/publish/start', view_func=oauth_meetup.Start.as_view('meetup_publish_finish', '/meetup/publish/finish', scopes=PUBLISH_SCOPES), methods=['POST']) +app.add_url_rule( + "/meetup/start", + view_func=util.oauth_starter(oauth_meetup.Start).as_view( + "meetup_start", "/meetup/add", scopes=PUBLISH_SCOPES + ), + methods=["POST"], +) # we don't support listen +app.add_url_rule("/meetup/add", view_func=Callback.as_view("meetup_add", "unused")) +app.add_url_rule( + "/meetup/delete/finish", + view_func=oauth_meetup.Callback.as_view("meetup_delete_finish", "/delete/finish"), +) +app.add_url_rule( + "/meetup/publish/start", + view_func=oauth_meetup.Start.as_view( + "meetup_publish_finish", "/meetup/publish/finish", scopes=PUBLISH_SCOPES + ), + methods=["POST"], +) diff --git a/models.py b/models.py index 8f679371..b613f034 100644 --- a/models.py +++ b/models.py @@ -19,8 +19,8 @@ import superfeedr import util -VERB_TYPES = ('post', 'comment', 'like', 'react', 'repost', 'rsvp', 'tag') -PUBLISH_TYPES = VERB_TYPES + ('preview', 'delete') +VERB_TYPES = ("post", "comment", "like", "react", "repost", "rsvp", "tag") +PUBLISH_TYPES = VERB_TYPES + ("preview", "delete") MAX_AUTHOR_URLS = 5 @@ -32,1129 +32,1222 @@ BLOCKLIST_MAX_IDS = 20000 TWITTER_SCRAPE_HEADERS = json_loads( - os.getenv('TWITTER_SCRAPE_HEADERS') or - util.read('twitter_scrape_headers.json') or '{}') + os.getenv("TWITTER_SCRAPE_HEADERS") + or util.read("twitter_scrape_headers.json") + or "{}" +) # maps string short name to Source subclass. populated by SourceMeta. sources = {} def get_type(obj): - """Returns the :class:`Response` or :class:`Publish` type for an AS object.""" - type = obj.get('objectType') - verb = obj.get('verb') - if type == 'activity' and verb == 'share': - return 'repost' - elif type == 'issue': - return 'post' - elif verb in gr_source.RSVP_VERB_TO_COLLECTION: - return 'rsvp' - elif (type == 'comment' or obj.get('inReplyTo') or - obj.get('context', {}).get('inReplyTo')): - return 'comment' - elif verb in VERB_TYPES: - return verb - else: - return 'post' + """Returns the :class:`Response` or :class:`Publish` type for an AS object.""" + type = obj.get("objectType") + verb = obj.get("verb") + if type == "activity" and verb == "share": + return "repost" + elif type == "issue": + return "post" + elif verb in gr_source.RSVP_VERB_TO_COLLECTION: + return "rsvp" + elif ( + type == "comment" + or obj.get("inReplyTo") + or obj.get("context", {}).get("inReplyTo") + ): + return "comment" + elif verb in VERB_TYPES: + return verb + else: + return "post" class DisableSource(Exception): - """Raised when a user has deauthorized our app inside a given platform.""" + """Raised when a user has deauthorized our app inside a given platform.""" class SourceMeta(ndb.MetaModel): - """:class:`Source` metaclass. Registers all subclasses in the sources global.""" - def __new__(meta, name, bases, class_dict): - cls = ndb.MetaModel.__new__(meta, name, bases, class_dict) - if cls.SHORT_NAME: - sources[cls.SHORT_NAME] = cls - return cls + """:class:`Source` metaclass. Registers all subclasses in the sources global.""" + def __new__(meta, name, bases, class_dict): + cls = ndb.MetaModel.__new__(meta, name, bases, class_dict) + if cls.SHORT_NAME: + sources[cls.SHORT_NAME] = cls + return cls -class Source(StringIdModel, metaclass=SourceMeta): - """A silo account, e.g. a Facebook or Google+ account. - - Each concrete silo class should subclass this class. - """ - - # Turn off NDB instance and memcache caching. - # https://developers.google.com/appengine/docs/python/ndb/cache - # https://github.com/snarfed/bridgy/issues/558 - # https://github.com/snarfed/bridgy/issues/68 - _use_cache = False - - STATUSES = ('enabled', 'disabled') - POLL_STATUSES = ('ok', 'error', 'polling') - FEATURES = ('listen', 'publish', 'webmention', 'email') - - # short name for this site type. used in URLs, etc. - SHORT_NAME = None - # the corresponding granary class - GR_CLASS = None - # oauth-dropins Start class - OAUTH_START = None - # whether Bridgy supports listen for this silo - this is unlikely, so we default to True - CAN_LISTEN = True - # whether Bridgy supports publish for this silo - CAN_PUBLISH = None - # whether this source should poll automatically, or only when triggered - # (eg Instagram) - AUTO_POLL = True - # how often to poll for responses - FAST_POLL = datetime.timedelta(minutes=30) - # how often to poll sources that have never sent a webmention - SLOW_POLL = datetime.timedelta(days=1) - # how often to poll sources that are currently rate limited by their silo - RATE_LIMITED_POLL = SLOW_POLL - # how long to wait after signup for a successful webmention before dropping to - # the lower frequency poll - FAST_POLL_GRACE_PERIOD = datetime.timedelta(days=7) - # how often refetch author url to look for updated syndication links - FAST_REFETCH = datetime.timedelta(hours=6) - # refetch less often (this often) if it's been >2w since the last synd link - SLOW_REFETCH = datetime.timedelta(days=2) - # rate limiting HTTP status codes returned by this silo. e.g. twitter returns - # 429, instagram 503, google+ 403. - RATE_LIMIT_HTTP_CODES = ('429',) - DISABLE_HTTP_CODES = ('401',) - TRANSIENT_ERROR_HTTP_CODES = () - # whether granary supports fetching block lists - HAS_BLOCKS = False - # whether to require a u-syndication link for backfeed - BACKFEED_REQUIRES_SYNDICATION_LINK = False - # ignore fragments when comparing syndication links in OPD - IGNORE_SYNDICATION_LINK_FRAGMENTS = False - - # Maps Publish.type (e.g. 'like') to source-specific human readable type label - # (e.g. 'favorite'). Subclasses should override this. - TYPE_LABELS = {} - - # subclasses should override this - URL_CANONICALIZER = util.UrlCanonicalizer(headers=util.REQUEST_HEADERS) - - # Regexps for URL paths that don't accept incoming webmentions. Currently used - # by Blogger. - PATH_BLOCKLIST = () - - created = ndb.DateTimeProperty(auto_now_add=True, required=True) - url = ndb.StringProperty() - status = ndb.StringProperty(choices=STATUSES, default='enabled') - poll_status = ndb.StringProperty(choices=POLL_STATUSES, default='ok') - rate_limited = ndb.BooleanProperty(default=False) - name = ndb.StringProperty() # full human-readable name - picture = ndb.StringProperty() - domains = ndb.StringProperty(repeated=True) - domain_urls = ndb.StringProperty(repeated=True) - features = ndb.StringProperty(repeated=True, choices=FEATURES) - superfeedr_secret = ndb.StringProperty() - webmention_endpoint = ndb.StringProperty() - - # points to an oauth-dropins auth entity. The model class should be a subclass - # of oauth_dropins.BaseAuth. the token should be generated with the - # offline_access scope so that it doesn't expire. - auth_entity = ndb.KeyProperty() - - # - # listen-only properties - # - last_polled = ndb.DateTimeProperty(default=util.EPOCH) - last_poll_attempt = ndb.DateTimeProperty(default=util.EPOCH) - last_webmention_sent = ndb.DateTimeProperty() - last_public_post = ndb.DateTimeProperty() - recent_private_posts = ndb.IntegerProperty(default=0) - - # the last time we re-fetched the author's url looking for updated - # syndication links - last_hfeed_refetch = ndb.DateTimeProperty(default=util.EPOCH) - - # the last time we've seen a rel=syndication link for this Source. - # we won't spend the time to re-fetch and look for updates if there's - # never been one - last_syndication_url = ndb.DateTimeProperty() - # the last time we saw a syndication link in an h-feed, as opposed to just on - # permalinks. background: https://github.com/snarfed/bridgy/issues/624 - last_feed_syndication_url = ndb.DateTimeProperty() - - last_activity_id = ndb.StringProperty() - last_activities_etag = ndb.StringProperty() - last_activities_cache_json = ndb.TextProperty() - seen_responses_cache_json = ndb.TextProperty(compressed=True) - - # populated in Poll.poll(), used by handlers - blocked_ids = ndb.JsonProperty(compressed=True) - - # maps updated property names to values that put_updates() writes back to the - # datastore transactionally. set this to {} before beginning. - updates = None - - # gr_source is *not* set to None by default here, since it needs to be unset - # for __getattr__ to run when it's accessed. - - def __init__(self, *args, id=None, **kwargs): - """Constructor. Escapes the key string id if it starts with `__`.""" - if id and id.startswith('__'): - id = '\\' + id - super().__init__(*args, id=id, **kwargs) - - def key_id(self): - """Returns the key's unescaped string id.""" - id = self.key.id() - return id[1:] if id[0] == '\\' else id - - @classmethod - def new(cls, **kwargs): - """Factory method. Creates and returns a new instance for the current user. - - To be implemented by subclasses. - """ - raise NotImplementedError() - def __getattr__(self, name): - """Lazily load the auth entity and instantiate :attr:`self.gr_source`. +class Source(StringIdModel, metaclass=SourceMeta): + """A silo account, e.g. a Facebook or Google+ account. - Once :attr:`self.gr_source` is set, this method will *not* be called; - :attr:`gr_source` will be returned normally. + Each concrete silo class should subclass this class. """ - if name == 'gr_source': - super_attr = getattr(super(), name, None) - if super_attr: - return super_attr - elif not self.auth_entity: - return None - - auth_entity = self.auth_entity.get() - try: - refresh_token = auth_entity.refresh_token - self.gr_source = self.GR_CLASS(refresh_token) - return self.gr_source - except AttributeError: - logging.info('no refresh_token') - args = auth_entity.access_token() - if not isinstance(args, tuple): - args = (args,) - - kwargs = {} - if self.key.kind() == 'FacebookPage' and auth_entity.type == 'user': - kwargs = {'user_id': self.key_id()} - elif self.key.kind() == 'Instagram': - kwargs = {'scrape': True, 'cookie': INSTAGRAM_SESSIONID_COOKIE} - elif self.key.kind() == 'Mastodon': - args = (auth_entity.instance(),) + args - inst = auth_entity.app.get().instance_info - kwargs = { - 'user_id': json_loads(auth_entity.user_json).get('id'), - # https://docs-develop.pleroma.social/backend/API/differences_in_mastoapi_responses/#instance - 'truncate_text_length': - json_loads(inst).get('max_toot_chars') if inst else None, - } - elif self.key.kind() == 'Twitter': - kwargs = {'username': self.key_id(), 'scrape_headers': TWITTER_SCRAPE_HEADERS} - - self.gr_source = self.GR_CLASS(*args, **kwargs) - return self.gr_source - - return getattr(super(), name) - - @classmethod - def lookup(cls, id): - """Returns the entity with the given id. - - By default, interprets id as just the key id. Subclasses may extend this to - support usernames, etc. - """ - if id and id.startswith('__'): - id = '\\' + id - return ndb.Key(cls, id).get() - - def user_tag_id(self): - """Returns the tag URI for this source, e.g. 'tag:plus.google.com:123456'.""" - return self.gr_source.tag_uri(self.key_id()) - - def bridgy_path(self): - """Returns the Bridgy page URL path for this source.""" - return '/%s/%s' % (self.SHORT_NAME, self.key_id()) - def bridgy_url(self): - """Returns the Bridgy page URL for this source.""" - return util.host_url(self.bridgy_path()) + # Turn off NDB instance and memcache caching. + # https://developers.google.com/appengine/docs/python/ndb/cache + # https://github.com/snarfed/bridgy/issues/558 + # https://github.com/snarfed/bridgy/issues/68 + _use_cache = False + + STATUSES = ("enabled", "disabled") + POLL_STATUSES = ("ok", "error", "polling") + FEATURES = ("listen", "publish", "webmention", "email") + + # short name for this site type. used in URLs, etc. + SHORT_NAME = None + # the corresponding granary class + GR_CLASS = None + # oauth-dropins Start class + OAUTH_START = None + # whether Bridgy supports listen for this silo - this is unlikely, so we default to True + CAN_LISTEN = True + # whether Bridgy supports publish for this silo + CAN_PUBLISH = None + # whether this source should poll automatically, or only when triggered + # (eg Instagram) + AUTO_POLL = True + # how often to poll for responses + FAST_POLL = datetime.timedelta(minutes=30) + # how often to poll sources that have never sent a webmention + SLOW_POLL = datetime.timedelta(days=1) + # how often to poll sources that are currently rate limited by their silo + RATE_LIMITED_POLL = SLOW_POLL + # how long to wait after signup for a successful webmention before dropping to + # the lower frequency poll + FAST_POLL_GRACE_PERIOD = datetime.timedelta(days=7) + # how often refetch author url to look for updated syndication links + FAST_REFETCH = datetime.timedelta(hours=6) + # refetch less often (this often) if it's been >2w since the last synd link + SLOW_REFETCH = datetime.timedelta(days=2) + # rate limiting HTTP status codes returned by this silo. e.g. twitter returns + # 429, instagram 503, google+ 403. + RATE_LIMIT_HTTP_CODES = ("429",) + DISABLE_HTTP_CODES = ("401",) + TRANSIENT_ERROR_HTTP_CODES = () + # whether granary supports fetching block lists + HAS_BLOCKS = False + # whether to require a u-syndication link for backfeed + BACKFEED_REQUIRES_SYNDICATION_LINK = False + # ignore fragments when comparing syndication links in OPD + IGNORE_SYNDICATION_LINK_FRAGMENTS = False + + # Maps Publish.type (e.g. 'like') to source-specific human readable type label + # (e.g. 'favorite'). Subclasses should override this. + TYPE_LABELS = {} + + # subclasses should override this + URL_CANONICALIZER = util.UrlCanonicalizer(headers=util.REQUEST_HEADERS) + + # Regexps for URL paths that don't accept incoming webmentions. Currently used + # by Blogger. + PATH_BLOCKLIST = () + + created = ndb.DateTimeProperty(auto_now_add=True, required=True) + url = ndb.StringProperty() + status = ndb.StringProperty(choices=STATUSES, default="enabled") + poll_status = ndb.StringProperty(choices=POLL_STATUSES, default="ok") + rate_limited = ndb.BooleanProperty(default=False) + name = ndb.StringProperty() # full human-readable name + picture = ndb.StringProperty() + domains = ndb.StringProperty(repeated=True) + domain_urls = ndb.StringProperty(repeated=True) + features = ndb.StringProperty(repeated=True, choices=FEATURES) + superfeedr_secret = ndb.StringProperty() + webmention_endpoint = ndb.StringProperty() + + # points to an oauth-dropins auth entity. The model class should be a subclass + # of oauth_dropins.BaseAuth. the token should be generated with the + # offline_access scope so that it doesn't expire. + auth_entity = ndb.KeyProperty() + + # + # listen-only properties + # + last_polled = ndb.DateTimeProperty(default=util.EPOCH) + last_poll_attempt = ndb.DateTimeProperty(default=util.EPOCH) + last_webmention_sent = ndb.DateTimeProperty() + last_public_post = ndb.DateTimeProperty() + recent_private_posts = ndb.IntegerProperty(default=0) + + # the last time we re-fetched the author's url looking for updated + # syndication links + last_hfeed_refetch = ndb.DateTimeProperty(default=util.EPOCH) + + # the last time we've seen a rel=syndication link for this Source. + # we won't spend the time to re-fetch and look for updates if there's + # never been one + last_syndication_url = ndb.DateTimeProperty() + # the last time we saw a syndication link in an h-feed, as opposed to just on + # permalinks. background: https://github.com/snarfed/bridgy/issues/624 + last_feed_syndication_url = ndb.DateTimeProperty() + + last_activity_id = ndb.StringProperty() + last_activities_etag = ndb.StringProperty() + last_activities_cache_json = ndb.TextProperty() + seen_responses_cache_json = ndb.TextProperty(compressed=True) + + # populated in Poll.poll(), used by handlers + blocked_ids = ndb.JsonProperty(compressed=True) + + # maps updated property names to values that put_updates() writes back to the + # datastore transactionally. set this to {} before beginning. + updates = None + + # gr_source is *not* set to None by default here, since it needs to be unset + # for __getattr__ to run when it's accessed. + + def __init__(self, *args, id=None, **kwargs): + """Constructor. Escapes the key string id if it starts with `__`.""" + if id and id.startswith("__"): + id = "\\" + id + super().__init__(*args, id=id, **kwargs) + + def key_id(self): + """Returns the key's unescaped string id.""" + id = self.key.id() + return id[1:] if id[0] == "\\" else id + + @classmethod + def new(cls, **kwargs): + """Factory method. Creates and returns a new instance for the current user. + + To be implemented by subclasses. + """ + raise NotImplementedError() + + def __getattr__(self, name): + """Lazily load the auth entity and instantiate :attr:`self.gr_source`. + + Once :attr:`self.gr_source` is set, this method will *not* be called; + :attr:`gr_source` will be returned normally. + """ + if name == "gr_source": + super_attr = getattr(super(), name, None) + if super_attr: + return super_attr + elif not self.auth_entity: + return None + + auth_entity = self.auth_entity.get() + try: + refresh_token = auth_entity.refresh_token + self.gr_source = self.GR_CLASS(refresh_token) + return self.gr_source + except AttributeError: + logging.info("no refresh_token") + args = auth_entity.access_token() + if not isinstance(args, tuple): + args = (args,) + + kwargs = {} + if self.key.kind() == "FacebookPage" and auth_entity.type == "user": + kwargs = {"user_id": self.key_id()} + elif self.key.kind() == "Instagram": + kwargs = {"scrape": True, "cookie": INSTAGRAM_SESSIONID_COOKIE} + elif self.key.kind() == "Mastodon": + args = (auth_entity.instance(),) + args + inst = auth_entity.app.get().instance_info + kwargs = { + "user_id": json_loads(auth_entity.user_json).get("id"), + # https://docs-develop.pleroma.social/backend/API/differences_in_mastoapi_responses/#instance + "truncate_text_length": json_loads(inst).get("max_toot_chars") + if inst + else None, + } + elif self.key.kind() == "Twitter": + kwargs = { + "username": self.key_id(), + "scrape_headers": TWITTER_SCRAPE_HEADERS, + } + + self.gr_source = self.GR_CLASS(*args, **kwargs) + return self.gr_source + + return getattr(super(), name) + + @classmethod + def lookup(cls, id): + """Returns the entity with the given id. + + By default, interprets id as just the key id. Subclasses may extend this to + support usernames, etc. + """ + if id and id.startswith("__"): + id = "\\" + id + return ndb.Key(cls, id).get() + + def user_tag_id(self): + """Returns the tag URI for this source, e.g. 'tag:plus.google.com:123456'.""" + return self.gr_source.tag_uri(self.key_id()) + + def bridgy_path(self): + """Returns the Bridgy page URL path for this source.""" + return "/%s/%s" % (self.SHORT_NAME, self.key_id()) + + def bridgy_url(self): + """Returns the Bridgy page URL for this source.""" + return util.host_url(self.bridgy_path()) + + def silo_url(self, handler): + """Returns the silo account URL, e.g. https://twitter.com/foo.""" + raise NotImplementedError() + + def label(self): + """Human-readable label for this source.""" + return "%s (%s)" % (self.label_name(), self.GR_CLASS.NAME) + + def label_name(self): + """Human-readable name or username for this source, whichever is preferred.""" + return self.name or self.key_id() + + @classmethod + @ndb.transactional() + def put_updates(cls, source): + """Writes source.updates to the datastore transactionally. + + Returns: + source: :class:`Source` + + Returns: + the updated :class:`Source` + """ + if not source.updates: + return source + + logging.info( + "Updating %s %s : %r", + source.label(), + source.bridgy_path(), + {k: v for k, v in source.updates.items() if not k.endswith("_json")}, + ) + + updates = source.updates + source = source.key.get() + source.updates = updates + for name, val in updates.items(): + setattr(source, name, val) + + source.put() + return source + + def poll_period(self): + """Returns the poll frequency for this source, as a :class:`datetime.timedelta`. + + Defaults to ~15m, depending on silo. If we've never sent a webmention for + this source, or the last one we sent was over a month ago, we drop them down + to ~1d after a week long grace period. + """ + now = datetime.datetime.now() + if self.rate_limited: + return self.RATE_LIMITED_POLL + elif now < self.created + self.FAST_POLL_GRACE_PERIOD: + return self.FAST_POLL + elif not self.last_webmention_sent: + return self.SLOW_POLL + elif self.last_webmention_sent > now - datetime.timedelta(days=7): + return self.FAST_POLL + elif self.last_webmention_sent > now - datetime.timedelta(days=30): + return self.FAST_POLL * 10 + else: + return self.SLOW_POLL + + def should_refetch(self): + """Returns True if we should run OPD refetch on this source now.""" + now = datetime.datetime.now() + if self.last_hfeed_refetch == REFETCH_HFEED_TRIGGER: + return True + elif not self.last_syndication_url: + return False + + period = ( + self.FAST_REFETCH + if self.last_syndication_url > now - datetime.timedelta(days=14) + else self.SLOW_REFETCH + ) + return self.last_poll_attempt >= self.last_hfeed_refetch + period + + @classmethod + def bridgy_webmention_endpoint(cls, domain="brid.gy"): + """Returns the Bridgy webmention endpoint for this source type.""" + return "https://%s/webmention/%s" % (domain, cls.SHORT_NAME) + + def has_bridgy_webmention_endpoint(self): + """Returns True if this source uses Bridgy's webmention endpoint.""" + return self.webmention_endpoint in ( + self.bridgy_webmention_endpoint(), + self.bridgy_webmention_endpoint(domain="www.brid.gy"), + ) + + def get_author_urls(self): + """Determine the author urls for a particular source. + + In debug mode, replace test domains with localhost. + + Return: + a list of string URLs, possibly empty + """ + return [util.replace_test_domains_with_localhost(u) for u in self.domain_urls] + + def search_for_links(self): + """Searches for activities with links to any of this source's web sites. + + https://github.com/snarfed/bridgy/issues/456 + https://github.com/snarfed/bridgy/issues/565 + + Returns: + sequence of ActivityStreams activity dicts + """ + return [] + + def get_activities_response(self, **kwargs): + """Returns recent posts and embedded comments for this source. + + May be overridden by subclasses. + """ + kwargs.setdefault("group_id", gr_source.SELF) + resp = self.gr_source.get_activities_response(**kwargs) + for activity in resp["items"]: + self._inject_user_urls(activity) + return resp + + def get_activities(self, **kwargs): + return self.get_activities_response(**kwargs)["items"] + + def get_comment(self, comment_id, **kwargs): + """Returns a comment from this source. + + Passes through to granary by default. May be overridden by subclasses. + + Args: + comment_id: string, site-specific comment id + kwargs: passed to :meth:`granary.source.Source.get_comment` + + Returns: + dict, decoded ActivityStreams comment object, or None + """ + comment = self.gr_source.get_comment(comment_id, **kwargs) + if comment: + self._inject_user_urls(comment) + return comment + + def get_like(self, activity_user_id, activity_id, like_user_id, **kwargs): + """Returns an ActivityStreams 'like' activity object. + + Passes through to granary by default. May be overridden + by subclasses. + + Args: + activity_user_id: string id of the user who posted the original activity + activity_id: string activity id + like_user_id: string id of the user who liked the activity + kwargs: passed to granary.Source.get_comment + """ + return self.gr_source.get_like( + activity_user_id, activity_id, like_user_id, **kwargs + ) + + def _inject_user_urls(self, activity): + """Adds this user's web site URLs to their user mentions (in tags), in place.""" + obj = activity.get("object") or activity + user_tag_id = self.user_tag_id() + for tag in obj.get("tags", []): + if tag.get("id") == user_tag_id: + tag.setdefault("urls", []).extend( + [{"value": u} for u in self.domain_urls] + ) + + def create_comment(self, post_url, author_name, author_url, content): + """Creates a new comment in the source silo. + + Must be implemented by subclasses. + + Args: + post_url: string + author_name: string + author_url: string + content: string + + Returns: + response dict with at least 'id' field + """ + raise NotImplementedError() + + def feed_url(self): + """Returns the RSS or Atom (or similar) feed URL for this source. + + Must be implemented by subclasses. Currently only implemented by + :mod:`blogger`, :mod:`medium`, :mod:`tumblr`, and :mod:`wordpress_rest`. + + Returns: + string URL + """ + raise NotImplementedError() + + def edit_template_url(self): + """Returns the URL for editing this blog's template HTML. + + Must be implemented by subclasses. Currently only implemented by + :mod:`blogger`, :mod:`medium`, :mod:`tumblr`, and :mod:`wordpress_rest`. + + Returns: + string URL + """ + raise NotImplementedError() + + @classmethod + def button_html(cls, feature, **kwargs): + """Returns an HTML string with a login form and button for this site. + + Mostly just passes through to + :meth:`oauth_dropins.handlers.Start.button_html`. + + Returns: string, HTML + """ + assert set(feature.split(",")) <= set(cls.FEATURES) + form_extra = ( + kwargs.pop("form_extra", "") + + '' % feature + ) + + source = kwargs.pop("source", None) + if source: + form_extra += ( + '\n' % source.key_id() + ) + + if cls.OAUTH_START: + return cls.OAUTH_START.button_html( + "/%s/start" % cls.SHORT_NAME, + form_extra=form_extra, + image_prefix="/oauth_dropins_static/", + **kwargs + ) + + return "" + + @classmethod + @ndb.transactional() + def create_new(cls, user_url=None, **kwargs): + """Creates and saves a new :class:`Source` and adds a poll task for it. + + Args: + user_url: a string, optional. if provided, supersedes other urls when + determining the author_url + **kwargs: passed to :meth:`new()` + + Returns: newly created :class:`Source` + """ + source = cls.new(**kwargs) + if source is None: + return None + + if not source.domain_urls: # defer to the source if it already set this + auth_entity = kwargs.get("auth_entity") + if auth_entity and hasattr(auth_entity, "user_json"): + source.domain_urls, source.domains = source._urls_and_domains( + auth_entity, user_url + ) + logging.debug("URLs/domains: %s %s", source.domain_urls, source.domains) + + # check if this source already exists + existing = source.key.get() + if existing: + # merge some fields + source.features = set(source.features + existing.features) + source.populate( + **existing.to_dict( + include=( + "created", + "last_hfeed_refetch", + "last_poll_attempt", + "last_polled", + "last_syndication_url", + "last_webmention_sent", + "superfeedr_secret", + "webmention_endpoint", + ) + ) + ) + verb = "Updated" + else: + verb = "Added" + + author_urls = source.get_author_urls() + link = ( + "http://indiewebify.me/send-webmentions/?url=" + author_urls[0] + if author_urls + else "http://indiewebify.me/#send-webmentions" + ) + feature = source.features[0] if source.features else "listen" + blurb = "%s %s. %s" % ( + verb, + source.label(), + "Try previewing a post from your web site!" + if feature == "publish" + else 'Try a webmention!' % link + if feature == "webmention" + else "Refresh in a minute to see what we've found!", + ) + logging.info("%s %s", blurb, source.bridgy_url()) + + source.verify() + if source.verified(): + flash(blurb) + + source.put() + + if "webmention" in source.features: + superfeedr.subscribe(source) + + if "listen" in source.features and source.AUTO_POLL: + util.add_poll_task(source, now=True) + util.add_poll_task(source) + + return source + + def verified(self): + """Returns True if this source is ready to be used, false otherwise. + + See :meth:`verify()` for details. May be overridden by subclasses, e.g. + :class:`tumblr.Tumblr`. + """ + if not self.domains or not self.domain_urls: + return False + if "webmention" in self.features and not self.webmention_endpoint: + return False + if "listen" in self.features and not ( + self.webmention_endpoint or self.last_webmention_sent + ): + return False + return True + + def verify(self, force=False): + """Checks that this source is ready to be used. + + For blog and listen sources, this fetches their front page HTML and + discovers their webmention endpoint. For publish sources, this checks that + they have a domain. + + May be overridden by subclasses, e.g. :class:`tumblr.Tumblr`. + + Args: + force: if True, fully verifies (e.g. re-fetches the blog's HTML and + performs webmention discovery) even we already think this source is + verified. + """ + author_urls = [ + u + for u, d in zip(self.get_author_urls(), self.domains) + if not util.in_webmention_blocklist(d) + ] + if ( + (self.verified() and not force) + or self.status == "disabled" + or not self.features + or not author_urls + ): + return + + author_url = author_urls[0] + try: + got = webmention.discover( + author_url, timeout=util.HTTP_TIMEOUT, headers=util.REQUEST_HEADERS + ) + self.webmention_endpoint = got.endpoint + self._fetched_html = got.response.text + except BaseException as e: + logging.info("Error discovering webmention endpoint", exc_info=e) + self.webmention_endpoint = None + + self.put() + + def _urls_and_domains(self, auth_entity, user_url, actor=None): + """Returns this user's valid (not webmention-blocklisted) URLs and domains. + + Converts the auth entity's user_json to an ActivityStreams actor and uses + its 'urls' and 'url' fields. May be overridden by subclasses. + + Args: + auth_entity: :class:`oauth_dropins.models.BaseAuth` + user_url: string, optional URL passed in when authorizing + actor: dict, optional AS actor for the user. If provided, overrides + auth_entity + + Returns: + ([string url, ...], [string domain, ...]) + """ + if not actor: + actor = self.gr_source.user_to_actor(json_loads(auth_entity.user_json)) + logging.debug( + "Extracting URLs and domains from actor: %s", json_dumps(actor, indent=2) + ) + + candidates = util.trim_nulls( + util.uniquify([user_url] + microformats2.object_urls(actor)) + ) + + if len(candidates) > MAX_AUTHOR_URLS: + logging.info( + "Too many profile links! Only resolving the first %s: %s", + MAX_AUTHOR_URLS, + candidates, + ) + + urls = [] + for i, url in enumerate(candidates): + resolved = self.resolve_profile_url(url, resolve=i < MAX_AUTHOR_URLS) + if resolved: + urls.append(resolved) + + final_urls = [] + domains = [] + for url in util.dedupe_urls(urls): # normalizes domains to lower case + # skip links on this source's domain itself. only currently needed for + # Mastodon; the other silo domains are in the webmention blocklist. + domain = util.domain_from_link(url) + if domain != self.gr_source.DOMAIN: + final_urls.append(url) + domains.append(domain) + + return final_urls, domains + + @staticmethod + def resolve_profile_url(url, resolve=True): + """Resolves a profile URL to be added to a source. + + Args: + url: string + resolve: boolean, whether to make HTTP requests to follow redirects, etc. + + Returns: string, resolved URL, or None + """ + final, _, ok = util.get_webmention_target(url, resolve=resolve) + if not ok: + return None + + final = final.lower() + if util.schemeless(final).startswith(util.schemeless(url.lower())): + # redirected to a deeper path. use the original higher level URL. #652 + final = url + + # If final has a path segment check if root has a matching rel=me. + match = re.match(r"^(https?://[^/]+)/.+", final) + if match and resolve: + root = match.group(1) + try: + mf2 = util.fetch_mf2(root) + me_urls = mf2["rels"].get("me", []) + if final in me_urls: + final = root + except requests.RequestException: + logging.warning( + "Couldn't fetch %s, preserving path in %s", + root, + final, + exc_info=True, + ) + + return final + + def canonicalize_url(self, url, activity=None, **kwargs): + """Canonicalizes a post or object URL. + + Wraps :class:`oauth_dropins.webutil.util.UrlCanonicalizer`. + """ + return self.URL_CANONICALIZER(url, **kwargs) if self.URL_CANONICALIZER else url + + def infer_profile_url(self, url): + """Given an arbitrary URL representing a person, try to find their + profile URL for *this* service. + + Queries Bridgy's registered accounts for users with a particular + domain in their silo profile. + + Args: + url: string, a person's URL + + Return: + a string URL for their profile on this service (or None) + """ + domain = util.domain_from_link(url) + if domain == self.gr_source.DOMAIN: + return url + user = self.__class__.query(self.__class__.domains == domain).get() + if user: + return self.gr_source.user_url(user.key_id()) + + def preprocess_for_publish(self, obj): + """Preprocess an object before trying to publish it. + + By default this tries to massage person tags so that the tag's + "url" points to the person's profile on this service (as opposed + to a person's homepage). + + The object is modified in place. + + Args: + obj: ActivityStreams activity or object dict + """ + for tag in obj.get("tags", []): + if tag.get("objectType") == "person": + silo_url = None + for url in microformats2.object_urls(tag): + silo_url = url and self.infer_profile_url(url) + if silo_url: + break + if silo_url: + tag["url"] = silo_url + + # recurse on contained object(s) + for obj in util.get_list(obj, "object"): + self.preprocess_for_publish(obj) + + def on_new_syndicated_post(self, syndpost): + """Called when a new :class:`SyndicatedPost` is stored for this source. + + Args: + syndpost: :class:`SyndicatedPost` + """ + pass + + def is_private(self): + """Returns True if this source is private aka protected. + + ...ie their posts are not public. + """ + return False + + def is_activity_public(self, activity): + """Returns True if the given activity is public, False otherwise. + + Just wraps :meth:`granary.source.Source.is_public`. Subclasses may override. + """ + return gr_source.Source.is_public(activity) + + def is_beta_user(self): + """Returns True if this is a "beta" user opted into new features. + + Beta users come from beta_users.txt. + """ + return self.bridgy_path() in util.BETA_USER_PATHS + + def load_blocklist(self): + """Fetches this user's blocklist, if supported, and stores it in the entity.""" + if not self.HAS_BLOCKS: + return + + try: + ids = self.gr_source.get_blocklist_ids() + except gr_source.RateLimited as e: + ids = e.partial or [] + + self.blocked_ids = ids[:BLOCKLIST_MAX_IDS] + self.put() - def silo_url(self, handler): - """Returns the silo account URL, e.g. https://twitter.com/foo.""" - raise NotImplementedError() + def is_blocked(self, obj): + """Returns True if an object's author is being blocked. - def label(self): - """Human-readable label for this source.""" - return '%s (%s)' % (self.label_name(), self.GR_CLASS.NAME) + ...ie they're in this user's block list. - def label_name(self): - """Human-readable name or username for this source, whichever is preferred.""" - return self.name or self.key_id() + Note that this method is tested in test_twitter.py, not test_models.py, for + historical reasons. + """ + if not self.blocked_ids: + return False - @classmethod - @ndb.transactional() - def put_updates(cls, source): - """Writes source.updates to the datastore transactionally. + for o in [obj] + util.get_list(obj, "object"): + for field in "author", "actor": + if o.get(field, {}).get("numeric_id") in self.blocked_ids: + return True - Returns: - source: :class:`Source` - Returns: - the updated :class:`Source` - """ - if not source.updates: - return source - - logging.info('Updating %s %s : %r', source.label(), source.bridgy_path(), - {k: v for k, v in source.updates.items() if not k.endswith('_json')}) - - updates = source.updates - source = source.key.get() - source.updates = updates - for name, val in updates.items(): - setattr(source, name, val) - - source.put() - return source - - def poll_period(self): - """Returns the poll frequency for this source, as a :class:`datetime.timedelta`. +class Webmentions(StringIdModel): + """A bundle of links to send webmentions for. - Defaults to ~15m, depending on silo. If we've never sent a webmention for - this source, or the last one we sent was over a month ago, we drop them down - to ~1d after a week long grace period. - """ - now = datetime.datetime.now() - if self.rate_limited: - return self.RATE_LIMITED_POLL - elif now < self.created + self.FAST_POLL_GRACE_PERIOD: - return self.FAST_POLL - elif not self.last_webmention_sent: - return self.SLOW_POLL - elif self.last_webmention_sent > now - datetime.timedelta(days=7): - return self.FAST_POLL - elif self.last_webmention_sent > now - datetime.timedelta(days=30): - return self.FAST_POLL * 10 - else: - return self.SLOW_POLL - - def should_refetch(self): - """Returns True if we should run OPD refetch on this source now.""" - now = datetime.datetime.now() - if self.last_hfeed_refetch == REFETCH_HFEED_TRIGGER: - return True - elif not self.last_syndication_url: - return False - - period = (self.FAST_REFETCH - if self.last_syndication_url > now - datetime.timedelta(days=14) - else self.SLOW_REFETCH) - return self.last_poll_attempt >= self.last_hfeed_refetch + period - - @classmethod - def bridgy_webmention_endpoint(cls, domain='brid.gy'): - """Returns the Bridgy webmention endpoint for this source type.""" - return 'https://%s/webmention/%s' % (domain, cls.SHORT_NAME) - - def has_bridgy_webmention_endpoint(self): - """Returns True if this source uses Bridgy's webmention endpoint.""" - return self.webmention_endpoint in ( - self.bridgy_webmention_endpoint(), - self.bridgy_webmention_endpoint(domain='www.brid.gy')) - - def get_author_urls(self): - """Determine the author urls for a particular source. - - In debug mode, replace test domains with localhost. - - Return: - a list of string URLs, possibly empty + Use the :class:`Response` and :class:`BlogPost` concrete subclasses below. """ - return [util.replace_test_domains_with_localhost(u) for u in self.domain_urls] - - def search_for_links(self): - """Searches for activities with links to any of this source's web sites. - https://github.com/snarfed/bridgy/issues/456 - https://github.com/snarfed/bridgy/issues/565 + STATUSES = ("new", "processing", "complete", "error") + + # Turn off instance and memcache caching. See Source for details. + _use_cache = False + _use_memcache = False + + source = ndb.KeyProperty() + status = ndb.StringProperty(choices=STATUSES, default="new") + leased_until = ndb.DateTimeProperty() + created = ndb.DateTimeProperty(auto_now_add=True) + updated = ndb.DateTimeProperty(auto_now=True) + + # Original post links, ie webmention targets + sent = ndb.StringProperty(repeated=True) + unsent = ndb.StringProperty(repeated=True) + error = ndb.StringProperty(repeated=True) + failed = ndb.StringProperty(repeated=True) + skipped = ndb.StringProperty(repeated=True) + + def label(self): + """Returns a human-readable string description for use in log messages. + + To be implemented by subclasses. + """ + raise NotImplementedError() + + def add_task(self): + """Adds a propagate task for this entity. + + To be implemented by subclasses. + """ + raise NotImplementedError() + + @ndb.transactional() + def get_or_save(self): + entity = existing = self.key.get() + + propagate = False + if entity: + # merge targets + urls = set( + entity.sent + + entity.unsent + + entity.error + + entity.failed + + entity.skipped + ) + for field in ("sent", "unsent", "error", "failed", "skipped"): + entity_urls = getattr(entity, field) + new_urls = set(getattr(self, field)) - urls + entity_urls += new_urls + if new_urls and field in ("unsent", "error"): + propagate = True + else: + entity = self + propagate = self.unsent or self.error + + if propagate: + logging.debug("New webmentions to propagate! %s", entity.label()) + entity.add_task() + elif not existing: + entity.status = "complete" + + entity.put() + return entity + + def restart(self): + """Moves status and targets to 'new' and adds a propagate task.""" + self.status = "new" + self.unsent = util.dedupe_urls( + self.unsent + self.sent + self.error + self.failed + self.skipped + ) + self.sent = self.error = self.failed = self.skipped = [] + + # clear any cached webmention endpoints + with util.webmention_endpoint_cache_lock: + for url in self.unsent: + util.webmention_endpoint_cache.pop( + util.webmention_endpoint_cache_key(url), None + ) + + # this datastore put and task add should be transactional, but Cloud Tasks + # doesn't support that :( + # https://cloud.google.com/appengine/docs/standard/python/taskqueue/push/migrating-push-queues#features-not-available + self.put() + self.add_task() - Returns: - sequence of ActivityStreams activity dicts - """ - return [] - def get_activities_response(self, **kwargs): - """Returns recent posts and embedded comments for this source. +class Response(Webmentions): + """A comment, like, or repost to be propagated. - May be overridden by subclasses. + The key name is the comment object id as a tag URI. """ - kwargs.setdefault('group_id', gr_source.SELF) - resp = self.gr_source.get_activities_response(**kwargs) - for activity in resp['items']: - self._inject_user_urls(activity) - return resp - - def get_activities(self, **kwargs): - return self.get_activities_response(**kwargs)['items'] - - def get_comment(self, comment_id, **kwargs): - """Returns a comment from this source. - - Passes through to granary by default. May be overridden by subclasses. - Args: - comment_id: string, site-specific comment id - kwargs: passed to :meth:`granary.source.Source.get_comment` + # ActivityStreams JSON activity and comment, like, or repost + type = ndb.StringProperty(choices=VERB_TYPES, default="comment") + # These are TextProperty, and not JsonProperty, so that their plain text is + # visible in the App Engine admin console. (JsonProperty uses a blob. :/) + activities_json = ndb.TextProperty(repeated=True) + response_json = ndb.TextProperty() + # Old values for response_json. Populated when the silo reports that the + # response has changed, e.g. the user edited a comment or changed their RSVP + # to an event. + old_response_jsons = ndb.TextProperty(repeated=True) + # JSON dict mapping original post url to activity index in activities_json. + # only set when there's more than one activity. + urls_to_activity = ndb.TextProperty() + # Original post links found by original post discovery + original_posts = ndb.StringProperty(repeated=True) + + def label(self): + return " ".join( + ( + self.key.kind(), + self.type, + self.key.id(), + json_loads(self.response_json).get("url", "[no url]"), + ) + ) + + def add_task(self): + util.add_propagate_task(self) + + @staticmethod + def get_type(obj): + type = get_type(obj) + return type if type in VERB_TYPES else "comment" + + def get_or_save(self, source, restart=False): + resp = super().get_or_save() + + if self.type != resp.type or source.gr_source.activity_changed( + json_loads(resp.response_json), json_loads(self.response_json), log=True + ): + logging.info("Response changed! Re-propagating. Original: %s" % resp) + + resp.old_response_jsons = resp.old_response_jsons[:10] + [ + resp.response_json + ] + + response_json_to_append = json_loads(self.response_json) + source.gr_source.append_in_reply_to( + json_loads(resp.response_json), response_json_to_append + ) + self.response_json = json_dumps(util.trim_nulls(response_json_to_append)) + resp.response_json = self.response_json + resp.restart(source) + elif restart and resp is not self: # ie it already existed + resp.restart(source) + + return resp + + def restart(self, source=None): + """Moves status and targets to 'new' and adds a propagate task.""" + # add original posts with syndication URLs + # TODO: unify with Poll.repropagate_old_responses() + if not source: + source = self.source.get() + + synd_urls = set() + for activity_json in self.activities_json: + activity = json_loads(activity_json) + url = activity.get("url") or activity.get("object", {}).get("url") + if url: + url = source.canonicalize_url(url, activity=activity) + if url: + synd_urls.add(url) + + if synd_urls: + self.unsent += [ + synd.original + for synd in SyndicatedPost.query( + SyndicatedPost.syndication.IN(synd_urls) + ) + if synd.original + ] + + return super().restart() - Returns: - dict, decoded ActivityStreams comment object, or None - """ - comment = self.gr_source.get_comment(comment_id, **kwargs) - if comment: - self._inject_user_urls(comment) - return comment - - def get_like(self, activity_user_id, activity_id, like_user_id, **kwargs): - """Returns an ActivityStreams 'like' activity object. - - Passes through to granary by default. May be overridden - by subclasses. - - Args: - activity_user_id: string id of the user who posted the original activity - activity_id: string activity id - like_user_id: string id of the user who liked the activity - kwargs: passed to granary.Source.get_comment - """ - return self.gr_source.get_like(activity_user_id, activity_id, like_user_id, - **kwargs) - - def _inject_user_urls(self, activity): - """Adds this user's web site URLs to their user mentions (in tags), in place.""" - obj = activity.get('object') or activity - user_tag_id = self.user_tag_id() - for tag in obj.get('tags', []): - if tag.get('id') == user_tag_id: - tag.setdefault('urls', []).extend([{'value': u} for u in self.domain_urls]) - - def create_comment(self, post_url, author_name, author_url, content): - """Creates a new comment in the source silo. - - Must be implemented by subclasses. - - Args: - post_url: string - author_name: string - author_url: string - content: string - - Returns: - response dict with at least 'id' field - """ - raise NotImplementedError() - def feed_url(self): - """Returns the RSS or Atom (or similar) feed URL for this source. +class Activity(StringIdModel): + """An activity with responses to be propagated. - Must be implemented by subclasses. Currently only implemented by - :mod:`blogger`, :mod:`medium`, :mod:`tumblr`, and :mod:`wordpress_rest`. + The key name is the activity id as a tag URI. - Returns: - string URL + Currently only used for posts sent to us by the browser extension. """ - raise NotImplementedError() - def edit_template_url(self): - """Returns the URL for editing this blog's template HTML. - - Must be implemented by subclasses. Currently only implemented by - :mod:`blogger`, :mod:`medium`, :mod:`tumblr`, and :mod:`wordpress_rest`. - - Returns: - string URL - """ - raise NotImplementedError() + source = ndb.KeyProperty() + created = ndb.DateTimeProperty(auto_now_add=True) + updated = ndb.DateTimeProperty(auto_now=True) + activity_json = ndb.TextProperty() + html = ndb.TextProperty() - @classmethod - def button_html(cls, feature, **kwargs): - """Returns an HTML string with a login form and button for this site. - Mostly just passes through to - :meth:`oauth_dropins.handlers.Start.button_html`. +class BlogPost(Webmentions): + """A blog post to be processed for links to send webmentions to. - Returns: string, HTML - """ - assert set(feature.split(',')) <= set(cls.FEATURES) - form_extra = (kwargs.pop('form_extra', '') + - '' % feature) - - source = kwargs.pop('source', None) - if source: - form_extra += ('\n' % - source.key_id()) - - if cls.OAUTH_START: - return cls.OAUTH_START.button_html( - '/%s/start' % cls.SHORT_NAME, - form_extra=form_extra, - image_prefix='/oauth_dropins_static/', - **kwargs) - - return '' - - @classmethod - @ndb.transactional() - def create_new(cls, user_url=None, **kwargs): - """Creates and saves a new :class:`Source` and adds a poll task for it. - - Args: - user_url: a string, optional. if provided, supersedes other urls when - determining the author_url - **kwargs: passed to :meth:`new()` - - Returns: newly created :class:`Source` + The key name is the URL. """ - source = cls.new(**kwargs) - if source is None: - return None - - if not source.domain_urls: # defer to the source if it already set this - auth_entity = kwargs.get('auth_entity') - if auth_entity and hasattr(auth_entity, 'user_json'): - source.domain_urls, source.domains = source._urls_and_domains( - auth_entity, user_url) - logging.debug('URLs/domains: %s %s', source.domain_urls, source.domains) - - # check if this source already exists - existing = source.key.get() - if existing: - # merge some fields - source.features = set(source.features + existing.features) - source.populate(**existing.to_dict(include=( - 'created', 'last_hfeed_refetch', 'last_poll_attempt', 'last_polled', - 'last_syndication_url', 'last_webmention_sent', 'superfeedr_secret', - 'webmention_endpoint'))) - verb = 'Updated' - else: - verb = 'Added' - - author_urls = source.get_author_urls() - link = ('http://indiewebify.me/send-webmentions/?url=' + author_urls[0] - if author_urls else 'http://indiewebify.me/#send-webmentions') - feature = source.features[0] if source.features else 'listen' - blurb = '%s %s. %s' % ( - verb, source.label(), - 'Try previewing a post from your web site!' if feature == 'publish' - else 'Try a webmention!' % link if feature == 'webmention' - else "Refresh in a minute to see what we've found!") - logging.info('%s %s', blurb, source.bridgy_url()) - - source.verify() - if source.verified(): - flash(blurb) - - source.put() - - if 'webmention' in source.features: - superfeedr.subscribe(source) - if 'listen' in source.features and source.AUTO_POLL: - util.add_poll_task(source, now=True) - util.add_poll_task(source) + feed_item = ndb.JsonProperty(compressed=True) # from Superfeedr - return source - - def verified(self): - """Returns True if this source is ready to be used, false otherwise. - - See :meth:`verify()` for details. May be overridden by subclasses, e.g. - :class:`tumblr.Tumblr`. - """ - if not self.domains or not self.domain_urls: - return False - if 'webmention' in self.features and not self.webmention_endpoint: - return False - if ('listen' in self.features and - not (self.webmention_endpoint or self.last_webmention_sent)): - return False - return True - - def verify(self, force=False): - """Checks that this source is ready to be used. - - For blog and listen sources, this fetches their front page HTML and - discovers their webmention endpoint. For publish sources, this checks that - they have a domain. - - May be overridden by subclasses, e.g. :class:`tumblr.Tumblr`. - - Args: - force: if True, fully verifies (e.g. re-fetches the blog's HTML and - performs webmention discovery) even we already think this source is - verified. - """ - author_urls = [u for u, d in zip(self.get_author_urls(), self.domains) - if not util.in_webmention_blocklist(d)] - if ((self.verified() and not force) or self.status == 'disabled' or - not self.features or not author_urls): - return - - author_url = author_urls[0] - try: - got = webmention.discover(author_url, timeout=util.HTTP_TIMEOUT, - headers=util.REQUEST_HEADERS) - self.webmention_endpoint = got.endpoint - self._fetched_html = got.response.text - except BaseException as e: - logging.info('Error discovering webmention endpoint', exc_info=e) - self.webmention_endpoint = None - - self.put() - - def _urls_and_domains(self, auth_entity, user_url, actor=None): - """Returns this user's valid (not webmention-blocklisted) URLs and domains. - - Converts the auth entity's user_json to an ActivityStreams actor and uses - its 'urls' and 'url' fields. May be overridden by subclasses. - - Args: - auth_entity: :class:`oauth_dropins.models.BaseAuth` - user_url: string, optional URL passed in when authorizing - actor: dict, optional AS actor for the user. If provided, overrides - auth_entity - - Returns: - ([string url, ...], [string domain, ...]) - """ - if not actor: - actor = self.gr_source.user_to_actor(json_loads(auth_entity.user_json)) - logging.debug('Extracting URLs and domains from actor: %s', - json_dumps(actor, indent=2)) - - candidates = util.trim_nulls(util.uniquify( - [user_url] + microformats2.object_urls(actor))) - - if len(candidates) > MAX_AUTHOR_URLS: - logging.info('Too many profile links! Only resolving the first %s: %s', - MAX_AUTHOR_URLS, candidates) - - urls = [] - for i, url in enumerate(candidates): - resolved = self.resolve_profile_url(url, resolve=i < MAX_AUTHOR_URLS) - if resolved: - urls.append(resolved) - - final_urls = [] - domains = [] - for url in util.dedupe_urls(urls): # normalizes domains to lower case - # skip links on this source's domain itself. only currently needed for - # Mastodon; the other silo domains are in the webmention blocklist. - domain = util.domain_from_link(url) - if domain != self.gr_source.DOMAIN: - final_urls.append(url) - domains.append(domain) - - return final_urls, domains - - @staticmethod - def resolve_profile_url(url, resolve=True): - """Resolves a profile URL to be added to a source. - - Args: - url: string - resolve: boolean, whether to make HTTP requests to follow redirects, etc. - - Returns: string, resolved URL, or None - """ - final, _, ok = util.get_webmention_target(url, resolve=resolve) - if not ok: - return None - - final = final.lower() - if util.schemeless(final).startswith(util.schemeless(url.lower())): - # redirected to a deeper path. use the original higher level URL. #652 - final = url - - # If final has a path segment check if root has a matching rel=me. - match = re.match(r'^(https?://[^/]+)/.+', final) - if match and resolve: - root = match.group(1) - try: - mf2 = util.fetch_mf2(root) - me_urls = mf2['rels'].get('me', []) - if final in me_urls: - final = root - except requests.RequestException: - logging.warning("Couldn't fetch %s, preserving path in %s", - root, final, exc_info=True) - - return final - - def canonicalize_url(self, url, activity=None, **kwargs): - """Canonicalizes a post or object URL. - - Wraps :class:`oauth_dropins.webutil.util.UrlCanonicalizer`. - """ - return self.URL_CANONICALIZER(url, **kwargs) if self.URL_CANONICALIZER else url + def label(self): + url = None + if self.feed_item: + url = self.feed_item.get("permalinkUrl") + return " ".join((self.key.kind(), self.key.id(), url or "[no url]")) - def infer_profile_url(self, url): - """Given an arbitrary URL representing a person, try to find their - profile URL for *this* service. + def add_task(self): + util.add_propagate_blogpost_task(self) - Queries Bridgy's registered accounts for users with a particular - domain in their silo profile. - Args: - url: string, a person's URL +class PublishedPage(StringIdModel): + """Minimal root entity for :class:`Publish` children with the same source URL. - Return: - a string URL for their profile on this service (or None) + Key id is the string source URL. """ - domain = util.domain_from_link(url) - if domain == self.gr_source.DOMAIN: - return url - user = self.__class__.query(self.__class__.domains == domain).get() - if user: - return self.gr_source.user_url(user.key_id()) - - def preprocess_for_publish(self, obj): - """Preprocess an object before trying to publish it. - - By default this tries to massage person tags so that the tag's - "url" points to the person's profile on this service (as opposed - to a person's homepage). - - The object is modified in place. - Args: - obj: ActivityStreams activity or object dict - """ - for tag in obj.get('tags', []): - if tag.get('objectType') == 'person': - silo_url = None - for url in microformats2.object_urls(tag): - silo_url = url and self.infer_profile_url(url) - if silo_url: - break - if silo_url: - tag['url'] = silo_url - - # recurse on contained object(s) - for obj in util.get_list(obj, 'object'): - self.preprocess_for_publish(obj) - - def on_new_syndicated_post(self, syndpost): - """Called when a new :class:`SyndicatedPost` is stored for this source. - - Args: - syndpost: :class:`SyndicatedPost` - """ pass - def is_private(self): - """Returns True if this source is private aka protected. - ...ie their posts are not public. - """ - return False - - def is_activity_public(self, activity): - """Returns True if the given activity is public, False otherwise. - - Just wraps :meth:`granary.source.Source.is_public`. Subclasses may override. - """ - return gr_source.Source.is_public(activity) - - def is_beta_user(self): - """Returns True if this is a "beta" user opted into new features. - - Beta users come from beta_users.txt. - """ - return self.bridgy_path() in util.BETA_USER_PATHS - - def load_blocklist(self): - """Fetches this user's blocklist, if supported, and stores it in the entity.""" - if not self.HAS_BLOCKS: - return - - try: - ids = self.gr_source.get_blocklist_ids() - except gr_source.RateLimited as e: - ids = e.partial or [] - - self.blocked_ids = ids[:BLOCKLIST_MAX_IDS] - self.put() - - def is_blocked(self, obj): - """Returns True if an object's author is being blocked. - - ...ie they're in this user's block list. +class Publish(ndb.Model): + """A comment, like, repost, or RSVP published into a silo. - Note that this method is tested in test_twitter.py, not test_models.py, for - historical reasons. + Child of a :class:`PublishedPage` entity. """ - if not self.blocked_ids: - return False - - for o in [obj] + util.get_list(obj, 'object'): - for field in 'author', 'actor': - if o.get(field, {}).get('numeric_id') in self.blocked_ids: - return True + STATUSES = ("new", "complete", "failed", "deleted") -class Webmentions(StringIdModel): - """A bundle of links to send webmentions for. - - Use the :class:`Response` and :class:`BlogPost` concrete subclasses below. - """ - STATUSES = ('new', 'processing', 'complete', 'error') + # Turn off instance and memcache caching. See Source for details. + _use_cache = False + _use_memcache = False - # Turn off instance and memcache caching. See Source for details. - _use_cache = False - _use_memcache = False + type = ndb.StringProperty(choices=PUBLISH_TYPES) + status = ndb.StringProperty(choices=STATUSES, default="new") + source = ndb.KeyProperty() + html = ndb.TextProperty() # raw HTML fetched from source + published = ndb.JsonProperty(compressed=True) + created = ndb.DateTimeProperty(auto_now_add=True) + updated = ndb.DateTimeProperty(auto_now=True) - source = ndb.KeyProperty() - status = ndb.StringProperty(choices=STATUSES, default='new') - leased_until = ndb.DateTimeProperty() - created = ndb.DateTimeProperty(auto_now_add=True) - updated = ndb.DateTimeProperty(auto_now=True) + def type_label(self): + """Returns silo-specific string type, e.g. 'favorite' instead of 'like'.""" + for cls in sources.values(): # global + if cls.__name__ == self.source.kind(): + return cls.TYPE_LABELS.get(self.type, self.type) - # Original post links, ie webmention targets - sent = ndb.StringProperty(repeated=True) - unsent = ndb.StringProperty(repeated=True) - error = ndb.StringProperty(repeated=True) - failed = ndb.StringProperty(repeated=True) - skipped = ndb.StringProperty(repeated=True) + return self.type - def label(self): - """Returns a human-readable string description for use in log messages. - To be implemented by subclasses. - """ - raise NotImplementedError() +class BlogWebmention(Publish, StringIdModel): + """Datastore entity for webmentions for hosted blog providers. - def add_task(self): - """Adds a propagate task for this entity. + Key id is the source URL and target URL concated with a space, ie 'SOURCE + TARGET'. The source URL is *always* the URL given in the webmention HTTP + request. If the source page has a u-url, that's stored in the u_url property. + The target URL is always the final URL, after any redirects. - To be implemented by subclasses. + Reuses :class:`Publish`'s fields, but otherwise unrelated. """ - raise NotImplementedError() - - @ndb.transactional() - def get_or_save(self): - entity = existing = self.key.get() - - propagate = False - if entity: - # merge targets - urls = set(entity.sent + entity.unsent + entity.error + - entity.failed + entity.skipped) - for field in ('sent', 'unsent', 'error', 'failed', 'skipped'): - entity_urls = getattr(entity, field) - new_urls = set(getattr(self, field)) - urls - entity_urls += new_urls - if new_urls and field in ('unsent', 'error'): - propagate = True - else: - entity = self - propagate = self.unsent or self.error - - if propagate: - logging.debug('New webmentions to propagate! %s', entity.label()) - entity.add_task() - elif not existing: - entity.status = 'complete' - - entity.put() - return entity - - def restart(self): - """Moves status and targets to 'new' and adds a propagate task.""" - self.status = 'new' - self.unsent = util.dedupe_urls(self.unsent + self.sent + self.error + - self.failed + self.skipped) - self.sent = self.error = self.failed = self.skipped = [] - - # clear any cached webmention endpoints - with util.webmention_endpoint_cache_lock: - for url in self.unsent: - util.webmention_endpoint_cache.pop(util.webmention_endpoint_cache_key(url), None) - - # this datastore put and task add should be transactional, but Cloud Tasks - # doesn't support that :( - # https://cloud.google.com/appengine/docs/standard/python/taskqueue/push/migrating-push-queues#features-not-available - self.put() - self.add_task() - - -class Response(Webmentions): - """A comment, like, or repost to be propagated. - - The key name is the comment object id as a tag URI. - """ - # ActivityStreams JSON activity and comment, like, or repost - type = ndb.StringProperty(choices=VERB_TYPES, default='comment') - # These are TextProperty, and not JsonProperty, so that their plain text is - # visible in the App Engine admin console. (JsonProperty uses a blob. :/) - activities_json = ndb.TextProperty(repeated=True) - response_json = ndb.TextProperty() - # Old values for response_json. Populated when the silo reports that the - # response has changed, e.g. the user edited a comment or changed their RSVP - # to an event. - old_response_jsons = ndb.TextProperty(repeated=True) - # JSON dict mapping original post url to activity index in activities_json. - # only set when there's more than one activity. - urls_to_activity = ndb.TextProperty() - # Original post links found by original post discovery - original_posts = ndb.StringProperty(repeated=True) - - def label(self): - return ' '.join((self.key.kind(), self.type, self.key.id(), - json_loads(self.response_json).get('url', '[no url]'))) - - def add_task(self): - util.add_propagate_task(self) - - @staticmethod - def get_type(obj): - type = get_type(obj) - return type if type in VERB_TYPES else 'comment' - - def get_or_save(self, source, restart=False): - resp = super().get_or_save() - - if (self.type != resp.type or - source.gr_source.activity_changed(json_loads(resp.response_json), - json_loads(self.response_json), - log=True)): - logging.info('Response changed! Re-propagating. Original: %s' % resp) - - resp.old_response_jsons = resp.old_response_jsons[:10] + [resp.response_json] - - response_json_to_append = json_loads(self.response_json) - source.gr_source.append_in_reply_to(json_loads(resp.response_json), response_json_to_append) - self.response_json = json_dumps(util.trim_nulls(response_json_to_append)) - resp.response_json = self.response_json - resp.restart(source) - elif restart and resp is not self: # ie it already existed - resp.restart(source) - - return resp - - def restart(self, source=None): - """Moves status and targets to 'new' and adds a propagate task.""" - # add original posts with syndication URLs - # TODO: unify with Poll.repropagate_old_responses() - if not source: - source = self.source.get() - - synd_urls = set() - for activity_json in self.activities_json: - activity = json_loads(activity_json) - url = activity.get('url') or activity.get('object', {}).get('url') - if url: - url = source.canonicalize_url(url, activity=activity) - if url: - synd_urls.add(url) - - if synd_urls: - self.unsent += [synd.original for synd in - SyndicatedPost.query(SyndicatedPost.syndication.IN(synd_urls)) - if synd.original] - - return super().restart() - - -class Activity(StringIdModel): - """An activity with responses to be propagated. - - The key name is the activity id as a tag URI. - - Currently only used for posts sent to us by the browser extension. - """ - source = ndb.KeyProperty() - created = ndb.DateTimeProperty(auto_now_add=True) - updated = ndb.DateTimeProperty(auto_now=True) - activity_json = ndb.TextProperty() - html = ndb.TextProperty() - - -class BlogPost(Webmentions): - """A blog post to be processed for links to send webmentions to. - - The key name is the URL. - """ - feed_item = ndb.JsonProperty(compressed=True) # from Superfeedr - - def label(self): - url = None - if self.feed_item: - url = self.feed_item.get('permalinkUrl') - return ' '.join((self.key.kind(), self.key.id(), url or '[no url]')) - - def add_task(self): - util.add_propagate_blogpost_task(self) - - -class PublishedPage(StringIdModel): - """Minimal root entity for :class:`Publish` children with the same source URL. - Key id is the string source URL. - """ - pass + # If the source page has a u-url, it's stored here and overrides the source + # URL in the key id. + u_url = ndb.StringProperty() + # Any initial target URLs that redirected to the final target URL, in redirect + # order. + redirected_target_urls = ndb.StringProperty(repeated=True) -class Publish(ndb.Model): - """A comment, like, repost, or RSVP published into a silo. - - Child of a :class:`PublishedPage` entity. - """ - STATUSES = ('new', 'complete', 'failed', 'deleted') - - # Turn off instance and memcache caching. See Source for details. - _use_cache = False - _use_memcache = False - - type = ndb.StringProperty(choices=PUBLISH_TYPES) - status = ndb.StringProperty(choices=STATUSES, default='new') - source = ndb.KeyProperty() - html = ndb.TextProperty() # raw HTML fetched from source - published = ndb.JsonProperty(compressed=True) - created = ndb.DateTimeProperty(auto_now_add=True) - updated = ndb.DateTimeProperty(auto_now=True) + def source_url(self): + return self.u_url or self.key.id().split()[0] - def type_label(self): - """Returns silo-specific string type, e.g. 'favorite' instead of 'like'.""" - for cls in sources.values(): # global - if cls.__name__ == self.source.kind(): - return cls.TYPE_LABELS.get(self.type, self.type) + def target_url(self): + return self.key.id().split()[1] - return self.type - - -class BlogWebmention(Publish, StringIdModel): - """Datastore entity for webmentions for hosted blog providers. - Key id is the source URL and target URL concated with a space, ie 'SOURCE - TARGET'. The source URL is *always* the URL given in the webmention HTTP - request. If the source page has a u-url, that's stored in the u_url property. - The target URL is always the final URL, after any redirects. - - Reuses :class:`Publish`'s fields, but otherwise unrelated. - """ - # If the source page has a u-url, it's stored here and overrides the source - # URL in the key id. - u_url = ndb.StringProperty() - - # Any initial target URLs that redirected to the final target URL, in redirect - # order. - redirected_target_urls = ndb.StringProperty(repeated=True) - - def source_url(self): - return self.u_url or self.key.id().split()[0] - - def target_url(self): - return self.key.id().split()[1] +class SyndicatedPost(ndb.Model): + """Represents a syndicated post and its discovered original (or not + if we found no original post). We discover the relationship by + following rel=syndication links on the author's h-feed. + See :mod:`original_post_discovery`. -class SyndicatedPost(ndb.Model): - """Represents a syndicated post and its discovered original (or not - if we found no original post). We discover the relationship by - following rel=syndication links on the author's h-feed. - - See :mod:`original_post_discovery`. - - When a :class:`SyndicatedPost` entity is about to be stored, - :meth:`source.Source.on_new_syndicated_post()` is called before it's stored. - """ - - # Turn off instance and memcache caching. See Response for details. - _use_cache = False - _use_memcache = False - - syndication = ndb.StringProperty() - original = ndb.StringProperty() - created = ndb.DateTimeProperty(auto_now_add=True) - updated = ndb.DateTimeProperty(auto_now=True) - - @classmethod - @ndb.transactional() - def insert_original_blank(cls, source, original): - """Insert a new original -> None relationship. Does a check-and-set to - make sure no previous relationship exists for this original. If - there is, nothing will be added. - - Args: - source: :class:`Source` subclass - original: string - """ - if cls.query(cls.original == original, ancestor=source.key).get(): - return - cls(parent=source.key, original=original, syndication=None).put() - - @classmethod - @ndb.transactional() - def insert_syndication_blank(cls, source, syndication): - """Insert a new syndication -> None relationship. Does a check-and-set - to make sure no previous relationship exists for this - syndication. If there is, nothing will be added. - - Args: - source: :class:`Source` subclass - original: string + When a :class:`SyndicatedPost` entity is about to be stored, + :meth:`source.Source.on_new_syndicated_post()` is called before it's stored. """ - if cls.query(cls.syndication == syndication, ancestor=source.key).get(): - return - cls(parent=source.key, original=None, syndication=syndication).put() + # Turn off instance and memcache caching. See Response for details. + _use_cache = False + _use_memcache = False + + syndication = ndb.StringProperty() + original = ndb.StringProperty() + created = ndb.DateTimeProperty(auto_now_add=True) + updated = ndb.DateTimeProperty(auto_now=True) + + @classmethod + @ndb.transactional() + def insert_original_blank(cls, source, original): + """Insert a new original -> None relationship. Does a check-and-set to + make sure no previous relationship exists for this original. If + there is, nothing will be added. + + Args: + source: :class:`Source` subclass + original: string + """ + if cls.query(cls.original == original, ancestor=source.key).get(): + return + cls(parent=source.key, original=original, syndication=None).put() + + @classmethod + @ndb.transactional() + def insert_syndication_blank(cls, source, syndication): + """Insert a new syndication -> None relationship. Does a check-and-set + to make sure no previous relationship exists for this + syndication. If there is, nothing will be added. + + Args: + source: :class:`Source` subclass + original: string + """ + + if cls.query(cls.syndication == syndication, ancestor=source.key).get(): + return + cls(parent=source.key, original=None, syndication=syndication).put() + + @classmethod + @ndb.transactional() + def insert(cls, source, syndication, original): + """Insert a new (non-blank) syndication -> original relationship. + + This method does a check-and-set within transaction to avoid + including duplicate relationships. + + If blank entries exists for the syndication or original URL + (i.e. syndication -> None or original -> None), they will first be + removed. If non-blank relationships exist, they will be retained. + + Args: + source: :class:`Source` subclass + syndication: string (not None) + original: string (not None) + + Returns: + SyndicatedPost: newly created or preexisting entity + """ + # check for an exact match + duplicate = cls.query( + cls.syndication == syndication, + cls.original == original, + ancestor=source.key, + ).get() + if duplicate: + return duplicate + + # delete blanks (expect at most 1 of each) + for filter in ( + ndb.AND(cls.syndication == syndication, cls.original == None), + ndb.AND(cls.original == original, cls.syndication == None), + ): + for synd in cls.query(filter, ancestor=source.key).fetch(keys_only=True): + synd.delete() + + r = cls(parent=source.key, original=original, syndication=syndication) + r.put() + return r - @classmethod - @ndb.transactional() - def insert(cls, source, syndication, original): - """Insert a new (non-blank) syndication -> original relationship. - This method does a check-and-set within transaction to avoid - including duplicate relationships. - - If blank entries exists for the syndication or original URL - (i.e. syndication -> None or original -> None), they will first be - removed. If non-blank relationships exist, they will be retained. +class Domain(StringIdModel): + """A domain owned by a user. - Args: - source: :class:`Source` subclass - syndication: string (not None) - original: string (not None) + Ownership is proven via IndieAuth. Supports secret tokens associated with each + domain. Clients can include a token with requests that operate on a given + domain, eg sending posts and responses from the browser extension. - Returns: - SyndicatedPost: newly created or preexisting entity + Key id is the string domain, eg 'example.com'. """ - # check for an exact match - duplicate = cls.query(cls.syndication == syndication, - cls.original == original, - ancestor=source.key).get() - if duplicate: - return duplicate - - # delete blanks (expect at most 1 of each) - for filter in (ndb.AND(cls.syndication == syndication, cls.original == None), - ndb.AND(cls.original == original, cls.syndication == None)): - for synd in cls.query(filter, ancestor=source.key).fetch(keys_only=True): - synd.delete() - r = cls(parent=source.key, original=original, syndication=syndication) - r.put() - return r - - -class Domain(StringIdModel): - """A domain owned by a user. - - Ownership is proven via IndieAuth. Supports secret tokens associated with each - domain. Clients can include a token with requests that operate on a given - domain, eg sending posts and responses from the browser extension. - - Key id is the string domain, eg 'example.com'. - """ - tokens = ndb.StringProperty(repeated=True) - auth = ndb.KeyProperty(IndieAuth) - created = ndb.DateTimeProperty(auto_now_add=True) - updated = ndb.DateTimeProperty(auto_now=True) + tokens = ndb.StringProperty(repeated=True) + auth = ndb.KeyProperty(IndieAuth) + created = ndb.DateTimeProperty(auto_now_add=True) + updated = ndb.DateTimeProperty(auto_now=True) diff --git a/original_post_discovery.py b/original_post_discovery.py index 3e461c54..c68b5334 100644 --- a/original_post_discovery.py +++ b/original_post_discovery.py @@ -45,552 +45,629 @@ MAX_ALLOWABLE_QUERIES = 30 -def discover(source, activity, fetch_hfeed=True, include_redirect_sources=True, - already_fetched_hfeeds=None): - """Augments the standard original_post_discovery algorithm with a - reverse lookup that supports posts without a backlink or citation. - - If fetch_hfeed is False, then we will check the db for previously found - :class:`models.SyndicatedPost`\ s but will not do posse-post-discovery to find - new ones. - - Args: - source: :class:`models.Source` subclass. Changes to property values (e.g. - domains, domain_urls, last_syndication_url) are stored in source.updates; - they should be updated transactionally later. - activity: activity dict - fetch_hfeed: boolean - include_redirect_sources: boolean, whether to include URLs that redirect as - well as their final destination URLs - already_fetched_hfeeds: set, URLs that we have already fetched and run - posse-post-discovery on, so we can avoid running it multiple times - - Returns: - (set(string original post URLs), set(string mention URLs)) tuple - - """ - logging.debug('discovering original posts for: %s', - activity.get('url') or activity.get('id')) - - if not source.updates: - source.updates = {} - - if already_fetched_hfeeds is None: - already_fetched_hfeeds = set() - - originals, mentions = gr_source.Source.original_post_discovery( - activity, domains=source.domains, - include_redirect_sources=include_redirect_sources, - include_reserved_hosts=DEBUG, - headers=util.request_headers(source=source)) - - # only include mentions of the author themselves. - # (mostly just for Mastodon; other silos' domains are all in the blocklist, so - # their mention URLs get dropped later anyway.) - # (these are originally added in Source._inject_user_urls() and in poll step 2.) - obj = activity.get('object', {}) - other_user_mentions = set( - t.get('url') for t in obj.get('tags', []) - if t.get('objectType') == 'person' and t.get('url') not in source.domain_urls) - originals -= other_user_mentions - mentions -= other_user_mentions - - # original posts are only from the author themselves - obj_author = obj.get('author', {}) - activity_author = activity.get('actor', {}) - author_id = obj_author.get('id') or activity_author.get('id') - author_username = obj_author.get('username') or activity_author.get('username') - if (author_id and author_id != source.user_tag_id() and - author_username != source.key.id()): - logging.info(f"Demoting original post links because user {source.user_tag_id()} doesn't match author id {author_id} username {author_username}") - # this is someone else's post, so all links must be mentions - mentions.update(originals) - originals = set() - - # look for original URL of attachments (e.g. quote tweets) - for att in obj.get('attachments', []): - if (att.get('objectType') in ('note', 'article') - and att.get('author', {}).get('id') == source.user_tag_id()): - logging.debug('running original post discovery on attachment: %s', - att.get('id')) - att_origs, _ = discover( - source, att, include_redirect_sources=include_redirect_sources) - logging.debug('original post discovery found originals for attachment, %s', - att_origs) - mentions.update(att_origs) - - def resolve(urls): - resolved = set() - for url in urls: - final, domain, send = util.get_webmention_target(url) - if send and domain != source.gr_source.DOMAIN: - resolved.add(final) - if include_redirect_sources: - resolved.add(url) - return resolved - - originals = resolve(originals) - mentions = resolve(mentions) - - if not source.get_author_urls(): - logging.debug('no author url(s), cannot find h-feed') - return ((originals, mentions) if not source.BACKFEED_REQUIRES_SYNDICATION_LINK - else (set(), set())) - - # TODO possible optimization: if we've discovered a backlink to a post on the - # author's domain (i.e., it included a link or citation), then skip the rest - # of this. - syndicated = [] - syndication_url = obj.get('url') or activity.get('url') - if syndication_url: - # use the canonical syndication url on both sides, so that we have - # the best chance of finding a match. Some silos allow several - # different permalink formats to point to the same place. - syndication_url = source.canonicalize_url(syndication_url) +def discover( + source, + activity, + fetch_hfeed=True, + include_redirect_sources=True, + already_fetched_hfeeds=None, +): + """Augments the standard original_post_discovery algorithm with a + reverse lookup that supports posts without a backlink or citation. + + If fetch_hfeed is False, then we will check the db for previously found + :class:`models.SyndicatedPost`\ s but will not do posse-post-discovery to find + new ones. + + Args: + source: :class:`models.Source` subclass. Changes to property values (e.g. + domains, domain_urls, last_syndication_url) are stored in source.updates; + they should be updated transactionally later. + activity: activity dict + fetch_hfeed: boolean + include_redirect_sources: boolean, whether to include URLs that redirect as + well as their final destination URLs + already_fetched_hfeeds: set, URLs that we have already fetched and run + posse-post-discovery on, so we can avoid running it multiple times + + Returns: + (set(string original post URLs), set(string mention URLs)) tuple + + """ + logging.debug( + "discovering original posts for: %s", activity.get("url") or activity.get("id") + ) + + if not source.updates: + source.updates = {} + + if already_fetched_hfeeds is None: + already_fetched_hfeeds = set() + + originals, mentions = gr_source.Source.original_post_discovery( + activity, + domains=source.domains, + include_redirect_sources=include_redirect_sources, + include_reserved_hosts=DEBUG, + headers=util.request_headers(source=source), + ) + + # only include mentions of the author themselves. + # (mostly just for Mastodon; other silos' domains are all in the blocklist, so + # their mention URLs get dropped later anyway.) + # (these are originally added in Source._inject_user_urls() and in poll step 2.) + obj = activity.get("object", {}) + other_user_mentions = set( + t.get("url") + for t in obj.get("tags", []) + if t.get("objectType") == "person" and t.get("url") not in source.domain_urls + ) + originals -= other_user_mentions + mentions -= other_user_mentions + + # original posts are only from the author themselves + obj_author = obj.get("author", {}) + activity_author = activity.get("actor", {}) + author_id = obj_author.get("id") or activity_author.get("id") + author_username = obj_author.get("username") or activity_author.get("username") + if ( + author_id + and author_id != source.user_tag_id() + and author_username != source.key.id() + ): + logging.info( + f"Demoting original post links because user {source.user_tag_id()} doesn't match author id {author_id} username {author_username}" + ) + # this is someone else's post, so all links must be mentions + mentions.update(originals) + originals = set() + + # look for original URL of attachments (e.g. quote tweets) + for att in obj.get("attachments", []): + if ( + att.get("objectType") in ("note", "article") + and att.get("author", {}).get("id") == source.user_tag_id() + ): + logging.debug( + "running original post discovery on attachment: %s", att.get("id") + ) + att_origs, _ = discover( + source, att, include_redirect_sources=include_redirect_sources + ) + logging.debug( + "original post discovery found originals for attachment, %s", att_origs + ) + mentions.update(att_origs) + + def resolve(urls): + resolved = set() + for url in urls: + final, domain, send = util.get_webmention_target(url) + if send and domain != source.gr_source.DOMAIN: + resolved.add(final) + if include_redirect_sources: + resolved.add(url) + return resolved + + originals = resolve(originals) + mentions = resolve(mentions) + + if not source.get_author_urls(): + logging.debug("no author url(s), cannot find h-feed") + return ( + (originals, mentions) + if not source.BACKFEED_REQUIRES_SYNDICATION_LINK + else (set(), set()) + ) + + # TODO possible optimization: if we've discovered a backlink to a post on the + # author's domain (i.e., it included a link or citation), then skip the rest + # of this. + syndicated = [] + syndication_url = obj.get("url") or activity.get("url") if syndication_url: - syndicated = _posse_post_discovery(source, activity, syndication_url, - fetch_hfeed, already_fetched_hfeeds) - originals.update(syndicated) - originals = set(util.dedupe_urls(originals)) - - if not syndication_url: - logging.debug('no %s syndication url, cannot process h-entries', source.SHORT_NAME) - - return ((originals, mentions) if not source.BACKFEED_REQUIRES_SYNDICATION_LINK - else (set(syndicated), set())) + # use the canonical syndication url on both sides, so that we have + # the best chance of finding a match. Some silos allow several + # different permalink formats to point to the same place. + syndication_url = source.canonicalize_url(syndication_url) + if syndication_url: + syndicated = _posse_post_discovery( + source, activity, syndication_url, fetch_hfeed, already_fetched_hfeeds + ) + originals.update(syndicated) + originals = set(util.dedupe_urls(originals)) + + if not syndication_url: + logging.debug( + "no %s syndication url, cannot process h-entries", source.SHORT_NAME + ) + + return ( + (originals, mentions) + if not source.BACKFEED_REQUIRES_SYNDICATION_LINK + else (set(syndicated), set()) + ) def refetch(source): - """Refetch the author's URLs and look for new or updated syndication - links that might not have been there the first time we looked. - - Args: - source: :class:`models.Source` subclass. Changes to property values (e.g. - domains, domain_urls, last_syndication_url) are stored in source.updates; - they should be updated transactionally later. - - Returns: - dict: mapping syndicated_url to a list of new :class:`models.SyndicatedPost`\ s - """ - logging.debug('attempting to refetch h-feed for %s', source.label()) - - if not source.updates: - source.updates = {} + """Refetch the author's URLs and look for new or updated syndication + links that might not have been there the first time we looked. - results = {} - for url in _get_author_urls(source): - results.update(_process_author(source, url, refetch=True)) + Args: + source: :class:`models.Source` subclass. Changes to property values (e.g. + domains, domain_urls, last_syndication_url) are stored in source.updates; + they should be updated transactionally later. - return results + Returns: + dict: mapping syndicated_url to a list of new :class:`models.SyndicatedPost`\ s + """ + logging.debug("attempting to refetch h-feed for %s", source.label()) + if not source.updates: + source.updates = {} -def targets_for_response(resp, originals, mentions): - """Returns the URLs that we should send webmentions to for a given response. - - ...specifically, all responses except posts get sent to original post URLs, - but only posts and comments get sent to mentioned URLs. - - Args: - resp: ActivityStreams response object - originals, mentions: sequence of string URLs - - Returns: - set of string URLs - """ - type = models.Response.get_type(resp) - targets = set() - if type != 'post': - targets |= originals - if type in ('post', 'comment'): - targets |= mentions - return targets - - -def _posse_post_discovery(source, activity, syndication_url, fetch_hfeed, - already_fetched_hfeeds): - """Performs the actual meat of the posse-post-discover. - - Args: - source: :class:`models.Source` subclass - activity: activity dict - syndication_url: url of the syndicated copy for which we are - trying to find an original - fetch_hfeed: boolean, whether or not to fetch and parse the - author's feed if we don't have a previously stored - relationship - already_fetched_hfeeds: set, URLs we've already fetched in a - previous iteration - - Return: - sequence of string original post urls, possibly empty - """ - logging.info('starting posse post discovery with syndicated %s', - syndication_url) - - relationships = SyndicatedPost.query( - SyndicatedPost.syndication == syndication_url, - ancestor=source.key).fetch() - - if source.IGNORE_SYNDICATION_LINK_FRAGMENTS: - relationships += SyndicatedPost.query( - # prefix search to find any instances of this synd link with a fragment - SyndicatedPost.syndication > f'{syndication_url}#', - SyndicatedPost.syndication < f'{syndication_url}#\ufffd', - ancestor=source.key).fetch() - - if not relationships and fetch_hfeed: - # a syndicated post we haven't seen before! fetch the author's URLs to see - # if we can find it. - # - # TODO: Consider using the actor's url, with get_author_urls() as the - # fallback in the future to support content from non-Bridgy users. results = {} for url in _get_author_urls(source): - if url not in already_fetched_hfeeds: - results.update(_process_author(source, url)) - already_fetched_hfeeds.add(url) - else: - logging.debug('skipping %s, already fetched this round', url) + results.update(_process_author(source, url, refetch=True)) - relationships = results.get(syndication_url, []) + return results - if not relationships: - # No relationships were found. Remember that we've seen this - # syndicated post to avoid reprocessing it every time - logging.debug('posse post discovery found no relationship for %s', - syndication_url) - if fetch_hfeed: - SyndicatedPost.insert_syndication_blank(source, syndication_url) - originals = [r.original for r in relationships if r.original] - if originals: - logging.debug('posse post discovery found relationship(s) %s -> %s', - syndication_url, originals) - return originals +def targets_for_response(resp, originals, mentions): + """Returns the URLs that we should send webmentions to for a given response. + + ...specifically, all responses except posts get sent to original post URLs, + but only posts and comments get sent to mentioned URLs. + + Args: + resp: ActivityStreams response object + originals, mentions: sequence of string URLs + + Returns: + set of string URLs + """ + type = models.Response.get_type(resp) + targets = set() + if type != "post": + targets |= originals + if type in ("post", "comment"): + targets |= mentions + return targets + + +def _posse_post_discovery( + source, activity, syndication_url, fetch_hfeed, already_fetched_hfeeds +): + """Performs the actual meat of the posse-post-discover. + + Args: + source: :class:`models.Source` subclass + activity: activity dict + syndication_url: url of the syndicated copy for which we are + trying to find an original + fetch_hfeed: boolean, whether or not to fetch and parse the + author's feed if we don't have a previously stored + relationship + already_fetched_hfeeds: set, URLs we've already fetched in a + previous iteration + + Return: + sequence of string original post urls, possibly empty + """ + logging.info("starting posse post discovery with syndicated %s", syndication_url) + + relationships = SyndicatedPost.query( + SyndicatedPost.syndication == syndication_url, ancestor=source.key + ).fetch() + + if source.IGNORE_SYNDICATION_LINK_FRAGMENTS: + relationships += SyndicatedPost.query( + # prefix search to find any instances of this synd link with a fragment + SyndicatedPost.syndication > f"{syndication_url}#", + SyndicatedPost.syndication < f"{syndication_url}#\ufffd", + ancestor=source.key, + ).fetch() + + if not relationships and fetch_hfeed: + # a syndicated post we haven't seen before! fetch the author's URLs to see + # if we can find it. + # + # TODO: Consider using the actor's url, with get_author_urls() as the + # fallback in the future to support content from non-Bridgy users. + results = {} + for url in _get_author_urls(source): + if url not in already_fetched_hfeeds: + results.update(_process_author(source, url)) + already_fetched_hfeeds.add(url) + else: + logging.debug("skipping %s, already fetched this round", url) + + relationships = results.get(syndication_url, []) + + if not relationships: + # No relationships were found. Remember that we've seen this + # syndicated post to avoid reprocessing it every time + logging.debug( + "posse post discovery found no relationship for %s", syndication_url + ) + if fetch_hfeed: + SyndicatedPost.insert_syndication_blank(source, syndication_url) + + originals = [r.original for r in relationships if r.original] + if originals: + logging.debug( + "posse post discovery found relationship(s) %s -> %s", + syndication_url, + originals, + ) + return originals def _process_author(source, author_url, refetch=False, store_blanks=True): - """Fetch the author's domain URL, and look for syndicated posts. - - Args: - source: a subclass of :class:`models.Source` - author_url: the author's homepage URL - refetch: boolean, whether to refetch and process entries we've seen before - store_blanks: boolean, whether we should store blank - :class:`models.SyndicatedPost`\ s when we don't find a relationship - - Return: - a dict of syndicated_url to a list of new :class:`models.SyndicatedPost`\ s - """ - # for now use whether the url is a valid webmention target - # as a proxy for whether it's worth searching it. - author_url, _, ok = util.get_webmention_target(author_url) - if not ok: - return {} - - logging.debug('fetching author url %s', author_url) - try: - author_mf2 = util.fetch_mf2(author_url) - except AssertionError: - raise # for unit tests - except BaseException: - # TODO limit allowed failures, cache the author's h-feed url - # or the # of times we've failed to fetch it - logging.info('Could not fetch author url %s', author_url, exc_info=True) - return {} - - feeditems = _find_feed_items(author_mf2) - - # try rel=feeds - feed_urls = set() - for feed_url in author_mf2['rels'].get('feed', []): - # check that it's html, not too big, etc - feed_url, _, feed_ok = util.get_webmention_target(feed_url) - if feed_url == author_url: - logging.debug('author url is the feed url, ignoring') - elif not feed_ok: - logging.debug("skipping feed since it's not HTML or otherwise bad") - else: - feed_urls.add(feed_url) - - for feed_url in feed_urls: + """Fetch the author's domain URL, and look for syndicated posts. + + Args: + source: a subclass of :class:`models.Source` + author_url: the author's homepage URL + refetch: boolean, whether to refetch and process entries we've seen before + store_blanks: boolean, whether we should store blank + :class:`models.SyndicatedPost`\ s when we don't find a relationship + + Return: + a dict of syndicated_url to a list of new :class:`models.SyndicatedPost`\ s + """ + # for now use whether the url is a valid webmention target + # as a proxy for whether it's worth searching it. + author_url, _, ok = util.get_webmention_target(author_url) + if not ok: + return {} + + logging.debug("fetching author url %s", author_url) try: - logging.debug("fetching author's rel-feed %s", feed_url) - feed_mf2 = util.fetch_mf2(feed_url) - feeditems = _merge_hfeeds(feeditems, _find_feed_items(feed_mf2)) - domain = util.domain_from_link(feed_url) - if source.updates is not None and domain not in source.domains: - domains = source.updates.setdefault('domains', source.domains) - if domain not in domains: - logging.info('rel-feed found new domain %s! adding to source', domain) - domains.append(domain) - + author_mf2 = util.fetch_mf2(author_url) except AssertionError: - raise # reraise assertions for unit tests + raise # for unit tests except BaseException: - logging.info('Could not fetch h-feed url %s.', feed_url, exc_info=True) - - # sort by dt-updated/dt-published - def updated_or_published(item): - props = microformats2.first_props(item.get('properties')) - return props.get('updated') or props.get('published') or '' - - feeditems.sort(key=updated_or_published, reverse=True) - - permalink_to_entry = collections.OrderedDict() - for child in feeditems: - if 'h-entry' in child['type']: - permalinks = child['properties'].get('url', []) - if not permalinks: - logging.debug('ignoring h-entry with no u-url!') - for permalink in permalinks: - if isinstance(permalink, str): - permalink_to_entry[permalink] = child + # TODO limit allowed failures, cache the author's h-feed url + # or the # of times we've failed to fetch it + logging.info("Could not fetch author url %s", author_url, exc_info=True) + return {} + + feeditems = _find_feed_items(author_mf2) + + # try rel=feeds + feed_urls = set() + for feed_url in author_mf2["rels"].get("feed", []): + # check that it's html, not too big, etc + feed_url, _, feed_ok = util.get_webmention_target(feed_url) + if feed_url == author_url: + logging.debug("author url is the feed url, ignoring") + elif not feed_ok: + logging.debug("skipping feed since it's not HTML or otherwise bad") else: - logging.warning('unexpected non-string "url" property: %s', permalink) - - max = (MAX_PERMALINK_FETCHES_BETA if source.is_beta_user() - else MAX_PERMALINK_FETCHES) - if len(permalink_to_entry) >= max: - logging.info('Hit cap of %d permalinks. Stopping.', max) - break - - # query all preexisting permalinks at once, instead of once per link - permalinks_list = list(permalink_to_entry.keys()) - # fetch the maximum allowed entries (currently 30) at a time - preexisting_list = itertools.chain.from_iterable( - SyndicatedPost.query( - SyndicatedPost.original.IN(permalinks_list[i:i + MAX_ALLOWABLE_QUERIES]), - ancestor=source.key) - for i in range(0, len(permalinks_list), MAX_ALLOWABLE_QUERIES)) - preexisting = {} - for r in preexisting_list: - preexisting.setdefault(r.original, []).append(r) - - results = {} - for permalink, entry in permalink_to_entry.items(): - logging.debug('processing permalink: %s', permalink) - new_results = process_entry( - source, permalink, entry, refetch, preexisting.get(permalink, []), - store_blanks=store_blanks) - for key, value in new_results.items(): - results.setdefault(key, []).extend(value) - - if source.updates is not None and results: - # keep track of the last time we've seen rel=syndication urls for - # this author. this helps us decide whether to refetch periodically - # and look for updates. - # Source will be saved at the end of each round of polling - source.updates['last_syndication_url'] = util.now_fn() - - return results + feed_urls.add(feed_url) + + for feed_url in feed_urls: + try: + logging.debug("fetching author's rel-feed %s", feed_url) + feed_mf2 = util.fetch_mf2(feed_url) + feeditems = _merge_hfeeds(feeditems, _find_feed_items(feed_mf2)) + domain = util.domain_from_link(feed_url) + if source.updates is not None and domain not in source.domains: + domains = source.updates.setdefault("domains", source.domains) + if domain not in domains: + logging.info( + "rel-feed found new domain %s! adding to source", domain + ) + domains.append(domain) + + except AssertionError: + raise # reraise assertions for unit tests + except BaseException: + logging.info("Could not fetch h-feed url %s.", feed_url, exc_info=True) + + # sort by dt-updated/dt-published + def updated_or_published(item): + props = microformats2.first_props(item.get("properties")) + return props.get("updated") or props.get("published") or "" + + feeditems.sort(key=updated_or_published, reverse=True) + + permalink_to_entry = collections.OrderedDict() + for child in feeditems: + if "h-entry" in child["type"]: + permalinks = child["properties"].get("url", []) + if not permalinks: + logging.debug("ignoring h-entry with no u-url!") + for permalink in permalinks: + if isinstance(permalink, str): + permalink_to_entry[permalink] = child + else: + logging.warning( + 'unexpected non-string "url" property: %s', permalink + ) + + max = ( + MAX_PERMALINK_FETCHES_BETA + if source.is_beta_user() + else MAX_PERMALINK_FETCHES + ) + if len(permalink_to_entry) >= max: + logging.info("Hit cap of %d permalinks. Stopping.", max) + break + + # query all preexisting permalinks at once, instead of once per link + permalinks_list = list(permalink_to_entry.keys()) + # fetch the maximum allowed entries (currently 30) at a time + preexisting_list = itertools.chain.from_iterable( + SyndicatedPost.query( + SyndicatedPost.original.IN(permalinks_list[i : i + MAX_ALLOWABLE_QUERIES]), + ancestor=source.key, + ) + for i in range(0, len(permalinks_list), MAX_ALLOWABLE_QUERIES) + ) + preexisting = {} + for r in preexisting_list: + preexisting.setdefault(r.original, []).append(r) + results = {} + for permalink, entry in permalink_to_entry.items(): + logging.debug("processing permalink: %s", permalink) + new_results = process_entry( + source, + permalink, + entry, + refetch, + preexisting.get(permalink, []), + store_blanks=store_blanks, + ) + for key, value in new_results.items(): + results.setdefault(key, []).extend(value) + + if source.updates is not None and results: + # keep track of the last time we've seen rel=syndication urls for + # this author. this helps us decide whether to refetch periodically + # and look for updates. + # Source will be saved at the end of each round of polling + source.updates["last_syndication_url"] = util.now_fn() + + return results -def _merge_hfeeds(feed1, feed2): - """Merge items from two h-feeds into a composite feed. Skips items in - feed2 that are already represented in feed1, based on the "url" property. - - Args: - feed1: a list of dicts - feed2: a list of dicts - - Returns: - a list of dicts - """ - seen = set() - for item in feed1: - for url in item.get('properties', {}).get('url', []): - if isinstance(url, str): - seen.add(url) - return feed1 + [item for item in feed2 if all( - (url not in seen) for url in item.get('properties', {}).get('url', []) if isinstance(url, str))] +def _merge_hfeeds(feed1, feed2): + """Merge items from two h-feeds into a composite feed. Skips items in + feed2 that are already represented in feed1, based on the "url" property. + + Args: + feed1: a list of dicts + feed2: a list of dicts + + Returns: + a list of dicts + """ + seen = set() + for item in feed1: + for url in item.get("properties", {}).get("url", []): + if isinstance(url, str): + seen.add(url) + + return feed1 + [ + item + for item in feed2 + if all( + (url not in seen) + for url in item.get("properties", {}).get("url", []) + if isinstance(url, str) + ) + ] def _find_feed_items(mf2): - """Extract feed items from given microformats2 data. - - If the top-level h-* item is an h-feed, return its children. Otherwise, - returns the top-level items. - - Args: - mf2: dict, parsed mf2 data - - Returns: list of dicts, each one representing an mf2 h-* item - """ - feeditems = mf2['items'] - hfeeds = mf2util.find_all_entries(mf2, ('h-feed',)) - if hfeeds: - feeditems = list(itertools.chain.from_iterable( - hfeed.get('children', []) for hfeed in hfeeds)) - else: - logging.debug('No h-feed found, fallback to top-level h-entrys.') - - if len(feeditems) > MAX_FEED_ENTRIES: - logging.info('Feed has %s entries! only processing the first %s.', - len(feeditems), MAX_FEED_ENTRIES) - feeditems = feeditems[:MAX_FEED_ENTRIES] - - return feeditems - - -def process_entry(source, permalink, feed_entry, refetch, preexisting, - store_blanks=True): - """Fetch and process an h-entry and save a new :class:`models.SyndicatedPost`. - - Args: - source: - permalink: url of the unprocessed post - feed_entry: the h-feed version of the h-entry dict, often contains - a partial version of the h-entry at the permalink - refetch: boolean, whether to refetch and process entries we've seen before - preexisting: list of previously discovered :class:`models.SyndicatedPost`\ s - for this permalink - store_blanks: boolean, whether we should store blank - :class:`models.SyndicatedPost`\ s when we don't find a relationship - - Returns: - a dict from syndicated url to a list of new :class:`models.SyndicatedPost`\ s - """ - # if the post has already been processed, do not add to the results - # since this method only returns *newly* discovered relationships. - if preexisting: - # if we're refetching and this one is blank, do not return. - # if there is a blank entry, it should be the one and only entry, - # but go ahead and check 'all' of them to be safe. - if not refetch: - return {} - synds = [s.syndication for s in preexisting if s.syndication] - if synds: - logging.debug('previously found relationship(s) for original %s: %s', - permalink, synds) - - # first try with the h-entry from the h-feed. if we find the syndication url - # we're looking for, we don't have to fetch the permalink - permalink, _, type_ok = util.get_webmention_target(permalink) - usynd = feed_entry.get('properties', {}).get('syndication', []) - if usynd: - logging.debug('u-syndication links on the h-feed h-entry: %s', usynd) - results = _process_syndication_urls(source, permalink, set( - url for url in usynd if isinstance(url, str)), preexisting) - success = True - - if results: - source.updates['last_feed_syndication_url'] = util.now_fn() - elif not source.last_feed_syndication_url or not feed_entry: - # fetch the full permalink page if we think it might have more details - mf2 = None - try: - if type_ok: - logging.debug('fetching post permalink %s', permalink) - mf2 = util.fetch_mf2(permalink) - except AssertionError: - raise # for unit tests - except BaseException: - # TODO limit the number of allowed failures - logging.info('Could not fetch permalink %s', permalink, exc_info=True) - success = False - - if mf2: - syndication_urls = set() - relsynd = mf2['rels'].get('syndication', []) - if relsynd: - logging.debug('rel-syndication links: %s', relsynd) - syndication_urls.update(url for url in relsynd - if isinstance(url, str)) - # there should only be one h-entry on a permalink page, but - # we'll check all of them just in case. - for hentry in (item for item in mf2['items'] - if 'h-entry' in item['type']): - usynd = hentry.get('properties', {}).get('syndication', []) - if usynd: - logging.debug('u-syndication links: %s', usynd) - syndication_urls.update(url for url in usynd - if isinstance(url, str)) - results = _process_syndication_urls( - source, permalink, syndication_urls, preexisting) - - # detect and delete SyndicatedPosts that were removed from the site - if success: - result_syndposts = list(itertools.chain(*results.values())) - for syndpost in preexisting: - if syndpost.syndication and syndpost not in result_syndposts: - logging.info('deleting relationship that disappeared: %s', syndpost) - syndpost.key.delete() - preexisting.remove(syndpost) - - if not results: - logging.debug('no syndication links from %s to current source %s.', - permalink, source.label()) + """Extract feed items from given microformats2 data. + + If the top-level h-* item is an h-feed, return its children. Otherwise, + returns the top-level items. + + Args: + mf2: dict, parsed mf2 data + + Returns: list of dicts, each one representing an mf2 h-* item + """ + feeditems = mf2["items"] + hfeeds = mf2util.find_all_entries(mf2, ("h-feed",)) + if hfeeds: + feeditems = list( + itertools.chain.from_iterable(hfeed.get("children", []) for hfeed in hfeeds) + ) + else: + logging.debug("No h-feed found, fallback to top-level h-entrys.") + + if len(feeditems) > MAX_FEED_ENTRIES: + logging.info( + "Feed has %s entries! only processing the first %s.", + len(feeditems), + MAX_FEED_ENTRIES, + ) + feeditems = feeditems[:MAX_FEED_ENTRIES] + + return feeditems + + +def process_entry( + source, permalink, feed_entry, refetch, preexisting, store_blanks=True +): + """Fetch and process an h-entry and save a new :class:`models.SyndicatedPost`. + + Args: + source: + permalink: url of the unprocessed post + feed_entry: the h-feed version of the h-entry dict, often contains + a partial version of the h-entry at the permalink + refetch: boolean, whether to refetch and process entries we've seen before + preexisting: list of previously discovered :class:`models.SyndicatedPost`\ s + for this permalink + store_blanks: boolean, whether we should store blank + :class:`models.SyndicatedPost`\ s when we don't find a relationship + + Returns: + a dict from syndicated url to a list of new :class:`models.SyndicatedPost`\ s + """ + # if the post has already been processed, do not add to the results + # since this method only returns *newly* discovered relationships. + if preexisting: + # if we're refetching and this one is blank, do not return. + # if there is a blank entry, it should be the one and only entry, + # but go ahead and check 'all' of them to be safe. + if not refetch: + return {} + synds = [s.syndication for s in preexisting if s.syndication] + if synds: + logging.debug( + "previously found relationship(s) for original %s: %s", permalink, synds + ) + + # first try with the h-entry from the h-feed. if we find the syndication url + # we're looking for, we don't have to fetch the permalink + permalink, _, type_ok = util.get_webmention_target(permalink) + usynd = feed_entry.get("properties", {}).get("syndication", []) + if usynd: + logging.debug("u-syndication links on the h-feed h-entry: %s", usynd) + results = _process_syndication_urls( + source, + permalink, + set(url for url in usynd if isinstance(url, str)), + preexisting, + ) + success = True + + if results: + source.updates["last_feed_syndication_url"] = util.now_fn() + elif not source.last_feed_syndication_url or not feed_entry: + # fetch the full permalink page if we think it might have more details + mf2 = None + try: + if type_ok: + logging.debug("fetching post permalink %s", permalink) + mf2 = util.fetch_mf2(permalink) + except AssertionError: + raise # for unit tests + except BaseException: + # TODO limit the number of allowed failures + logging.info("Could not fetch permalink %s", permalink, exc_info=True) + success = False + + if mf2: + syndication_urls = set() + relsynd = mf2["rels"].get("syndication", []) + if relsynd: + logging.debug("rel-syndication links: %s", relsynd) + syndication_urls.update(url for url in relsynd if isinstance(url, str)) + # there should only be one h-entry on a permalink page, but + # we'll check all of them just in case. + for hentry in (item for item in mf2["items"] if "h-entry" in item["type"]): + usynd = hentry.get("properties", {}).get("syndication", []) + if usynd: + logging.debug("u-syndication links: %s", usynd) + syndication_urls.update(url for url in usynd if isinstance(url, str)) + results = _process_syndication_urls( + source, permalink, syndication_urls, preexisting + ) + + # detect and delete SyndicatedPosts that were removed from the site + if success: + result_syndposts = list(itertools.chain(*results.values())) + for syndpost in preexisting: + if syndpost.syndication and syndpost not in result_syndposts: + logging.info("deleting relationship that disappeared: %s", syndpost) + syndpost.key.delete() + preexisting.remove(syndpost) + + if not results: + logging.debug( + "no syndication links from %s to current source %s.", + permalink, + source.label(), + ) + results = {} + if store_blanks and not preexisting: + # remember that this post doesn't have syndication links for this + # particular source + logging.debug( + "saving empty relationship so that %s will not be " "searched again", + permalink, + ) + SyndicatedPost.insert_original_blank(source, permalink) + + # only return results that are not in the preexisting list + new_results = {} + for syndurl, syndposts_for_url in results.items(): + for syndpost in syndposts_for_url: + if syndpost not in preexisting: + new_results.setdefault(syndurl, []).append(syndpost) + + if new_results: + logging.debug("discovered relationships %s", new_results) + return new_results + + +def _process_syndication_urls(source, permalink, syndication_urls, preexisting): + """Process a list of syndication URLs looking for one that matches the + current source. If one is found, stores a new :class:`models.SyndicatedPost` + in the db. + + Args: + source: a :class:`models.Source` subclass + permalink: a string. the current h-entry permalink + syndication_urls: a collection of strings. the unfitered list + of syndication urls + preexisting: a list of previously discovered :class:`models.SyndicatedPost`\ s + + Returns: + dict mapping string syndication url to list of :class:`models.SyndicatedPost`\ s + """ results = {} - if store_blanks and not preexisting: - # remember that this post doesn't have syndication links for this - # particular source - logging.debug('saving empty relationship so that %s will not be ' - 'searched again', permalink) - SyndicatedPost.insert_original_blank(source, permalink) - - # only return results that are not in the preexisting list - new_results = {} - for syndurl, syndposts_for_url in results.items(): - for syndpost in syndposts_for_url: - if syndpost not in preexisting: - new_results.setdefault(syndurl, []).append(syndpost) - - if new_results: - logging.debug('discovered relationships %s', new_results) - return new_results - - -def _process_syndication_urls(source, permalink, syndication_urls, - preexisting): - """Process a list of syndication URLs looking for one that matches the - current source. If one is found, stores a new :class:`models.SyndicatedPost` - in the db. - - Args: - source: a :class:`models.Source` subclass - permalink: a string. the current h-entry permalink - syndication_urls: a collection of strings. the unfitered list - of syndication urls - preexisting: a list of previously discovered :class:`models.SyndicatedPost`\ s - - Returns: - dict mapping string syndication url to list of :class:`models.SyndicatedPost`\ s - """ - results = {} - # save the results (or lack thereof) to the db, and put them in a - # map for immediate use - for url in syndication_urls: - # source-specific logic to standardize the URL - url = source.canonicalize_url(url) - if not url: - continue - - # TODO: save future lookups by saving results for other sources too (note: - # query the appropriate source subclass by author.domains, rather than - # author.domain_urls) - # - # we may have already seen this relationship, save a DB lookup by - # finding it in the preexisting list - relationship = next((sp for sp in preexisting - if sp.syndication == url - and sp.original == permalink), None) - if not relationship: - logging.debug('saving discovered relationship %s -> %s', url, permalink) - relationship = SyndicatedPost.insert( - source, syndication=url, original=permalink) - results.setdefault(url, []).append(relationship) - - return results + # save the results (or lack thereof) to the db, and put them in a + # map for immediate use + for url in syndication_urls: + # source-specific logic to standardize the URL + url = source.canonicalize_url(url) + if not url: + continue + + # TODO: save future lookups by saving results for other sources too (note: + # query the appropriate source subclass by author.domains, rather than + # author.domain_urls) + # + # we may have already seen this relationship, save a DB lookup by + # finding it in the preexisting list + relationship = next( + ( + sp + for sp in preexisting + if sp.syndication == url and sp.original == permalink + ), + None, + ) + if not relationship: + logging.debug("saving discovered relationship %s -> %s", url, permalink) + relationship = SyndicatedPost.insert( + source, syndication=url, original=permalink + ) + results.setdefault(url, []).append(relationship) + + return results def _get_author_urls(source): - max = models.MAX_AUTHOR_URLS - urls = source.get_author_urls() - if len(urls) > max: - logging.warning('user has over %d URLs! only running PPD on %s. skipping %s.', - max, urls[:max], urls[max:]) - urls = urls[:max] - - return urls + max = models.MAX_AUTHOR_URLS + urls = source.get_author_urls() + if len(urls) > max: + logging.warning( + "user has over %d URLs! only running PPD on %s. skipping %s.", + max, + urls[:max], + urls[max:], + ) + urls = urls[:max] + + return urls diff --git a/pages.py b/pages.py index dc3db228..603b171c 100644 --- a/pages.py +++ b/pages.py @@ -25,545 +25,635 @@ # populate models.sources import blogger, facebook, flickr, github, indieauth, instagram, mastodon, medium, meetup, reddit, tumblr, twitter, wordpress_rest -SITES = ','.join(list(models.sources.keys()) + ['fake']) # for unit tests +SITES = ",".join(list(models.sources.keys()) + ["fake"]) # for unit tests RECENT_PRIVATE_POSTS_THRESHOLD = 5 -@app.route('/', methods=['HEAD']) -@app.route('/users', methods=['HEAD']) -@app.route(f'//', methods=['HEAD']) -@app.route('/about', methods=['HEAD']) +@app.route("/", methods=["HEAD"]) +@app.route("/users", methods=["HEAD"]) +@app.route(f"//", methods=["HEAD"]) +@app.route("/about", methods=["HEAD"]) def head(site=None, id=None): - """Return an empty 200 with no caching directives.""" - if site and site not in models.sources: - return '', 404 + """Return an empty 200 with no caching directives.""" + if site and site not in models.sources: + return "", 404 - return '' + return "" -@app.route('/') +@app.route("/") @flask_util.cached(cache, datetime.timedelta(days=1)) def front_page(): - """View for the front page.""" - return render_template('index.html') + """View for the front page.""" + return render_template("index.html") -@app.route('/about') +@app.route("/about") def about(): - return render_template('about.html') + return render_template("about.html") -@app.route('/users') +@app.route("/users") @flask_util.cached(cache, datetime.timedelta(hours=1)) def users(): - """View for /users. - - Semi-optimized. Pages by source name. Queries each source type for results - with name greater than the start_name query param, then merge sorts the - results and truncates at PAGE_SIZE. - - The start_name param is expected to be capitalized because capital letters - sort lexicographically before lower case letters. An alternative would be to - store a lower cased version of the name in another property and query on that. - """ - PAGE_SIZE = 50 - - start_name = request.values.get('start_name', '') - queries = [cls.query(cls.name >= start_name).fetch_async(PAGE_SIZE) - for cls in models.sources.values()] - - sources = sorted(itertools.chain(*[q.get_result() for q in queries]), - key=lambda s: (s.name.lower(), s.GR_CLASS.NAME)) - sources = [util.preprocess_source(s) for s in sources - if s.name.lower() >= start_name.lower() and s.features - and s.status != 'disabled' - ][:PAGE_SIZE] - - return render_template('users.html', PAGE_SIZE=PAGE_SIZE, sources=sources) - - -@app.route(f'//') + """View for /users. + + Semi-optimized. Pages by source name. Queries each source type for results + with name greater than the start_name query param, then merge sorts the + results and truncates at PAGE_SIZE. + + The start_name param is expected to be capitalized because capital letters + sort lexicographically before lower case letters. An alternative would be to + store a lower cased version of the name in another property and query on that. + """ + PAGE_SIZE = 50 + + start_name = request.values.get("start_name", "") + queries = [ + cls.query(cls.name >= start_name).fetch_async(PAGE_SIZE) + for cls in models.sources.values() + ] + + sources = sorted( + itertools.chain(*[q.get_result() for q in queries]), + key=lambda s: (s.name.lower(), s.GR_CLASS.NAME), + ) + sources = [ + util.preprocess_source(s) + for s in sources + if s.name.lower() >= start_name.lower() + and s.features + and s.status != "disabled" + ][:PAGE_SIZE] + + return render_template("users.html", PAGE_SIZE=PAGE_SIZE, sources=sources) + + +@app.route(f"//") def user(site, id): - """View for a user page.""" - cls = models.sources.get(site) - if not cls: - return render_template('user_not_found.html'), 404 - - source = cls.lookup(id) - - if not source: - key = cls.query(ndb.OR(*[ndb.GenericProperty(prop) == id for prop in - ('domains', 'inferred_username', 'name', 'username')]) - ).get(keys_only=True) - if key: - return redirect(cls(key=key).bridgy_path(), code=301) - - if not source or not source.features: - return render_template('user_not_found.html'), 404 - - source.verify() - source = util.preprocess_source(source) - - vars = { - 'source': source, - 'logs': logs, - 'REFETCH_HFEED_TRIGGER': models.REFETCH_HFEED_TRIGGER, - 'RECENT_PRIVATE_POSTS_THRESHOLD': RECENT_PRIVATE_POSTS_THRESHOLD, - } - - # Blog webmention promos - if 'webmention' not in source.features: - if source.SHORT_NAME in ('blogger', 'medium', 'tumblr', 'wordpress'): - vars[source.SHORT_NAME + '_promo'] = True - else: - for domain in source.domains: - if ('.blogspot.' in domain and # Blogger uses country TLDs - not Blogger.query(Blogger.domains == domain).get()): - vars['blogger_promo'] = True - elif (util.domain_or_parent_in(domain, ['tumblr.com']) and - not Tumblr.query(Tumblr.domains == domain).get()): - vars['tumblr_promo'] = True - elif (util.domain_or_parent_in(domain, 'wordpress.com') and - not WordPress.query(WordPress.domains == domain).get()): - vars['wordpress_promo'] = True - - # Responses - if 'listen' in source.features or 'email' in source.features: - vars['responses'] = [] - query = Response.query().filter(Response.source == source.key) - - # if there's a paging param (responses_before or responses_after), update - # query with it - def get_paging_param(param): - val = request.values.get(param) - try: - return util.parse_iso8601(val) if val else None - except BaseException: - error(f"Couldn't parse {param}, {val!r} as ISO8601") - - before = get_paging_param('responses_before') - after = get_paging_param('responses_after') - if before and after: - error("can't handle both responses_before and responses_after") - elif after: - query = query.filter(Response.updated > after).order(Response.updated) - elif before: - query = query.filter(Response.updated < before).order(-Response.updated) - else: - query = query.order(-Response.updated) - - query_iter = query.iter() - for i, r in enumerate(query_iter): - r.response = json_loads(r.response_json) - r.activities = [json_loads(a) for a in r.activities_json] - - if (not source.is_activity_public(r.response) or - not all(source.is_activity_public(a) for a in r.activities)): - continue - elif r.type == 'post': - r.activities = [] - - verb = r.response.get('verb') - r.actor = (r.response.get('object') if verb == 'invite' - else r.response.get('author') or r.response.get('actor') - ) or {} - - activity_content = '' - for a in r.activities + [r.response]: - if not a.get('content'): - obj = a.get('object', {}) - a['content'] = activity_content = ( - obj.get('content') or obj.get('displayName') or - # historical, from a Reddit bug fixed in granary@4f9df7c - obj.get('name') or '') - - response_content = r.response.get('content') - phrases = { - 'like': 'liked this', - 'repost': 'reposted this', - 'rsvp-yes': 'is attending', - 'rsvp-no': 'is not attending', - 'rsvp-maybe': 'might attend', - 'rsvp-interested': 'is interested', - 'invite': 'is invited', - } - phrase = phrases.get(r.type) or phrases.get(verb) - if phrase and (r.type != 'repost' or - activity_content.startswith(response_content)): - r.response['content'] = '%s %s.' % ( - r.actor.get('displayName') or '', phrase) - - # convert image URL to https if we're serving over SSL - image_url = r.actor.setdefault('image', {}).get('url') - if image_url: - r.actor['image']['url'] = util.update_scheme(image_url, request) - - # generate original post links - r.links = process_webmention_links(r) - r.original_links = [util.pretty_link(url, new_tab=True) - for url in r.original_posts] - - vars['responses'].append(r) - if len(vars['responses']) >= 10 or i > 200: - break - - vars['responses'].sort(key=lambda r: r.updated, reverse=True) - - # calculate new paging param(s) - new_after = ( - before if before else - vars['responses'][0].updated if - vars['responses'] and query_iter.probably_has_next() and (before or after) - else None) - if new_after: - vars['responses_after_link'] = ('?responses_after=%s#responses' % - new_after.isoformat()) - - new_before = ( - after if after else - vars['responses'][-1].updated if - vars['responses'] and query_iter.probably_has_next() - else None) - if new_before: - vars['responses_before_link'] = ('?responses_before=%s#responses' % - new_before.isoformat()) - - vars['next_poll'] = max( - source.last_poll_attempt + source.poll_period(), - # lower bound is 1 minute from now - util.now_fn() + datetime.timedelta(seconds=90)) - - # Publishes - if 'publish' in source.features: - publishes = Publish.query().filter(Publish.source == source.key)\ - .order(-Publish.updated)\ - .fetch(10) - for p in publishes: - p.pretty_page = util.pretty_link( - p.key.parent().id(), - attrs={'class': 'original-post u-url u-name'}, - new_tab=True) - - vars['publishes'] = publishes - - if 'webmention' in source.features: - # Blog posts - blogposts = BlogPost.query().filter(BlogPost.source == source.key)\ - .order(-BlogPost.created)\ - .fetch(10) - for b in blogposts: - b.links = process_webmention_links(b) - try: - text = b.feed_item.get('title') - except ValueError: - text = None - b.pretty_url = util.pretty_link( - b.key.id(), text=text, attrs={'class': 'original-post u-url u-name'}, - max_length=40, new_tab=True) - - # Blog webmentions - webmentions = BlogWebmention.query()\ - .filter(BlogWebmention.source == source.key)\ - .order(-BlogWebmention.updated)\ - .fetch(10) - for w in webmentions: - w.pretty_source = util.pretty_link( - w.source_url(), attrs={'class': 'original-post'}, new_tab=True) - try: - target_is_source = (urllib.parse.urlparse(w.target_url()).netloc in - source.domains) - except BaseException: - target_is_source = False - w.pretty_target = util.pretty_link( - w.target_url(), attrs={'class': 'original-post'}, new_tab=True, - keep_host=target_is_source) - - vars.update({'blogposts': blogposts, 'webmentions': webmentions}) - - return render_template(f'{source.SHORT_NAME}_user.html', **vars) + """View for a user page.""" + cls = models.sources.get(site) + if not cls: + return render_template("user_not_found.html"), 404 + + source = cls.lookup(id) + + if not source: + key = cls.query( + ndb.OR( + *[ + ndb.GenericProperty(prop) == id + for prop in ("domains", "inferred_username", "name", "username") + ] + ) + ).get(keys_only=True) + if key: + return redirect(cls(key=key).bridgy_path(), code=301) + + if not source or not source.features: + return render_template("user_not_found.html"), 404 + + source.verify() + source = util.preprocess_source(source) + + vars = { + "source": source, + "logs": logs, + "REFETCH_HFEED_TRIGGER": models.REFETCH_HFEED_TRIGGER, + "RECENT_PRIVATE_POSTS_THRESHOLD": RECENT_PRIVATE_POSTS_THRESHOLD, + } + + # Blog webmention promos + if "webmention" not in source.features: + if source.SHORT_NAME in ("blogger", "medium", "tumblr", "wordpress"): + vars[source.SHORT_NAME + "_promo"] = True + else: + for domain in source.domains: + if ( + ".blogspot." in domain + and not Blogger.query( # Blogger uses country TLDs + Blogger.domains == domain + ).get() + ): + vars["blogger_promo"] = True + elif ( + util.domain_or_parent_in(domain, ["tumblr.com"]) + and not Tumblr.query(Tumblr.domains == domain).get() + ): + vars["tumblr_promo"] = True + elif ( + util.domain_or_parent_in(domain, "wordpress.com") + and not WordPress.query(WordPress.domains == domain).get() + ): + vars["wordpress_promo"] = True + + # Responses + if "listen" in source.features or "email" in source.features: + vars["responses"] = [] + query = Response.query().filter(Response.source == source.key) + + # if there's a paging param (responses_before or responses_after), update + # query with it + def get_paging_param(param): + val = request.values.get(param) + try: + return util.parse_iso8601(val) if val else None + except BaseException: + error(f"Couldn't parse {param}, {val!r} as ISO8601") + + before = get_paging_param("responses_before") + after = get_paging_param("responses_after") + if before and after: + error("can't handle both responses_before and responses_after") + elif after: + query = query.filter(Response.updated > after).order(Response.updated) + elif before: + query = query.filter(Response.updated < before).order(-Response.updated) + else: + query = query.order(-Response.updated) + + query_iter = query.iter() + for i, r in enumerate(query_iter): + r.response = json_loads(r.response_json) + r.activities = [json_loads(a) for a in r.activities_json] + + if not source.is_activity_public(r.response) or not all( + source.is_activity_public(a) for a in r.activities + ): + continue + elif r.type == "post": + r.activities = [] + + verb = r.response.get("verb") + r.actor = ( + r.response.get("object") + if verb == "invite" + else r.response.get("author") or r.response.get("actor") + ) or {} + + activity_content = "" + for a in r.activities + [r.response]: + if not a.get("content"): + obj = a.get("object", {}) + a["content"] = activity_content = ( + obj.get("content") + or obj.get("displayName") + or + # historical, from a Reddit bug fixed in granary@4f9df7c + obj.get("name") + or "" + ) + + response_content = r.response.get("content") + phrases = { + "like": "liked this", + "repost": "reposted this", + "rsvp-yes": "is attending", + "rsvp-no": "is not attending", + "rsvp-maybe": "might attend", + "rsvp-interested": "is interested", + "invite": "is invited", + } + phrase = phrases.get(r.type) or phrases.get(verb) + if phrase and ( + r.type != "repost" or activity_content.startswith(response_content) + ): + r.response["content"] = "%s %s." % ( + r.actor.get("displayName") or "", + phrase, + ) + + # convert image URL to https if we're serving over SSL + image_url = r.actor.setdefault("image", {}).get("url") + if image_url: + r.actor["image"]["url"] = util.update_scheme(image_url, request) + + # generate original post links + r.links = process_webmention_links(r) + r.original_links = [ + util.pretty_link(url, new_tab=True) for url in r.original_posts + ] + + vars["responses"].append(r) + if len(vars["responses"]) >= 10 or i > 200: + break + + vars["responses"].sort(key=lambda r: r.updated, reverse=True) + + # calculate new paging param(s) + new_after = ( + before + if before + else vars["responses"][0].updated + if vars["responses"] + and query_iter.probably_has_next() + and (before or after) + else None + ) + if new_after: + vars["responses_after_link"] = ( + "?responses_after=%s#responses" % new_after.isoformat() + ) + + new_before = ( + after + if after + else vars["responses"][-1].updated + if vars["responses"] and query_iter.probably_has_next() + else None + ) + if new_before: + vars["responses_before_link"] = ( + "?responses_before=%s#responses" % new_before.isoformat() + ) + + vars["next_poll"] = max( + source.last_poll_attempt + source.poll_period(), + # lower bound is 1 minute from now + util.now_fn() + datetime.timedelta(seconds=90), + ) + + # Publishes + if "publish" in source.features: + publishes = ( + Publish.query() + .filter(Publish.source == source.key) + .order(-Publish.updated) + .fetch(10) + ) + for p in publishes: + p.pretty_page = util.pretty_link( + p.key.parent().id(), + attrs={"class": "original-post u-url u-name"}, + new_tab=True, + ) + + vars["publishes"] = publishes + + if "webmention" in source.features: + # Blog posts + blogposts = ( + BlogPost.query() + .filter(BlogPost.source == source.key) + .order(-BlogPost.created) + .fetch(10) + ) + for b in blogposts: + b.links = process_webmention_links(b) + try: + text = b.feed_item.get("title") + except ValueError: + text = None + b.pretty_url = util.pretty_link( + b.key.id(), + text=text, + attrs={"class": "original-post u-url u-name"}, + max_length=40, + new_tab=True, + ) + + # Blog webmentions + webmentions = ( + BlogWebmention.query() + .filter(BlogWebmention.source == source.key) + .order(-BlogWebmention.updated) + .fetch(10) + ) + for w in webmentions: + w.pretty_source = util.pretty_link( + w.source_url(), attrs={"class": "original-post"}, new_tab=True + ) + try: + target_is_source = ( + urllib.parse.urlparse(w.target_url()).netloc in source.domains + ) + except BaseException: + target_is_source = False + w.pretty_target = util.pretty_link( + w.target_url(), + attrs={"class": "original-post"}, + new_tab=True, + keep_host=target_is_source, + ) + + vars.update({"blogposts": blogposts, "webmentions": webmentions}) + + return render_template(f"{source.SHORT_NAME}_user.html", **vars) def process_webmention_links(e): - """Generates pretty HTML for the links in a :class:`Webmentions` entity. - - Args: - e: :class:`Webmentions` subclass (:class:`Response` or :class:`BlogPost`) - """ - def link(url, g): - return util.pretty_link( - url, glyphicon=g, attrs={'class': 'original-post u-bridgy-target'}, - new_tab=True) - - return util.trim_nulls({ - 'Failed': set(link(url, 'exclamation-sign') for url in e.error + e.failed), - 'Sending': set(link(url, 'transfer') for url in e.unsent - if url not in e.error), - 'Sent': set(link(url, None) for url in e.sent - if url not in (e.error + e.unsent)), - 'No webmention ' - 'support': set(link(url, None) for url in e.skipped), - }) - - -@app.route('/delete/start', methods=['POST']) + """Generates pretty HTML for the links in a :class:`Webmentions` entity. + + Args: + e: :class:`Webmentions` subclass (:class:`Response` or :class:`BlogPost`) + """ + + def link(url, g): + return util.pretty_link( + url, + glyphicon=g, + attrs={"class": "original-post u-bridgy-target"}, + new_tab=True, + ) + + return util.trim_nulls( + { + "Failed": set(link(url, "exclamation-sign") for url in e.error + e.failed), + "Sending": set( + link(url, "transfer") for url in e.unsent if url not in e.error + ), + "Sent": set( + link(url, None) for url in e.sent if url not in (e.error + e.unsent) + ), + 'No webmention ' + "support": set(link(url, None) for url in e.skipped), + } + ) + + +@app.route("/delete/start", methods=["POST"]) def delete_start(): - source = util.load_source() - kind = source.key.kind() - feature = request.form['feature'] - state = util.encode_oauth_state({ - 'operation': 'delete', - 'feature': feature, - 'source': source.key.urlsafe().decode(), - 'callback': request.values.get('callback'), - }) - - # Blogger don't support redirect_url() yet - if kind == 'Blogger': - return redirect('/blogger/delete/start?state=%s' % state) - - path = ('/reddit/callback' if kind == 'Reddit' - else '/wordpress/add' if kind == 'WordPress' - else f'/{source.SHORT_NAME}/delete/finish') - kwargs = {} - if kind == 'Twitter': - kwargs['access_type'] = 'read' if feature == 'listen' else 'write' - - try: - return redirect(source.OAUTH_START(path).redirect_url(state=state)) - except werkzeug.exceptions.HTTPException: - # raised by us, probably via self.error() - raise - except Exception as e: - code, body = util.interpret_http_exception(e) - if not code and util.is_connection_failure(e): - code = '-' - body = str(e) - if code: - flash(f'{source.GR_CLASS.NAME} API error {code}: {body}') - return redirect(source.bridgy_url()) - else: - raise - + source = util.load_source() + kind = source.key.kind() + feature = request.form["feature"] + state = util.encode_oauth_state( + { + "operation": "delete", + "feature": feature, + "source": source.key.urlsafe().decode(), + "callback": request.values.get("callback"), + } + ) + + # Blogger don't support redirect_url() yet + if kind == "Blogger": + return redirect("/blogger/delete/start?state=%s" % state) + + path = ( + "/reddit/callback" + if kind == "Reddit" + else "/wordpress/add" + if kind == "WordPress" + else f"/{source.SHORT_NAME}/delete/finish" + ) + kwargs = {} + if kind == "Twitter": + kwargs["access_type"] = "read" if feature == "listen" else "write" -@app.route('/delete/finish') + try: + return redirect(source.OAUTH_START(path).redirect_url(state=state)) + except werkzeug.exceptions.HTTPException: + # raised by us, probably via self.error() + raise + except Exception as e: + code, body = util.interpret_http_exception(e) + if not code and util.is_connection_failure(e): + code = "-" + body = str(e) + if code: + flash(f"{source.GR_CLASS.NAME} API error {code}: {body}") + return redirect(source.bridgy_url()) + else: + raise + + +@app.route("/delete/finish") def delete_finish(): - parts = util.decode_oauth_state(request.values.get('state') or '') - callback = parts and parts.get('callback') - - if request.values.get('declined'): - # disable declined means no change took place - if callback: - callback = util.add_query_params(callback, {'result': 'declined'}) - return redirect(callback) - else: - flash('If you want to disable, please approve the prompt.') - return redirect('/') - return - - if not parts or 'feature' not in parts or 'source' not in parts: - error('state query parameter must include "feature" and "source"') - - feature = parts['feature'] - if feature not in (Source.FEATURES): - error('cannot delete unknown feature %s' % feature) - - logged_in_as = ndb.Key(urlsafe=request.args['auth_entity']).get() - source = ndb.Key(urlsafe=parts['source']).get() - - logins = None - if logged_in_as and logged_in_as.is_authority_for(source.auth_entity): - # TODO: remove credentials - if feature in source.features: - source.features.remove(feature) - source.put() - - # remove login cookie - logins = util.get_logins() - login = util.Login(path=source.bridgy_path(), site=source.SHORT_NAME, - name=source.label_name()) - if login in logins: - logins.remove(login) - - noun = 'webmentions' if feature == 'webmention' else feature + 'ing' - if callback: - callback = util.add_query_params(callback, { - 'result': 'success', - 'user': source.bridgy_url(), - 'key': source.key.urlsafe().decode(), - }) + parts = util.decode_oauth_state(request.values.get("state") or "") + callback = parts and parts.get("callback") + + if request.values.get("declined"): + # disable declined means no change took place + if callback: + callback = util.add_query_params(callback, {"result": "declined"}) + return redirect(callback) + else: + flash("If you want to disable, please approve the prompt.") + return redirect("/") + return + + if not parts or "feature" not in parts or "source" not in parts: + error('state query parameter must include "feature" and "source"') + + feature = parts["feature"] + if feature not in (Source.FEATURES): + error("cannot delete unknown feature %s" % feature) + + logged_in_as = ndb.Key(urlsafe=request.args["auth_entity"]).get() + source = ndb.Key(urlsafe=parts["source"]).get() + + logins = None + if logged_in_as and logged_in_as.is_authority_for(source.auth_entity): + # TODO: remove credentials + if feature in source.features: + source.features.remove(feature) + source.put() + + # remove login cookie + logins = util.get_logins() + login = util.Login( + path=source.bridgy_path(), + site=source.SHORT_NAME, + name=source.label_name(), + ) + if login in logins: + logins.remove(login) + + noun = "webmentions" if feature == "webmention" else feature + "ing" + if callback: + callback = util.add_query_params( + callback, + { + "result": "success", + "user": source.bridgy_url(), + "key": source.key.urlsafe().decode(), + }, + ) + else: + msg = f"Disabled {noun} for {source.label()}." + if not source.features: + msg += " Sorry to see you go!" + flash(msg) else: - msg = f'Disabled {noun} for {source.label()}.' - if not source.features: - msg += ' Sorry to see you go!' - flash(msg) - else: - if callback: - callback = util.add_query_params(callback, {'result': 'failure'}) - else: - flash(f'Please log into {source.GR_CLASS.NAME} as {source.name} to disable it here.') + if callback: + callback = util.add_query_params(callback, {"result": "failure"}) + else: + flash( + f"Please log into {source.GR_CLASS.NAME} as {source.name} to disable it here." + ) - url = callback if callback else source.bridgy_url() if source.features else '/' - return redirect(url, logins=logins) + url = callback if callback else source.bridgy_url() if source.features else "/" + return redirect(url, logins=logins) -@app.route('/poll-now', methods=['POST']) +@app.route("/poll-now", methods=["POST"]) def poll_now(): - source = util.load_source() - util.add_poll_task(source, now=True) - flash("Polling now. Refresh in a minute to see what's new!") - return redirect(source.bridgy_url()) + source = util.load_source() + util.add_poll_task(source, now=True) + flash("Polling now. Refresh in a minute to see what's new!") + return redirect(source.bridgy_url()) -@app.route('/crawl-now', methods=['POST']) +@app.route("/crawl-now", methods=["POST"]) def crawl_now(): - source = None - - @ndb.transactional() - def setup_refetch_hfeed(): - nonlocal source - source = util.load_source() - source.last_hfeed_refetch = models.REFETCH_HFEED_TRIGGER - source.last_feed_syndication_url = None - source.put() + source = None + + @ndb.transactional() + def setup_refetch_hfeed(): + nonlocal source + source = util.load_source() + source.last_hfeed_refetch = models.REFETCH_HFEED_TRIGGER + source.last_feed_syndication_url = None + source.put() - setup_refetch_hfeed() - util.add_poll_task(source, now=True) - flash("Crawling now. Refresh in a minute to see what's new!") - return redirect(source.bridgy_url()) + setup_refetch_hfeed() + util.add_poll_task(source, now=True) + flash("Crawling now. Refresh in a minute to see what's new!") + return redirect(source.bridgy_url()) -@app.route('/retry', methods=['POST']) +@app.route("/retry", methods=["POST"]) def retry(): - entity = util.load_source() - if not isinstance(entity, Webmentions): - error(f'Unexpected key kind {entity.key.kind()}') - - source = entity.source.get() + entity = util.load_source() + if not isinstance(entity, Webmentions): + error(f"Unexpected key kind {entity.key.kind()}") - # run OPD to pick up any new SyndicatedPosts. note that we don't refetch - # their h-feed, so if they've added a syndication URL since we last crawled, - # retry won't make us pick it up. background in #524. - if entity.key.kind() == 'Response': source = entity.source.get() - for activity in [json_loads(a) for a in entity.activities_json]: - originals, mentions = original_post_discovery.discover( - source, activity, fetch_hfeed=False, include_redirect_sources=False) - entity.unsent += original_post_discovery.targets_for_response( - json_loads(entity.response_json), originals=originals, mentions=mentions) - entity.restart() - flash('Retrying. Refresh in a minute to see the results!') - return redirect(request.values.get('redirect_to') or source.bridgy_url()) + # run OPD to pick up any new SyndicatedPosts. note that we don't refetch + # their h-feed, so if they've added a syndication URL since we last crawled, + # retry won't make us pick it up. background in #524. + if entity.key.kind() == "Response": + source = entity.source.get() + for activity in [json_loads(a) for a in entity.activities_json]: + originals, mentions = original_post_discovery.discover( + source, activity, fetch_hfeed=False, include_redirect_sources=False + ) + entity.unsent += original_post_discovery.targets_for_response( + json_loads(entity.response_json), originals=originals, mentions=mentions + ) + + entity.restart() + flash("Retrying. Refresh in a minute to see the results!") + return redirect(request.values.get("redirect_to") or source.bridgy_url()) + + +@app.route("/discover", methods=["POST"]) +def discover(): + source = util.load_source() + # validate URL, find silo post + url = request.form["url"] + domain = util.domain_from_link(url) + path = urllib.parse.urlparse(url).path + msg = "Discovering now. Refresh in a minute to see the results!" + + gr_source = source.gr_source + if domain == gr_source.DOMAIN: + post_id = gr_source.post_id(url) + if post_id: + type = "event" if path.startswith("/events/") else None + util.add_discover_task(source, post_id, type=type) + else: + msg = "Sorry, that doesn't look like a %s post URL." % gr_source.NAME + + elif util.domain_or_parent_in(domain, source.domains): + synd_links = original_post_discovery.process_entry(source, url, {}, False, []) + if synd_links: + for link in synd_links: + util.add_discover_task(source, gr_source.post_id(link)) + source.updates = {"last_syndication_url": util.now_fn()} + models.Source.put_updates(source) + else: + msg = "Failed to fetch %s or find a %s syndication link." % ( + util.pretty_link(url), + gr_source.NAME, + ) -@app.route('/discover', methods=['POST']) -def discover(): - source = util.load_source() - - # validate URL, find silo post - url = request.form['url'] - domain = util.domain_from_link(url) - path = urllib.parse.urlparse(url).path - msg = 'Discovering now. Refresh in a minute to see the results!' - - gr_source = source.gr_source - if domain == gr_source.DOMAIN: - post_id = gr_source.post_id(url) - if post_id: - type = 'event' if path.startswith('/events/') else None - util.add_discover_task(source, post_id, type=type) - else: - msg = "Sorry, that doesn't look like a %s post URL." % gr_source.NAME - - elif util.domain_or_parent_in(domain, source.domains): - synd_links = original_post_discovery.process_entry(source, url, {}, False, []) - if synd_links: - for link in synd_links: - util.add_discover_task(source, gr_source.post_id(link)) - source.updates = {'last_syndication_url': util.now_fn()} - models.Source.put_updates(source) else: - msg = 'Failed to fetch %s or find a %s syndication link.' % ( - util.pretty_link(url), gr_source.NAME) + msg = "Please enter a URL on either your web site or %s." % gr_source.NAME - else: - msg = 'Please enter a URL on either your web site or %s.' % gr_source.NAME + flash(msg) + return redirect(source.bridgy_url()) - flash(msg) - return redirect(source.bridgy_url()) - -@app.route('/edit-websites', methods=['GET']) +@app.route("/edit-websites", methods=["GET"]) def edit_websites_get(): - return render_template('edit_websites.html', - source=util.preprocess_source(util.load_source())) + return render_template( + "edit_websites.html", source=util.preprocess_source(util.load_source()) + ) -@app.route('/edit-websites', methods=['POST']) +@app.route("/edit-websites", methods=["POST"]) def edit_websites_post(): - source = util.load_source() - redirect_url = '%s?%s' % (request.path, urllib.parse.urlencode({ - 'source_key': source.key.urlsafe().decode(), - })) - - add = request.values.get('add') - delete = request.values.get('delete') - if (add and delete) or (not add and not delete): - error('Either add or delete param (but not both) required') - - link = util.pretty_link(add or delete) - - if add: - resolved = Source.resolve_profile_url(add) - if resolved: - if resolved in source.domain_urls: - flash('%s already exists.' % link) - else: - source.domain_urls.append(resolved) - domain = util.domain_from_link(resolved) - source.domains.append(domain) - source.put() - flash('Added %s.' % link) - else: - flash("%s doesn't look like your web site. Try again?" % link) + source = util.load_source() + redirect_url = "%s?%s" % ( + request.path, + urllib.parse.urlencode( + { + "source_key": source.key.urlsafe().decode(), + } + ), + ) + + add = request.values.get("add") + delete = request.values.get("delete") + if (add and delete) or (not add and not delete): + error("Either add or delete param (but not both) required") + + link = util.pretty_link(add or delete) + + if add: + resolved = Source.resolve_profile_url(add) + if resolved: + if resolved in source.domain_urls: + flash("%s already exists." % link) + else: + source.domain_urls.append(resolved) + domain = util.domain_from_link(resolved) + source.domains.append(domain) + source.put() + flash("Added %s." % link) + else: + flash("%s doesn't look like your web site. Try again?" % link) - else: - assert delete - try: - source.domain_urls.remove(delete) - except ValueError: - error(f"{delete} not found in {source.label()}'s current web sites") - domain = util.domain_from_link(delete) - if domain not in set(util.domain_from_link(url) for url in source.domain_urls): - source.domains.remove(domain) - source.put() - flash(f'Removed {link}.') + else: + assert delete + try: + source.domain_urls.remove(delete) + except ValueError: + error(f"{delete} not found in {source.label()}'s current web sites") + domain = util.domain_from_link(delete) + if domain not in set(util.domain_from_link(url) for url in source.domain_urls): + source.domains.remove(domain) + source.put() + flash(f"Removed {link}.") - return redirect(redirect_url) + return redirect(redirect_url) -@app.route('/', methods=('GET', 'HEAD')) +@app.route("/", methods=("GET", "HEAD")) def redirect_to_front_page(_): - """Redirect to the front page.""" - return redirect(util.add_query_params('/', request.values.items()), code=301) + """Redirect to the front page.""" + return redirect(util.add_query_params("/", request.values.items()), code=301) -@app.route('/logout') +@app.route("/logout") def logout(): - """Redirect to the front page.""" - flash('Logged out.') - return redirect('/', logins=[]) + """Redirect to the front page.""" + flash("Logged out.") + return redirect("/", logins=[]) -@app.route('/csp-report') +@app.route("/csp-report") def csp_report(): - """Log Content-Security-Policy reports. https://content-security-policy.com/""" - logging.info(request.values.get_data(as_text=True)) - return 'OK' + """Log Content-Security-Policy reports. https://content-security-policy.com/""" + logging.info(request.values.get_data(as_text=True)) + return "OK" -@app.route('/log') +@app.route("/log") @flask_util.cached(cache, logs.CACHE_TIME) def log(): return logs.log() diff --git a/publish.py b/publish.py index 6a409e5b..ad074cb3 100644 --- a/publish.py +++ b/publish.py @@ -16,11 +16,11 @@ from granary import source as gr_source import grpc from oauth_dropins import ( - flickr as oauth_flickr, - github as oauth_github, - mastodon as oauth_mastodon, - meetup as oauth_meetup, - twitter as oauth_twitter, + flickr as oauth_flickr, + github as oauth_github, + mastodon as oauth_mastodon, + meetup as oauth_meetup, + twitter as oauth_twitter, ) from oauth_dropins.webutil import appengine_info from oauth_dropins.webutil import flask_util @@ -47,728 +47,873 @@ # image URLs matching this regexp should be ignored. # (This matches Wordpress Jetpack lazy loaded image placeholders.) # https://github.com/snarfed/bridgy/issues/798 -IGNORE_IMAGE_RE = re.compile(r'.*/lazy-images/images/1x1\.trans\.gif$') - -PUBLISHABLE_TYPES = frozenset(( - 'h-checkin', - 'h-entry', - 'h-event', - 'h-geo', - 'h-item', - 'h-listing', - 'h-product', - 'h-recipe', - 'h-resume', - 'h-review', -)) +IGNORE_IMAGE_RE = re.compile(r".*/lazy-images/images/1x1\.trans\.gif$") + +PUBLISHABLE_TYPES = frozenset( + ( + "h-checkin", + "h-entry", + "h-event", + "h-geo", + "h-item", + "h-listing", + "h-product", + "h-recipe", + "h-resume", + "h-review", + ) +) class CollisionError(RuntimeError): - """Multiple publish requests for the same page at the same time.""" - pass + """Multiple publish requests for the same page at the same time.""" + + pass class PublishBase(webmention.Webmention): - """Base handler for both previews and publishes. + """Base handler for both previews and publishes. - Subclasses must set the :attr:`PREVIEW` attribute to True or False. They may - also override other methods. + Subclasses must set the :attr:`PREVIEW` attribute to True or False. They may + also override other methods. - Attributes: - fetched: :class:`requests.Response` from fetching source_url - shortlink: rel-shortlink found in the original post, if any - """ - PREVIEW = None + Attributes: + fetched: :class:`requests.Response` from fetching source_url + shortlink: rel-shortlink found in the original post, if any + """ - shortlink = None - source = None + PREVIEW = None - def authorize(self): - """Returns True if the current user is authorized for this request. + shortlink = None + source = None - Otherwise, should call :meth:`self.error()` to provide an appropriate - error message. - """ - return True - - def source_url(self): - return request.values['source'].strip() - - def target_url(self): - return request.values['target'].strip() - - def include_link(self, item): - val = request.values.get('bridgy_omit_link', None) - - if val is None: - # _run has already parsed and validated the target URL - vals = urllib.parse.parse_qs(urllib.parse.urlparse(self.target_url()).query)\ - .get('bridgy_omit_link') - val = vals[0] if vals else None - - if val is None: - vals = item.get('properties', {}).get('bridgy-omit-link') - val = vals[0] if vals else None - - result = (gr_source.INCLUDE_LINK if val is None or val.lower() == 'false' - else gr_source.INCLUDE_IF_TRUNCATED if val.lower() == 'maybe' - else gr_source.OMIT_LINK) - - return result - - def ignore_formatting(self, item): - val = request.values.get('bridgy_ignore_formatting', None) - - if val is None: - # _run has already parsed and validated the target URL - vals = urllib.parse.parse_qs(urllib.parse.urlparse(self.target_url()).query)\ - .get('bridgy_ignore_formatting') - val = vals[0] if vals else None - - if val is not None: - return val.lower() in ('', 'true') - - return 'bridgy-ignore-formatting' in item.get('properties', {}) - - def maybe_inject_silo_content(self, item): - props = item.setdefault('properties', {}) - silo_content = props.get('bridgy-%s-content' % self.source.SHORT_NAME, []) - if silo_content: - props['content'] = silo_content - props.pop('name', None) - props.pop('summary', None) - - def _run(self): - """Returns CreationResult on success, None otherwise.""" - logging.info('Params: %s', list(request.values.items())) - assert self.PREVIEW in (True, False) - - # parse and validate target URL - try: - parsed = urllib.parse.urlparse(self.target_url()) - except BaseException: - self.error(f'Could not parse target URL {self.target_url()}') - - domain = parsed.netloc - path_parts = parsed.path.rsplit('/', 1) - source_cls = SOURCE_NAMES.get(path_parts[-1]) - if (domain not in util.DOMAINS or - len(path_parts) != 2 or path_parts[0] != '/publish' or not source_cls): - self.error( - 'Target must be brid.gy/publish/{flickr,github,mastodon,meetup,twitter}') - elif source_cls == Instagram: - self.error(f'Sorry, {source_cls.GR_CLASS.NAME} is not supported.') - - # resolve source URL - source_url = self.source_url() - resolved_url, domain, ok = util.get_webmention_target( - source_url, replace_test_domains=False) - # show nice error message if they're trying to publish a silo post - if domain in SOURCE_DOMAINS: - return self.error( - "Looks like that's a %s URL. Try one from your web site instead!" % - SOURCE_DOMAINS[domain].GR_CLASS.NAME) - elif not ok: - return self.error('Unsupported source URL %s' % resolved_url) - elif not domain: - return self.error('Could not parse source URL %s' % resolved_url) - - # look up source by domain - self.source = self._find_source(source_cls, resolved_url, domain) - - content_param = 'bridgy_%s_content' % self.source.SHORT_NAME - if content_param in request.values: - return self.error('The %s parameter is not supported' % content_param) - - # show nice error message if they're trying to publish their home page - for domain_url in self.source.domain_urls: - domain_url_parts = urllib.parse.urlparse(domain_url) - for check_url in resolved_url, source_url: - parts = urllib.parse.urlparse(check_url) - if (parts.netloc == domain_url_parts.netloc and - parts.path.strip('/') == domain_url_parts.path.strip('/') and - not parts.query): - return self.error( - "Looks like that's your home page. Try one of your posts instead!") - - # done with the sanity checks, create the Publish entity - self.entity = self.get_or_add_publish_entity(resolved_url) - if not self.entity: - return None - - # fetch the source page! - fragment = urllib.parse.urlparse(source_url).fragment - try: - resp = self.fetch_mf2(resolved_url, id=fragment, raise_errors=True) - except HTTPException: - # raised by us, probably via self.error() - raise - except BaseException as e: - status, body = util.interpret_http_exception(e) - if status == '410': - return self.delete(resolved_url) - return self.error('Could not fetch source URL %s' % resolved_url) - - if not resp: - return - self.fetched, mf2 = resp - - # check that we haven't already published this URL. (we can't do this before - # fetching because it might be a 410 delete, which we only know by fetching.) - if (self.entity.status == 'complete' and self.entity.type != 'preview' and - not self.PREVIEW and not appengine_info.LOCAL): - return self.error("Sorry, you've already published that page, and Bridgy Publish doesn't support updating existing posts. Details: https://github.com/snarfed/bridgy/issues/84", - extra_json={'original': self.entity.published}) - - # find rel-shortlink, if any - # http://microformats.org/wiki/rel-shortlink - # https://github.com/snarfed/bridgy/issues/173 - shortlinks = mf2['rels'].get('shortlink') - if shortlinks: - self.shortlink = urllib.parse.urljoin(resolved_url, shortlinks[0]) - - # loop through each item and its children and try to preview/create it. if - # it fails, try the next one. break after the first one that works. - result = None - types = set() - queue = collections.deque(mf2.get('items', [])) - while queue: - item = queue.popleft() - item_types = set(item.get('type')) - if 'h-feed' in item_types and 'h-entry' not in item_types: - queue.extend(item.get('children', [])) - continue - elif not item_types & PUBLISHABLE_TYPES: - types = types.union(item_types) - continue - - try: - result = self.attempt_single_item(item) - if self.entity.published: - break - if result.abort: - if result.error_plain: - self.error(result.error_plain, html=result.error_html, data=item) - return - # try the next item - for embedded in ('rsvp', 'invitee', 'repost', 'repost-of', 'like', - 'like-of', 'in-reply-to'): - if embedded in item.get('properties', []): - item_types.add(embedded) - logging.info( - 'Object type(s) %s not supported; error=%s; trying next.', - item_types, result.error_plain) - types = types.union(item_types) - queue.extend(item.get('children', [])) - except HTTPException: - # raised by us, probably via self.error() - raise - except BaseException as e: - code, body = util.interpret_http_exception(e) - if code in self.source.DISABLE_HTTP_CODES or isinstance(e, models.DisableSource): - # the user deauthorized the bridgy app, or the token expired, so - # disable this source. - logging.warning(f'Disabling source due to: {e}', exc_info=True) - self.source.status = 'disabled' - self.source.put() - if isinstance(e, (NotImplementedError, ValueError, urllib.error.URLError)): - code = '400' - elif not code: - raise - msg = 'Error: %s %s' % (body or '', e) - return self.error(msg, status=code, report=code not in ('400', '404', '502', '503', '504')) - - if not self.entity.published: # tried all the items - types.discard('h-entry') - types.discard('h-note') - if types: - msg = ("%s doesn't support type(s) %s, or no content was found." % - (source_cls.GR_CLASS.NAME, ' + '.join(types))) - return self.error(msg, data=mf2) - else: - msg = 'Could not find content in h-entry or any other element!' - return self.error(msg, html=msg, data=mf2) - - # write results to datastore, but don't overwrite a previous publish with a - # preview. - if not (self.PREVIEW and self.entity.type != 'preview'): - self.entity.status = 'complete' - self.entity.put() - - return result - - def _find_source(self, source_cls, url, domain): - """Returns the source that should publish a post URL, or None if not found. - - Args: - source_cls: :class:`models.Source` subclass for this silo - url: string - domain: string, url's domain - - Returns: :class:`models.Source` - """ - domain = domain.lower() - sources = source_cls.query().filter(source_cls.domains == domain).fetch(100) - if not sources: - msg = "Could not find %(type)s account for %(domain)s. Check that your %(type)s profile has %(domain)s in its web site or link field, then try signing up again." % {'type': source_cls.GR_CLASS.NAME, 'domain': domain} - return self.error(msg, html=msg) - - current_url = '' - sources_ready = [] - best_match = None - for source in sources: - logging.info('Source: %s , features %s, status %s, poll status %s', - source.bridgy_url(), source.features, source.status, - source.poll_status) - if source.status != 'disabled' and 'publish' in source.features: - # use a source that has a domain_url matching the url provided, - # including path. find the source with the closest match. - sources_ready.append(source) - schemeless_url = util.schemeless(url.lower()).strip('/') - for domain_url in source.domain_urls: - schemeless_domain_url = util.schemeless(domain_url.lower()).strip('/') - if (schemeless_url.startswith(schemeless_domain_url) and - len(domain_url) > len(current_url)): - current_url = domain_url - best_match = source - - if best_match: - return best_match - - if sources_ready: - msg = 'No account found that matches {util.pretty_link(url)}. Check that the web site URL is in your silo profile, then sign up again.' - else: - msg = 'Publish is not enabled for your account. Try signing up!' - self.error(msg, html=msg) - - def attempt_single_item(self, item): - """Attempts to preview or publish a single mf2 item. - - Args: - item: mf2 item dict from mf2py - - Returns: - CreationResult - """ - self.maybe_inject_silo_content(item) - obj = microformats2.json_to_object(item) - - ignore_formatting = self.ignore_formatting(item) - if ignore_formatting: - prop = microformats2.first_props(item.get('properties', {})) - content = microformats2.get_text(prop.get('content')) - if content: - obj['content'] = content.strip() - - # which original post URL to include? in order of preference: - # 1. rel-shortlink (background: https://github.com/snarfed/bridgy/issues/173) - # 2. original user-provided URL if it redirected - # 3. u-url if available - # 4. actual final fetched URL - if self.shortlink: - obj['url'] = self.shortlink - elif self.source_url() != self.fetched.url: - obj['url'] = self.source_url() - elif 'url' not in obj: - obj['url'] = self.fetched.url - logging.debug('Converted to ActivityStreams object: %s', json_dumps(obj, indent=2)) - - # posts and comments need content - obj_type = obj.get('objectType') - if obj_type in ('note', 'article', 'comment'): - if (not obj.get('content') and not obj.get('summary') and - not obj.get('displayName')): - return gr_source.creation_result( - abort=False, - error_plain='Could not find content in %s' % self.fetched.url, - error_html='Could not find content in %s' % self.fetched.url) - - self.preprocess(obj) - - include_link = self.include_link(item) - - if not self.authorize(): - return gr_source.creation_result(abort=True) - - if self.PREVIEW: - result = self.source.gr_source.preview_create( - obj, include_link=include_link, ignore_formatting=ignore_formatting) - previewed = result.content or result.description - if self.entity.type == 'preview': - self.entity.published = previewed - if not previewed: - return result # there was an error - return self._render_preview(result, include_link=include_link) - - else: - result = self.source.gr_source.create( - obj, include_link=include_link, ignore_formatting=ignore_formatting) - self.entity.published = result.content - if not result.content: - return result # there was an error - if 'url' not in self.entity.published: - self.entity.published['url'] = obj.get('url') - self.entity.type = self.entity.published.get('type') or models.get_type(obj) - logging.info('Returning %s', json_dumps(self.entity.published, indent=2)) - return gr_source.creation_result( - json_dumps(self.entity.published, indent=2)) - - def delete(self, source_url): - """Attempts to delete or preview delete a published post. - - Args: - source_url: string, original post URL - - Returns: - dict response data with at least id and url - """ - assert self.entity - if ((self.entity.status != 'complete' or self.entity.type == 'preview') and - not appengine_info.LOCAL): - return self.error("Can't delete this post from %s because Bridgy Publish didn't originally POSSE it there" % self.source.gr_source.NAME) - - id = self.entity.published.get('id') - url = self.entity.published.get('url') - if not id and url: - id = self.source.gr_source.post_id(url) - - if not id: - return self.error( - "Bridgy Publish can't find the id of the %s post that it originally published for %s" % - self.source.gr_source.NAME, source_url) - - if self.PREVIEW: - try: - return self._render_preview(self.source.gr_source.preview_delete(id)) - except NotImplementedError: - return self.error("Sorry, deleting isn't supported for %s yet" % - self.source.gr_source.NAME) - - logging.info('Deleting silo post id %s', id) - self.entity = models.Publish(parent=self.entity.key.parent(), - source=self.source.key, type='delete') - self.entity.put() - logging.debug("Publish entity for delete: '%s'", - self.entity.key.urlsafe().decode()) - - resp = self.source.gr_source.delete(id) - resp.content.setdefault('id', id) - resp.content.setdefault('url', url) - logging.info(resp.content) - self.entity.published = resp.content - self.entity.status = 'deleted' - self.entity.put() - return resp - - def preprocess(self, activity): - """Preprocesses an item before trying to publish it. - - Specifically, expands inReplyTo/object URLs with rel=syndication URLs. - - Args: - activity: an ActivityStreams activity or object being published - """ - self.source.preprocess_for_publish(activity) - self.expand_target_urls(activity) + def authorize(self): + """Returns True if the current user is authorized for this request. - activity['image'] = [img for img in util.get_list(activity, 'image') - if not IGNORE_IMAGE_RE.match(img.get('url', ''))] - if not activity['image']: - del activity['image'] + Otherwise, should call :meth:`self.error()` to provide an appropriate + error message. + """ + return True - def expand_target_urls(self, activity): - """Expand the inReplyTo or object fields of an ActivityStreams object - by fetching the original and looking for rel=syndication URLs. + def source_url(self): + return request.values["source"].strip() - This method modifies the dict in place. + def target_url(self): + return request.values["target"].strip() - Args: - activity: an ActivityStreams dict of the activity being published - """ - for field in ('inReplyTo', 'object'): - # microformats2.json_to_object de-dupes, no need to do it here - objs = activity.get(field) - if not objs: - continue - - if isinstance(objs, dict): - objs = [objs] - - augmented = list(objs) - for obj in objs: - url = obj.get('url') - if not url: - continue - - parsed = urllib.parse.urlparse(url) - # ignore home pages. https://github.com/snarfed/bridgy/issues/760 - if parsed.path in ('', '/'): - continue - - # get_webmention_target weeds out silos and non-HTML targets - # that we wouldn't want to download and parse - url, _, ok = util.get_webmention_target(url) - if not ok: - continue - - logging.debug('expand_target_urls fetching field=%s, url=%s', field, url) - try: - mf2 = util.fetch_mf2(url) - except AssertionError: - raise # for unit tests - except HTTPException: - # raised by us, probably via self.error() - raise - except BaseException: - # it's not a big deal if we can't fetch an in-reply-to url - logging.info('expand_target_urls could not fetch field=%s, url=%s', - field, url, exc_info=True) - continue + def include_link(self, item): + val = request.values.get("bridgy_omit_link", None) - synd_urls = mf2['rels'].get('syndication', []) + if val is None: + # _run has already parsed and validated the target URL + vals = urllib.parse.parse_qs( + urllib.parse.urlparse(self.target_url()).query + ).get("bridgy_omit_link") + val = vals[0] if vals else None - # look for syndication urls in the first h-entry - queue = collections.deque(mf2.get('items', [])) - while queue: - item = queue.popleft() - item_types = set(item.get('type', [])) - if 'h-feed' in item_types and 'h-entry' not in item_types: - queue.extend(item.get('children', [])) - continue + if val is None: + vals = item.get("properties", {}).get("bridgy-omit-link") + val = vals[0] if vals else None - # these can be urls or h-cites - synd_urls += microformats2.get_string_urls( - item.get('properties', {}).get('syndication', [])) + result = ( + gr_source.INCLUDE_LINK + if val is None or val.lower() == "false" + else gr_source.INCLUDE_IF_TRUNCATED + if val.lower() == "maybe" + else gr_source.OMIT_LINK + ) - logging.debug('expand_target_urls found rel=syndication for url=%s: %r', url, synd_urls) - augmented += [{'url': u} for u in synd_urls] + return result - activity[field] = augmented + def ignore_formatting(self, item): + val = request.values.get("bridgy_ignore_formatting", None) - def get_or_add_publish_entity(self, source_url): - """Creates and stores :class:`models.Publish` entity. + if val is None: + # _run has already parsed and validated the target URL + vals = urllib.parse.parse_qs( + urllib.parse.urlparse(self.target_url()).query + ).get("bridgy_ignore_formatting") + val = vals[0] if vals else None - ...and if necessary, :class:`models.PublishedPage` entity. + if val is not None: + return val.lower() in ("", "true") - Args: - source_url: string - """ - try: - return self._get_or_add_publish_entity(source_url) - except CollisionError: - return self.error("You're already publishing that post in another request.", - status=429) - except Exception as e: - code = getattr(e, 'code', None) - details = getattr(e, 'details', None) - logging.info((code and code(), details and details())) - if (code and code() == grpc.StatusCode.ABORTED and - details and 'too much contention' in details()): - return self.error("You're already publishing that post in another request.", - status=429) - raise - - @ndb.transactional() - def _get_or_add_publish_entity(self, source_url): - page = PublishedPage.get_or_insert(source_url) - - # Detect concurrent publish request for the same page - # https://github.com/snarfed/bridgy/issues/996 - pending = Publish.query( - Publish.status == 'new', Publish.type != 'preview', - Publish.source == self.source.key, ancestor=page.key).get() - if pending: - logging.warning(f'Collided with publish: {pending.key.urlsafe().decode()}') - raise CollisionError() - - entity = Publish.query( - Publish.status == 'complete', Publish.type != 'preview', - Publish.source == self.source.key, ancestor=page.key).get() - if entity is None: - entity = Publish(parent=page.key, source=self.source.key) - if self.PREVIEW: - entity.type = 'preview' - entity.put() - - logging.debug("Publish entity: '%s'", entity.key.urlsafe().decode()) - return entity - - def _render_preview(self, result, include_link=False): - """Renders a preview CreationResult as HTML. - - Args: - result: CreationResult - include_link: boolean - - Returns: CreationResult with the rendered HTML in content - """ - state = { - 'source_key': self.source.key.urlsafe().decode(), - 'source_url': self.source_url(), - 'target_url': self.target_url(), - 'include_link': include_link, - } - vars = { - 'source': util.preprocess_source(self.source), - 'preview': result.content, - 'description': result.description, - 'webmention_endpoint': util.host_url('/publish/webmention'), - 'state': util.encode_oauth_state(state), - **state, - } - logging.info(f'Rendering preview with template vars {pprint.pformat(vars)}') - return gr_source.creation_result(render_template('preview.html', **vars)) + return "bridgy-ignore-formatting" in item.get("properties", {}) + + def maybe_inject_silo_content(self, item): + props = item.setdefault("properties", {}) + silo_content = props.get("bridgy-%s-content" % self.source.SHORT_NAME, []) + if silo_content: + props["content"] = silo_content + props.pop("name", None) + props.pop("summary", None) + + def _run(self): + """Returns CreationResult on success, None otherwise.""" + logging.info("Params: %s", list(request.values.items())) + assert self.PREVIEW in (True, False) + + # parse and validate target URL + try: + parsed = urllib.parse.urlparse(self.target_url()) + except BaseException: + self.error(f"Could not parse target URL {self.target_url()}") + + domain = parsed.netloc + path_parts = parsed.path.rsplit("/", 1) + source_cls = SOURCE_NAMES.get(path_parts[-1]) + if ( + domain not in util.DOMAINS + or len(path_parts) != 2 + or path_parts[0] != "/publish" + or not source_cls + ): + self.error( + "Target must be brid.gy/publish/{flickr,github,mastodon,meetup,twitter}" + ) + elif source_cls == Instagram: + self.error(f"Sorry, {source_cls.GR_CLASS.NAME} is not supported.") + + # resolve source URL + source_url = self.source_url() + resolved_url, domain, ok = util.get_webmention_target( + source_url, replace_test_domains=False + ) + # show nice error message if they're trying to publish a silo post + if domain in SOURCE_DOMAINS: + return self.error( + "Looks like that's a %s URL. Try one from your web site instead!" + % SOURCE_DOMAINS[domain].GR_CLASS.NAME + ) + elif not ok: + return self.error("Unsupported source URL %s" % resolved_url) + elif not domain: + return self.error("Could not parse source URL %s" % resolved_url) + + # look up source by domain + self.source = self._find_source(source_cls, resolved_url, domain) + + content_param = "bridgy_%s_content" % self.source.SHORT_NAME + if content_param in request.values: + return self.error("The %s parameter is not supported" % content_param) + + # show nice error message if they're trying to publish their home page + for domain_url in self.source.domain_urls: + domain_url_parts = urllib.parse.urlparse(domain_url) + for check_url in resolved_url, source_url: + parts = urllib.parse.urlparse(check_url) + if ( + parts.netloc == domain_url_parts.netloc + and parts.path.strip("/") == domain_url_parts.path.strip("/") + and not parts.query + ): + return self.error( + "Looks like that's your home page. Try one of your posts instead!" + ) + + # done with the sanity checks, create the Publish entity + self.entity = self.get_or_add_publish_entity(resolved_url) + if not self.entity: + return None + + # fetch the source page! + fragment = urllib.parse.urlparse(source_url).fragment + try: + resp = self.fetch_mf2(resolved_url, id=fragment, raise_errors=True) + except HTTPException: + # raised by us, probably via self.error() + raise + except BaseException as e: + status, body = util.interpret_http_exception(e) + if status == "410": + return self.delete(resolved_url) + return self.error("Could not fetch source URL %s" % resolved_url) + + if not resp: + return + self.fetched, mf2 = resp + + # check that we haven't already published this URL. (we can't do this before + # fetching because it might be a 410 delete, which we only know by fetching.) + if ( + self.entity.status == "complete" + and self.entity.type != "preview" + and not self.PREVIEW + and not appengine_info.LOCAL + ): + return self.error( + "Sorry, you've already published that page, and Bridgy Publish doesn't support updating existing posts. Details: https://github.com/snarfed/bridgy/issues/84", + extra_json={"original": self.entity.published}, + ) + + # find rel-shortlink, if any + # http://microformats.org/wiki/rel-shortlink + # https://github.com/snarfed/bridgy/issues/173 + shortlinks = mf2["rels"].get("shortlink") + if shortlinks: + self.shortlink = urllib.parse.urljoin(resolved_url, shortlinks[0]) + + # loop through each item and its children and try to preview/create it. if + # it fails, try the next one. break after the first one that works. + result = None + types = set() + queue = collections.deque(mf2.get("items", [])) + while queue: + item = queue.popleft() + item_types = set(item.get("type")) + if "h-feed" in item_types and "h-entry" not in item_types: + queue.extend(item.get("children", [])) + continue + elif not item_types & PUBLISHABLE_TYPES: + types = types.union(item_types) + continue + + try: + result = self.attempt_single_item(item) + if self.entity.published: + break + if result.abort: + if result.error_plain: + self.error( + result.error_plain, html=result.error_html, data=item + ) + return + # try the next item + for embedded in ( + "rsvp", + "invitee", + "repost", + "repost-of", + "like", + "like-of", + "in-reply-to", + ): + if embedded in item.get("properties", []): + item_types.add(embedded) + logging.info( + "Object type(s) %s not supported; error=%s; trying next.", + item_types, + result.error_plain, + ) + types = types.union(item_types) + queue.extend(item.get("children", [])) + except HTTPException: + # raised by us, probably via self.error() + raise + except BaseException as e: + code, body = util.interpret_http_exception(e) + if code in self.source.DISABLE_HTTP_CODES or isinstance( + e, models.DisableSource + ): + # the user deauthorized the bridgy app, or the token expired, so + # disable this source. + logging.warning(f"Disabling source due to: {e}", exc_info=True) + self.source.status = "disabled" + self.source.put() + if isinstance( + e, (NotImplementedError, ValueError, urllib.error.URLError) + ): + code = "400" + elif not code: + raise + msg = "Error: %s %s" % (body or "", e) + return self.error( + msg, + status=code, + report=code not in ("400", "404", "502", "503", "504"), + ) + + if not self.entity.published: # tried all the items + types.discard("h-entry") + types.discard("h-note") + if types: + msg = "%s doesn't support type(s) %s, or no content was found." % ( + source_cls.GR_CLASS.NAME, + " + ".join(types), + ) + return self.error(msg, data=mf2) + else: + msg = 'Could not find content in h-entry or any other element!' + return self.error(msg, html=msg, data=mf2) + + # write results to datastore, but don't overwrite a previous publish with a + # preview. + if not (self.PREVIEW and self.entity.type != "preview"): + self.entity.status = "complete" + self.entity.put() + + return result + + def _find_source(self, source_cls, url, domain): + """Returns the source that should publish a post URL, or None if not found. + + Args: + source_cls: :class:`models.Source` subclass for this silo + url: string + domain: string, url's domain + + Returns: :class:`models.Source` + """ + domain = domain.lower() + sources = source_cls.query().filter(source_cls.domains == domain).fetch(100) + if not sources: + msg = ( + "Could not find %(type)s account for %(domain)s. Check that your %(type)s profile has %(domain)s in its web site or link field, then try signing up again." + % {"type": source_cls.GR_CLASS.NAME, "domain": domain} + ) + return self.error(msg, html=msg) + + current_url = "" + sources_ready = [] + best_match = None + for source in sources: + logging.info( + "Source: %s , features %s, status %s, poll status %s", + source.bridgy_url(), + source.features, + source.status, + source.poll_status, + ) + if source.status != "disabled" and "publish" in source.features: + # use a source that has a domain_url matching the url provided, + # including path. find the source with the closest match. + sources_ready.append(source) + schemeless_url = util.schemeless(url.lower()).strip("/") + for domain_url in source.domain_urls: + schemeless_domain_url = util.schemeless(domain_url.lower()).strip( + "/" + ) + if schemeless_url.startswith(schemeless_domain_url) and len( + domain_url + ) > len(current_url): + current_url = domain_url + best_match = source + + if best_match: + return best_match + + if sources_ready: + msg = 'No account found that matches {util.pretty_link(url)}. Check that the web site URL is in your silo profile, then sign up again.' + else: + msg = 'Publish is not enabled for your account. Try signing up!' + self.error(msg, html=msg) + + def attempt_single_item(self, item): + """Attempts to preview or publish a single mf2 item. + + Args: + item: mf2 item dict from mf2py + + Returns: + CreationResult + """ + self.maybe_inject_silo_content(item) + obj = microformats2.json_to_object(item) + + ignore_formatting = self.ignore_formatting(item) + if ignore_formatting: + prop = microformats2.first_props(item.get("properties", {})) + content = microformats2.get_text(prop.get("content")) + if content: + obj["content"] = content.strip() + + # which original post URL to include? in order of preference: + # 1. rel-shortlink (background: https://github.com/snarfed/bridgy/issues/173) + # 2. original user-provided URL if it redirected + # 3. u-url if available + # 4. actual final fetched URL + if self.shortlink: + obj["url"] = self.shortlink + elif self.source_url() != self.fetched.url: + obj["url"] = self.source_url() + elif "url" not in obj: + obj["url"] = self.fetched.url + logging.debug( + "Converted to ActivityStreams object: %s", json_dumps(obj, indent=2) + ) + + # posts and comments need content + obj_type = obj.get("objectType") + if obj_type in ("note", "article", "comment"): + if ( + not obj.get("content") + and not obj.get("summary") + and not obj.get("displayName") + ): + return gr_source.creation_result( + abort=False, + error_plain="Could not find content in %s" % self.fetched.url, + error_html='Could not find content in %s' + % self.fetched.url, + ) + + self.preprocess(obj) + + include_link = self.include_link(item) + + if not self.authorize(): + return gr_source.creation_result(abort=True) + + if self.PREVIEW: + result = self.source.gr_source.preview_create( + obj, include_link=include_link, ignore_formatting=ignore_formatting + ) + previewed = result.content or result.description + if self.entity.type == "preview": + self.entity.published = previewed + if not previewed: + return result # there was an error + return self._render_preview(result, include_link=include_link) + + else: + result = self.source.gr_source.create( + obj, include_link=include_link, ignore_formatting=ignore_formatting + ) + self.entity.published = result.content + if not result.content: + return result # there was an error + if "url" not in self.entity.published: + self.entity.published["url"] = obj.get("url") + self.entity.type = self.entity.published.get("type") or models.get_type(obj) + logging.info("Returning %s", json_dumps(self.entity.published, indent=2)) + return gr_source.creation_result( + json_dumps(self.entity.published, indent=2) + ) + + def delete(self, source_url): + """Attempts to delete or preview delete a published post. + + Args: + source_url: string, original post URL + + Returns: + dict response data with at least id and url + """ + assert self.entity + if ( + self.entity.status != "complete" or self.entity.type == "preview" + ) and not appengine_info.LOCAL: + return self.error( + "Can't delete this post from %s because Bridgy Publish didn't originally POSSE it there" + % self.source.gr_source.NAME + ) + + id = self.entity.published.get("id") + url = self.entity.published.get("url") + if not id and url: + id = self.source.gr_source.post_id(url) + + if not id: + return self.error( + "Bridgy Publish can't find the id of the %s post that it originally published for %s" + % self.source.gr_source.NAME, + source_url, + ) + + if self.PREVIEW: + try: + return self._render_preview(self.source.gr_source.preview_delete(id)) + except NotImplementedError: + return self.error( + "Sorry, deleting isn't supported for %s yet" + % self.source.gr_source.NAME + ) + + logging.info("Deleting silo post id %s", id) + self.entity = models.Publish( + parent=self.entity.key.parent(), source=self.source.key, type="delete" + ) + self.entity.put() + logging.debug( + "Publish entity for delete: '%s'", self.entity.key.urlsafe().decode() + ) + + resp = self.source.gr_source.delete(id) + resp.content.setdefault("id", id) + resp.content.setdefault("url", url) + logging.info(resp.content) + self.entity.published = resp.content + self.entity.status = "deleted" + self.entity.put() + return resp + + def preprocess(self, activity): + """Preprocesses an item before trying to publish it. + + Specifically, expands inReplyTo/object URLs with rel=syndication URLs. + + Args: + activity: an ActivityStreams activity or object being published + """ + self.source.preprocess_for_publish(activity) + self.expand_target_urls(activity) + + activity["image"] = [ + img + for img in util.get_list(activity, "image") + if not IGNORE_IMAGE_RE.match(img.get("url", "")) + ] + if not activity["image"]: + del activity["image"] + + def expand_target_urls(self, activity): + """Expand the inReplyTo or object fields of an ActivityStreams object + by fetching the original and looking for rel=syndication URLs. + + This method modifies the dict in place. + + Args: + activity: an ActivityStreams dict of the activity being published + """ + for field in ("inReplyTo", "object"): + # microformats2.json_to_object de-dupes, no need to do it here + objs = activity.get(field) + if not objs: + continue + + if isinstance(objs, dict): + objs = [objs] + + augmented = list(objs) + for obj in objs: + url = obj.get("url") + if not url: + continue + + parsed = urllib.parse.urlparse(url) + # ignore home pages. https://github.com/snarfed/bridgy/issues/760 + if parsed.path in ("", "/"): + continue + + # get_webmention_target weeds out silos and non-HTML targets + # that we wouldn't want to download and parse + url, _, ok = util.get_webmention_target(url) + if not ok: + continue + + logging.debug( + "expand_target_urls fetching field=%s, url=%s", field, url + ) + try: + mf2 = util.fetch_mf2(url) + except AssertionError: + raise # for unit tests + except HTTPException: + # raised by us, probably via self.error() + raise + except BaseException: + # it's not a big deal if we can't fetch an in-reply-to url + logging.info( + "expand_target_urls could not fetch field=%s, url=%s", + field, + url, + exc_info=True, + ) + continue + + synd_urls = mf2["rels"].get("syndication", []) + + # look for syndication urls in the first h-entry + queue = collections.deque(mf2.get("items", [])) + while queue: + item = queue.popleft() + item_types = set(item.get("type", [])) + if "h-feed" in item_types and "h-entry" not in item_types: + queue.extend(item.get("children", [])) + continue + + # these can be urls or h-cites + synd_urls += microformats2.get_string_urls( + item.get("properties", {}).get("syndication", []) + ) + + logging.debug( + "expand_target_urls found rel=syndication for url=%s: %r", + url, + synd_urls, + ) + augmented += [{"url": u} for u in synd_urls] + + activity[field] = augmented + + def get_or_add_publish_entity(self, source_url): + """Creates and stores :class:`models.Publish` entity. + + ...and if necessary, :class:`models.PublishedPage` entity. + + Args: + source_url: string + """ + try: + return self._get_or_add_publish_entity(source_url) + except CollisionError: + return self.error( + "You're already publishing that post in another request.", status=429 + ) + except Exception as e: + code = getattr(e, "code", None) + details = getattr(e, "details", None) + logging.info((code and code(), details and details())) + if ( + code + and code() == grpc.StatusCode.ABORTED + and details + and "too much contention" in details() + ): + return self.error( + "You're already publishing that post in another request.", + status=429, + ) + raise + + @ndb.transactional() + def _get_or_add_publish_entity(self, source_url): + page = PublishedPage.get_or_insert(source_url) + + # Detect concurrent publish request for the same page + # https://github.com/snarfed/bridgy/issues/996 + pending = Publish.query( + Publish.status == "new", + Publish.type != "preview", + Publish.source == self.source.key, + ancestor=page.key, + ).get() + if pending: + logging.warning(f"Collided with publish: {pending.key.urlsafe().decode()}") + raise CollisionError() + + entity = Publish.query( + Publish.status == "complete", + Publish.type != "preview", + Publish.source == self.source.key, + ancestor=page.key, + ).get() + if entity is None: + entity = Publish(parent=page.key, source=self.source.key) + if self.PREVIEW: + entity.type = "preview" + entity.put() + + logging.debug("Publish entity: '%s'", entity.key.urlsafe().decode()) + return entity + + def _render_preview(self, result, include_link=False): + """Renders a preview CreationResult as HTML. + + Args: + result: CreationResult + include_link: boolean + + Returns: CreationResult with the rendered HTML in content + """ + state = { + "source_key": self.source.key.urlsafe().decode(), + "source_url": self.source_url(), + "target_url": self.target_url(), + "include_link": include_link, + } + vars = { + "source": util.preprocess_source(self.source), + "preview": result.content, + "description": result.description, + "webmention_endpoint": util.host_url("/publish/webmention"), + "state": util.encode_oauth_state(state), + **state, + } + logging.info(f"Rendering preview with template vars {pprint.pformat(vars)}") + return gr_source.creation_result(render_template("preview.html", **vars)) class Preview(PublishBase): - """Renders a preview HTML snippet of how a webmention would be handled. - """ - PREVIEW = True - - def dispatch_request(self): - try: - result = self._run() - return result.content if result and result.content else r'¯\_(ツ)_/¯' - except HTTPException as e: - return e.description, e.code - - def authorize(self): - from_source = util.load_source() - if from_source.key != self.source.key: - msg = 'Try publishing that page from %s instead.' % (self.source.bridgy_path(), self.source.label()) - self.error(msg, html=msg) - return False - - return True - - def include_link(self, item): - # always use query param because there's a checkbox in the UI - val = request.values.get('bridgy_omit_link', None) - return (gr_source.INCLUDE_LINK if val is None or val.lower() == 'false' - else gr_source.INCLUDE_IF_TRUNCATED if val.lower() == 'maybe' - else gr_source.OMIT_LINK) - - def error(self, error, html=None, status=400, data=None, report=False, **kwargs): - error = html if html else util.linkify(error) - logging.info(f'publish: {error}') - if report: - self.report_error(error, status=status) - flask_util.error(error, status=status) + """Renders a preview HTML snippet of how a webmention would be handled.""" + + PREVIEW = True + + def dispatch_request(self): + try: + result = self._run() + return result.content if result and result.content else r"¯\_(ツ)_/¯" + except HTTPException as e: + return e.description, e.code + + def authorize(self): + from_source = util.load_source() + if from_source.key != self.source.key: + msg = 'Try publishing that page from %s instead.' % ( + self.source.bridgy_path(), + self.source.label(), + ) + self.error(msg, html=msg) + return False + + return True + + def include_link(self, item): + # always use query param because there's a checkbox in the UI + val = request.values.get("bridgy_omit_link", None) + return ( + gr_source.INCLUDE_LINK + if val is None or val.lower() == "false" + else gr_source.INCLUDE_IF_TRUNCATED + if val.lower() == "maybe" + else gr_source.OMIT_LINK + ) + + def error(self, error, html=None, status=400, data=None, report=False, **kwargs): + error = html if html else util.linkify(error) + logging.info(f"publish: {error}") + if report: + self.report_error(error, status=status) + flask_util.error(error, status=status) class Send(PublishBase): - """Interactive publish handler. Redirected to after each silo's OAuth dance. - - Note that this is GET, not POST, since HTTP redirects always GET. - """ - PREVIEW = False - - def finish(self, auth_entity, state=None): - self.state = util.decode_oauth_state(state) - if not state: - self.error('If you want to publish or preview, please approve the prompt.') - return redirect('/') - - source = ndb.Key(urlsafe=self.state['source_key']).get() - if auth_entity is None: - self.error('If you want to publish or preview, please approve the prompt.') - elif not auth_entity.is_authority_for(source.auth_entity): - self.error('Please log into %s as %s to publish that page.' % - (source.GR_CLASS.NAME, source.name)) - else: - result = self._run() - if result and result.content: - flash('Done! Click here to view.' % - self.entity.published.get('url')) - granary_message = self.entity.published.get('granary_message') - if granary_message: - flash(granary_message) - # otherwise error() added an error message - - return redirect(source.bridgy_url()) - - def source_url(self): - return self.state['source_url'] - - def target_url(self): - return self.state['target_url'] - - def include_link(self, item): - return self.state['include_link'] - - def error(self, error, html=None, status=400, data=None, report=False, **kwargs): - logging.info(f'publish: {error}') - error = html if html else util.linkify(error) - flash('%s' % error) - if report: - self.report_error(error, status=status) + """Interactive publish handler. Redirected to after each silo's OAuth dance. + + Note that this is GET, not POST, since HTTP redirects always GET. + """ + + PREVIEW = False + + def finish(self, auth_entity, state=None): + self.state = util.decode_oauth_state(state) + if not state: + self.error("If you want to publish or preview, please approve the prompt.") + return redirect("/") + + source = ndb.Key(urlsafe=self.state["source_key"]).get() + if auth_entity is None: + self.error("If you want to publish or preview, please approve the prompt.") + elif not auth_entity.is_authority_for(source.auth_entity): + self.error( + "Please log into %s as %s to publish that page." + % (source.GR_CLASS.NAME, source.name) + ) + else: + result = self._run() + if result and result.content: + flash( + 'Done! Click here to view.' + % self.entity.published.get("url") + ) + granary_message = self.entity.published.get("granary_message") + if granary_message: + flash(granary_message) + # otherwise error() added an error message + + return redirect(source.bridgy_url()) + + def source_url(self): + return self.state["source_url"] + + def target_url(self): + return self.state["target_url"] + + def include_link(self, item): + return self.state["include_link"] + + def error(self, error, html=None, status=400, data=None, report=False, **kwargs): + logging.info(f"publish: {error}") + error = html if html else util.linkify(error) + flash("%s" % error) + if report: + self.report_error(error, status=status) # We want Callback.get() and Send.finish(), so put # Callback first and override finish. class FlickrSend(oauth_flickr.Callback, Send): - finish = Send.finish + finish = Send.finish class GitHubSend(oauth_github.Callback, Send): - finish = Send.finish + finish = Send.finish class MastodonSend(oauth_mastodon.Callback, Send): - finish = Send.finish + finish = Send.finish class MeetupSend(oauth_meetup.Callback, Send): - finish = Send.finish + finish = Send.finish class TwitterSend(oauth_twitter.Callback, Send): - finish = Send.finish + finish = Send.finish class Webmention(PublishBase): - """Accepts webmentions and translates them to publish requests.""" - PREVIEW = False - - def dispatch_request(self): - result = self._run() - if result: - return result.content, 201, { - 'Content-Type': 'application/json', - 'Location': self.entity.published['url'], - } - - return '' - - def authorize(self): - """Check for a backlink to brid.gy/publish/SILO.""" - bases = set() - if request.host == 'brid.gy': - bases.add('brid.gy') - bases.add('www.brid.gy') # also accept www - else: - bases.add(request.host) - - expected = ['%s/publish/%s' % (base, self.source.SHORT_NAME) for base in bases] - - if self.entity.html: - for url in expected: - if url in self.entity.html or urllib.parse.quote(url, safe='') in self.entity.html: - return True - - self.error(f"Couldn't find link to {expected[0]}") - return False - - def error(self, error, **kwargs): - logging.info(f'publish: {error}') - return super().error(error, **kwargs) - - -app.add_url_rule('/publish/preview', view_func=Preview.as_view('publish_preview'), methods=['POST']) -app.add_url_rule('/publish/webmention', view_func=Webmention.as_view('publish_webmention'), methods=['POST']) -app.add_url_rule('/publish/flickr/finish', view_func=FlickrSend.as_view('publish_flickr_finish', 'unused')) -app.add_url_rule('/publish/github/finish', view_func=GitHubSend.as_view('publish_github_finish', 'unused')) -app.add_url_rule('/publish/mastodon/finish', view_func=MastodonSend.as_view('publish_mastodon_finish', 'unused')) + """Accepts webmentions and translates them to publish requests.""" + + PREVIEW = False + + def dispatch_request(self): + result = self._run() + if result: + return ( + result.content, + 201, + { + "Content-Type": "application/json", + "Location": self.entity.published["url"], + }, + ) + + return "" + + def authorize(self): + """Check for a backlink to brid.gy/publish/SILO.""" + bases = set() + if request.host == "brid.gy": + bases.add("brid.gy") + bases.add("www.brid.gy") # also accept www + else: + bases.add(request.host) + + expected = ["%s/publish/%s" % (base, self.source.SHORT_NAME) for base in bases] + + if self.entity.html: + for url in expected: + if ( + url in self.entity.html + or urllib.parse.quote(url, safe="") in self.entity.html + ): + return True + + self.error(f"Couldn't find link to {expected[0]}") + return False + + def error(self, error, **kwargs): + logging.info(f"publish: {error}") + return super().error(error, **kwargs) + + +app.add_url_rule( + "/publish/preview", view_func=Preview.as_view("publish_preview"), methods=["POST"] +) +app.add_url_rule( + "/publish/webmention", + view_func=Webmention.as_view("publish_webmention"), + methods=["POST"], +) +app.add_url_rule( + "/publish/flickr/finish", + view_func=FlickrSend.as_view("publish_flickr_finish", "unused"), +) +app.add_url_rule( + "/publish/github/finish", + view_func=GitHubSend.as_view("publish_github_finish", "unused"), +) +app.add_url_rule( + "/publish/mastodon/finish", + view_func=MastodonSend.as_view("publish_mastodon_finish", "unused"), +) # because Meetup's `redirect_uri` handling is a little more restrictive -app.add_url_rule('/meetup/publish/finish', view_func=MeetupSend.as_view('publish_meetup_finish', 'unused')) -app.add_url_rule('/publish/twitter/finish', view_func=TwitterSend.as_view('publish_twitter_finish', 'unused')) +app.add_url_rule( + "/meetup/publish/finish", + view_func=MeetupSend.as_view("publish_meetup_finish", "unused"), +) +app.add_url_rule( + "/publish/twitter/finish", + view_func=TwitterSend.as_view("publish_twitter_finish", "unused"), +) diff --git a/reddit.py b/reddit.py index bfe31e82..ece14deb 100644 --- a/reddit.py +++ b/reddit.py @@ -10,69 +10,86 @@ class Reddit(models.Source): - """A Reddit account. - - The key name is the username. - """ - GR_CLASS = gr_reddit.Reddit - OAUTH_START = oauth_reddit.Start - SHORT_NAME = 'reddit' - TYPE_LABELS = { - 'post': 'submission', - 'comment': 'comment', - } - CAN_PUBLISH = False - - @staticmethod - def new(auth_entity=None, **kwargs): - """Creates and returns a :class:`Reddit` entity. - - Args: - auth_entity: :class:`oauth_dropins.reddit.RedditAuth` - kwargs: property values - """ - user = json_loads(auth_entity.user_json) - gr_source = gr_reddit.Reddit(auth_entity.refresh_token) - return Reddit(id=user.get('name'), - auth_entity=auth_entity.key, - url=gr_source.user_url(user.get('name')), - name=user.get('name'), - picture=user.get('icon_img'), - **kwargs) - - def silo_url(self): - """Returns the Reddit account URL, e.g. https://reddit.com/user/foo.""" - return self.gr_source.user_url(self.key_id()) - - def label_name(self): - """Returns the username.""" - return self.key_id() - - def search_for_links(self): - """Searches for activities with links to any of this source's web sites. - - Returns: - sequence of ActivityStreams activity dicts + """A Reddit account. + + The key name is the username. """ - urls = set(util.schemeless(util.fragmentless(url), slashes=False) - for url in self.domain_urls - if not util.in_webmention_blocklist(util.domain_from_link(url))) - if not urls: - return [] - # Search syntax: https://www.reddit.com/wiki/search - url_query = ' OR '.join([f'site:"{u}" OR selftext:"{u}"' for u in urls]) - return self.get_activities( - search_query=url_query, group_id=gr_source.SEARCH, etag=self.last_activities_etag, - fetch_replies=False, fetch_likes=False, fetch_shares=False, count=50) + GR_CLASS = gr_reddit.Reddit + OAUTH_START = oauth_reddit.Start + SHORT_NAME = "reddit" + TYPE_LABELS = { + "post": "submission", + "comment": "comment", + } + CAN_PUBLISH = False + + @staticmethod + def new(auth_entity=None, **kwargs): + """Creates and returns a :class:`Reddit` entity. + + Args: + auth_entity: :class:`oauth_dropins.reddit.RedditAuth` + kwargs: property values + """ + user = json_loads(auth_entity.user_json) + gr_source = gr_reddit.Reddit(auth_entity.refresh_token) + return Reddit( + id=user.get("name"), + auth_entity=auth_entity.key, + url=gr_source.user_url(user.get("name")), + name=user.get("name"), + picture=user.get("icon_img"), + **kwargs, + ) + + def silo_url(self): + """Returns the Reddit account URL, e.g. https://reddit.com/user/foo.""" + return self.gr_source.user_url(self.key_id()) + + def label_name(self): + """Returns the username.""" + return self.key_id() + + def search_for_links(self): + """Searches for activities with links to any of this source's web sites. + + Returns: + sequence of ActivityStreams activity dicts + """ + urls = set( + util.schemeless(util.fragmentless(url), slashes=False) + for url in self.domain_urls + if not util.in_webmention_blocklist(util.domain_from_link(url)) + ) + if not urls: + return [] + + # Search syntax: https://www.reddit.com/wiki/search + url_query = " OR ".join([f'site:"{u}" OR selftext:"{u}"' for u in urls]) + return self.get_activities( + search_query=url_query, + group_id=gr_source.SEARCH, + etag=self.last_activities_etag, + fetch_replies=False, + fetch_likes=False, + fetch_shares=False, + count=50, + ) class Callback(oauth_reddit.Callback): - def finish(self, auth_entity, state=None): - util.maybe_add_or_delete_source(Reddit, auth_entity, state) + def finish(self, auth_entity, state=None): + util.maybe_add_or_delete_source(Reddit, auth_entity, state) -app.add_url_rule('/reddit/start', - view_func=util.oauth_starter(oauth_reddit.Start).as_view('reddit_start', '/reddit/callback'), methods=['POST']) -app.add_url_rule('/reddit/callback', - view_func=Callback.as_view('reddit_callback', 'unused to_path')) +app.add_url_rule( + "/reddit/start", + view_func=util.oauth_starter(oauth_reddit.Start).as_view( + "reddit_start", "/reddit/callback" + ), + methods=["POST"], +) +app.add_url_rule( + "/reddit/callback", view_func=Callback.as_view("reddit_callback", "unused to_path") +) diff --git a/superfeedr.py b/superfeedr.py index 8b1c757c..99d7c1e3 100644 --- a/superfeedr.py +++ b/superfeedr.py @@ -19,122 +19,135 @@ import models import util -SUPERFEEDR_TOKEN = util.read('superfeedr_token') -SUPERFEEDR_USERNAME = util.read('superfeedr_username') -PUSH_API_URL = 'https://push.superfeedr.com' +SUPERFEEDR_TOKEN = util.read("superfeedr_token") +SUPERFEEDR_USERNAME = util.read("superfeedr_username") +PUSH_API_URL = "https://push.superfeedr.com" MAX_BLOGPOST_LINKS = 10 def subscribe(source): - """Subscribes to a source. - - Also receives some past posts and adds propagate tasks for them. - - http://documentation.superfeedr.com/subscribers.html#addingfeedswithpubsubhubbub - - Args: - source: Blogger, Tumblr, or WordPress - """ - if appengine_info.LOCAL: - logging.info('Running in dev_appserver, not subscribing to Superfeedr') - return - - data = { - 'hub.mode': 'subscribe', - 'hub.topic': source.feed_url(), - 'hub.callback': util.host_url(f'/{source.SHORT_NAME}/notify/{source.key_id()}'), - # TODO - # 'hub.secret': 'xxx', - 'format': 'json', - 'retrieve': 'true', - } - - logging.info('Adding Superfeedr subscription: %s', data) - resp = util.requests_post( - PUSH_API_URL, data=data, - auth=HTTPBasicAuth(SUPERFEEDR_USERNAME, SUPERFEEDR_TOKEN), - headers=util.REQUEST_HEADERS) - handle_feed(resp.json(), source) + """Subscribes to a source. + + Also receives some past posts and adds propagate tasks for them. + + http://documentation.superfeedr.com/subscribers.html#addingfeedswithpubsubhubbub + + Args: + source: Blogger, Tumblr, or WordPress + """ + if appengine_info.LOCAL: + logging.info("Running in dev_appserver, not subscribing to Superfeedr") + return + + data = { + "hub.mode": "subscribe", + "hub.topic": source.feed_url(), + "hub.callback": util.host_url(f"/{source.SHORT_NAME}/notify/{source.key_id()}"), + # TODO + # 'hub.secret': 'xxx', + "format": "json", + "retrieve": "true", + } + + logging.info("Adding Superfeedr subscription: %s", data) + resp = util.requests_post( + PUSH_API_URL, + data=data, + auth=HTTPBasicAuth(SUPERFEEDR_USERNAME, SUPERFEEDR_TOKEN), + headers=util.REQUEST_HEADERS, + ) + handle_feed(resp.json(), source) def handle_feed(feed, source): - """Handles a Superfeedr JSON feed. - - Creates :class:`models.BlogPost` entities and adds propagate-blogpost tasks - for new items. - - http://documentation.superfeedr.com/schema.html#json - http://documentation.superfeedr.com/subscribers.html#pubsubhubbubnotifications - - Args: - feed: unicode string, Superfeedr JSON feed - source: Blogger, Tumblr, or WordPress - """ - logging.info('Source: %s %s', source.label(), source.key_id()) - logging.info('Raw feed: %s', feed) - - if not feed: - return - - if source.status != 'enabled': - logging.info('Dropping because source is %s', source.status) - return - elif 'webmention' not in source.features: - logging.info("Dropping because source doesn't have webmention feature") - return - - for item in feed.get('items', []): - url = item.get('permalinkUrl') or item.get('id') - if not url: - logging.error('Dropping feed item without permalinkUrl or id!') - continue - - # extract links from content, discarding self links. - # - # i don't use get_webmention_target[s]() here because they follows redirects - # and fetch link contents, and this handler should be small and fast and try - # to return a response to superfeedr successfully. - # - # TODO: extract_links currently has a bug that makes it drop trailing - # slashes. ugh. fix that. - content = item.get('content') or item.get('summary', '') - links = [util.clean_url(util.unwrap_t_umblr_com(url)) - for url in util.extract_links(content) - if util.domain_from_link(url) not in source.domains] - - unique = [] - for link in util.dedupe_urls(links): - if len(link) <= _MAX_STRING_LENGTH: - unique.append(link) - else: - logging.info('Giving up on link over %s chars! %s', _MAX_STRING_LENGTH, link) - if len(unique) >= MAX_BLOGPOST_LINKS: - logging.info('Stopping at 10 links! Skipping the rest.') - break - - logging.info('Found links: %s', unique) - if len(url) > _MAX_KEYPART_BYTES: - logging.warning('Blog post URL is too long (over 500 chars)! Giving up.') - bp = models.BlogPost(id=url[:_MAX_KEYPART_BYTES], source=source.key, - feed_item=item, failed=unique) - else: - bp = models.BlogPost(id=url, source=source.key, feed_item=item, unsent=unique) - - bp.get_or_save() + """Handles a Superfeedr JSON feed. + + Creates :class:`models.BlogPost` entities and adds propagate-blogpost tasks + for new items. + + http://documentation.superfeedr.com/schema.html#json + http://documentation.superfeedr.com/subscribers.html#pubsubhubbubnotifications + + Args: + feed: unicode string, Superfeedr JSON feed + source: Blogger, Tumblr, or WordPress + """ + logging.info("Source: %s %s", source.label(), source.key_id()) + logging.info("Raw feed: %s", feed) + + if not feed: + return + + if source.status != "enabled": + logging.info("Dropping because source is %s", source.status) + return + elif "webmention" not in source.features: + logging.info("Dropping because source doesn't have webmention feature") + return + + for item in feed.get("items", []): + url = item.get("permalinkUrl") or item.get("id") + if not url: + logging.error("Dropping feed item without permalinkUrl or id!") + continue + + # extract links from content, discarding self links. + # + # i don't use get_webmention_target[s]() here because they follows redirects + # and fetch link contents, and this handler should be small and fast and try + # to return a response to superfeedr successfully. + # + # TODO: extract_links currently has a bug that makes it drop trailing + # slashes. ugh. fix that. + content = item.get("content") or item.get("summary", "") + links = [ + util.clean_url(util.unwrap_t_umblr_com(url)) + for url in util.extract_links(content) + if util.domain_from_link(url) not in source.domains + ] + + unique = [] + for link in util.dedupe_urls(links): + if len(link) <= _MAX_STRING_LENGTH: + unique.append(link) + else: + logging.info( + "Giving up on link over %s chars! %s", _MAX_STRING_LENGTH, link + ) + if len(unique) >= MAX_BLOGPOST_LINKS: + logging.info("Stopping at 10 links! Skipping the rest.") + break + + logging.info("Found links: %s", unique) + if len(url) > _MAX_KEYPART_BYTES: + logging.warning("Blog post URL is too long (over 500 chars)! Giving up.") + bp = models.BlogPost( + id=url[:_MAX_KEYPART_BYTES], + source=source.key, + feed_item=item, + failed=unique, + ) + else: + bp = models.BlogPost( + id=url, source=source.key, feed_item=item, unsent=unique + ) + + bp.get_or_save() class Notify(View): - """Handles a Superfeedr notification. + """Handles a Superfeedr notification. + + Abstract; subclasses must set the SOURCE_CLS attr. - Abstract; subclasses must set the SOURCE_CLS attr. + http://documentation.superfeedr.com/subscribers.html#pubsubhubbubnotifications + """ - http://documentation.superfeedr.com/subscribers.html#pubsubhubbubnotifications - """ - SOURCE_CLS = None + SOURCE_CLS = None - def dispatch_request(self, id): - source = self.SOURCE_CLS.get_by_id(id) - if source: - handle_feed(request.json, source) + def dispatch_request(self, id): + source = self.SOURCE_CLS.get_by_id(id) + if source: + handle_feed(request.json, source) - return '' + return "" diff --git a/tasks.py b/tasks.py index d386e33b..46277844 100644 --- a/tasks.py +++ b/tasks.py @@ -18,817 +18,960 @@ from flask_background import app from models import Response from util import ERROR_HTTP_RETURN_CODE + # need to import model class definitions since poll creates and saves entities. import blogger, facebook, flickr, github, instagram, mastodon, medium, reddit, tumblr, twitter, wordpress_rest # Used as a sentinel value in the webmention endpoint cache -NO_ENDPOINT = 'NONE' +NO_ENDPOINT = "NONE" class Poll(View): - """Task handler that fetches and processes new responses from a single source. - - Request parameters: - - * source_key: string key of source entity - * last_polled: timestamp, YYYY-MM-DD-HH-MM-SS - - Inserts a propagate task for each response that hasn't been seen before. - - Steps: - 1: Fetch activities: posts by the user, links to the user's domain(s). - 2: Extract responses, store their activities. - 3: Filter out responses we've already seen, using Responses in the datastore. - 4: Store new responses and enqueue propagate tasks. - 5: Possibly refetch updated syndication urls. - - 1-4 are in backfeed(); 5 is in poll(). - """ - RESTART_EXISTING_TASKS = False # overridden in Discover - - def _last_poll_url(self, source): - return util.host_url(logs.url(source.last_poll_attempt, source.key)) - - def dispatch_request(self): - logging.debug('Params: %s', list(request.values.items())) - - key = request.values['source_key'] - source = g.source = ndb.Key(urlsafe=key).get() - if not source or source.status == 'disabled' or 'listen' not in source.features: - logging.error('Source not found or disabled. Dropping task.') - return '' - logging.info('Source: %s %s, %s', source.label(), source.key_id(), - source.bridgy_url()) - - if source.AUTO_POLL: - last_polled = request.values['last_polled'] - if last_polled != source.last_polled.strftime(util.POLL_TASK_DATETIME_FORMAT): - logging.warning('duplicate poll task! deferring to the other task.') - return '' - - logging.info('Last poll: %s', self._last_poll_url(source)) - - # mark this source as polling - source.updates = { - 'poll_status': 'polling', - 'last_poll_attempt': util.now_fn(), - 'rate_limited': False, - } - source = models.Source.put_updates(source) - - source.updates = {} - try: - self.poll(source) - except Exception as e: - source.updates['poll_status'] = 'error' - code, _ = util.interpret_http_exception(e) - if code in source.DISABLE_HTTP_CODES or isinstance(e, models.DisableSource): - # the user deauthorized the bridgy app, so disable this source. - # let the task complete successfully so that it's not retried. - logging.warning(f'Disabling source due to: {e}', exc_info=True) - source.updates.update({ - 'status': 'disabled', - 'poll_status': 'ok', - }) - elif code in source.RATE_LIMIT_HTTP_CODES: - logging.info('Rate limited. Marking as error and finishing. %s', e) - source.updates['rate_limited'] = True - else: - raise - finally: - source = models.Source.put_updates(source) - - if source.AUTO_POLL: - util.add_poll_task(source) - - # feeble attempt to avoid hitting the instance memory limit - source = None - gc.collect() - - return 'OK' - - def poll(self, source): - """Actually runs the poll. - - Stores property names and values to update in source.updates. - """ - if source.last_activities_etag or source.last_activity_id: - logging.debug('Using ETag %s, last activity id %s', - source.last_activities_etag, source.last_activity_id) - - # - # Step 1: fetch activities: - # * posts by the user - # * search all posts for the user's domain URLs to find links - # - cache = util.CacheDict() - if source.last_activities_cache_json: - cache.update(json_loads(source.last_activities_cache_json)) - - # search for links first so that the user's activities and responses - # override them if they overlap - links = source.search_for_links() - - # this user's own activities (and user mentions) - resp = source.get_activities_response( - fetch_replies=True, fetch_likes=True, fetch_shares=True, - fetch_mentions=True, count=50, etag=source.last_activities_etag, - min_id=source.last_activity_id, cache=cache) - etag = resp.get('etag') # used later - user_activities = resp.get('items', []) - - # these map ids to AS objects. - # backfeed all links as responses, but only include the user's own links as - # activities, since their responses also get backfeed. - responses = {a['id']: a for a in links} - - user_id = source.user_tag_id() - links_by_user = [a for a in links - if a.get('object', {}).get('author', {}).get('id') == user_id] - activities = {a['id']: a for a in links_by_user + user_activities} - - # extract silo activity ids, update last_activity_id - silo_activity_ids = set() - last_activity_id = source.last_activity_id - for id, activity in activities.items(): - # maybe replace stored last activity id - parsed = util.parse_tag_uri(id) - if parsed: - id = parsed[1] - silo_activity_ids.add(id) - try: - # try numeric comparison first - greater = int(id) > int(last_activity_id) - except (TypeError, ValueError): - greater = str(id) > str(last_activity_id) - if greater: - last_activity_id = id - - if last_activity_id and last_activity_id != source.last_activity_id: - source.updates['last_activity_id'] = last_activity_id - - # trim cache to just the returned activity ids, so that it doesn't grow - # without bound. (WARNING: depends on get_activities_response()'s cache key - # format, e.g. 'PREFIX ACTIVITY_ID'!) - source.updates['last_activities_cache_json'] = json_dumps( - {k: v for k, v in cache.items() if k.split()[-1] in silo_activity_ids}) - - self.backfeed(source, responses, activities=activities) - - source.updates.update({'last_polled': source.last_poll_attempt, - 'poll_status': 'ok'}) - if etag and etag != source.last_activities_etag: - source.updates['last_activities_etag'] = etag - - # - # Possibly refetch updated syndication urls. - # - # if the author has added syndication urls since the first time - # original_post_discovery ran, we'll miss them. this cleanup task will - # periodically check for updated urls. only kicks in if the author has - # *ever* published a rel=syndication url - if source.should_refetch(): - logging.info('refetching h-feed for source %s', source.label()) - relationships = original_post_discovery.refetch(source) - - now = util.now_fn() - source.updates['last_hfeed_refetch'] = now - - if relationships: - logging.info('refetch h-feed found new rel=syndication relationships: %s', - relationships) - try: - self.repropagate_old_responses(source, relationships) - except BaseException as e: - if ('BadRequestError' in str(e.__class__) or - 'Timeout' in str(e.__class__) or - util.is_connection_failure(e)): - logging.info('Timeout while repropagating responses.', exc_info=True) - else: - raise - else: - logging.info( - 'skipping refetch h-feed. last-syndication-url %s, last-refetch %s', - source.last_syndication_url, source.last_hfeed_refetch) + """Task handler that fetches and processes new responses from a single source. - def backfeed(self, source, responses=None, activities=None): - """Processes responses and activities and generates propagate tasks. + Request parameters: - Stores property names and values to update in source.updates. + * source_key: string key of source entity + * last_polled: timestamp, YYYY-MM-DD-HH-MM-SS - Args: - source: Source - responses: dict mapping AS response id to AS object - activities: dict mapping AS activity id to AS object - """ - if responses is None: - responses = {} - if activities is None: - activities = {} - - # Cache to make sure we only fetch the author's h-feed(s) the - # first time we see it - fetched_hfeeds = set() - - # narrow down to just public activities - public = {} - private = {} - for id, activity in activities.items(): - (public if source.is_activity_public(activity) else private)[id] = activity - logging.info('Found %d public activities: %s', len(public), public.keys()) - logging.info('Found %d private activities: %s', len(private), private.keys()) - - last_public_post = (source.last_public_post or util.EPOCH).isoformat() - public_published = util.trim_nulls( - [a.get('object', {}).get('published') for a in public.values()]) - if public_published: - max_published = max(public_published) - if max_published > last_public_post: - last_public_post = max_published - source.updates['last_public_post'] = \ - util.as_utc(util.parse_iso8601(max_published)) - - source.updates['recent_private_posts'] = \ - len([a for a in private.values() - if a.get('object', {}).get('published', util.EPOCH_ISO) > last_public_post]) - - # - # Step 2: extract responses, store their activities in response['activities'] - # - # WARNING: this creates circular references in link posts found by search - # queries in step 1, since they are their own activity. We use - # prune_activity() and prune_response() in step 4 to remove these before - # serializing to JSON. - # - for id, activity in public.items(): - obj = activity.get('object') or activity - - # handle user mentions - user_id = source.user_tag_id() - if obj.get('author', {}).get('id') != user_id and activity.get('verb') != 'share': - for tag in obj.get('tags', []): - urls = tag.get('urls') - if tag.get('objectType') == 'person' and tag.get('id') == user_id and urls: - activity['originals'], activity['mentions'] = \ - original_post_discovery.discover( - source, activity, fetch_hfeed=True, - include_redirect_sources=False, - already_fetched_hfeeds=fetched_hfeeds) - activity['mentions'].update(u.get('value') for u in urls) - responses[id] = activity - break - - # handle quote mentions - for att in obj.get('attachments', []): - if (att.get('objectType') in ('note', 'article') - and att.get('author', {}).get('id') == source.user_tag_id()): - # now that we've confirmed that one exists, OPD will dig - # into the actual attachments - if 'originals' not in activity or 'mentions' not in activity: - activity['originals'], activity['mentions'] = \ - original_post_discovery.discover( - source, activity, fetch_hfeed=True, - include_redirect_sources=False, - already_fetched_hfeeds=fetched_hfeeds) - responses[id] = activity - break - - # extract replies, likes, reactions, reposts, and rsvps - replies = obj.get('replies', {}).get('items', []) - tags = obj.get('tags', []) - likes = [t for t in tags if Response.get_type(t) == 'like'] - reactions = [t for t in tags if Response.get_type(t) == 'react'] - reposts = [t for t in tags if Response.get_type(t) == 'repost'] - rsvps = Source.get_rsvps_from_event(obj) - - # coalesce responses. drop any without ids - for resp in replies + likes + reactions + reposts + rsvps: - id = resp.get('id') - if not id: - logging.error('Skipping response without id: %s', json_dumps(resp, indent=2)) - continue - - if source.is_blocked(resp): - logging.info('Skipping response by blocked user: %s', - json_dumps(resp.get('author') or resp.get('actor'), indent=2)) - continue - - resp.setdefault('activities', []).append(activity) - - # when we find two responses with the same id, the earlier one may have - # come from a link post or user mention, and this one is probably better - # since it probably came from the user's activity, so prefer this one. - # background: https://github.com/snarfed/bridgy/issues/533 - existing = responses.get(id) - if existing: - if source.gr_source.activity_changed(resp, existing, log=True): - logging.warning('Got two different versions of same response!\n%s\n%s', - existing, resp) - resp['activities'].extend(existing.get('activities', [])) - - responses[id] = resp - - # - # Step 3: filter out responses we've already seen - # - # seen responses (JSON objects) for each source are stored in its entity. - unchanged_responses = [] - if source.seen_responses_cache_json: - for seen in json_loads(source.seen_responses_cache_json): - id = seen['id'] - resp = responses.get(id) - if resp and not source.gr_source.activity_changed(seen, resp, log=True): - unchanged_responses.append(seen) - del responses[id] - - # - # Step 4: store new responses and enqueue propagate tasks - # - pruned_responses = [] - source.blocked_ids = None - - for id, resp in responses.items(): - resp_type = Response.get_type(resp) - activities = resp.pop('activities', []) - if not activities and resp_type == 'post': - activities = [resp] - too_long = set() - urls_to_activity = {} - for i, activity in enumerate(activities): - # we'll usually have multiple responses for the same activity, and the - # objects in resp['activities'] are shared, so cache each activity's - # discovered webmention targets inside its object. - if 'originals' not in activity or 'mentions' not in activity: - activity['originals'], activity['mentions'] = \ - original_post_discovery.discover( - source, activity, fetch_hfeed=True, - include_redirect_sources=False, - already_fetched_hfeeds=fetched_hfeeds) - - targets = original_post_discovery.targets_for_response( - resp, originals=activity['originals'], mentions=activity['mentions']) - if targets: - logging.info('%s has %d webmention target(s): %s', activity.get('url'), - len(targets), ' '.join(targets)) - # new response to propagate! load block list if we haven't already - if source.blocked_ids is None: - source.load_blocklist() - - for t in targets: - if len(t) <= _MAX_STRING_LENGTH: - urls_to_activity[t] = i - else: - logging.info('Giving up on target URL over %s chars! %s', - _MAX_STRING_LENGTH, t) - too_long.add(t[:_MAX_STRING_LENGTH - 4] + '...') - - # store/update response entity. the prune_*() calls are important to - # remove circular references in link responses, which are their own - # activities. details in the step 2 comment above. - pruned_response = util.prune_response(resp) - pruned_responses.append(pruned_response) - resp_entity = Response( - id=id, - source=source.key, - activities_json=[json_dumps(util.prune_activity(a, source)) - for a in activities], - response_json=json_dumps(pruned_response), - type=resp_type, - unsent=list(urls_to_activity.keys()), - failed=list(too_long), - original_posts=resp.get('originals', [])) - if urls_to_activity and len(activities) > 1: - resp_entity.urls_to_activity=json_dumps(urls_to_activity) - resp_entity.get_or_save(source, restart=self.RESTART_EXISTING_TASKS) - - # update cache - if pruned_responses: - source.updates['seen_responses_cache_json'] = json_dumps( - pruned_responses + unchanged_responses) - - def repropagate_old_responses(self, source, relationships): - """Find old Responses that match a new SyndicatedPost and repropagate them. - - We look through as many responses as we can until the datastore query expires. - - Args: - source: :class:`models.Source` - relationships: refetch result + Inserts a propagate task for each response that hasn't been seen before. + + Steps: + 1: Fetch activities: posts by the user, links to the user's domain(s). + 2: Extract responses, store their activities. + 3: Filter out responses we've already seen, using Responses in the datastore. + 4: Store new responses and enqueue propagate tasks. + 5: Possibly refetch updated syndication urls. + + 1-4 are in backfeed(); 5 is in poll(). """ - for response in (Response.query(Response.source == source.key) - .order(-Response.updated)): - new_orig_urls = set() - for activity_json in response.activities_json: - activity = json_loads(activity_json) - activity_url = activity.get('url') or activity.get('object', {}).get('url') - if not activity_url: - logging.warning('activity has no url %s', activity_json) - continue - - activity_url = source.canonicalize_url(activity_url, activity=activity) - if not activity_url: - continue - - # look for activity url in the newly discovered list of relationships - for relationship in relationships.get(activity_url, []): - # won't re-propagate if the discovered link is already among - # these well-known upstream duplicates - if (relationship.original in response.sent or - relationship.original in response.original_posts): - logging.info( - '%s found a new rel=syndication link %s -> %s, but the ' - 'relationship had already been discovered by another method', - response.label(), relationship.original, relationship.syndication) - else: - logging.info( - '%s found a new rel=syndication link %s -> %s, and ' - 'will be repropagated with a new target!', - response.label(), relationship.original, relationship.syndication) - new_orig_urls.add(relationship.original) - if new_orig_urls: - # re-open a previously 'complete' propagate task - response.status = 'new' - response.unsent.extend(list(new_orig_urls)) - response.put() - response.add_task() + RESTART_EXISTING_TASKS = False # overridden in Discover + + def _last_poll_url(self, source): + return util.host_url(logs.url(source.last_poll_attempt, source.key)) + + def dispatch_request(self): + logging.debug("Params: %s", list(request.values.items())) + + key = request.values["source_key"] + source = g.source = ndb.Key(urlsafe=key).get() + if not source or source.status == "disabled" or "listen" not in source.features: + logging.error("Source not found or disabled. Dropping task.") + return "" + logging.info( + "Source: %s %s, %s", source.label(), source.key_id(), source.bridgy_url() + ) + + if source.AUTO_POLL: + last_polled = request.values["last_polled"] + if last_polled != source.last_polled.strftime( + util.POLL_TASK_DATETIME_FORMAT + ): + logging.warning("duplicate poll task! deferring to the other task.") + return "" + + logging.info("Last poll: %s", self._last_poll_url(source)) + + # mark this source as polling + source.updates = { + "poll_status": "polling", + "last_poll_attempt": util.now_fn(), + "rate_limited": False, + } + source = models.Source.put_updates(source) + + source.updates = {} + try: + self.poll(source) + except Exception as e: + source.updates["poll_status"] = "error" + code, _ = util.interpret_http_exception(e) + if code in source.DISABLE_HTTP_CODES or isinstance(e, models.DisableSource): + # the user deauthorized the bridgy app, so disable this source. + # let the task complete successfully so that it's not retried. + logging.warning(f"Disabling source due to: {e}", exc_info=True) + source.updates.update( + { + "status": "disabled", + "poll_status": "ok", + } + ) + elif code in source.RATE_LIMIT_HTTP_CODES: + logging.info("Rate limited. Marking as error and finishing. %s", e) + source.updates["rate_limited"] = True + else: + raise + finally: + source = models.Source.put_updates(source) + + if source.AUTO_POLL: + util.add_poll_task(source) + + # feeble attempt to avoid hitting the instance memory limit + source = None + gc.collect() + + return "OK" + + def poll(self, source): + """Actually runs the poll. + + Stores property names and values to update in source.updates. + """ + if source.last_activities_etag or source.last_activity_id: + logging.debug( + "Using ETag %s, last activity id %s", + source.last_activities_etag, + source.last_activity_id, + ) + + # + # Step 1: fetch activities: + # * posts by the user + # * search all posts for the user's domain URLs to find links + # + cache = util.CacheDict() + if source.last_activities_cache_json: + cache.update(json_loads(source.last_activities_cache_json)) + + # search for links first so that the user's activities and responses + # override them if they overlap + links = source.search_for_links() + + # this user's own activities (and user mentions) + resp = source.get_activities_response( + fetch_replies=True, + fetch_likes=True, + fetch_shares=True, + fetch_mentions=True, + count=50, + etag=source.last_activities_etag, + min_id=source.last_activity_id, + cache=cache, + ) + etag = resp.get("etag") # used later + user_activities = resp.get("items", []) + + # these map ids to AS objects. + # backfeed all links as responses, but only include the user's own links as + # activities, since their responses also get backfeed. + responses = {a["id"]: a for a in links} + + user_id = source.user_tag_id() + links_by_user = [ + a + for a in links + if a.get("object", {}).get("author", {}).get("id") == user_id + ] + activities = {a["id"]: a for a in links_by_user + user_activities} + + # extract silo activity ids, update last_activity_id + silo_activity_ids = set() + last_activity_id = source.last_activity_id + for id, activity in activities.items(): + # maybe replace stored last activity id + parsed = util.parse_tag_uri(id) + if parsed: + id = parsed[1] + silo_activity_ids.add(id) + try: + # try numeric comparison first + greater = int(id) > int(last_activity_id) + except (TypeError, ValueError): + greater = str(id) > str(last_activity_id) + if greater: + last_activity_id = id + + if last_activity_id and last_activity_id != source.last_activity_id: + source.updates["last_activity_id"] = last_activity_id + + # trim cache to just the returned activity ids, so that it doesn't grow + # without bound. (WARNING: depends on get_activities_response()'s cache key + # format, e.g. 'PREFIX ACTIVITY_ID'!) + source.updates["last_activities_cache_json"] = json_dumps( + {k: v for k, v in cache.items() if k.split()[-1] in silo_activity_ids} + ) + + self.backfeed(source, responses, activities=activities) + + source.updates.update( + {"last_polled": source.last_poll_attempt, "poll_status": "ok"} + ) + if etag and etag != source.last_activities_etag: + source.updates["last_activities_etag"] = etag + + # + # Possibly refetch updated syndication urls. + # + # if the author has added syndication urls since the first time + # original_post_discovery ran, we'll miss them. this cleanup task will + # periodically check for updated urls. only kicks in if the author has + # *ever* published a rel=syndication url + if source.should_refetch(): + logging.info("refetching h-feed for source %s", source.label()) + relationships = original_post_discovery.refetch(source) + + now = util.now_fn() + source.updates["last_hfeed_refetch"] = now + + if relationships: + logging.info( + "refetch h-feed found new rel=syndication relationships: %s", + relationships, + ) + try: + self.repropagate_old_responses(source, relationships) + except BaseException as e: + if ( + "BadRequestError" in str(e.__class__) + or "Timeout" in str(e.__class__) + or util.is_connection_failure(e) + ): + logging.info( + "Timeout while repropagating responses.", exc_info=True + ) + else: + raise + else: + logging.info( + "skipping refetch h-feed. last-syndication-url %s, last-refetch %s", + source.last_syndication_url, + source.last_hfeed_refetch, + ) + + def backfeed(self, source, responses=None, activities=None): + """Processes responses and activities and generates propagate tasks. + + Stores property names and values to update in source.updates. + + Args: + source: Source + responses: dict mapping AS response id to AS object + activities: dict mapping AS activity id to AS object + """ + if responses is None: + responses = {} + if activities is None: + activities = {} + + # Cache to make sure we only fetch the author's h-feed(s) the + # first time we see it + fetched_hfeeds = set() + + # narrow down to just public activities + public = {} + private = {} + for id, activity in activities.items(): + (public if source.is_activity_public(activity) else private)[id] = activity + logging.info("Found %d public activities: %s", len(public), public.keys()) + logging.info("Found %d private activities: %s", len(private), private.keys()) + + last_public_post = (source.last_public_post or util.EPOCH).isoformat() + public_published = util.trim_nulls( + [a.get("object", {}).get("published") for a in public.values()] + ) + if public_published: + max_published = max(public_published) + if max_published > last_public_post: + last_public_post = max_published + source.updates["last_public_post"] = util.as_utc( + util.parse_iso8601(max_published) + ) + + source.updates["recent_private_posts"] = len( + [ + a + for a in private.values() + if a.get("object", {}).get("published", util.EPOCH_ISO) + > last_public_post + ] + ) + + # + # Step 2: extract responses, store their activities in response['activities'] + # + # WARNING: this creates circular references in link posts found by search + # queries in step 1, since they are their own activity. We use + # prune_activity() and prune_response() in step 4 to remove these before + # serializing to JSON. + # + for id, activity in public.items(): + obj = activity.get("object") or activity + + # handle user mentions + user_id = source.user_tag_id() + if ( + obj.get("author", {}).get("id") != user_id + and activity.get("verb") != "share" + ): + for tag in obj.get("tags", []): + urls = tag.get("urls") + if ( + tag.get("objectType") == "person" + and tag.get("id") == user_id + and urls + ): + ( + activity["originals"], + activity["mentions"], + ) = original_post_discovery.discover( + source, + activity, + fetch_hfeed=True, + include_redirect_sources=False, + already_fetched_hfeeds=fetched_hfeeds, + ) + activity["mentions"].update(u.get("value") for u in urls) + responses[id] = activity + break + + # handle quote mentions + for att in obj.get("attachments", []): + if ( + att.get("objectType") in ("note", "article") + and att.get("author", {}).get("id") == source.user_tag_id() + ): + # now that we've confirmed that one exists, OPD will dig + # into the actual attachments + if "originals" not in activity or "mentions" not in activity: + ( + activity["originals"], + activity["mentions"], + ) = original_post_discovery.discover( + source, + activity, + fetch_hfeed=True, + include_redirect_sources=False, + already_fetched_hfeeds=fetched_hfeeds, + ) + responses[id] = activity + break + + # extract replies, likes, reactions, reposts, and rsvps + replies = obj.get("replies", {}).get("items", []) + tags = obj.get("tags", []) + likes = [t for t in tags if Response.get_type(t) == "like"] + reactions = [t for t in tags if Response.get_type(t) == "react"] + reposts = [t for t in tags if Response.get_type(t) == "repost"] + rsvps = Source.get_rsvps_from_event(obj) + + # coalesce responses. drop any without ids + for resp in replies + likes + reactions + reposts + rsvps: + id = resp.get("id") + if not id: + logging.error( + "Skipping response without id: %s", json_dumps(resp, indent=2) + ) + continue + + if source.is_blocked(resp): + logging.info( + "Skipping response by blocked user: %s", + json_dumps(resp.get("author") or resp.get("actor"), indent=2), + ) + continue + + resp.setdefault("activities", []).append(activity) + + # when we find two responses with the same id, the earlier one may have + # come from a link post or user mention, and this one is probably better + # since it probably came from the user's activity, so prefer this one. + # background: https://github.com/snarfed/bridgy/issues/533 + existing = responses.get(id) + if existing: + if source.gr_source.activity_changed(resp, existing, log=True): + logging.warning( + "Got two different versions of same response!\n%s\n%s", + existing, + resp, + ) + resp["activities"].extend(existing.get("activities", [])) + + responses[id] = resp + + # + # Step 3: filter out responses we've already seen + # + # seen responses (JSON objects) for each source are stored in its entity. + unchanged_responses = [] + if source.seen_responses_cache_json: + for seen in json_loads(source.seen_responses_cache_json): + id = seen["id"] + resp = responses.get(id) + if resp and not source.gr_source.activity_changed(seen, resp, log=True): + unchanged_responses.append(seen) + del responses[id] + + # + # Step 4: store new responses and enqueue propagate tasks + # + pruned_responses = [] + source.blocked_ids = None + + for id, resp in responses.items(): + resp_type = Response.get_type(resp) + activities = resp.pop("activities", []) + if not activities and resp_type == "post": + activities = [resp] + too_long = set() + urls_to_activity = {} + for i, activity in enumerate(activities): + # we'll usually have multiple responses for the same activity, and the + # objects in resp['activities'] are shared, so cache each activity's + # discovered webmention targets inside its object. + if "originals" not in activity or "mentions" not in activity: + ( + activity["originals"], + activity["mentions"], + ) = original_post_discovery.discover( + source, + activity, + fetch_hfeed=True, + include_redirect_sources=False, + already_fetched_hfeeds=fetched_hfeeds, + ) + + targets = original_post_discovery.targets_for_response( + resp, originals=activity["originals"], mentions=activity["mentions"] + ) + if targets: + logging.info( + "%s has %d webmention target(s): %s", + activity.get("url"), + len(targets), + " ".join(targets), + ) + # new response to propagate! load block list if we haven't already + if source.blocked_ids is None: + source.load_blocklist() + + for t in targets: + if len(t) <= _MAX_STRING_LENGTH: + urls_to_activity[t] = i + else: + logging.info( + "Giving up on target URL over %s chars! %s", + _MAX_STRING_LENGTH, + t, + ) + too_long.add(t[: _MAX_STRING_LENGTH - 4] + "...") + + # store/update response entity. the prune_*() calls are important to + # remove circular references in link responses, which are their own + # activities. details in the step 2 comment above. + pruned_response = util.prune_response(resp) + pruned_responses.append(pruned_response) + resp_entity = Response( + id=id, + source=source.key, + activities_json=[ + json_dumps(util.prune_activity(a, source)) for a in activities + ], + response_json=json_dumps(pruned_response), + type=resp_type, + unsent=list(urls_to_activity.keys()), + failed=list(too_long), + original_posts=resp.get("originals", []), + ) + if urls_to_activity and len(activities) > 1: + resp_entity.urls_to_activity = json_dumps(urls_to_activity) + resp_entity.get_or_save(source, restart=self.RESTART_EXISTING_TASKS) + + # update cache + if pruned_responses: + source.updates["seen_responses_cache_json"] = json_dumps( + pruned_responses + unchanged_responses + ) + + def repropagate_old_responses(self, source, relationships): + """Find old Responses that match a new SyndicatedPost and repropagate them. + + We look through as many responses as we can until the datastore query expires. + + Args: + source: :class:`models.Source` + relationships: refetch result + """ + for response in Response.query(Response.source == source.key).order( + -Response.updated + ): + new_orig_urls = set() + for activity_json in response.activities_json: + activity = json_loads(activity_json) + activity_url = activity.get("url") or activity.get("object", {}).get( + "url" + ) + if not activity_url: + logging.warning("activity has no url %s", activity_json) + continue + + activity_url = source.canonicalize_url(activity_url, activity=activity) + if not activity_url: + continue + + # look for activity url in the newly discovered list of relationships + for relationship in relationships.get(activity_url, []): + # won't re-propagate if the discovered link is already among + # these well-known upstream duplicates + if ( + relationship.original in response.sent + or relationship.original in response.original_posts + ): + logging.info( + "%s found a new rel=syndication link %s -> %s, but the " + "relationship had already been discovered by another method", + response.label(), + relationship.original, + relationship.syndication, + ) + else: + logging.info( + "%s found a new rel=syndication link %s -> %s, and " + "will be repropagated with a new target!", + response.label(), + relationship.original, + relationship.syndication, + ) + new_orig_urls.add(relationship.original) + + if new_orig_urls: + # re-open a previously 'complete' propagate task + response.status = "new" + response.unsent.extend(list(new_orig_urls)) + response.put() + response.add_task() class Discover(Poll): - """Task handler that fetches and processes new responses to a single post. + """Task handler that fetches and processes new responses to a single post. - Request parameters: + Request parameters: - * source_key: string key of source entity - * post_id: string, silo post id(s) + * source_key: string key of source entity + * post_id: string, silo post id(s) - Inserts a propagate task for each response that hasn't been seen before. + Inserts a propagate task for each response that hasn't been seen before. - Original feature request: https://github.com/snarfed/bridgy/issues/579 - """ - RESTART_EXISTING_TASKS = True + Original feature request: https://github.com/snarfed/bridgy/issues/579 + """ - def dispatch_request(self): - logging.debug('Params: %s', list(request.values.items())) - g.TRANSIENT_ERROR_HTTP_CODES = ('400', '404') + RESTART_EXISTING_TASKS = True - type = request.values.get('type') - if type: - assert type in ('event',) + def dispatch_request(self): + logging.debug("Params: %s", list(request.values.items())) + g.TRANSIENT_ERROR_HTTP_CODES = ("400", "404") - source = g.source = util.load_source() - if not source or source.status == 'disabled' or 'listen' not in source.features: - logging.error('Source not found or disabled. Dropping task.') - return '' - logging.info('Source: %s %s, %s', source.label(), source.key_id(), - source.bridgy_url()) + type = request.values.get("type") + if type: + assert type in ("event",) - post_id = request.values['post_id'] - source.updates = {} + source = g.source = util.load_source() + if not source or source.status == "disabled" or "listen" not in source.features: + logging.error("Source not found or disabled. Dropping task.") + return "" + logging.info( + "Source: %s %s, %s", source.label(), source.key_id(), source.bridgy_url() + ) - if type == 'event': - activities = [source.gr_source.get_event(post_id)] - else: - activities = source.get_activities( - fetch_replies=True, fetch_likes=True, fetch_shares=True, - activity_id=post_id, user_id=source.key_id()) + post_id = request.values["post_id"] + source.updates = {} - if not activities or not activities[0]: - logging.info('Post %s not found.', post_id) - return '' - assert len(activities) == 1, activities - activity = activities[0] - activities = {activity['id']: activity} - self.backfeed(source, responses=activities, activities=activities) + if type == "event": + activities = [source.gr_source.get_event(post_id)] + else: + activities = source.get_activities( + fetch_replies=True, + fetch_likes=True, + fetch_shares=True, + activity_id=post_id, + user_id=source.key_id(), + ) + + if not activities or not activities[0]: + logging.info("Post %s not found.", post_id) + return "" + assert len(activities) == 1, activities + activity = activities[0] + activities = {activity["id"]: activity} + self.backfeed(source, responses=activities, activities=activities) + + obj = activity.get("object") or activity + in_reply_to = util.get_first(obj, "inReplyTo") + if in_reply_to: + parsed = util.parse_tag_uri( + in_reply_to.get("id", "") + ) # TODO: fall back to url + if parsed: + util.add_discover_task(source, parsed[1]) + + return "OK" - obj = activity.get('object') or activity - in_reply_to = util.get_first(obj, 'inReplyTo') - if in_reply_to: - parsed = util.parse_tag_uri(in_reply_to.get('id', '')) # TODO: fall back to url - if parsed: - util.add_discover_task(source, parsed[1]) - return 'OK' +class SendWebmentions(View): + """Abstract base task handler that can send webmentions. + Attributes: -class SendWebmentions(View): - """Abstract base task handler that can send webmentions. + * entity: :class:`models.Webmentions` subclass instance (set in :meth:`lease_entity`) + * source: :class:`models.Source` entity (set in :meth:`send_webmentions`) + """ - Attributes: + # request deadline (10m) plus some padding + LEASE_LENGTH = datetime.timedelta(minutes=12) - * entity: :class:`models.Webmentions` subclass instance (set in :meth:`lease_entity`) - * source: :class:`models.Source` entity (set in :meth:`send_webmentions`) - """ - # request deadline (10m) plus some padding - LEASE_LENGTH = datetime.timedelta(minutes=12) + def source_url(self, target_url): + """Return the source URL to use for a given target URL. - def source_url(self, target_url): - """Return the source URL to use for a given target URL. + Subclasses must implement. - Subclasses must implement. + Args: + target_url: string - Args: - target_url: string + Returns: + string + """ + raise NotImplementedError() - Returns: - string - """ - raise NotImplementedError() + def send_webmentions(self): + """Tries to send each unsent webmention in self.entity. - def send_webmentions(self): - """Tries to send each unsent webmention in self.entity. + Uses :meth:`source_url()` to determine the source parameter for each + webmention. - Uses :meth:`source_url()` to determine the source parameter for each - webmention. + :meth:`lease()` *must* be called before this! + """ + logging.info("Starting %s", self.entity.label()) - :meth:`lease()` *must* be called before this! - """ - logging.info('Starting %s', self.entity.label()) - - try: - self.do_send_webmentions() - except: - logging.info('Propagate task failed', exc_info=True) - self.release('error') - raise - - def do_send_webmentions(self): - urls = self.entity.unsent + self.entity.error + self.entity.failed - unsent = set() - self.entity.error = [] - self.entity.failed = [] - - for orig_url in urls: - # recheck the url here since the checks may have failed during the poll - # or streaming add. - url, domain, ok = util.get_webmention_target(orig_url) - if ok: - if len(url) <= _MAX_STRING_LENGTH: - unsent.add(url) - else: - logging.info('Giving up on target URL over %s chars! %s', - _MAX_STRING_LENGTH, url) - self.entity.failed.append(orig_url) - self.entity.unsent = sorted(unsent) - - while self.entity.unsent: - target = self.entity.unsent.pop(0) - source_url = self.source_url(target) - logging.info('Webmention from %s to %s', source_url, target) - - # see if we've cached webmention discovery for this domain. the cache - # value is a string URL endpoint if discovery succeeded, NO_ENDPOINT if - # no endpoint was ofund. - cache_key = util.webmention_endpoint_cache_key(target) - endpoint = util.webmention_endpoint_cache.get(cache_key) - if endpoint: - logging.info(f'Webmention discovery: using cached endpoint {cache_key}: {endpoint}') - - # send! and handle response or error - try: - resp = None - headers = util.request_headers(source=g.source) - if not endpoint: - endpoint, resp = webmention.discover(target, headers=headers) - with util.webmention_endpoint_cache_lock: - util.webmention_endpoint_cache[cache_key] = endpoint or NO_ENDPOINT - - if endpoint and endpoint != NO_ENDPOINT: - logging.info('Sending...') - resp = webmention.send(endpoint, source_url, target, timeout=999, - headers=headers) - logging.info('Sent! %s', resp) - self.record_source_webmention(endpoint, target) - self.entity.sent.append(target) + try: + self.do_send_webmentions() + except: + logging.info("Propagate task failed", exc_info=True) + self.release("error") + raise + + def do_send_webmentions(self): + urls = self.entity.unsent + self.entity.error + self.entity.failed + unsent = set() + self.entity.error = [] + self.entity.failed = [] + + for orig_url in urls: + # recheck the url here since the checks may have failed during the poll + # or streaming add. + url, domain, ok = util.get_webmention_target(orig_url) + if ok: + if len(url) <= _MAX_STRING_LENGTH: + unsent.add(url) + else: + logging.info( + "Giving up on target URL over %s chars! %s", + _MAX_STRING_LENGTH, + url, + ) + self.entity.failed.append(orig_url) + self.entity.unsent = sorted(unsent) + + while self.entity.unsent: + target = self.entity.unsent.pop(0) + source_url = self.source_url(target) + logging.info("Webmention from %s to %s", source_url, target) + + # see if we've cached webmention discovery for this domain. the cache + # value is a string URL endpoint if discovery succeeded, NO_ENDPOINT if + # no endpoint was ofund. + cache_key = util.webmention_endpoint_cache_key(target) + endpoint = util.webmention_endpoint_cache.get(cache_key) + if endpoint: + logging.info( + f"Webmention discovery: using cached endpoint {cache_key}: {endpoint}" + ) + + # send! and handle response or error + try: + resp = None + headers = util.request_headers(source=g.source) + if not endpoint: + endpoint, resp = webmention.discover(target, headers=headers) + with util.webmention_endpoint_cache_lock: + util.webmention_endpoint_cache[cache_key] = ( + endpoint or NO_ENDPOINT + ) + + if endpoint and endpoint != NO_ENDPOINT: + logging.info("Sending...") + resp = webmention.send( + endpoint, source_url, target, timeout=999, headers=headers + ) + logging.info("Sent! %s", resp) + self.record_source_webmention(endpoint, target) + self.entity.sent.append(target) + else: + logging.info("Giving up this target.") + self.entity.skipped.append(target) + + except ValueError: + logging.info("Bad URL; giving up this target.") + self.entity.skipped.append(target) + + except BaseException as e: + logging.info("", exc_info=True) + # Give up on 4XX and DNS errors; we don't expect retries to succeed. + code, _ = util.interpret_http_exception(e) + if (code and code.startswith("4")) or "DNS lookup failed" in str(e): + logging.info("Giving up this target.") + self.entity.failed.append(target) + else: + self.fail(f"Error sending to endpoint: {resp}") + self.entity.error.append(target) + + if target in self.entity.unsent: + self.entity.unsent.remove(target) + + if self.entity.error: + logging.info("Some targets failed") + self.release("error") else: - logging.info('Giving up this target.') - self.entity.skipped.append(target) - - except ValueError: - logging.info('Bad URL; giving up this target.') - self.entity.skipped.append(target) - - except BaseException as e: - logging.info('', exc_info=True) - # Give up on 4XX and DNS errors; we don't expect retries to succeed. - code, _ = util.interpret_http_exception(e) - if (code and code.startswith('4')) or 'DNS lookup failed' in str(e): - logging.info('Giving up this target.') - self.entity.failed.append(target) + self.complete() + + @ndb.transactional() + def lease(self, key): + """Attempts to acquire and lease the :class:`models.Webmentions` entity. + + Also loads and sets `g.source`, and returns False if the source doesn't + exist or is disabled. + + TODO: unify with :meth:`complete()` + + Args: + key: :class:`ndb.Key` + + Returns: True on success, False or None otherwise + """ + self.entity = key.get() + + if self.entity is None: + return self.fail("no entity!") + elif self.entity.status == "complete": + # let this task return 200 and finish + logging.warning("duplicate task already propagated this") + return + elif ( + self.entity.status == "processing" + and util.now_fn() < self.entity.leased_until + ): + return self.fail("duplicate task is currently processing!") + + g.source = self.entity.source.get() + if not g.source or g.source.status == "disabled": + logging.error("Source not found or disabled. Dropping task.") + return False + logging.info( + "Source: %s %s, %s", + g.source.label(), + g.source.key_id(), + g.source.bridgy_url(), + ) + + assert self.entity.status in ("new", "processing", "error"), self.entity.status + self.entity.status = "processing" + self.entity.leased_until = util.now_fn() + self.LEASE_LENGTH + self.entity.put() + return True + + @ndb.transactional() + def complete(self): + """Attempts to mark the :class:`models.Webmentions` entity completed. + + Returns True on success, False otherwise. + """ + existing = self.entity.key.get() + if existing is None: + self.fail("entity disappeared!") + elif existing.status == "complete": + # let this task return 200 and finish + logging.warning( + "another task stole and finished this. did my lease expire?" + ) + elif self.entity.status == "complete": + # let this task return 200 and finish + logging.error( + "i already completed this task myself somehow?! " + "https://github.com/snarfed/bridgy/issues/610" + ) + elif existing.status == "new": + self.fail("went backward from processing to new!") else: - self.fail(f'Error sending to endpoint: {resp}') - self.entity.error.append(target) + assert existing.status == "processing", existing.status + assert self.entity.status == "processing", self.entity.status + self.entity.status = "complete" + self.entity.put() + return True + + return False + + @ndb.transactional() + def release(self, new_status): + """Attempts to unlease the :class:`models.Webmentions` entity. + + Args: + new_status: string + """ + existing = self.entity.key.get() + if existing and existing.status == "processing": + self.entity.status = new_status + self.entity.leased_until = None + self.entity.put() + + def fail(self, message): + """Marks the request failed and logs an error message.""" + logging.warning(message) + g.failed = True + + @ndb.transactional() + def record_source_webmention(self, endpoint, target): + """Sets this source's last_webmention_sent and maybe webmention_endpoint. + + Args: + endpoint: str, URL + target: str, URL + """ + g.source = g.source.key.get() + logging.info("Setting last_webmention_sent") + g.source.last_webmention_sent = util.now_fn() + + if ( + endpoint != g.source.webmention_endpoint + and util.domain_from_link(target) in g.source.domains + ): + logging.info( + "Also setting webmention_endpoint to %s (discovered in %s; was %s)", + endpoint, + target, + g.source.webmention_endpoint, + ) + g.source.webmention_endpoint = endpoint - if target in self.entity.unsent: - self.entity.unsent.remove(target) + g.source.put() - if self.entity.error: - logging.info('Some targets failed') - self.release('error') - else: - self.complete() - @ndb.transactional() - def lease(self, key): - """Attempts to acquire and lease the :class:`models.Webmentions` entity. +class PropagateResponse(SendWebmentions): + """Task handler that sends webmentions for a :class:`models.Response`. - Also loads and sets `g.source`, and returns False if the source doesn't - exist or is disabled. + Attributes: - TODO: unify with :meth:`complete()` + * activities: parsed :attr:`models.Response.activities_json` list - Args: - key: :class:`ndb.Key` + Request parameters: - Returns: True on success, False or None otherwise - """ - self.entity = key.get() - - if self.entity is None: - return self.fail('no entity!') - elif self.entity.status == 'complete': - # let this task return 200 and finish - logging.warning('duplicate task already propagated this') - return - elif (self.entity.status == 'processing' and - util.now_fn() < self.entity.leased_until): - return self.fail('duplicate task is currently processing!') - - g.source = self.entity.source.get() - if not g.source or g.source.status == 'disabled': - logging.error('Source not found or disabled. Dropping task.') - return False - logging.info('Source: %s %s, %s', g.source.label(), g.source.key_id(), - g.source.bridgy_url()) - - assert self.entity.status in ('new', 'processing', 'error'), self.entity.status - self.entity.status = 'processing' - self.entity.leased_until = util.now_fn() + self.LEASE_LENGTH - self.entity.put() - return True - - @ndb.transactional() - def complete(self): - """Attempts to mark the :class:`models.Webmentions` entity completed. - - Returns True on success, False otherwise. - """ - existing = self.entity.key.get() - if existing is None: - self.fail('entity disappeared!') - elif existing.status == 'complete': - # let this task return 200 and finish - logging.warning('another task stole and finished this. did my lease expire?') - elif self.entity.status == 'complete': - # let this task return 200 and finish - logging.error('i already completed this task myself somehow?! ' - 'https://github.com/snarfed/bridgy/issues/610') - elif existing.status == 'new': - self.fail('went backward from processing to new!') - else: - assert existing.status == 'processing', existing.status - assert self.entity.status == 'processing', self.entity.status - self.entity.status = 'complete' - self.entity.put() - return True - - return False - - @ndb.transactional() - def release(self, new_status): - """Attempts to unlease the :class:`models.Webmentions` entity. - - Args: - new_status: string - """ - existing = self.entity.key.get() - if existing and existing.status == 'processing': - self.entity.status = new_status - self.entity.leased_until = None - self.entity.put() - - def fail(self, message): - """Marks the request failed and logs an error message.""" - logging.warning(message) - g.failed = True - - @ndb.transactional() - def record_source_webmention(self, endpoint, target): - """Sets this source's last_webmention_sent and maybe webmention_endpoint. - - Args: - endpoint: str, URL - target: str, URL + * response_key: string key of :class:`models.Response` entity """ - g.source = g.source.key.get() - logging.info('Setting last_webmention_sent') - g.source.last_webmention_sent = util.now_fn() - if (endpoint != g.source.webmention_endpoint and - util.domain_from_link(target) in g.source.domains): - logging.info('Also setting webmention_endpoint to %s (discovered in %s; was %s)', - endpoint, target, g.source.webmention_endpoint) - g.source.webmention_endpoint = endpoint - - g.source.put() - - -class PropagateResponse(SendWebmentions): - """Task handler that sends webmentions for a :class:`models.Response`. - - Attributes: - - * activities: parsed :attr:`models.Response.activities_json` list - - Request parameters: - - * response_key: string key of :class:`models.Response` entity - """ - - def dispatch_request(self): - logging.debug('Params: %s', list(request.values.items())) - if not self.lease(ndb.Key(urlsafe=request.values['response_key'])): - return ('', ERROR_HTTP_RETURN_CODE) if getattr(g, 'failed', None) else 'OK' - - source = g.source - poll_estimate = self.entity.created - datetime.timedelta(seconds=61) - poll_url = util.host_url(logs.url(poll_estimate, source.key)) - logging.info(f'Created by this poll: {poll_url}') - - self.activities = [json_loads(a) for a in self.entity.activities_json] - response_obj = json_loads(self.entity.response_json) - if (not source.is_activity_public(response_obj) or - not all(source.is_activity_public(a) for a in self.activities)): - logging.info('Response or activity is non-public. Dropping.') - self.complete() - return '' - - self.send_webmentions() - return ('', ERROR_HTTP_RETURN_CODE) if getattr(g, 'failed', None) else 'OK' - - def source_url(self, target_url): - # determine which activity to use - try: - activity = self.activities[0] - if self.entity.urls_to_activity: - urls_to_activity = json_loads(self.entity.urls_to_activity) - if urls_to_activity: - activity = self.activities[urls_to_activity[target_url]] - except (KeyError, IndexError): - error("""Hit https://github.com/snarfed/bridgy/issues/237 KeyError! + def dispatch_request(self): + logging.debug("Params: %s", list(request.values.items())) + if not self.lease(ndb.Key(urlsafe=request.values["response_key"])): + return ("", ERROR_HTTP_RETURN_CODE) if getattr(g, "failed", None) else "OK" + + source = g.source + poll_estimate = self.entity.created - datetime.timedelta(seconds=61) + poll_url = util.host_url(logs.url(poll_estimate, source.key)) + logging.info(f"Created by this poll: {poll_url}") + + self.activities = [json_loads(a) for a in self.entity.activities_json] + response_obj = json_loads(self.entity.response_json) + if not source.is_activity_public(response_obj) or not all( + source.is_activity_public(a) for a in self.activities + ): + logging.info("Response or activity is non-public. Dropping.") + self.complete() + return "" + + self.send_webmentions() + return ("", ERROR_HTTP_RETURN_CODE) if getattr(g, "failed", None) else "OK" + + def source_url(self, target_url): + # determine which activity to use + try: + activity = self.activities[0] + if self.entity.urls_to_activity: + urls_to_activity = json_loads(self.entity.urls_to_activity) + if urls_to_activity: + activity = self.activities[urls_to_activity[target_url]] + except (KeyError, IndexError): + error( + """Hit https://github.com/snarfed/bridgy/issues/237 KeyError! target url {target_url} not in urls_to_activity: {self.entity.urls_to_activity} -activities: {self.activities}""", status=ERROR_HTTP_RETURN_CODE) - - # generate source URL - id = activity['id'] - parsed = util.parse_tag_uri(id) - post_id = parsed[1] if parsed else id - parts = [self.entity.type, g.source.SHORT_NAME, g.source.key.string_id(), post_id] - - if self.entity.type != 'post': - # parse and add response id. (we know Response key ids are always tag URIs) - _, response_id = util.parse_tag_uri(self.entity.key.string_id()) - reaction_id = response_id - if self.entity.type in ('like', 'react', 'repost', 'rsvp'): - response_id = response_id.split('_')[-1] # extract responder user id - parts.append(response_id) - if self.entity.type == 'react': - parts.append(reaction_id) - - return util.host_url('/'.join(parts)) +activities: {self.activities}""", + status=ERROR_HTTP_RETURN_CODE, + ) + + # generate source URL + id = activity["id"] + parsed = util.parse_tag_uri(id) + post_id = parsed[1] if parsed else id + parts = [ + self.entity.type, + g.source.SHORT_NAME, + g.source.key.string_id(), + post_id, + ] + + if self.entity.type != "post": + # parse and add response id. (we know Response key ids are always tag URIs) + _, response_id = util.parse_tag_uri(self.entity.key.string_id()) + reaction_id = response_id + if self.entity.type in ("like", "react", "repost", "rsvp"): + response_id = response_id.split("_")[-1] # extract responder user id + parts.append(response_id) + if self.entity.type == "react": + parts.append(reaction_id) + + return util.host_url("/".join(parts)) class PropagateBlogPost(SendWebmentions): - """Task handler that sends webmentions for a :class:`models.BlogPost`. - - Request parameters: - - * key: string key of :class:`models.BlogPost` entity - """ + """Task handler that sends webmentions for a :class:`models.BlogPost`. - def dispatch_request(self): - logging.debug('Params: %s', list(request.values.items())) - - if not self.lease(ndb.Key(urlsafe=request.values['key'])): - return ('', ERROR_HTTP_RETURN_CODE) if getattr(g, 'failed', None) else 'OK' - - to_send = set() - for url in self.entity.unsent: - url, domain, ok = util.get_webmention_target(url) - # skip "self" links to this blog's domain - if ok and domain not in g.source.domains: - to_send.add(url) - - self.entity.unsent = list(to_send) - self.send_webmentions() - return ('', ERROR_HTTP_RETURN_CODE) if getattr(g, 'failed', None) else 'OK' - - def source_url(self, target_url): - return self.entity.key.id() + Request parameters: + * key: string key of :class:`models.BlogPost` entity + """ -app.add_url_rule('/_ah/queue/poll', view_func=Poll.as_view('poll'), methods=['POST']) -app.add_url_rule('/_ah/queue/poll-now', view_func=Poll.as_view('poll-now'), methods=['POST']) -app.add_url_rule('/_ah/queue/discover', view_func=Discover.as_view('discover'), methods=['POST']) -app.add_url_rule('/_ah/queue/propagate', view_func=PropagateResponse.as_view('propagate'), methods=['POST']) -app.add_url_rule('/_ah/queue/propagate-blogpost', view_func=PropagateBlogPost.as_view('propagate_blogpost'), methods=['POST']) + def dispatch_request(self): + logging.debug("Params: %s", list(request.values.items())) + + if not self.lease(ndb.Key(urlsafe=request.values["key"])): + return ("", ERROR_HTTP_RETURN_CODE) if getattr(g, "failed", None) else "OK" + + to_send = set() + for url in self.entity.unsent: + url, domain, ok = util.get_webmention_target(url) + # skip "self" links to this blog's domain + if ok and domain not in g.source.domains: + to_send.add(url) + + self.entity.unsent = list(to_send) + self.send_webmentions() + return ("", ERROR_HTTP_RETURN_CODE) if getattr(g, "failed", None) else "OK" + + def source_url(self, target_url): + return self.entity.key.id() + + +app.add_url_rule("/_ah/queue/poll", view_func=Poll.as_view("poll"), methods=["POST"]) +app.add_url_rule( + "/_ah/queue/poll-now", view_func=Poll.as_view("poll-now"), methods=["POST"] +) +app.add_url_rule( + "/_ah/queue/discover", view_func=Discover.as_view("discover"), methods=["POST"] +) +app.add_url_rule( + "/_ah/queue/propagate", + view_func=PropagateResponse.as_view("propagate"), + methods=["POST"], +) +app.add_url_rule( + "/_ah/queue/propagate-blogpost", + view_func=PropagateBlogPost.as_view("propagate_blogpost"), + methods=["POST"], +) diff --git a/tests/test_blog_webmention.py b/tests/test_blog_webmention.py index 58f7a823..1e95ae48 100644 --- a/tests/test_blog_webmention.py +++ b/tests/test_blog_webmention.py @@ -14,57 +14,64 @@ class BlogWebmentionTest(testutil.AppTest): - - def setUp(self): - super().setUp() - self.source = testutil.FakeSource(id='foo.com', - domains=['x.com', 'foo.com', 'y.com'], - features=['webmention']) - self.source.put() - - self.mox.StubOutWithMock(testutil.FakeSource, 'create_comment') - self.mention_html = """\ + def setUp(self): + super().setUp() + self.source = testutil.FakeSource( + id="foo.com", domains=["x.com", "foo.com", "y.com"], features=["webmention"] + ) + self.source.put() + + self.mox.StubOutWithMock(testutil.FakeSource, "create_comment") + self.mention_html = """\

my post http://foo.com/post/1

""" - def post(self, source=None, target=None): - if source is None: - source = 'http://bar.com/reply' - if target is None: - target = 'http://foo.com/post/1' - return self.client.post('/webmention/fake', data={ - 'source': source, - 'target': target, - }) - - def assert_error(self, expected_error, status=400, **kwargs): - resp = self.post(**kwargs) - self.assertEqual(status, resp.status_code) - self.assertIn(expected_error, resp.json['error']) - - def expect_mention(self): - self.expect_requests_get('http://bar.com/reply', self.mention_html) - mock = testutil.FakeSource.create_comment( - 'http://foo.com/post/1', 'foo.com', 'http://foo.com/', - 'mentioned this in my post.
via bar.com' - ) - mock.AndReturn({'id': 'fake id'}) - return mock - - def test_success(self): - self._test_success(""" + def post(self, source=None, target=None): + if source is None: + source = "http://bar.com/reply" + if target is None: + target = "http://foo.com/post/1" + return self.client.post( + "/webmention/fake", + data={ + "source": source, + "target": target, + }, + ) + + def assert_error(self, expected_error, status=400, **kwargs): + resp = self.post(**kwargs) + self.assertEqual(status, resp.status_code) + self.assertIn(expected_error, resp.json["error"]) + + def expect_mention(self): + self.expect_requests_get("http://bar.com/reply", self.mention_html) + mock = testutil.FakeSource.create_comment( + "http://foo.com/post/1", + "foo.com", + "http://foo.com/", + 'mentioned this in my post.
via bar.com', + ) + mock.AndReturn({"id": "fake id"}) + return mock + + def test_success(self): + self._test_success( + """

my name

i hereby reply -

""") +

""" + ) - def test_nested_item_in_hfeed(self): - """https://chat.indieweb.org/dev/2019-01-23#t1548242942538900""" - self._test_success(""" + def test_nested_item_in_hfeed(self): + """https://chat.indieweb.org/dev/2019-01-23#t1548242942538900""" + self._test_success( + """

my name

@@ -73,256 +80,284 @@ def test_nested_item_in_hfeed(self):

-
""") - - def _test_success(self, html): - self.expect_requests_get('http://bar.com/reply', html) - - testutil.FakeSource.create_comment( - 'http://foo.com/post/1', 'my name', 'http://foo.com/', - 'i hereby reply\n' - '
via bar.com' - ).AndReturn({'id': 'fake id'}) - self.mox.ReplayAll() - - resp = self.post() - self.assertEqual(200, resp.status_code, resp.get_data(as_text=True)) - self.assertEqual({'id': 'fake id'}, resp.json) - - bw = BlogWebmention.get_by_id('http://bar.com/reply http://foo.com/post/1') - self.assertEqual(self.source.key, bw.source) - self.assertEqual('complete', bw.status) - self.assertEqual('comment', bw.type) - self.assertEqual(html, bw.html) - self.assertEqual({'id': 'fake id'}, bw.published) - - def test_reply_outside_e_content(self): - html = """ +""" + ) + + def _test_success(self, html): + self.expect_requests_get("http://bar.com/reply", html) + + testutil.FakeSource.create_comment( + "http://foo.com/post/1", + "my name", + "http://foo.com/", + 'i hereby reply\n' + '
via bar.com', + ).AndReturn({"id": "fake id"}) + self.mox.ReplayAll() + + resp = self.post() + self.assertEqual(200, resp.status_code, resp.get_data(as_text=True)) + self.assertEqual({"id": "fake id"}, resp.json) + + bw = BlogWebmention.get_by_id("http://bar.com/reply http://foo.com/post/1") + self.assertEqual(self.source.key, bw.source) + self.assertEqual("complete", bw.status) + self.assertEqual("comment", bw.type) + self.assertEqual(html, bw.html) + self.assertEqual({"id": "fake id"}, bw.published) + + def test_reply_outside_e_content(self): + html = """

my name

i hereby reply
""" - self.expect_requests_get('http://bar.com/reply', html) - - testutil.FakeSource.create_comment( - 'http://foo.com/post/1', 'my name', 'http://foo.com/', - 'i hereby reply
via bar.com' - ).AndReturn({'id': 'fake id'}) - self.mox.ReplayAll() - - resp = self.post() - self.assertEqual(200, resp.status_code, resp.get_data(as_text=True)) - - bw = BlogWebmention.get_by_id('http://bar.com/reply http://foo.com/post/1') - self.assertEqual('complete', bw.status) - self.assertEqual({'id': 'fake id'}, bw.published) - self.assertEqual(html, bw.html) - - def test_domain_not_found(self): - self.expect_requests_get('http://foo.com/post/1', status_code=404) - for i in range(4): - self.expect_requests_get('http://foo.com/post/1', '') - self.mox.ReplayAll() - - # couldn't fetch source URL - self.source.key.delete() - self.assert_error('Could not fetch source URL http://foo.com/post/1') - self.assertEqual(0, BlogWebmention.query().count()) - - # no source - msg = 'Could not find FakeSource account for foo.com.' - self.assert_error(msg) - self.assertEqual(0, BlogWebmention.query().count()) - - # source without webmention feature - self.source.features = ['listen'] - self.source.put() - self.assert_error(msg) - self.assertEqual(0, BlogWebmention.query().count()) - - # source without domain - self.source.features = ['webmention'] - self.source.domains = ['asdfoo.com', 'foo.comedy'] - self.source.put() - self.assert_error(msg) - self.assertEqual(0, BlogWebmention.query().count()) - - # source is disabled - self.source.domains = ['foo.com'] - self.source.status = 'disabled' - self.source.put() - self.assert_error(msg) - self.assertEqual(0, BlogWebmention.query().count()) - - def test_rel_canonical_different_domain(self): - self.expect_requests_get('http://foo.zz/post/1', """ + self.expect_requests_get("http://bar.com/reply", html) + + testutil.FakeSource.create_comment( + "http://foo.com/post/1", + "my name", + "http://foo.com/", + 'i hereby reply
via bar.com', + ).AndReturn({"id": "fake id"}) + self.mox.ReplayAll() + + resp = self.post() + self.assertEqual(200, resp.status_code, resp.get_data(as_text=True)) + + bw = BlogWebmention.get_by_id("http://bar.com/reply http://foo.com/post/1") + self.assertEqual("complete", bw.status) + self.assertEqual({"id": "fake id"}, bw.published) + self.assertEqual(html, bw.html) + + def test_domain_not_found(self): + self.expect_requests_get("http://foo.com/post/1", status_code=404) + for i in range(4): + self.expect_requests_get("http://foo.com/post/1", "") + self.mox.ReplayAll() + + # couldn't fetch source URL + self.source.key.delete() + self.assert_error("Could not fetch source URL http://foo.com/post/1") + self.assertEqual(0, BlogWebmention.query().count()) + + # no source + msg = "Could not find FakeSource account for foo.com." + self.assert_error(msg) + self.assertEqual(0, BlogWebmention.query().count()) + + # source without webmention feature + self.source.features = ["listen"] + self.source.put() + self.assert_error(msg) + self.assertEqual(0, BlogWebmention.query().count()) + + # source without domain + self.source.features = ["webmention"] + self.source.domains = ["asdfoo.com", "foo.comedy"] + self.source.put() + self.assert_error(msg) + self.assertEqual(0, BlogWebmention.query().count()) + + # source is disabled + self.source.domains = ["foo.com"] + self.source.status = "disabled" + self.source.put() + self.assert_error(msg) + self.assertEqual(0, BlogWebmention.query().count()) + + def test_rel_canonical_different_domain(self): + self.expect_requests_get( + "http://foo.zz/post/1", + """ -foo bar""") +foo bar""", + ) - html = """ + html = """ """ - self.expect_requests_get('http://bar.com/mention', html) - - testutil.FakeSource.create_comment( - 'http://foo.zz/post/1', 'foo.zz', 'http://foo.zz/', - 'mentioned this in bar.com/mention.
via bar.com' - ).AndReturn({'id': 'fake id'}) - self.mox.ReplayAll() - - resp = self.post('http://bar.com/mention', 'http://foo.zz/post/1') - self.assertEqual(200, resp.status_code, resp.get_data(as_text=True)) - - bw = BlogWebmention.get_by_id('http://bar.com/mention http://foo.zz/post/1') - self.assertEqual('complete', bw.status) - self.assertEqual(html, bw.html) - - def test_target_is_home_page(self): - self.assert_error('Home page webmentions are not currently supported.', - target='http://foo.com/', status=202) - self.assertEqual(0, BlogWebmention.query().count()) - - def test_mention(self): - html = """\ + self.expect_requests_get("http://bar.com/mention", html) + + testutil.FakeSource.create_comment( + "http://foo.zz/post/1", + "foo.zz", + "http://foo.zz/", + 'mentioned this in bar.com/mention.
via bar.com', + ).AndReturn({"id": "fake id"}) + self.mox.ReplayAll() + + resp = self.post("http://bar.com/mention", "http://foo.zz/post/1") + self.assertEqual(200, resp.status_code, resp.get_data(as_text=True)) + + bw = BlogWebmention.get_by_id("http://bar.com/mention http://foo.zz/post/1") + self.assertEqual("complete", bw.status) + self.assertEqual(html, bw.html) + + def test_target_is_home_page(self): + self.assert_error( + "Home page webmentions are not currently supported.", + target="http://foo.com/", + status=202, + ) + self.assertEqual(0, BlogWebmention.query().count()) + + def test_mention(self): + html = """\ """ - self.expect_requests_get('http://bar.com/mention', html) - testutil.FakeSource.create_comment( - 'http://foo.com/post/1', 'foo.com', 'http://foo.com/', - 'mentioned this in my post.
via bar.com' - ).AndReturn({'id': 'fake id'}) - self.mox.ReplayAll() - - resp = self.post('http://bar.com/mention') - self.assertEqual(200, resp.status_code, resp.get_data(as_text=True)) - - def test_domain_translates_to_lowercase(self): - html = """\ + self.expect_requests_get("http://bar.com/mention", html) + testutil.FakeSource.create_comment( + "http://foo.com/post/1", + "foo.com", + "http://foo.com/", + 'mentioned this in my post.
via bar.com', + ).AndReturn({"id": "fake id"}) + self.mox.ReplayAll() + + resp = self.post("http://bar.com/mention") + self.assertEqual(200, resp.status_code, resp.get_data(as_text=True)) + + def test_domain_translates_to_lowercase(self): + html = """\

my post X http://FoO.cOm/post/1

""" - self.expect_requests_get('http://bar.com/reply', html) - - testutil.FakeSource.create_comment( - 'http://FoO.cOm/post/1', 'foo.com', 'http://foo.com/', - 'mentioned this in my post.
via bar.com' - ).AndReturn({'id': 'fake id'}) - self.mox.ReplayAll() - - resp = self.post(target='http://FoO.cOm/post/1') - self.assertEqual(200, resp.status_code, resp.get_data(as_text=True)) - bw = BlogWebmention.get_by_id('http://bar.com/reply http://FoO.cOm/post/1') - self.assertEqual('complete', bw.status) - - def test_source_link_not_found(self): - html = '
' - self.expect_requests_get('http://bar.com/reply', html) - self.mox.ReplayAll() - self.assert_error('Could not find target URL') - bw = BlogWebmention.get_by_id('http://bar.com/reply http://foo.com/post/1') - self.assertEqual('failed', bw.status) - self.assertEqual(html, bw.html) - - def test_target_path_blocklisted(self): - bad = 'http://foo.com/blocklisted/1' - self.assert_error( - 'FakeSource webmentions are not supported for URL path: /blocklisted/1', - target=bad, status=202) - self.assertEqual(0, BlogWebmention.query().count()) - - def test_strip_utm_query_params(self): - """utm_* query params should be stripped from target URLs.""" - self.expect_mention() - self.mox.ReplayAll() - - resp = self.post(target='http://foo.com/post/1?utm_source=x&utm_medium=y') - self.assertEqual(200, resp.status_code, resp.get_data(as_text=True)) - bw = BlogWebmention.get_by_id('http://bar.com/reply http://foo.com/post/1') - self.assertEqual('complete', bw.status) - - def test_unicode_in_target_and_source_urls(self): - """Unicode chars in target and source URLs should work.""" - # note the … and ✁ chars - target = 'http://foo.com/2014/11/23/england-german…iendly-wembley' - source = 'http://bar.com/✁/1' - - html = u"""\ + self.expect_requests_get("http://bar.com/reply", html) + + testutil.FakeSource.create_comment( + "http://FoO.cOm/post/1", + "foo.com", + "http://foo.com/", + 'mentioned this in my post.
via bar.com', + ).AndReturn({"id": "fake id"}) + self.mox.ReplayAll() + + resp = self.post(target="http://FoO.cOm/post/1") + self.assertEqual(200, resp.status_code, resp.get_data(as_text=True)) + bw = BlogWebmention.get_by_id("http://bar.com/reply http://FoO.cOm/post/1") + self.assertEqual("complete", bw.status) + + def test_source_link_not_found(self): + html = '
' + self.expect_requests_get("http://bar.com/reply", html) + self.mox.ReplayAll() + self.assert_error("Could not find target URL") + bw = BlogWebmention.get_by_id("http://bar.com/reply http://foo.com/post/1") + self.assertEqual("failed", bw.status) + self.assertEqual(html, bw.html) + + def test_target_path_blocklisted(self): + bad = "http://foo.com/blocklisted/1" + self.assert_error( + "FakeSource webmentions are not supported for URL path: /blocklisted/1", + target=bad, + status=202, + ) + self.assertEqual(0, BlogWebmention.query().count()) + + def test_strip_utm_query_params(self): + """utm_* query params should be stripped from target URLs.""" + self.expect_mention() + self.mox.ReplayAll() + + resp = self.post(target="http://foo.com/post/1?utm_source=x&utm_medium=y") + self.assertEqual(200, resp.status_code, resp.get_data(as_text=True)) + bw = BlogWebmention.get_by_id("http://bar.com/reply http://foo.com/post/1") + self.assertEqual("complete", bw.status) + + def test_unicode_in_target_and_source_urls(self): + """Unicode chars in target and source URLs should work.""" + # note the … and ✁ chars + target = "http://foo.com/2014/11/23/england-german…iendly-wembley" + source = "http://bar.com/✁/1" + + html = ( + u"""\

my post %s -

""" % target - self.expect_requests_get(source, html) - - comment = 'mentioned this in my post.
via bar.com' % (source, source) - testutil.FakeSource.create_comment(target, 'foo.com', 'http://foo.com/', comment - ).AndReturn({'id': 'fake id'}) - self.mox.ReplayAll() - - resp = self.post(source=source, target=target) - self.assertEqual(200, resp.status_code, resp.get_data(as_text=True)) - bw = BlogWebmention.get_by_id(' '.join((source, target))) - self.assertEqual('complete', bw.status) - - def test_target_redirects(self): - html = """\ +

""" + % target + ) + self.expect_requests_get(source, html) + + comment = ( + 'mentioned this in my post.
via bar.com' + % (source, source) + ) + testutil.FakeSource.create_comment( + target, "foo.com", "http://foo.com/", comment + ).AndReturn({"id": "fake id"}) + self.mox.ReplayAll() + + resp = self.post(source=source, target=target) + self.assertEqual(200, resp.status_code, resp.get_data(as_text=True)) + bw = BlogWebmention.get_by_id(" ".join((source, target))) + self.assertEqual("complete", bw.status) + + def test_target_redirects(self): + html = """\

http://second/

""" - redirects = ['http://second/', 'http://foo.com/final'] - self.expect_requests_head('http://first/', redirected_url=redirects) - self.expect_requests_get('http://bar.com/reply', html) - testutil.FakeSource.create_comment( - 'http://foo.com/final', 'foo.com', 'http://foo.com/', mox.IgnoreArg() - ).AndReturn({'id': 'fake id'}) - self.mox.ReplayAll() - - resp = self.post(target='http://first/') - self.assertEqual(200, resp.status_code, resp.get_data(as_text=True)) - bw = BlogWebmention.get_by_id('http://bar.com/reply http://foo.com/final') - self.assertEqual('complete', bw.status) - self.assertEqual(['http://first/', 'http://second/'], bw.redirected_target_urls) - - def test_source_link_check_ignores_fragment(self): - html = """\ + redirects = ["http://second/", "http://foo.com/final"] + self.expect_requests_head("http://first/", redirected_url=redirects) + self.expect_requests_get("http://bar.com/reply", html) + testutil.FakeSource.create_comment( + "http://foo.com/final", "foo.com", "http://foo.com/", mox.IgnoreArg() + ).AndReturn({"id": "fake id"}) + self.mox.ReplayAll() + + resp = self.post(target="http://first/") + self.assertEqual(200, resp.status_code, resp.get_data(as_text=True)) + bw = BlogWebmention.get_by_id("http://bar.com/reply http://foo.com/final") + self.assertEqual("complete", bw.status) + self.assertEqual(["http://first/", "http://second/"], bw.redirected_target_urls) + + def test_source_link_check_ignores_fragment(self): + html = """\ """ - self.expect_requests_get('http://bar.com/reply', html) - testutil.FakeSource.create_comment( - 'http://foo.com/post/1', 'foo.com', 'http://foo.com/', - 'mentioned this in my post.
via bar.com' - ).AndReturn({'id': 'fake id'}) - self.mox.ReplayAll() - - resp = self.post() - self.assertEqual(200, resp.status_code, resp.get_data(as_text=True)) - bw = BlogWebmention.get_by_id('http://bar.com/reply http://foo.com/post/1') - self.assertEqual('complete', bw.status) - - def test_source_missing_mf2(self): - html = 'no microformats here, run along' - self.expect_requests_get('http://bar.com/reply', html) - self.mox.ReplayAll() - self.assert_error('No microformats2 data found') - bw = BlogWebmention.get_by_id('http://bar.com/reply http://foo.com/post/1') - self.assertEqual('failed', bw.status) - self.assertEqual(html, bw.html) - - def test_u_url(self): - html = """ + self.expect_requests_get("http://bar.com/reply", html) + testutil.FakeSource.create_comment( + "http://foo.com/post/1", + "foo.com", + "http://foo.com/", + 'mentioned this in my post.
via bar.com', + ).AndReturn({"id": "fake id"}) + self.mox.ReplayAll() + + resp = self.post() + self.assertEqual(200, resp.status_code, resp.get_data(as_text=True)) + bw = BlogWebmention.get_by_id("http://bar.com/reply http://foo.com/post/1") + self.assertEqual("complete", bw.status) + + def test_source_missing_mf2(self): + html = "no microformats here, run along" + self.expect_requests_get("http://bar.com/reply", html) + self.mox.ReplayAll() + self.assert_error("No microformats2 data found") + bw = BlogWebmention.get_by_id("http://bar.com/reply http://foo.com/post/1") + self.assertEqual("failed", bw.status) + self.assertEqual(html, bw.html) + + def test_u_url(self): + html = """

my name

@@ -331,102 +366,110 @@ def test_u_url(self):

""" - self.expect_requests_get('http://bar.com/reply', html) - - testutil.FakeSource.create_comment( - 'http://foo.com/post/1', 'my name', 'http://foo.com/', """mentioned this in barzz.com/u/url.
via barzz.com""" - ).AndReturn({'id': 'fake id'}) - self.mox.ReplayAll() - - resp = self.post() - self.assertEqual(200, resp.status_code, resp.get_data(as_text=True)) - bw = BlogWebmention.get_by_id('http://bar.com/reply http://foo.com/post/1') - self.assertEqual('complete', bw.status) - self.assertEqual('post', bw.type) - self.assertEqual('http://barzz.com/u/url', bw.u_url) - self.assertEqual('http://barzz.com/u/url', bw.source_url()) - - def test_repeated(self): - # 1) first a failure - self.expect_requests_get('http://bar.com/reply', '') - - # 2) should allow retrying, this one will succeed - self.expect_requests_get('http://bar.com/reply', """ + self.expect_requests_get("http://bar.com/reply", html) + + testutil.FakeSource.create_comment( + "http://foo.com/post/1", + "my name", + "http://foo.com/", + """mentioned this in barzz.com/u/url.
via barzz.com""", + ).AndReturn({"id": "fake id"}) + self.mox.ReplayAll() + + resp = self.post() + self.assertEqual(200, resp.status_code, resp.get_data(as_text=True)) + bw = BlogWebmention.get_by_id("http://bar.com/reply http://foo.com/post/1") + self.assertEqual("complete", bw.status) + self.assertEqual("post", bw.type) + self.assertEqual("http://barzz.com/u/url", bw.u_url) + self.assertEqual("http://barzz.com/u/url", bw.source_url()) + + def test_repeated(self): + # 1) first a failure + self.expect_requests_get("http://bar.com/reply", "") + + # 2) should allow retrying, this one will succeed + self.expect_requests_get( + "http://bar.com/reply", + """
-
""") - testutil.FakeSource.create_comment( - 'http://foo.com/post/1', 'foo.com', 'http://foo.com/', - 'reposted this.
via bar.com' - ).AndReturn({'id': 'fake id'}) - - # 3) after success, another is a noop and returns 200 - # TODO: check for "updates not supported" message - self.mox.ReplayAll() - - # now the webmention requests. 1) failure - self.assert_error('No microformats2 data found') - bw = BlogWebmention.get_by_id('http://bar.com/reply http://foo.com/post/1') - self.assertEqual('failed', bw.status) - - # 2) success - resp = self.post() - self.assertEqual(200, resp.status_code, resp.get_data(as_text=True)) - bw = BlogWebmention.get_by_id('http://bar.com/reply http://foo.com/post/1') - self.assertEqual('complete', bw.status) - self.assertEqual('repost', bw.type) - - # 3) noop repeated success - # source without webmention feature - resp = self.post() - self.assertEqual(200, resp.status_code, resp.get_data(as_text=True)) - bw = BlogWebmention.get_by_id('http://bar.com/reply http://foo.com/post/1') - self.assertEqual('complete', bw.status) - - def test_create_comment_exception(self): - self.expect_mention().AndRaise(exceptions.NotAcceptable()) - self.mox.ReplayAll() - - resp = self.post() - self.assertEqual(406, resp.status_code, resp.get_data(as_text=True)) - bw = BlogWebmention.get_by_id('http://bar.com/reply http://foo.com/post/1') - self.assertEqual('failed', bw.status) - self.assertEqual(self.mention_html, bw.html) - - def test_create_comment_401_disables_source(self): - self.expect_mention().AndRaise(exceptions.Unauthorized('no way')) - self.mox.ReplayAll() - - self.assert_error('no way', status=401) - source = self.source.key.get() - self.assertEqual('disabled', source.status) - - bw = BlogWebmention.get_by_id('http://bar.com/reply http://foo.com/post/1') - self.assertEqual('failed', bw.status) - self.assertEqual(self.mention_html, bw.html) - - def test_create_comment_404s(self): - self.expect_mention().AndRaise(exceptions.NotFound('gone baby gone')) - self.mox.ReplayAll() - - self.assert_error('gone baby gone', status=404) - - bw = BlogWebmention.get_by_id('http://bar.com/reply http://foo.com/post/1') - self.assertEqual('failed', bw.status) - self.assertEqual(self.mention_html, bw.html) - - def test_create_comment_500s(self): - self.expect_mention().AndRaise(exceptions.InternalServerError('oops')) - self.mox.ReplayAll() - self.assert_error('oops', status=502) - - def test_create_comment_raises_connection_error(self): - self.expect_mention().AndRaise(requests.ConnectionError('oops')) - self.mox.ReplayAll() - self.assert_error('oops', status=502) - - def test_sources_global(self): - self.assertIsNotNone(models.sources['blogger']) - self.assertIsNotNone(models.sources['tumblr']) - self.assertIsNotNone(models.sources['wordpress']) +""", + ) + testutil.FakeSource.create_comment( + "http://foo.com/post/1", + "foo.com", + "http://foo.com/", + 'reposted this.
via bar.com', + ).AndReturn({"id": "fake id"}) + + # 3) after success, another is a noop and returns 200 + # TODO: check for "updates not supported" message + self.mox.ReplayAll() + + # now the webmention requests. 1) failure + self.assert_error("No microformats2 data found") + bw = BlogWebmention.get_by_id("http://bar.com/reply http://foo.com/post/1") + self.assertEqual("failed", bw.status) + + # 2) success + resp = self.post() + self.assertEqual(200, resp.status_code, resp.get_data(as_text=True)) + bw = BlogWebmention.get_by_id("http://bar.com/reply http://foo.com/post/1") + self.assertEqual("complete", bw.status) + self.assertEqual("repost", bw.type) + + # 3) noop repeated success + # source without webmention feature + resp = self.post() + self.assertEqual(200, resp.status_code, resp.get_data(as_text=True)) + bw = BlogWebmention.get_by_id("http://bar.com/reply http://foo.com/post/1") + self.assertEqual("complete", bw.status) + + def test_create_comment_exception(self): + self.expect_mention().AndRaise(exceptions.NotAcceptable()) + self.mox.ReplayAll() + + resp = self.post() + self.assertEqual(406, resp.status_code, resp.get_data(as_text=True)) + bw = BlogWebmention.get_by_id("http://bar.com/reply http://foo.com/post/1") + self.assertEqual("failed", bw.status) + self.assertEqual(self.mention_html, bw.html) + + def test_create_comment_401_disables_source(self): + self.expect_mention().AndRaise(exceptions.Unauthorized("no way")) + self.mox.ReplayAll() + + self.assert_error("no way", status=401) + source = self.source.key.get() + self.assertEqual("disabled", source.status) + + bw = BlogWebmention.get_by_id("http://bar.com/reply http://foo.com/post/1") + self.assertEqual("failed", bw.status) + self.assertEqual(self.mention_html, bw.html) + + def test_create_comment_404s(self): + self.expect_mention().AndRaise(exceptions.NotFound("gone baby gone")) + self.mox.ReplayAll() + + self.assert_error("gone baby gone", status=404) + + bw = BlogWebmention.get_by_id("http://bar.com/reply http://foo.com/post/1") + self.assertEqual("failed", bw.status) + self.assertEqual(self.mention_html, bw.html) + + def test_create_comment_500s(self): + self.expect_mention().AndRaise(exceptions.InternalServerError("oops")) + self.mox.ReplayAll() + self.assert_error("oops", status=502) + + def test_create_comment_raises_connection_error(self): + self.expect_mention().AndRaise(requests.ConnectionError("oops")) + self.mox.ReplayAll() + self.assert_error("oops", status=502) + + def test_sources_global(self): + self.assertIsNotNone(models.sources["blogger"]) + self.assertIsNotNone(models.sources["tumblr"]) + self.assertIsNotNone(models.sources["wordpress"]) diff --git a/tests/test_blogger.py b/tests/test_blogger.py index 0b651ad1..c41aead3 100644 --- a/tests/test_blogger.py +++ b/tests/test_blogger.py @@ -18,126 +18,157 @@ class BloggerTest(testutil.AppTest): - - def setUp(self): - super().setUp() - self.auth_entity = BloggerV2Auth(name='name', - blog_ids=['111'], - blog_hostnames=['my.blawg'], - picture_url='http://pic') - self.blogger_client = self.mox.CreateMock(BloggerClient) - - self.comment = data.Comment() - self.comment.id = util.Struct( - text='tag:blogger.com,1999:blog-111.post-222.comment-333') - self.comment.to_string = lambda: '' - - def expect_get_posts(self): - post = data.BlogPost() - post.id = util.Struct(text='tag:blogger.com,1999:blog-111.post-222') - feed = data.BlogFeed() - feed.entry = [post] - - def check_path(query): - return query.custom_parameters['path'] == '/path/to/post' - - self.blogger_client.get_posts('111', query=mox.Func(check_path)).AndReturn(feed) - - def test_new(self): - b = Blogger.new(auth_entity=self.auth_entity) - self.assertEqual(self.auth_entity.key, b.auth_entity) - self.assertEqual('name', b.name) - self.assertEqual(['http://my.blawg/'], b.domain_urls) - self.assertEqual(['my.blawg'], b.domains) - self.assertEqual('http://pic', b.picture) - - def test_new_oauth_dropins_error(self): - """Blogger is special cased in oauth-dropins: when login succeeds but then - an authenticated API call fails, it returns an empty auth entity key, which - we can differentiate from a user decline because oauth-dropins can't - currently intercept Blogger declines. - """ - resp = self.client.get('/blogger/oauth_handler') - self.assertEqual(302, resp.status_code) - location = urllib.parse.urlparse(resp.headers['Location']) - self.assertEqual('/', location.path) - self.assertIn("Couldn't fetch your blogs", get_flashed_messages()[0]) - self.assertEqual(0, BloggerV2Auth.query().count()) - self.assertEqual(0, Blogger.query().count()) - - def test_oauth_view_no_blogs(self): - self.auth_entity = BloggerV2Auth(id='123', name='name', picture_url='pic', - blogs_atom='x', user_atom='y', creds_json='z') - self.auth_entity.put() - - resp = self.client.get('/blogger/oauth_handler?auth_entity=%s' % - self.auth_entity.key.urlsafe().decode()) - self.assertEqual(302, resp.status_code) - location = urllib.parse.urlparse(resp.headers['Location']) - self.assertEqual('/', location.path) - self.assertIn("Couldn't fetch your blogs", get_flashed_messages()[0]) - - def test_new_no_blogs(self): - self.auth_entity.blog_hostnames = [] - with app.test_request_context(): - self.assertIsNone(Blogger.new(auth_entity=self.auth_entity)) - self.assertIn('Blogger blog not found', get_flashed_messages()[0]) - - def test_create_comment(self): - self.expect_get_posts() - self.blogger_client.add_comment( - '111', '222', 'who: foo bar').AndReturn(self.comment) - self.mox.ReplayAll() - - b = Blogger.new(auth_entity=self.auth_entity) - resp = b.create_comment('http://blawg/path/to/post', 'who', 'http://who', - 'foo bar', client=self.blogger_client) - self.assert_equals({'id': '333', 'response': ''}, resp) - - def test_create_comment_with_unicode_chars(self): - # TODO: this just checks the arguments passed to client.add_comment(). we - # should test that the blogger client itself encodes as UTF-8. - self.expect_get_posts() - - prefix = 'Degenève: ' - content = prefix + 'x' * (blogger.MAX_COMMENT_LENGTH - len(prefix) - 3) + '...' - self.blogger_client.add_comment('111', '222', content).AndReturn(self.comment) - self.mox.ReplayAll() - - b = Blogger.new(auth_entity=self.auth_entity) - resp = b.create_comment('http://blawg/path/to/post', 'Degenève', 'http://who', - 'x' * blogger.MAX_COMMENT_LENGTH, - client=self.blogger_client) - self.assert_equals({'id': '333', 'response': ''}, resp) - - def test_create_too_long_comment(self): - """Blogger caps HTML comment length at 4096 chars.""" - self.expect_get_posts() - self.blogger_client.add_comment( - '111', '222', 'Degenève: foo Degenève bar' - ).AndReturn(self.comment) - self.mox.ReplayAll() - - b = Blogger.new(auth_entity=self.auth_entity) - resp = b.create_comment('http://blawg/path/to/post', 'Degenève', 'http://who', - 'foo Degenève bar', client=self.blogger_client) - self.assert_equals({'id': '333', 'response': ''}, resp) - - def test_create_comment_gives_up_on_internal_error_bX2i87au(self): - # see https://github.com/snarfed/bridgy/issues/175 - self.expect_get_posts() - self.blogger_client.add_comment( - '111', '222', 'who: foo bar' - ).AndRaise(RequestError('500, Internal error: bX-2i87au')) - self.mox.ReplayAll() - - b = Blogger.new(auth_entity=self.auth_entity) - resp = b.create_comment('http://blawg/path/to/post', 'who', 'http://who', - 'foo bar', client=self.blogger_client) - # the key point is that create_comment doesn't raise an exception - self.assert_equals({'error': '500, Internal error: bX-2i87au'}, resp) - - def test_feed_url(self): - self.assertEqual( - 'http://my.blawg/feeds/posts/default', - Blogger.new(auth_entity=self.auth_entity).feed_url()) + def setUp(self): + super().setUp() + self.auth_entity = BloggerV2Auth( + name="name", + blog_ids=["111"], + blog_hostnames=["my.blawg"], + picture_url="http://pic", + ) + self.blogger_client = self.mox.CreateMock(BloggerClient) + + self.comment = data.Comment() + self.comment.id = util.Struct( + text="tag:blogger.com,1999:blog-111.post-222.comment-333" + ) + self.comment.to_string = lambda: "" + + def expect_get_posts(self): + post = data.BlogPost() + post.id = util.Struct(text="tag:blogger.com,1999:blog-111.post-222") + feed = data.BlogFeed() + feed.entry = [post] + + def check_path(query): + return query.custom_parameters["path"] == "/path/to/post" + + self.blogger_client.get_posts("111", query=mox.Func(check_path)).AndReturn(feed) + + def test_new(self): + b = Blogger.new(auth_entity=self.auth_entity) + self.assertEqual(self.auth_entity.key, b.auth_entity) + self.assertEqual("name", b.name) + self.assertEqual(["http://my.blawg/"], b.domain_urls) + self.assertEqual(["my.blawg"], b.domains) + self.assertEqual("http://pic", b.picture) + + def test_new_oauth_dropins_error(self): + """Blogger is special cased in oauth-dropins: when login succeeds but then + an authenticated API call fails, it returns an empty auth entity key, which + we can differentiate from a user decline because oauth-dropins can't + currently intercept Blogger declines. + """ + resp = self.client.get("/blogger/oauth_handler") + self.assertEqual(302, resp.status_code) + location = urllib.parse.urlparse(resp.headers["Location"]) + self.assertEqual("/", location.path) + self.assertIn("Couldn't fetch your blogs", get_flashed_messages()[0]) + self.assertEqual(0, BloggerV2Auth.query().count()) + self.assertEqual(0, Blogger.query().count()) + + def test_oauth_view_no_blogs(self): + self.auth_entity = BloggerV2Auth( + id="123", + name="name", + picture_url="pic", + blogs_atom="x", + user_atom="y", + creds_json="z", + ) + self.auth_entity.put() + + resp = self.client.get( + "/blogger/oauth_handler?auth_entity=%s" + % self.auth_entity.key.urlsafe().decode() + ) + self.assertEqual(302, resp.status_code) + location = urllib.parse.urlparse(resp.headers["Location"]) + self.assertEqual("/", location.path) + self.assertIn("Couldn't fetch your blogs", get_flashed_messages()[0]) + + def test_new_no_blogs(self): + self.auth_entity.blog_hostnames = [] + with app.test_request_context(): + self.assertIsNone(Blogger.new(auth_entity=self.auth_entity)) + self.assertIn("Blogger blog not found", get_flashed_messages()[0]) + + def test_create_comment(self): + self.expect_get_posts() + self.blogger_client.add_comment( + "111", "222", 'who: foo bar' + ).AndReturn(self.comment) + self.mox.ReplayAll() + + b = Blogger.new(auth_entity=self.auth_entity) + resp = b.create_comment( + "http://blawg/path/to/post", + "who", + "http://who", + "foo bar", + client=self.blogger_client, + ) + self.assert_equals({"id": "333", "response": ""}, resp) + + def test_create_comment_with_unicode_chars(self): + # TODO: this just checks the arguments passed to client.add_comment(). we + # should test that the blogger client itself encodes as UTF-8. + self.expect_get_posts() + + prefix = 'Degenève: ' + content = prefix + "x" * (blogger.MAX_COMMENT_LENGTH - len(prefix) - 3) + "..." + self.blogger_client.add_comment("111", "222", content).AndReturn(self.comment) + self.mox.ReplayAll() + + b = Blogger.new(auth_entity=self.auth_entity) + resp = b.create_comment( + "http://blawg/path/to/post", + "Degenève", + "http://who", + "x" * blogger.MAX_COMMENT_LENGTH, + client=self.blogger_client, + ) + self.assert_equals({"id": "333", "response": ""}, resp) + + def test_create_too_long_comment(self): + """Blogger caps HTML comment length at 4096 chars.""" + self.expect_get_posts() + self.blogger_client.add_comment( + "111", "222", 'Degenève: foo Degenève bar' + ).AndReturn(self.comment) + self.mox.ReplayAll() + + b = Blogger.new(auth_entity=self.auth_entity) + resp = b.create_comment( + "http://blawg/path/to/post", + "Degenève", + "http://who", + "foo Degenève bar", + client=self.blogger_client, + ) + self.assert_equals({"id": "333", "response": ""}, resp) + + def test_create_comment_gives_up_on_internal_error_bX2i87au(self): + # see https://github.com/snarfed/bridgy/issues/175 + self.expect_get_posts() + self.blogger_client.add_comment( + "111", "222", 'who: foo bar' + ).AndRaise(RequestError("500, Internal error: bX-2i87au")) + self.mox.ReplayAll() + + b = Blogger.new(auth_entity=self.auth_entity) + resp = b.create_comment( + "http://blawg/path/to/post", + "who", + "http://who", + "foo bar", + client=self.blogger_client, + ) + # the key point is that create_comment doesn't raise an exception + self.assert_equals({"error": "500, Internal error: bX-2i87au"}, resp) + + def test_feed_url(self): + self.assertEqual( + "http://my.blawg/feeds/posts/default", + Blogger.new(auth_entity=self.auth_entity).feed_url(), + ) diff --git a/tests/test_browser.py b/tests/test_browser.py index ce077925..02ed2488 100644 --- a/tests/test_browser.py +++ b/tests/test_browser.py @@ -18,432 +18,491 @@ class FakeBrowserSource(browser.BrowserSource): - GR_CLASS = FakeGrSource - SHORT_NAME = 'fbs' - gr_source = FakeGrSource() + GR_CLASS = FakeGrSource + SHORT_NAME = "fbs" + gr_source = FakeGrSource() - @classmethod - def key_id_from_actor(cls, actor): - return actor['fbs_id'] + @classmethod + def key_id_from_actor(cls, actor): + return actor["fbs_id"] class BrowserSourceTest(testutil.AppTest): - - def setUp(self): - super().setUp() - self.actor['fbs_id'] = '222yyy' - self.source = FakeBrowserSource.new(actor=self.actor) - FakeBrowserSource.gr_source.actor = {} - - def test_new(self): - self.assertIsNone(self.source.auth_entity) - self.assertEqual('222yyy', self.source.key.id()) - self.assertEqual('Ryan B', self.source.name) - self.assertEqual('Ryan B (FakeSource)', self.source.label()) - - def test_get_activities_response_activity_id(self): - Activity(id='tag:fa.ke,2013:123', - activity_json=json_dumps({'foo': 'bar'})).put() - - resp = self.source.get_activities_response(activity_id='123') - self.assertEqual([{'foo': 'bar'}], resp['items']) - - def test_get_activities_response_no_activity_id(self): - Activity(id='tag:fa.ke,2013:123', source=self.source.key, - activity_json=json_dumps({'foo': 'bar'})).put() - Activity(id='tag:fa.ke,2013:456', source=self.source.key, - activity_json=json_dumps({'baz': 'biff'})).put() - - other = FakeBrowserSource.new(actor={'fbs_id': 'other'}).put() - Activity(id='tag:fa.ke,2013:789', source=other, - activity_json=json_dumps({'boo': 'bah'})).put() - - - resp = self.source.get_activities_response() - self.assert_equals([{'foo': 'bar'}, {'baz': 'biff'}], resp['items']) - - def test_get_activities_response_no_stored_activity(self): - resp = self.source.get_activities_response(activity_id='123') - self.assertEqual([], resp['items']) - - def test_get_comment(self): - expected = copy.deepcopy(self.activities[0]['object']['replies']['items'][0]) - microformats2.prefix_image_urls(expected, browser.IMAGE_PROXY_URL_BASE) - - got = self.source.get_comment('1_2_a', activity=self.activities[0]) - self.assert_equals(expected, got) - - def test_get_comment_no_matching_id(self): - self.assertIsNone(self.source.get_comment('333', activity=self.activities[0])) - - def test_get_comment_no_activity_kwarg(self): - self.assertIsNone(self.source.get_comment('020')) - - def test_get_like(self): - expected = copy.deepcopy(self.activities[0]['object']['tags'][0]) - microformats2.prefix_image_urls(expected, browser.IMAGE_PROXY_URL_BASE) - - got = self.source.get_like('unused', 'unused', 'alice', - activity=self.activities[0]) - self.assert_equals(expected, got) - - def test_get_like_no_matching_user(self): - self.assertIsNone(self.source.get_like( - 'unused', 'unused', 'eve', activity=self.activities[0])) - - def test_get_like_no_activity_kwarg(self): - self.assertIsNone(self.source.get_like('unused', 'unused', 'alice')) + def setUp(self): + super().setUp() + self.actor["fbs_id"] = "222yyy" + self.source = FakeBrowserSource.new(actor=self.actor) + FakeBrowserSource.gr_source.actor = {} + + def test_new(self): + self.assertIsNone(self.source.auth_entity) + self.assertEqual("222yyy", self.source.key.id()) + self.assertEqual("Ryan B", self.source.name) + self.assertEqual("Ryan B (FakeSource)", self.source.label()) + + def test_get_activities_response_activity_id(self): + Activity( + id="tag:fa.ke,2013:123", activity_json=json_dumps({"foo": "bar"}) + ).put() + + resp = self.source.get_activities_response(activity_id="123") + self.assertEqual([{"foo": "bar"}], resp["items"]) + + def test_get_activities_response_no_activity_id(self): + Activity( + id="tag:fa.ke,2013:123", + source=self.source.key, + activity_json=json_dumps({"foo": "bar"}), + ).put() + Activity( + id="tag:fa.ke,2013:456", + source=self.source.key, + activity_json=json_dumps({"baz": "biff"}), + ).put() + + other = FakeBrowserSource.new(actor={"fbs_id": "other"}).put() + Activity( + id="tag:fa.ke,2013:789", + source=other, + activity_json=json_dumps({"boo": "bah"}), + ).put() + + resp = self.source.get_activities_response() + self.assert_equals([{"foo": "bar"}, {"baz": "biff"}], resp["items"]) + + def test_get_activities_response_no_stored_activity(self): + resp = self.source.get_activities_response(activity_id="123") + self.assertEqual([], resp["items"]) + + def test_get_comment(self): + expected = copy.deepcopy(self.activities[0]["object"]["replies"]["items"][0]) + microformats2.prefix_image_urls(expected, browser.IMAGE_PROXY_URL_BASE) + + got = self.source.get_comment("1_2_a", activity=self.activities[0]) + self.assert_equals(expected, got) + + def test_get_comment_no_matching_id(self): + self.assertIsNone(self.source.get_comment("333", activity=self.activities[0])) + + def test_get_comment_no_activity_kwarg(self): + self.assertIsNone(self.source.get_comment("020")) + + def test_get_like(self): + expected = copy.deepcopy(self.activities[0]["object"]["tags"][0]) + microformats2.prefix_image_urls(expected, browser.IMAGE_PROXY_URL_BASE) + + got = self.source.get_like( + "unused", "unused", "alice", activity=self.activities[0] + ) + self.assert_equals(expected, got) + + def test_get_like_no_matching_user(self): + self.assertIsNone( + self.source.get_like("unused", "unused", "eve", activity=self.activities[0]) + ) + + def test_get_like_no_activity_kwarg(self): + self.assertIsNone(self.source.get_like("unused", "unused", "alice")) browser.route(FakeBrowserSource) + class BrowserViewTest(testutil.AppTest): - def setUp(self): - super().setUp() - - self.domain = Domain(id='snarfed.org', tokens=['towkin']).put() - FakeBrowserSource.gr_source = FakeGrSource() - self.actor['fbs_id'] = '222yyy' - self.source = FakeBrowserSource.new(actor=self.actor).put() - self.auth = f'token=towkin&key={self.source.urlsafe().decode()}' - self.other_source = FakeBrowserSource(id='333zzz', domains=['foo.com']).put() - - for a in self.activities: - a['object']['author'] = self.actor - - self.activities_no_extras = copy.deepcopy(self.activities) - for a in self.activities_no_extras: - del a['object']['tags'] - - self.activities_no_replies = copy.deepcopy(self.activities_no_extras) - for a in self.activities_no_replies: - del a['object']['replies'] - - def post(self, path_query, auth=True, **kwargs): - if auth and '?' not in path_query: - path_query += f'?{self.auth}' - return self.client.post(f'/fbs/browser/{path_query}', **kwargs) - - def test_status(self): - resp = self.client.get(f'/fbs/browser/status?{self.auth}') - self.assertEqual(200, resp.status_code, resp.get_data(as_text=True)) - - self.assertEqual({ - 'status': 'enabled', - 'poll-seconds': FakeBrowserSource.FAST_POLL.total_seconds(), - }, resp.json) - - def test_homepage(self): - resp = self.post('homepage', data='homepage html', auth=False) - self.assertEqual(200, resp.status_code, resp.get_data(as_text=True)) - self.assertEqual('snarfed', resp.json) - - def test_homepage_no_logged_in_user(self): - FakeBrowserSource.gr_source.actor = {} - resp = self.post('homepage', data='not logged in', auth=False) - self.assertEqual(400, resp.status_code) - self.assertIn("Couldn't determine logged in FakeSource user", - html.unescape(resp.get_data(as_text=True))) - - def test_profile_new_user(self): - self.source.delete() - - self.expect_requests_get('https://snarfed.org/', '') - self.mox.ReplayAll() - - resp = self.post('profile?token=towkin') - - self.assertEqual(200, resp.status_code, resp.get_data(as_text=True)) - self.assert_equals(self.source.urlsafe().decode(), resp.json) - - src = self.source.get() - self.assertEqual('Ryan B', src.name) - self.assertEqual(['https://snarfed.org/'], src.domain_urls) - self.assertEqual(['snarfed.org'], src.domains) - - def test_profile_existing_user_update(self): - self.assertIsNotNone(self.source.get()) - FakeBrowserSource.gr_source.actor.update({ - 'displayName': 'Mrs. Foo', - 'image': {'url': 'http://foo/img'}, - }) - - # for webmention discovery - self.mox.ReplayAll() - - resp = self.post('profile?token=towkin') - self.assertEqual(200, resp.status_code, resp.get_data(as_text=True)) - self.assert_equals(self.source.urlsafe().decode(), resp.json) - - src = self.source.get() - self.assertEqual('Mrs. Foo', src.name) - self.assertEqual('http://foo/img', src.picture) - - def test_profile_fall_back_to_scraped_to_actor(self): - self.source.delete() - - self.mox.StubOutWithMock(FakeGrSource, 'scraped_to_activities') - FakeGrSource.scraped_to_activities('').AndReturn(([], None)) - - self.expect_requests_get('https://snarfed.org/', '') - self.mox.ReplayAll() - - resp = self.post('profile?token=towkin') - self.assertEqual(200, resp.status_code, resp.get_data(as_text=True)) - self.assert_equals(self.source.urlsafe().decode(), resp.json) - - src = self.source.get() - self.assertEqual('Ryan B', src.name) - self.assertEqual(['https://snarfed.org/'], src.domain_urls) - self.assertEqual(['snarfed.org'], src.domains) - - def test_profile_no_scraped_actor(self): - self.source.delete() - FakeGrSource.actor = None - resp = self.post('profile?token=towkin') - self.assertEqual(400, resp.status_code, resp.get_data(as_text=True)) - self.assertIn('Missing actor', html.unescape(resp.get_data(as_text=True))) - - def test_profile_private_account(self): - FakeBrowserSource.gr_source.actor['to'] = \ - [{'objectType':'group', 'alias':'@private'}] - resp = self.post('profile?token=towkin') - self.assertEqual(400, resp.status_code) - self.assertIn('Your FakeSource account is private.', resp.get_data(as_text=True)) - - def test_profile_missing_token(self): - resp = self.post('profile', auth=False) - self.assertEqual(400, resp.status_code) - self.assertIn('Missing required parameter: token', resp.get_data(as_text=True)) - - def test_profile_no_stored_token(self): - self.domain.delete() - resp = self.post('profile?token=towkin') - self.assertEqual(403, resp.status_code) - self.assertIn("towkin is not authorized for any of: {'snarfed.org'}", - html.unescape(resp.get_data(as_text=True))) - - def test_profile_bad_token(self): - resp = self.post('profile?token=nope') - self.assertEqual(403, resp.status_code) - self.assertIn("nope is not authorized for any of: {'snarfed.org'}", - html.unescape(resp.get_data(as_text=True))) - - def test_feed(self): - resp = self.post('feed') - self.assertEqual(200, resp.status_code, resp.get_data(as_text=True)) - self.assertEqual(self.activities_no_replies, util.trim_nulls(resp.json)) - - def test_feed_empty(self): - FakeGrSource.activities = [] - resp = self.post('feed') - self.assertEqual(200, resp.status_code, resp.get_data(as_text=True)) - self.assertEqual([], resp.json) - - def test_feed_missing_token(self): - resp = self.post('feed?key={self.source.urlsafe().decode()}') - self.assertEqual(400, resp.status_code, resp.get_data(as_text=True)) - - def test_feed_bad_token(self): - resp = self.post(f'feed?token=nope&key={self.source.urlsafe().decode()}') - self.assertEqual(403, resp.status_code, resp.get_data(as_text=True)) - self.assertIn("nope is not authorized for any of: ['snarfed.org']", - html.unescape(resp.get_data(as_text=True))) - - def test_feed_missing_key(self): - resp = self.post('feed?token=towkin') - self.assertEqual(400, resp.status_code, resp.get_data(as_text=True)) - - def test_feed_bad_key(self): - resp = self.post('feed?token=towkin&key=asdf') - self.assertEqual(400, resp.status_code, resp.get_data(as_text=True)) - # this comes from util.load_source() since the urlsafe key is malformed - self.assertIn('Bad value for key', resp.get_data(as_text=True)) - - def test_feed_token_domain_not_in_source(self): - resp = self.post( - f'feed?token=towkin&key={self.other_source.urlsafe().decode()}') - self.assertEqual(403, resp.status_code, resp.get_data(as_text=True)) - - def test_post(self): - resp = self.post('post', data='silowe html') - self.assertEqual(200, resp.status_code, resp.get_data(as_text=True)) - self.assert_equals(self.activities_no_extras[0], util.trim_nulls(resp.json)) - - activities = Activity.query().fetch() - self.assertEqual(1, len(activities)) - self.assertEqual(self.source, activities[0].source) - self.assert_equals(self.activities_no_extras[0], - util.trim_nulls(json_loads(activities[0].activity_json))) - self.assertEqual('silowe html', activities[0].html) - - def test_post_empty(self): - FakeGrSource.activities = [] - resp = self.post('post') - self.assertEqual(400, resp.status_code) - self.assertIn('No FakeSource post found in HTML', resp.get_data(as_text=True)) - - def test_post_merge_comments(self): - # existing activity with two comments - activity = self.activities_no_extras[0] - reply = self.activities[0]['object']['replies']['items'][0] - activity['object']['replies'] = { - 'items': [reply, copy.deepcopy(reply)], - 'totalItems': 2, - } - activity['object']['replies']['items'][1]['id'] = 'abc' - key = Activity(id=activity['id'], activity_json=json_dumps(activity)).put() - - # scraped activity has different second comment - activity['object']['replies']['items'][1]['id'] = 'xyz' - FakeGrSource.activities = [activity] - - resp = self.post('post') - self.assertEqual(200, resp.status_code, resp.get_data(as_text=True)) - self.assert_equals(activity, util.trim_nulls(resp.json)) - - merged = json_loads(key.get().activity_json) - replies = merged['object']['replies'] - self.assert_equals(3, replies['totalItems'], replies) - self.assert_equals([reply['id'], 'abc', 'xyz'], - [r['id'] for r in replies['items']]) - - def test_post_missing_key(self): - resp = self.post('post?token=towkin') - self.assertEqual(400, resp.status_code, resp.get_data(as_text=True)) - - def test_post_bad_key(self): - resp = self.post('post?token=towkin&key=asdf') - self.assertEqual(400, resp.status_code, resp.get_data(as_text=True)) - # this comes from util.load_source() since the urlsafe key is malformed - self.assertIn('Bad value for key', resp.get_data(as_text=True)) - - def test_post_missing_token(self): - resp = self.post(f'post?key={self.source.urlsafe().decode()}') - self.assertEqual(400, resp.status_code, resp.get_data(as_text=True)) - self.assertIn('Missing required parameter: token', resp.get_data(as_text=True)) - - def test_post_bad_token(self): - resp = self.post(f'post?token=nope&key={self.source.urlsafe().decode()}') - self.assertEqual(403, resp.status_code, resp.get_data(as_text=True)) - self.assertIn("nope is not authorized for any of: ['snarfed.org']", - html.unescape(resp.get_data(as_text=True))) - - def test_post_token_domain_not_in_source(self): - resp = self.post( - f'post?token=towkin&key={self.other_source.urlsafe().decode()}') - self.assertEqual(403, resp.status_code, resp.get_data(as_text=True)) - - def test_reactions(self): - key = Activity(id='tag:fa.ke,2013:123_456', source=self.source, - activity_json=json_dumps(self.activities[0])).put() - like = FakeBrowserSource.gr_source.like = { - 'objectType': 'activity', - 'verb': 'like', - 'id': 'new', - } - - resp = self.post(f'reactions?id=tag:fa.ke,2013:123_456&{self.auth}') - self.assertEqual(200, resp.status_code, resp.get_data(as_text=True)) - self.assert_equals([like], resp.json) - - stored = json_loads(key.get().activity_json) - self.assert_equals(self.activities[0]['object']['tags'] + [like], - stored['object']['tags']) - - def test_reactions_bad_id(self): - resp = self.post(f'reactions?id=789&{self.auth}') - self.assertEqual(400, resp.status_code) - self.assertIn('Expected id to be tag URI', resp.get_data(as_text=True)) - - def test_reactions_bad_scraped_data(self): - Activity(id='tag:fa.ke,2013:123_456', source=self.source, - activity_json=json_dumps(self.activities[0])).put() - - bad_json = '' - self.mox.StubOutWithMock(FakeGrSource, 'merge_scraped_reactions') - FakeGrSource.merge_scraped_reactions(bad_json, mox.IgnoreArg() - ).AndRaise((ValueError('fooey'))) - self.mox.ReplayAll() - - resp = self.post(f'reactions?id=tag:fa.ke,2013:123_456&{self.auth}', - data=bad_json) - self.assertEqual(400, resp.status_code) - self.assertIn("Couldn't parse scraped reactions: fooey", - html.unescape(resp.get_data(as_text=True))) - - def test_reactions_no_activity(self): - resp = self.post(f'reactions?id=tag:fa.ke,2013:789&{self.auth}') - self.assertEqual(404, resp.status_code) - self.assertIn('No FakeSource post found for id tag:fa.ke,2013:789', resp.get_data(as_text=True)) - - def test_reactions_missing_token(self): - resp = self.post(f'reactions?key={self.source.urlsafe().decode()}') - self.assertEqual(400, resp.status_code, resp.get_data(as_text=True)) - - def test_reactions_bad_token(self): - resp = self.post(f'reactions?token=nope&key={self.source.urlsafe().decode()}') - self.assertEqual(403, resp.status_code, resp.get_data(as_text=True)) - self.assertIn("nope is not authorized for any of: ['snarfed.org']", - html.unescape(resp.get_data(as_text=True))) - - def test_reactions_missing_key(self): - resp = self.post('reactions?token=towkin') - self.assertEqual(400, resp.status_code, resp.get_data(as_text=True)) - - def test_reactions_bad_key(self): - resp = self.post('reactions?token=towkin&key=asdf') - self.assertEqual(400, resp.status_code, resp.get_data(as_text=True)) - - def test_reactions_token_domain_not_in_source(self): - resp = self.post( - f'reactions?token=towkin&key={self.other_source.urlsafe().decode()}') - self.assertEqual(403, resp.status_code, resp.get_data(as_text=True)) - - def test_reactions_wrong_activity_source(self): - Activity(id='tag:fa.ke,2013:123_456', source=self.other_source).put() - resp = self.post(f'reactions?id=tag:fa.ke,2013:123_456&{self.auth}') - self.assertEqual(403, resp.status_code) - self.assertIn( - "tag:fa.ke,2013:123_456 is owned by Key('FakeBrowserSource', '333zzz')", - html.unescape(resp.get_data(as_text=True))) - - def test_poll(self): - self.expect_task('poll', eta_seconds=0, source_key=self.source, - last_polled='1970-01-01-00-00-00') - self.mox.ReplayAll() - resp = self.post('poll') - self.assertEqual(200, resp.status_code, resp.get_data(as_text=True)) - self.assertEqual('OK', resp.json) - - def test_poll_missing_token(self): - resp = self.post('poll?key={self.source.urlsafe().decode()}') - self.assertEqual(400, resp.status_code, resp.get_data(as_text=True)) - - def test_poll_bad_token(self): - resp = self.post(f'poll?token=nope&key={self.source.urlsafe().decode()}') - self.assertEqual(403, resp.status_code, resp.get_data(as_text=True)) - self.assertIn("nope is not authorized for any of: ['snarfed.org']", - html.unescape(resp.get_data(as_text=True))) - - def test_poll_missing_key(self): - resp = self.post('poll?token=towkin') - self.assertEqual(400, resp.status_code, resp.get_data(as_text=True)) - - def test_poll_bad_key(self): - resp = self.post('poll?token=towkin&key=asdf') - self.assertEqual(400, resp.status_code, resp.get_data(as_text=True)) - - def test_poll_token_domain_not_in_source(self): - resp = self.post( - f'poll?token=towkin&key={self.other_source.urlsafe().decode()}') - self.assertEqual(403, resp.status_code, resp.get_data(as_text=True)) - - def test_token_domains(self): - resp = self.post('token-domains?token=towkin') - self.assertEqual(200, resp.status_code) - self.assertEqual(['snarfed.org'], resp.json) - - def test_token_domains_missing(self): - resp = self.post('token-domains?token=unknown') - self.assertEqual(404, resp.status_code) + def setUp(self): + super().setUp() + + self.domain = Domain(id="snarfed.org", tokens=["towkin"]).put() + FakeBrowserSource.gr_source = FakeGrSource() + self.actor["fbs_id"] = "222yyy" + self.source = FakeBrowserSource.new(actor=self.actor).put() + self.auth = f"token=towkin&key={self.source.urlsafe().decode()}" + self.other_source = FakeBrowserSource(id="333zzz", domains=["foo.com"]).put() + + for a in self.activities: + a["object"]["author"] = self.actor + + self.activities_no_extras = copy.deepcopy(self.activities) + for a in self.activities_no_extras: + del a["object"]["tags"] + + self.activities_no_replies = copy.deepcopy(self.activities_no_extras) + for a in self.activities_no_replies: + del a["object"]["replies"] + + def post(self, path_query, auth=True, **kwargs): + if auth and "?" not in path_query: + path_query += f"?{self.auth}" + return self.client.post(f"/fbs/browser/{path_query}", **kwargs) + + def test_status(self): + resp = self.client.get(f"/fbs/browser/status?{self.auth}") + self.assertEqual(200, resp.status_code, resp.get_data(as_text=True)) + + self.assertEqual( + { + "status": "enabled", + "poll-seconds": FakeBrowserSource.FAST_POLL.total_seconds(), + }, + resp.json, + ) + + def test_homepage(self): + resp = self.post("homepage", data="homepage html", auth=False) + self.assertEqual(200, resp.status_code, resp.get_data(as_text=True)) + self.assertEqual("snarfed", resp.json) + + def test_homepage_no_logged_in_user(self): + FakeBrowserSource.gr_source.actor = {} + resp = self.post("homepage", data="not logged in", auth=False) + self.assertEqual(400, resp.status_code) + self.assertIn( + "Couldn't determine logged in FakeSource user", + html.unescape(resp.get_data(as_text=True)), + ) + + def test_profile_new_user(self): + self.source.delete() + + self.expect_requests_get("https://snarfed.org/", "") + self.mox.ReplayAll() + + resp = self.post("profile?token=towkin") + + self.assertEqual(200, resp.status_code, resp.get_data(as_text=True)) + self.assert_equals(self.source.urlsafe().decode(), resp.json) + + src = self.source.get() + self.assertEqual("Ryan B", src.name) + self.assertEqual(["https://snarfed.org/"], src.domain_urls) + self.assertEqual(["snarfed.org"], src.domains) + + def test_profile_existing_user_update(self): + self.assertIsNotNone(self.source.get()) + FakeBrowserSource.gr_source.actor.update( + { + "displayName": "Mrs. Foo", + "image": {"url": "http://foo/img"}, + } + ) + + # for webmention discovery + self.mox.ReplayAll() + + resp = self.post("profile?token=towkin") + self.assertEqual(200, resp.status_code, resp.get_data(as_text=True)) + self.assert_equals(self.source.urlsafe().decode(), resp.json) + + src = self.source.get() + self.assertEqual("Mrs. Foo", src.name) + self.assertEqual("http://foo/img", src.picture) + + def test_profile_fall_back_to_scraped_to_actor(self): + self.source.delete() + + self.mox.StubOutWithMock(FakeGrSource, "scraped_to_activities") + FakeGrSource.scraped_to_activities("").AndReturn(([], None)) + + self.expect_requests_get("https://snarfed.org/", "") + self.mox.ReplayAll() + + resp = self.post("profile?token=towkin") + self.assertEqual(200, resp.status_code, resp.get_data(as_text=True)) + self.assert_equals(self.source.urlsafe().decode(), resp.json) + + src = self.source.get() + self.assertEqual("Ryan B", src.name) + self.assertEqual(["https://snarfed.org/"], src.domain_urls) + self.assertEqual(["snarfed.org"], src.domains) + + def test_profile_no_scraped_actor(self): + self.source.delete() + FakeGrSource.actor = None + resp = self.post("profile?token=towkin") + self.assertEqual(400, resp.status_code, resp.get_data(as_text=True)) + self.assertIn("Missing actor", html.unescape(resp.get_data(as_text=True))) + + def test_profile_private_account(self): + FakeBrowserSource.gr_source.actor["to"] = [ + {"objectType": "group", "alias": "@private"} + ] + resp = self.post("profile?token=towkin") + self.assertEqual(400, resp.status_code) + self.assertIn( + "Your FakeSource account is private.", resp.get_data(as_text=True) + ) + + def test_profile_missing_token(self): + resp = self.post("profile", auth=False) + self.assertEqual(400, resp.status_code) + self.assertIn("Missing required parameter: token", resp.get_data(as_text=True)) + + def test_profile_no_stored_token(self): + self.domain.delete() + resp = self.post("profile?token=towkin") + self.assertEqual(403, resp.status_code) + self.assertIn( + "towkin is not authorized for any of: {'snarfed.org'}", + html.unescape(resp.get_data(as_text=True)), + ) + + def test_profile_bad_token(self): + resp = self.post("profile?token=nope") + self.assertEqual(403, resp.status_code) + self.assertIn( + "nope is not authorized for any of: {'snarfed.org'}", + html.unescape(resp.get_data(as_text=True)), + ) + + def test_feed(self): + resp = self.post("feed") + self.assertEqual(200, resp.status_code, resp.get_data(as_text=True)) + self.assertEqual(self.activities_no_replies, util.trim_nulls(resp.json)) + + def test_feed_empty(self): + FakeGrSource.activities = [] + resp = self.post("feed") + self.assertEqual(200, resp.status_code, resp.get_data(as_text=True)) + self.assertEqual([], resp.json) + + def test_feed_missing_token(self): + resp = self.post("feed?key={self.source.urlsafe().decode()}") + self.assertEqual(400, resp.status_code, resp.get_data(as_text=True)) + + def test_feed_bad_token(self): + resp = self.post(f"feed?token=nope&key={self.source.urlsafe().decode()}") + self.assertEqual(403, resp.status_code, resp.get_data(as_text=True)) + self.assertIn( + "nope is not authorized for any of: ['snarfed.org']", + html.unescape(resp.get_data(as_text=True)), + ) + + def test_feed_missing_key(self): + resp = self.post("feed?token=towkin") + self.assertEqual(400, resp.status_code, resp.get_data(as_text=True)) + + def test_feed_bad_key(self): + resp = self.post("feed?token=towkin&key=asdf") + self.assertEqual(400, resp.status_code, resp.get_data(as_text=True)) + # this comes from util.load_source() since the urlsafe key is malformed + self.assertIn("Bad value for key", resp.get_data(as_text=True)) + + def test_feed_token_domain_not_in_source(self): + resp = self.post( + f"feed?token=towkin&key={self.other_source.urlsafe().decode()}" + ) + self.assertEqual(403, resp.status_code, resp.get_data(as_text=True)) + + def test_post(self): + resp = self.post("post", data="silowe html") + self.assertEqual(200, resp.status_code, resp.get_data(as_text=True)) + self.assert_equals(self.activities_no_extras[0], util.trim_nulls(resp.json)) + + activities = Activity.query().fetch() + self.assertEqual(1, len(activities)) + self.assertEqual(self.source, activities[0].source) + self.assert_equals( + self.activities_no_extras[0], + util.trim_nulls(json_loads(activities[0].activity_json)), + ) + self.assertEqual("silowe html", activities[0].html) + + def test_post_empty(self): + FakeGrSource.activities = [] + resp = self.post("post") + self.assertEqual(400, resp.status_code) + self.assertIn("No FakeSource post found in HTML", resp.get_data(as_text=True)) + + def test_post_merge_comments(self): + # existing activity with two comments + activity = self.activities_no_extras[0] + reply = self.activities[0]["object"]["replies"]["items"][0] + activity["object"]["replies"] = { + "items": [reply, copy.deepcopy(reply)], + "totalItems": 2, + } + activity["object"]["replies"]["items"][1]["id"] = "abc" + key = Activity(id=activity["id"], activity_json=json_dumps(activity)).put() + + # scraped activity has different second comment + activity["object"]["replies"]["items"][1]["id"] = "xyz" + FakeGrSource.activities = [activity] + + resp = self.post("post") + self.assertEqual(200, resp.status_code, resp.get_data(as_text=True)) + self.assert_equals(activity, util.trim_nulls(resp.json)) + + merged = json_loads(key.get().activity_json) + replies = merged["object"]["replies"] + self.assert_equals(3, replies["totalItems"], replies) + self.assert_equals( + [reply["id"], "abc", "xyz"], [r["id"] for r in replies["items"]] + ) + + def test_post_missing_key(self): + resp = self.post("post?token=towkin") + self.assertEqual(400, resp.status_code, resp.get_data(as_text=True)) + + def test_post_bad_key(self): + resp = self.post("post?token=towkin&key=asdf") + self.assertEqual(400, resp.status_code, resp.get_data(as_text=True)) + # this comes from util.load_source() since the urlsafe key is malformed + self.assertIn("Bad value for key", resp.get_data(as_text=True)) + + def test_post_missing_token(self): + resp = self.post(f"post?key={self.source.urlsafe().decode()}") + self.assertEqual(400, resp.status_code, resp.get_data(as_text=True)) + self.assertIn("Missing required parameter: token", resp.get_data(as_text=True)) + + def test_post_bad_token(self): + resp = self.post(f"post?token=nope&key={self.source.urlsafe().decode()}") + self.assertEqual(403, resp.status_code, resp.get_data(as_text=True)) + self.assertIn( + "nope is not authorized for any of: ['snarfed.org']", + html.unescape(resp.get_data(as_text=True)), + ) + + def test_post_token_domain_not_in_source(self): + resp = self.post( + f"post?token=towkin&key={self.other_source.urlsafe().decode()}" + ) + self.assertEqual(403, resp.status_code, resp.get_data(as_text=True)) + + def test_reactions(self): + key = Activity( + id="tag:fa.ke,2013:123_456", + source=self.source, + activity_json=json_dumps(self.activities[0]), + ).put() + like = FakeBrowserSource.gr_source.like = { + "objectType": "activity", + "verb": "like", + "id": "new", + } + + resp = self.post(f"reactions?id=tag:fa.ke,2013:123_456&{self.auth}") + self.assertEqual(200, resp.status_code, resp.get_data(as_text=True)) + self.assert_equals([like], resp.json) + + stored = json_loads(key.get().activity_json) + self.assert_equals( + self.activities[0]["object"]["tags"] + [like], stored["object"]["tags"] + ) + + def test_reactions_bad_id(self): + resp = self.post(f"reactions?id=789&{self.auth}") + self.assertEqual(400, resp.status_code) + self.assertIn("Expected id to be tag URI", resp.get_data(as_text=True)) + + def test_reactions_bad_scraped_data(self): + Activity( + id="tag:fa.ke,2013:123_456", + source=self.source, + activity_json=json_dumps(self.activities[0]), + ).put() + + bad_json = "" + self.mox.StubOutWithMock(FakeGrSource, "merge_scraped_reactions") + FakeGrSource.merge_scraped_reactions(bad_json, mox.IgnoreArg()).AndRaise( + (ValueError("fooey")) + ) + self.mox.ReplayAll() + + resp = self.post( + f"reactions?id=tag:fa.ke,2013:123_456&{self.auth}", data=bad_json + ) + self.assertEqual(400, resp.status_code) + self.assertIn( + "Couldn't parse scraped reactions: fooey", + html.unescape(resp.get_data(as_text=True)), + ) + + def test_reactions_no_activity(self): + resp = self.post(f"reactions?id=tag:fa.ke,2013:789&{self.auth}") + self.assertEqual(404, resp.status_code) + self.assertIn( + "No FakeSource post found for id tag:fa.ke,2013:789", + resp.get_data(as_text=True), + ) + + def test_reactions_missing_token(self): + resp = self.post(f"reactions?key={self.source.urlsafe().decode()}") + self.assertEqual(400, resp.status_code, resp.get_data(as_text=True)) + + def test_reactions_bad_token(self): + resp = self.post(f"reactions?token=nope&key={self.source.urlsafe().decode()}") + self.assertEqual(403, resp.status_code, resp.get_data(as_text=True)) + self.assertIn( + "nope is not authorized for any of: ['snarfed.org']", + html.unescape(resp.get_data(as_text=True)), + ) + + def test_reactions_missing_key(self): + resp = self.post("reactions?token=towkin") + self.assertEqual(400, resp.status_code, resp.get_data(as_text=True)) + + def test_reactions_bad_key(self): + resp = self.post("reactions?token=towkin&key=asdf") + self.assertEqual(400, resp.status_code, resp.get_data(as_text=True)) + + def test_reactions_token_domain_not_in_source(self): + resp = self.post( + f"reactions?token=towkin&key={self.other_source.urlsafe().decode()}" + ) + self.assertEqual(403, resp.status_code, resp.get_data(as_text=True)) + + def test_reactions_wrong_activity_source(self): + Activity(id="tag:fa.ke,2013:123_456", source=self.other_source).put() + resp = self.post(f"reactions?id=tag:fa.ke,2013:123_456&{self.auth}") + self.assertEqual(403, resp.status_code) + self.assertIn( + "tag:fa.ke,2013:123_456 is owned by Key('FakeBrowserSource', '333zzz')", + html.unescape(resp.get_data(as_text=True)), + ) + + def test_poll(self): + self.expect_task( + "poll", + eta_seconds=0, + source_key=self.source, + last_polled="1970-01-01-00-00-00", + ) + self.mox.ReplayAll() + resp = self.post("poll") + self.assertEqual(200, resp.status_code, resp.get_data(as_text=True)) + self.assertEqual("OK", resp.json) + + def test_poll_missing_token(self): + resp = self.post("poll?key={self.source.urlsafe().decode()}") + self.assertEqual(400, resp.status_code, resp.get_data(as_text=True)) + + def test_poll_bad_token(self): + resp = self.post(f"poll?token=nope&key={self.source.urlsafe().decode()}") + self.assertEqual(403, resp.status_code, resp.get_data(as_text=True)) + self.assertIn( + "nope is not authorized for any of: ['snarfed.org']", + html.unescape(resp.get_data(as_text=True)), + ) + + def test_poll_missing_key(self): + resp = self.post("poll?token=towkin") + self.assertEqual(400, resp.status_code, resp.get_data(as_text=True)) + + def test_poll_bad_key(self): + resp = self.post("poll?token=towkin&key=asdf") + self.assertEqual(400, resp.status_code, resp.get_data(as_text=True)) + + def test_poll_token_domain_not_in_source(self): + resp = self.post( + f"poll?token=towkin&key={self.other_source.urlsafe().decode()}" + ) + self.assertEqual(403, resp.status_code, resp.get_data(as_text=True)) + + def test_token_domains(self): + resp = self.post("token-domains?token=towkin") + self.assertEqual(200, resp.status_code) + self.assertEqual(["snarfed.org"], resp.json) + + def test_token_domains_missing(self): + resp = self.post("token-domains?token=unknown") + self.assertEqual(404, resp.status_code) diff --git a/tests/test_cron.py b/tests/test_cron.py index 167eea3e..de8bf59c 100644 --- a/tests/test_cron.py +++ b/tests/test_cron.py @@ -29,209 +29,246 @@ class CronTest(testutil.BackgroundTest): - def setUp(self): - super().setUp() - oauth_dropins.flickr_auth.FLICKR_APP_KEY = 'my_app_key' - oauth_dropins.flickr_auth.FLICKR_APP_SECRET = 'my_app_secret' - oauth_dropins.twitter_auth.TWITTER_APP_KEY = 'my_app_key' - oauth_dropins.twitter_auth.TWITTER_APP_SECRET = 'my_app_secret' - - def test_replace_poll_tasks(self): - now = datetime.datetime.now() - - # a bunch of sources, one needs a new poll task - five_min_ago = now - datetime.timedelta(minutes=5) - day_and_half_ago = now - datetime.timedelta(hours=36) - month_ago = now - datetime.timedelta(days=30) - defaults = { - 'features': ['listen'], - 'last_webmention_sent': day_and_half_ago, - } - - self.clear_datastore() - sources = [ - # doesn't need a new poll task - FakeSource.new(last_poll_attempt=now, **defaults).put(), - FakeSource.new(last_poll_attempt=five_min_ago, **defaults).put(), - FakeSource.new(status='disabled', **defaults).put(), - FakeSource.new(status='disabled', **defaults).put(), - # need a new poll task - FakeSource.new(status='enabled', **defaults).put(), - # not signed up for listen - FakeSource.new(last_webmention_sent=day_and_half_ago).put(), - # never sent a webmention, past grace period. last polled is older than 2x - # fast poll, but within 2x slow poll. - FakeSource.new(features=['listen'], created=month_ago, - last_poll_attempt=day_and_half_ago).put(), - ] - - self.expect_task('poll', source_key=sources[4], last_polled='1970-01-01-00-00-00') - self.mox.ReplayAll() - - resp = self.client.get('/cron/replace_poll_tasks') - self.assertEqual(200, resp.status_code) - - def test_update_twitter_pictures(self): - sources = [] - for screen_name in ('a', 'b', 'c'): - auth_entity = oauth_dropins.twitter.TwitterAuth( - id='id', token_key='key', token_secret='secret', - user_json=json_dumps({'name': 'Ryan', - 'screen_name': screen_name, - 'profile_image_url': 'http://pi.ct/ure', - })) - auth_entity.put() - sources.append(Twitter.new(auth_entity=auth_entity, features=['listen']).put()) - - user_obj = { - 'screen_name': sources[1].id(), - 'profile_image_url_https': 'http://new/pic_normal.jpg', - 'profile_image_url': 'http://bad/http', - } - - lookup_url = gr_twitter.API_BASE + gr_twitter.API_USER - self.expect_urlopen(lookup_url % 'a', json_dumps(user_obj)) - self.expect_urlopen(lookup_url % 'b', json_dumps(user_obj)) - self.expect_urlopen(lookup_url % 'c', json_dumps(user_obj)) - self.mox.ReplayAll() - - resp = self.client.get('/cron/update_twitter_pictures') - self.assertEqual(200, resp.status_code) - - for source in sources: - self.assertEqual('http://new/pic.jpg', source.get().picture) - - def test_update_twitter_picture_user_lookup_404s(self): - auth_entity = oauth_dropins.twitter.TwitterAuth( - id='id', token_key='key', token_secret='secret', - user_json=json_dumps({'name': 'Bad', - 'screen_name': 'bad', - 'profile_image_url': 'http://pi.ct/ure', - })) - auth_entity.put() - source = Twitter.new(auth_entity=auth_entity, features=['publish']).put() - - lookup_url = gr_twitter.API_BASE + gr_twitter.API_USER - self.expect_urlopen(lookup_url % 'bad', status=404) - self.mox.ReplayAll() - - resp = self.client.get('/cron/update_twitter_pictures') - self.assertEqual(200, resp.status_code) - - self.assertEqual('http://pi.ct/ure', source.get().picture) - - def test_update_flickr_pictures(self): - flickrs = self._setup_flickr() - - self.mox.StubOutWithMock(cron, 'PAGE_SIZE') - cron.PAGE_SIZE = 1 - - # first - self.expect_urlopen( - 'https://api.flickr.com/services/rest?nojsoncallback=1&format=json&method=flickr.people.getInfo&user_id=123%40N00', - json_dumps({ - 'person': { - 'id': '789@N99', - 'nsid': '789@N99', - 'iconfarm': 9, - 'iconserver': '9876', - }})) - # second has no features, gets skipped - self.mox.ReplayAll() - - # first - self.assertEqual( - 'https://farm5.staticflickr.com/4068/buddyicons/123@N00.jpg', - flickrs[0].picture) - - resp = self.client.get('/cron/update_flickr_pictures') - self.assertEqual(200, resp.status_code) - self.assertEqual( - 'https://farm9.staticflickr.com/9876/buddyicons/789@N99.jpg', - flickrs[0].key.get().picture) - - cursor = cron.LastUpdatedPicture.get_by_id('flickr') - self.assertEqual(flickrs[0].key, cursor.last) - - # second - resp = self.client.get('/cron/update_flickr_pictures') - self.assertEqual(200, resp.status_code) - # unchanged - self.assertEqual(flickrs[1].picture, flickrs[1].key.get().picture) - - cursor = cron.LastUpdatedPicture.get_by_id('flickr') - # this would be None on prod, but the datastore emulator always returns - # more=True even when there aren't more results. :( - # https://github.com/googleapis/python-ndb/issues/241 - self.assertEqual(flickrs[1].key, cursor.last) - - def test_update_mastodon_pictures(self): - self.expect_requests_get( - 'https://foo.com' + test_mastodon.API_ACCOUNT % 123, - test_mastodon.ACCOUNT, headers={'Authorization': 'Bearer towkin'}) - self.mox.ReplayAll() - - mastodon = self._setup_mastodon() - resp = self.client.get('/cron/update_mastodon_pictures') - self.assertEqual(200, resp.status_code) - self.assertEqual(test_mastodon.ACCOUNT['avatar'], mastodon.key.get().picture) - - def test_update_mastodon_pictures_get_actor_404(self): - self.expect_requests_get( - 'https://foo.com' + test_mastodon.API_ACCOUNT % 123, - headers={'Authorization': 'Bearer towkin'}, - ).AndRaise( - requests.exceptions.HTTPError( - response=util.Struct(status_code='404', text='foo'))) - self.mox.ReplayAll() - - mastodon = self._setup_mastodon() - resp = self.client.get('/cron/update_mastodon_pictures') - self.assertEqual(200, resp.status_code) - self.assertEqual('http://before', mastodon.key.get().picture) - - def test_update_mastodon_pictures_get_actor_connection_failure(self): - self.expect_requests_get( - 'https://foo.com' + test_mastodon.API_ACCOUNT % 123, - headers={'Authorization': 'Bearer towkin'}, - ).AndRaise(NewConnectionError(None, None)) - self.mox.ReplayAll() - - mastodon = self._setup_mastodon() - resp = self.client.get('/cron/update_mastodon_pictures') - self.assertEqual(200, resp.status_code) - self.assertEqual('http://before', mastodon.key.get().picture) - - def _setup_flickr(self): - """Creates and test :class:`Flickr` entities.""" - flickrs = [] - - for id, features in (('123@N00', ['listen']), ('456@N11', [])): - info = copy.deepcopy(test_flickr.PERSON_INFO) - info['person']['nsid'] = id - flickr_auth = oauth_dropins.flickr.FlickrAuth( - id=id, user_json=json_dumps(info), - token_key='my_key', token_secret='my_secret') - flickr_auth.put() - flickr = Flickr.new(auth_entity=flickr_auth, features=features) - flickr.put() - flickrs.append(flickr) - - return flickrs - - def _setup_mastodon(self): - """Creates and returns a test :class:`Mastodon`.""" - app = oauth_dropins.mastodon.MastodonApp(instance='https://foo.com', data='') - app.put() - auth = oauth_dropins.mastodon.MastodonAuth( - id='@me@foo.com', access_token_str='towkin', app=app.key, - user_json=json_dumps({ - 'id': 123, - 'username': 'me', - 'acct': 'me', - 'avatar': 'http://before', - })) - auth.put() - mastodon = Mastodon.new(auth_entity=auth, features=['listen']) - mastodon.put() - return mastodon + def setUp(self): + super().setUp() + oauth_dropins.flickr_auth.FLICKR_APP_KEY = "my_app_key" + oauth_dropins.flickr_auth.FLICKR_APP_SECRET = "my_app_secret" + oauth_dropins.twitter_auth.TWITTER_APP_KEY = "my_app_key" + oauth_dropins.twitter_auth.TWITTER_APP_SECRET = "my_app_secret" + + def test_replace_poll_tasks(self): + now = datetime.datetime.now() + + # a bunch of sources, one needs a new poll task + five_min_ago = now - datetime.timedelta(minutes=5) + day_and_half_ago = now - datetime.timedelta(hours=36) + month_ago = now - datetime.timedelta(days=30) + defaults = { + "features": ["listen"], + "last_webmention_sent": day_and_half_ago, + } + + self.clear_datastore() + sources = [ + # doesn't need a new poll task + FakeSource.new(last_poll_attempt=now, **defaults).put(), + FakeSource.new(last_poll_attempt=five_min_ago, **defaults).put(), + FakeSource.new(status="disabled", **defaults).put(), + FakeSource.new(status="disabled", **defaults).put(), + # need a new poll task + FakeSource.new(status="enabled", **defaults).put(), + # not signed up for listen + FakeSource.new(last_webmention_sent=day_and_half_ago).put(), + # never sent a webmention, past grace period. last polled is older than 2x + # fast poll, but within 2x slow poll. + FakeSource.new( + features=["listen"], + created=month_ago, + last_poll_attempt=day_and_half_ago, + ).put(), + ] + + self.expect_task( + "poll", source_key=sources[4], last_polled="1970-01-01-00-00-00" + ) + self.mox.ReplayAll() + + resp = self.client.get("/cron/replace_poll_tasks") + self.assertEqual(200, resp.status_code) + + def test_update_twitter_pictures(self): + sources = [] + for screen_name in ("a", "b", "c"): + auth_entity = oauth_dropins.twitter.TwitterAuth( + id="id", + token_key="key", + token_secret="secret", + user_json=json_dumps( + { + "name": "Ryan", + "screen_name": screen_name, + "profile_image_url": "http://pi.ct/ure", + } + ), + ) + auth_entity.put() + sources.append( + Twitter.new(auth_entity=auth_entity, features=["listen"]).put() + ) + + user_obj = { + "screen_name": sources[1].id(), + "profile_image_url_https": "http://new/pic_normal.jpg", + "profile_image_url": "http://bad/http", + } + + lookup_url = gr_twitter.API_BASE + gr_twitter.API_USER + self.expect_urlopen(lookup_url % "a", json_dumps(user_obj)) + self.expect_urlopen(lookup_url % "b", json_dumps(user_obj)) + self.expect_urlopen(lookup_url % "c", json_dumps(user_obj)) + self.mox.ReplayAll() + + resp = self.client.get("/cron/update_twitter_pictures") + self.assertEqual(200, resp.status_code) + + for source in sources: + self.assertEqual("http://new/pic.jpg", source.get().picture) + + def test_update_twitter_picture_user_lookup_404s(self): + auth_entity = oauth_dropins.twitter.TwitterAuth( + id="id", + token_key="key", + token_secret="secret", + user_json=json_dumps( + { + "name": "Bad", + "screen_name": "bad", + "profile_image_url": "http://pi.ct/ure", + } + ), + ) + auth_entity.put() + source = Twitter.new(auth_entity=auth_entity, features=["publish"]).put() + + lookup_url = gr_twitter.API_BASE + gr_twitter.API_USER + self.expect_urlopen(lookup_url % "bad", status=404) + self.mox.ReplayAll() + + resp = self.client.get("/cron/update_twitter_pictures") + self.assertEqual(200, resp.status_code) + + self.assertEqual("http://pi.ct/ure", source.get().picture) + + def test_update_flickr_pictures(self): + flickrs = self._setup_flickr() + + self.mox.StubOutWithMock(cron, "PAGE_SIZE") + cron.PAGE_SIZE = 1 + + # first + self.expect_urlopen( + "https://api.flickr.com/services/rest?nojsoncallback=1&format=json&method=flickr.people.getInfo&user_id=123%40N00", + json_dumps( + { + "person": { + "id": "789@N99", + "nsid": "789@N99", + "iconfarm": 9, + "iconserver": "9876", + } + } + ), + ) + # second has no features, gets skipped + self.mox.ReplayAll() + + # first + self.assertEqual( + "https://farm5.staticflickr.com/4068/buddyicons/123@N00.jpg", + flickrs[0].picture, + ) + + resp = self.client.get("/cron/update_flickr_pictures") + self.assertEqual(200, resp.status_code) + self.assertEqual( + "https://farm9.staticflickr.com/9876/buddyicons/789@N99.jpg", + flickrs[0].key.get().picture, + ) + + cursor = cron.LastUpdatedPicture.get_by_id("flickr") + self.assertEqual(flickrs[0].key, cursor.last) + + # second + resp = self.client.get("/cron/update_flickr_pictures") + self.assertEqual(200, resp.status_code) + # unchanged + self.assertEqual(flickrs[1].picture, flickrs[1].key.get().picture) + + cursor = cron.LastUpdatedPicture.get_by_id("flickr") + # this would be None on prod, but the datastore emulator always returns + # more=True even when there aren't more results. :( + # https://github.com/googleapis/python-ndb/issues/241 + self.assertEqual(flickrs[1].key, cursor.last) + + def test_update_mastodon_pictures(self): + self.expect_requests_get( + "https://foo.com" + test_mastodon.API_ACCOUNT % 123, + test_mastodon.ACCOUNT, + headers={"Authorization": "Bearer towkin"}, + ) + self.mox.ReplayAll() + + mastodon = self._setup_mastodon() + resp = self.client.get("/cron/update_mastodon_pictures") + self.assertEqual(200, resp.status_code) + self.assertEqual(test_mastodon.ACCOUNT["avatar"], mastodon.key.get().picture) + + def test_update_mastodon_pictures_get_actor_404(self): + self.expect_requests_get( + "https://foo.com" + test_mastodon.API_ACCOUNT % 123, + headers={"Authorization": "Bearer towkin"}, + ).AndRaise( + requests.exceptions.HTTPError( + response=util.Struct(status_code="404", text="foo") + ) + ) + self.mox.ReplayAll() + + mastodon = self._setup_mastodon() + resp = self.client.get("/cron/update_mastodon_pictures") + self.assertEqual(200, resp.status_code) + self.assertEqual("http://before", mastodon.key.get().picture) + + def test_update_mastodon_pictures_get_actor_connection_failure(self): + self.expect_requests_get( + "https://foo.com" + test_mastodon.API_ACCOUNT % 123, + headers={"Authorization": "Bearer towkin"}, + ).AndRaise(NewConnectionError(None, None)) + self.mox.ReplayAll() + + mastodon = self._setup_mastodon() + resp = self.client.get("/cron/update_mastodon_pictures") + self.assertEqual(200, resp.status_code) + self.assertEqual("http://before", mastodon.key.get().picture) + + def _setup_flickr(self): + """Creates and test :class:`Flickr` entities.""" + flickrs = [] + + for id, features in (("123@N00", ["listen"]), ("456@N11", [])): + info = copy.deepcopy(test_flickr.PERSON_INFO) + info["person"]["nsid"] = id + flickr_auth = oauth_dropins.flickr.FlickrAuth( + id=id, + user_json=json_dumps(info), + token_key="my_key", + token_secret="my_secret", + ) + flickr_auth.put() + flickr = Flickr.new(auth_entity=flickr_auth, features=features) + flickr.put() + flickrs.append(flickr) + + return flickrs + + def _setup_mastodon(self): + """Creates and returns a test :class:`Mastodon`.""" + app = oauth_dropins.mastodon.MastodonApp(instance="https://foo.com", data="") + app.put() + auth = oauth_dropins.mastodon.MastodonAuth( + id="@me@foo.com", + access_token_str="towkin", + app=app.key, + user_json=json_dumps( + { + "id": 123, + "username": "me", + "acct": "me", + "avatar": "http://before", + } + ), + ) + auth.put() + mastodon = Mastodon.new(auth_entity=auth, features=["listen"]) + mastodon.put() + return mastodon diff --git a/tests/test_facebook.py b/tests/test_facebook.py index 6d2acbe5..85247229 100644 --- a/tests/test_facebook.py +++ b/tests/test_facebook.py @@ -5,17 +5,17 @@ from granary import facebook as gr_facebook from granary.tests.test_facebook import ( - MBASIC_HTML_TIMELINE, - MBASIC_HTML_POST, - MBASIC_HTML_REACTIONS, - MBASIC_HTML_ABOUT, - MBASIC_ACTOR, - MBASIC_ABOUT_ACTOR, - MBASIC_ACTIVITIES, - MBASIC_ACTIVITIES_REPLIES, - MBASIC_ACTIVITIES_REPLIES_REACTIONS, - MBASIC_ACTIVITY, - MBASIC_REACTION_TAGS, + MBASIC_HTML_TIMELINE, + MBASIC_HTML_POST, + MBASIC_HTML_REACTIONS, + MBASIC_HTML_ABOUT, + MBASIC_ACTOR, + MBASIC_ABOUT_ACTOR, + MBASIC_ACTIVITIES, + MBASIC_ACTIVITIES_REPLIES, + MBASIC_ACTIVITIES_REPLIES_REACTIONS, + MBASIC_ACTIVITY, + MBASIC_REACTION_TAGS, ) from oauth_dropins.webutil.util import json_dumps, json_loads @@ -26,153 +26,180 @@ class FacebookTest(testutil.AppTest): - - def setUp(self): - super().setUp() - self.actor['numeric_id'] = '212038' - self.source = Facebook.new(actor=self.actor) - self.domain = Domain(id='snarfed.org', tokens=['towkin']).put() - self.auth = f'token=towkin&key={self.source.key.urlsafe().decode()}' - self.mox.StubOutWithMock(gr_facebook, 'now_fn') - - def get_response(self, path_query, auth=True, **kwargs): - if auth and '?' not in path_query: - path_query += f'?{self.auth}' - return self.client.post(f'/facebook/browser/{path_query}', **kwargs) - - def store_activity(self): - activity = copy.deepcopy(MBASIC_ACTIVITIES[0]) - activity['actor']['url'] = 'http://snarfed.org/' - return Activity(id='tag:facebook.com,2013:123', source=self.source.key, - activity_json=json_dumps(activity)).put() - - def test_canonicalize_url_basic(self): - for expected, input in ( - ('https://www.facebook.com/212038/posts/314159', - 'https://facebook.com/snarfed/photos.php?fbid=314159'), - # note. https://github.com/snarfed/bridgy/issues/429 - ('https://www.facebook.com/212038/posts/314159', - 'https://www.facebook.com/notes/ryan-b/title/314159'), - ('https://www.facebook.com/212038/posts/314159', - 'https://www.facebook.com/photo.php?fbid=314159&set=a.456.2393090.212038&type=1&theater'), - ('https://www.facebook.com/212038/posts/314159', - 'https://facebook.com/permalink.php?story_fbid=314159&id=212038'), - ('https://www.facebook.com/212038/posts/314159', - 'https://facebook.com/permalink.php?story_fbid=314159&id=212038'), - ('https://www.facebook.com/212038/posts/314159', - 'https://m.facebook.com/story.php?id=212038&story_fbid=314159'), - ): - self.assertEqual(expected, self.source.canonicalize_url(input), input) - - def test_canonicalize_url_username(self): - # we shouldn't touch username when it appears elsewhere in the url - self.source.username = 'snarfed' - self.assertEqual('https://www.facebook.com/25624/posts/snarfed', - self.source.canonicalize_url( - 'http://www.facebook.com/25624/posts/snarfed')) - - # if no username, fall through - self.source.username = None - self.assertEqual('https://www.facebook.com/212038/posts/444', - self.source.canonicalize_url( - 'https://www.facebook.com/mr-disguise/posts/444')) - - def test_canonicalize_url_not_facebook(self): - """Shouldn't try to extract id and fetch post for non-facebook.com URLs.""" - url = 'https://twitter.com/foo/status/123' - self.assertIsNone(self.source.canonicalize_url(url)) - - def test_profile_new_user(self): - self.assertIsNone(Facebook.get_by_id('212038')) - - # webmention discovery - self.expect_requests_get('https://snarfed.org/', '') - self.mox.ReplayAll() - - resp = self.get_response('profile?token=towkin', data=MBASIC_HTML_ABOUT) - self.assertEqual(200, resp.status_code, resp.get_data(as_text=True)) - self.assertEqual(self.source.key.urlsafe().decode(), resp.json) - - fb = Facebook.get_by_id('212038') - self.assertEqual('Ryan Barrett', fb.name) - self.assertEqual('https://scontent-sjc3-1.xx.fbcdn.net/v/t1.0-1/cp0/e15/q65/p74x74/39610935_10104076860151373_4179282966062563328_o.jpg?...', fb.picture) - self.assertEqual(['https://snarfed.org/', 'https://foo.bar/'], fb.domain_urls) - self.assertEqual(['snarfed.org', 'foo.bar'], fb.domains) - - def test_feed(self): - self.source.put() - gr_facebook.now_fn().MultipleTimes().AndReturn(datetime(1999, 1, 1)) - self.mox.ReplayAll() - - resp = self.get_response('feed', data=MBASIC_HTML_TIMELINE) - self.assertEqual(200, resp.status_code, resp.get_data(as_text=True)) - self.assertEqual(MBASIC_ACTIVITIES, resp.json) - - def test_post(self): - self.source.put() - gr_facebook.now_fn().MultipleTimes().AndReturn(datetime(1999, 1, 1)) - self.mox.ReplayAll() - - resp = self.get_response(f'post', data=MBASIC_HTML_POST) - self.assertEqual(200, resp.status_code, resp.get_data(as_text=True)) - self.assertEqual(MBASIC_ACTIVITY, resp.json) - - activities = Activity.query().fetch() - self.assertEqual(1, len(activities)) - self.assertEqual(self.source.key, activities[0].source) - self.assertEqual(MBASIC_ACTIVITY, json_loads(activities[0].activity_json)) - - def test_post_empty(self): - key = self.source.put() - - resp = self.get_response(f'post?token=towkin&key={key.urlsafe().decode()}', - data="""\ + def setUp(self): + super().setUp() + self.actor["numeric_id"] = "212038" + self.source = Facebook.new(actor=self.actor) + self.domain = Domain(id="snarfed.org", tokens=["towkin"]).put() + self.auth = f"token=towkin&key={self.source.key.urlsafe().decode()}" + self.mox.StubOutWithMock(gr_facebook, "now_fn") + + def get_response(self, path_query, auth=True, **kwargs): + if auth and "?" not in path_query: + path_query += f"?{self.auth}" + return self.client.post(f"/facebook/browser/{path_query}", **kwargs) + + def store_activity(self): + activity = copy.deepcopy(MBASIC_ACTIVITIES[0]) + activity["actor"]["url"] = "http://snarfed.org/" + return Activity( + id="tag:facebook.com,2013:123", + source=self.source.key, + activity_json=json_dumps(activity), + ).put() + + def test_canonicalize_url_basic(self): + for expected, input in ( + ( + "https://www.facebook.com/212038/posts/314159", + "https://facebook.com/snarfed/photos.php?fbid=314159", + ), + # note. https://github.com/snarfed/bridgy/issues/429 + ( + "https://www.facebook.com/212038/posts/314159", + "https://www.facebook.com/notes/ryan-b/title/314159", + ), + ( + "https://www.facebook.com/212038/posts/314159", + "https://www.facebook.com/photo.php?fbid=314159&set=a.456.2393090.212038&type=1&theater", + ), + ( + "https://www.facebook.com/212038/posts/314159", + "https://facebook.com/permalink.php?story_fbid=314159&id=212038", + ), + ( + "https://www.facebook.com/212038/posts/314159", + "https://facebook.com/permalink.php?story_fbid=314159&id=212038", + ), + ( + "https://www.facebook.com/212038/posts/314159", + "https://m.facebook.com/story.php?id=212038&story_fbid=314159", + ), + ): + self.assertEqual(expected, self.source.canonicalize_url(input), input) + + def test_canonicalize_url_username(self): + # we shouldn't touch username when it appears elsewhere in the url + self.source.username = "snarfed" + self.assertEqual( + "https://www.facebook.com/25624/posts/snarfed", + self.source.canonicalize_url("http://www.facebook.com/25624/posts/snarfed"), + ) + + # if no username, fall through + self.source.username = None + self.assertEqual( + "https://www.facebook.com/212038/posts/444", + self.source.canonicalize_url( + "https://www.facebook.com/mr-disguise/posts/444" + ), + ) + + def test_canonicalize_url_not_facebook(self): + """Shouldn't try to extract id and fetch post for non-facebook.com URLs.""" + url = "https://twitter.com/foo/status/123" + self.assertIsNone(self.source.canonicalize_url(url)) + + def test_profile_new_user(self): + self.assertIsNone(Facebook.get_by_id("212038")) + + # webmention discovery + self.expect_requests_get("https://snarfed.org/", "") + self.mox.ReplayAll() + + resp = self.get_response("profile?token=towkin", data=MBASIC_HTML_ABOUT) + self.assertEqual(200, resp.status_code, resp.get_data(as_text=True)) + self.assertEqual(self.source.key.urlsafe().decode(), resp.json) + + fb = Facebook.get_by_id("212038") + self.assertEqual("Ryan Barrett", fb.name) + self.assertEqual( + "https://scontent-sjc3-1.xx.fbcdn.net/v/t1.0-1/cp0/e15/q65/p74x74/39610935_10104076860151373_4179282966062563328_o.jpg?...", + fb.picture, + ) + self.assertEqual(["https://snarfed.org/", "https://foo.bar/"], fb.domain_urls) + self.assertEqual(["snarfed.org", "foo.bar"], fb.domains) + + def test_feed(self): + self.source.put() + gr_facebook.now_fn().MultipleTimes().AndReturn(datetime(1999, 1, 1)) + self.mox.ReplayAll() + + resp = self.get_response("feed", data=MBASIC_HTML_TIMELINE) + self.assertEqual(200, resp.status_code, resp.get_data(as_text=True)) + self.assertEqual(MBASIC_ACTIVITIES, resp.json) + + def test_post(self): + self.source.put() + gr_facebook.now_fn().MultipleTimes().AndReturn(datetime(1999, 1, 1)) + self.mox.ReplayAll() + + resp = self.get_response(f"post", data=MBASIC_HTML_POST) + self.assertEqual(200, resp.status_code, resp.get_data(as_text=True)) + self.assertEqual(MBASIC_ACTIVITY, resp.json) + + activities = Activity.query().fetch() + self.assertEqual(1, len(activities)) + self.assertEqual(self.source.key, activities[0].source) + self.assertEqual(MBASIC_ACTIVITY, json_loads(activities[0].activity_json)) + + def test_post_empty(self): + key = self.source.put() + + resp = self.get_response( + f"post?token=towkin&key={key.urlsafe().decode()}", + data="""\ - """) - self.assertEqual(400, resp.status_code, resp.get_data(as_text=True)) - self.assertIn('No Facebook post found in HTML', resp.get_data(as_text=True)) - - def test_post_merge_comments(self): - key = self.source.put() - gr_facebook.now_fn().MultipleTimes().AndReturn(datetime(1999, 1, 1)) - self.mox.ReplayAll() - - # existing activity with one of the two comments in MBASIC_ACTIVITIES - existing_activity = copy.deepcopy(MBASIC_ACTIVITIES[1]) - existing_activity['object']['replies'] = { - 'totalItems': 1, - 'items': [MBASIC_ACTIVITIES_REPLIES[1]['object']['replies']['items'][0]], - } - activity_key = Activity(id='tag:facebook.com,2013:456', - activity_json=json_dumps(existing_activity)).put() - - # send MBASIC_HTML_POST to /post, check that the response and stored - # activity have both of its comments - resp = self.get_response('post', data=MBASIC_HTML_POST) - self.assertEqual(200, resp.status_code, resp.get_data(as_text=True)) - self.assert_equals(MBASIC_ACTIVITY, resp.json) - - activity = activity_key.get() - self.assert_equals(MBASIC_ACTIVITY, json_loads(activity.activity_json)) - - def test_likes(self): - self.source.put() - key = self.store_activity() - resp = self.get_response(f'likes?id=tag:facebook.com,2013:123&{self.auth}', - data=MBASIC_HTML_REACTIONS) - - self.assertEqual(200, resp.status_code, resp.get_data(as_text=True)) - self.assert_equals(MBASIC_REACTION_TAGS('123'), resp.json) - - activity = json_loads(key.get().activity_json) - self.assert_equals(MBASIC_REACTION_TAGS('123'), activity['object']['tags']) - - def test_poll(self): - key = self.source.put() - self.expect_task('poll', eta_seconds=0, source_key=key, - last_polled='1970-01-01-00-00-00') - self.mox.ReplayAll() - - resp = self.get_response(f'poll') - self.assertEqual(200, resp.status_code, resp.get_data(as_text=True)) - self.assertEqual('OK', resp.json) + """, + ) + self.assertEqual(400, resp.status_code, resp.get_data(as_text=True)) + self.assertIn("No Facebook post found in HTML", resp.get_data(as_text=True)) + + def test_post_merge_comments(self): + key = self.source.put() + gr_facebook.now_fn().MultipleTimes().AndReturn(datetime(1999, 1, 1)) + self.mox.ReplayAll() + + # existing activity with one of the two comments in MBASIC_ACTIVITIES + existing_activity = copy.deepcopy(MBASIC_ACTIVITIES[1]) + existing_activity["object"]["replies"] = { + "totalItems": 1, + "items": [MBASIC_ACTIVITIES_REPLIES[1]["object"]["replies"]["items"][0]], + } + activity_key = Activity( + id="tag:facebook.com,2013:456", activity_json=json_dumps(existing_activity) + ).put() + + # send MBASIC_HTML_POST to /post, check that the response and stored + # activity have both of its comments + resp = self.get_response("post", data=MBASIC_HTML_POST) + self.assertEqual(200, resp.status_code, resp.get_data(as_text=True)) + self.assert_equals(MBASIC_ACTIVITY, resp.json) + + activity = activity_key.get() + self.assert_equals(MBASIC_ACTIVITY, json_loads(activity.activity_json)) + + def test_likes(self): + self.source.put() + key = self.store_activity() + resp = self.get_response( + f"likes?id=tag:facebook.com,2013:123&{self.auth}", + data=MBASIC_HTML_REACTIONS, + ) + + self.assertEqual(200, resp.status_code, resp.get_data(as_text=True)) + self.assert_equals(MBASIC_REACTION_TAGS("123"), resp.json) + + activity = json_loads(key.get().activity_json) + self.assert_equals(MBASIC_REACTION_TAGS("123"), activity["object"]["tags"]) + + def test_poll(self): + key = self.source.put() + self.expect_task( + "poll", eta_seconds=0, source_key=key, last_polled="1970-01-01-00-00-00" + ) + self.mox.ReplayAll() + + resp = self.get_response(f"poll") + self.assertEqual(200, resp.status_code, resp.get_data(as_text=True)) + self.assertEqual("OK", resp.json) diff --git a/tests/test_flickr.py b/tests/test_flickr.py index 8f1c46de..ad699bb0 100644 --- a/tests/test_flickr.py +++ b/tests/test_flickr.py @@ -12,132 +12,153 @@ from . import testutil -class FlickrBaseTest(): - def setUp(self): - super().setUp() - oauth_dropins.flickr_auth.FLICKR_APP_KEY = 'my_app_key' - oauth_dropins.flickr_auth.FLICKR_APP_SECRET = 'my_app_secret' - - self.auth_entity = oauth_dropins.flickr.FlickrAuth( - id='my_string_id', - token_key='my_key', token_secret='my_secret', - user_json=json_dumps(gr_test_flickr.PERSON_INFO)) - - self.auth_entity.put() - self.flickr = flickr.Flickr.new(self.auth_entity) - - def expect_call_api_method(self, method, params, result): - full_params = { - 'nojsoncallback': 1, - 'format': 'json', - 'method': method, - } - full_params.update(params) - self.expect_urlopen('https://api.flickr.com/services/rest?' - + urllib.parse.urlencode(full_params), result) +class FlickrBaseTest: + def setUp(self): + super().setUp() + oauth_dropins.flickr_auth.FLICKR_APP_KEY = "my_app_key" + oauth_dropins.flickr_auth.FLICKR_APP_SECRET = "my_app_secret" + + self.auth_entity = oauth_dropins.flickr.FlickrAuth( + id="my_string_id", + token_key="my_key", + token_secret="my_secret", + user_json=json_dumps(gr_test_flickr.PERSON_INFO), + ) + + self.auth_entity.put() + self.flickr = flickr.Flickr.new(self.auth_entity) + + def expect_call_api_method(self, method, params, result): + full_params = { + "nojsoncallback": 1, + "format": "json", + "method": method, + } + full_params.update(params) + self.expect_urlopen( + "https://api.flickr.com/services/rest?" + + urllib.parse.urlencode(full_params), + result, + ) class FlickrTest(FlickrBaseTest, testutil.AppTest): - - def test_new(self): - self.assertEqual(self.auth_entity, self.flickr.auth_entity.get()) - self.assertEqual('39216764@N00', self.flickr.key.id()) - self.assertEqual('Kyle Mahan', self.flickr.name) - self.assertEqual('kindofblue115', self.flickr.username) - self.assertEqual('https://www.flickr.com/people/kindofblue115/', - self.flickr.silo_url()) - self.assertEqual('tag:flickr.com,2013:kindofblue115', self.flickr.user_tag_id()) - - @staticmethod - def prepare_person_tags(): - flickr.Flickr(id='555', username='username').put() - flickr.Flickr(id='666', domains=['my.domain']).put() - input_urls = ( - 'https://unknown/', - 'https://www.flickr.com/photos/444/', - 'https://flickr.com/people/444/', - 'https://flickr.com/photos/username/', - 'https://www.flickr.com/people/username/', - 'https://my.domain/', - ) - expected_urls = ( - 'https://unknown/', - 'https://www.flickr.com/photos/444/', - 'https://flickr.com/people/444/', - 'https://flickr.com/photos/username/', - 'https://www.flickr.com/people/username/', - 'https://www.flickr.com/people/666/', - ) - return input_urls, expected_urls - - def test_preprocess_for_publish(self): - input_urls, expected_urls = self.prepare_person_tags() - activity = { - 'object': { - 'objectType': 'note', - 'content': 'a msg', - 'tags': [{'objectType': 'person', 'url': url} for url in input_urls], - }, - } - self.flickr.preprocess_for_publish(activity) - self.assert_equals(expected_urls, [t['url'] for t in activity['object']['tags']]) - - def test_canonicalize_url(self): - def check(expected, url): - for input in expected, url: - self.assertEqual(expected, self.flickr.canonicalize_url(input)) - - check('https://www.flickr.com/photos/xyz/123/', - 'http://flickr.com/photos/xyz/123') - check('https://www.flickr.com/photos/xyz/123/', - 'https://www.flickr.com/photos/xyz/123') - check('https://www.flickr.com/people/xyz/', - 'http://flickr.com/people/xyz') - - self.flickr.username = 'mee' - check('https://www.flickr.com/photos/39216764@N00/123/', - 'http://flickr.com/photos/mee/123') - check('https://www.flickr.com/people/39216764@N00/', - 'http://flickr.com/people/mee') - - self.assertIsNone(self.flickr.canonicalize_url( - 'https://login.yahoo.com/config/login?...')) - - def test_label_name(self): - # default to name - self.assertEqual('Kyle Mahan', self.flickr.label_name()) - # fall back to username - self.flickr.name = None - self.assertEqual('kindofblue115', self.flickr.label_name()) - # final fallback to key id - self.flickr.username = None - self.assertEqual('39216764@N00', self.flickr.label_name()) + def test_new(self): + self.assertEqual(self.auth_entity, self.flickr.auth_entity.get()) + self.assertEqual("39216764@N00", self.flickr.key.id()) + self.assertEqual("Kyle Mahan", self.flickr.name) + self.assertEqual("kindofblue115", self.flickr.username) + self.assertEqual( + "https://www.flickr.com/people/kindofblue115/", self.flickr.silo_url() + ) + self.assertEqual("tag:flickr.com,2013:kindofblue115", self.flickr.user_tag_id()) + + @staticmethod + def prepare_person_tags(): + flickr.Flickr(id="555", username="username").put() + flickr.Flickr(id="666", domains=["my.domain"]).put() + input_urls = ( + "https://unknown/", + "https://www.flickr.com/photos/444/", + "https://flickr.com/people/444/", + "https://flickr.com/photos/username/", + "https://www.flickr.com/people/username/", + "https://my.domain/", + ) + expected_urls = ( + "https://unknown/", + "https://www.flickr.com/photos/444/", + "https://flickr.com/people/444/", + "https://flickr.com/photos/username/", + "https://www.flickr.com/people/username/", + "https://www.flickr.com/people/666/", + ) + return input_urls, expected_urls + + def test_preprocess_for_publish(self): + input_urls, expected_urls = self.prepare_person_tags() + activity = { + "object": { + "objectType": "note", + "content": "a msg", + "tags": [{"objectType": "person", "url": url} for url in input_urls], + }, + } + self.flickr.preprocess_for_publish(activity) + self.assert_equals( + expected_urls, [t["url"] for t in activity["object"]["tags"]] + ) + + def test_canonicalize_url(self): + def check(expected, url): + for input in expected, url: + self.assertEqual(expected, self.flickr.canonicalize_url(input)) + + check( + "https://www.flickr.com/photos/xyz/123/", "http://flickr.com/photos/xyz/123" + ) + check( + "https://www.flickr.com/photos/xyz/123/", + "https://www.flickr.com/photos/xyz/123", + ) + check("https://www.flickr.com/people/xyz/", "http://flickr.com/people/xyz") + + self.flickr.username = "mee" + check( + "https://www.flickr.com/photos/39216764@N00/123/", + "http://flickr.com/photos/mee/123", + ) + check( + "https://www.flickr.com/people/39216764@N00/", + "http://flickr.com/people/mee", + ) + + self.assertIsNone( + self.flickr.canonicalize_url("https://login.yahoo.com/config/login?...") + ) + + def test_label_name(self): + # default to name + self.assertEqual("Kyle Mahan", self.flickr.label_name()) + # fall back to username + self.flickr.name = None + self.assertEqual("kindofblue115", self.flickr.label_name()) + # final fallback to key id + self.flickr.username = None + self.assertEqual("39216764@N00", self.flickr.label_name()) class FlickrPollTest(FlickrBaseTest, testutil.BackgroundTest): - - def test_revoked_disables_source(self): - """ Make sure polling Flickr with a revoked token will - disable it as a source. - """ - self.expect_call_api_method('flickr.people.getPhotos', { - 'extras': granary.flickr.Flickr.API_EXTRAS, - 'per_page': 50, - 'user_id': 'me', - }, json_dumps({ - 'stat': 'fail', - 'code': 98, - 'message': 'Invalid auth token', - })) - self.mox.ReplayAll() - - self.flickr.features = ['listen'] - self.flickr.put() - self.assertEqual('enabled', self.flickr.status) - - self.client.post('/_ah/queue/poll', data={ - 'source_key': self.flickr.key.urlsafe().decode(), - 'last_polled': '1970-01-01-00-00-00', - }) - self.assertEqual('disabled', self.flickr.key.get().status) - + def test_revoked_disables_source(self): + """Make sure polling Flickr with a revoked token will + disable it as a source. + """ + self.expect_call_api_method( + "flickr.people.getPhotos", + { + "extras": granary.flickr.Flickr.API_EXTRAS, + "per_page": 50, + "user_id": "me", + }, + json_dumps( + { + "stat": "fail", + "code": 98, + "message": "Invalid auth token", + } + ), + ) + self.mox.ReplayAll() + + self.flickr.features = ["listen"] + self.flickr.put() + self.assertEqual("enabled", self.flickr.status) + + self.client.post( + "/_ah/queue/poll", + data={ + "source_key": self.flickr.key.urlsafe().decode(), + "last_polled": "1970-01-01-00-00-00", + }, + ) + self.assertEqual("disabled", self.flickr.key.get().status) diff --git a/tests/test_github.py b/tests/test_github.py index 7e1b17b4..f4baa62a 100644 --- a/tests/test_github.py +++ b/tests/test_github.py @@ -9,22 +9,26 @@ class GitHubTest(testutil.AppTest): + def setUp(self): + super().setUp() + self.auth_entity = oauth_dropins.github.GitHubAuth( + id="snarfed", + access_token_str="towkin", + user_json=json_dumps(gr_test_github.USER_GRAPHQL), + ) - def setUp(self): - super().setUp() - self.auth_entity = oauth_dropins.github.GitHubAuth( - id='snarfed', access_token_str='towkin', - user_json=json_dumps(gr_test_github.USER_GRAPHQL)) + self.auth_entity.put() + self.gh = github.GitHub.new(self.auth_entity) - self.auth_entity.put() - self.gh = github.GitHub.new(self.auth_entity) - - def test_new(self): - self.assertEqual(self.auth_entity, self.gh.auth_entity.get()) - self.assertEqual('snarfed', self.gh.key.id()) - self.assertEqual('snarfed', self.gh.label_name()) - self.assertEqual('Ryan Barrett', self.gh.name) - self.assertEqual('https://github.com/snarfed', self.gh.silo_url()) - self.assertEqual('https://avatars2.githubusercontent.com/u/778068?v=4', - self.gh.picture) - self.assertEqual('tag:github.com,2013:MDQ6VXNlcjc3ODA2OA==', self.gh.user_tag_id()) + def test_new(self): + self.assertEqual(self.auth_entity, self.gh.auth_entity.get()) + self.assertEqual("snarfed", self.gh.key.id()) + self.assertEqual("snarfed", self.gh.label_name()) + self.assertEqual("Ryan Barrett", self.gh.name) + self.assertEqual("https://github.com/snarfed", self.gh.silo_url()) + self.assertEqual( + "https://avatars2.githubusercontent.com/u/778068?v=4", self.gh.picture + ) + self.assertEqual( + "tag:github.com,2013:MDQ6VXNlcjc3ODA2OA==", self.gh.user_tag_id() + ) diff --git a/tests/test_handlers.py b/tests/test_handlers.py index 58f0dd18..ee847628 100644 --- a/tests/test_handlers.py +++ b/tests/test_handlers.py @@ -17,64 +17,77 @@ class HandlersTest(testutil.AppTest): - - def setUp(self): - super().setUp() - self.source = testutil.FakeSource.new( - features=['listen'], domains=['or.ig', 'fa.ke'], - domain_urls=['http://or.ig', 'https://fa.ke']) - self.source.put() - self.activities = [{ - 'object': { - 'id': 'tag:fa.ke,2013:000', - 'url': 'http://fa.ke/000', - 'content': 'asdf http://other/link qwert', - 'author': { - 'id': self.source.user_tag_id(), - 'image': {'url': 'http://example.com/ryan/image'}, - }, - 'tags': [{ - 'id': 'tag:fa.ke,2013:nobody', - }, { - 'id': self.source.user_tag_id(), - 'objectType': 'person', - }], - 'upstreamDuplicates': ['http://or.ig/post'], - }}] - FakeGrSource.activities = self.activities - FakeGrSource.comment = { - 'id': 'tag:fa.ke,2013:a1-b2.c3', # test alphanumeric id (like G+) - 'content': 'qwert', - 'inReplyTo': [{'url': 'http://fa.ke/000'}], - 'author': {'image': {'url': 'http://example.com/ryan/image'}}, - 'tags': self.activities[0]['object']['tags'], - } - FakeGrSource.event = { - 'object': { - 'id': 'tag:fa.ke,2013:123', - 'url': 'http://fa.ke/events/123', - 'content': 'Come to the next #Bridgy meetup http://other/link', - 'upstreamDuplicates': ['http://or.ig/event'], - }, - 'id': '123', - 'url': 'http://fa.ke/events/123', - } - - def check_response(self, url_template, expected_body=None, expected_status=200): - resp = self.client.get(url_template % self.source.key.string_id(), - # use an HTTPS request so that URL schemes are converted - base_url='https://localhost/') - self.assertEqual(expected_status, resp.status_code) - - if expected_body: - header_lines = len(handlers.TEMPLATE.template.splitlines()) - 2 - actual = '\n'.join(resp.get_data(as_text=True).splitlines()[header_lines:-1]) - self.assert_multiline_equals(expected_body, actual, ignore_blanks=True) - - return resp - - def test_post_html(self): - self.check_response('/post/fake/%s/000', """\ + def setUp(self): + super().setUp() + self.source = testutil.FakeSource.new( + features=["listen"], + domains=["or.ig", "fa.ke"], + domain_urls=["http://or.ig", "https://fa.ke"], + ) + self.source.put() + self.activities = [ + { + "object": { + "id": "tag:fa.ke,2013:000", + "url": "http://fa.ke/000", + "content": "asdf http://other/link qwert", + "author": { + "id": self.source.user_tag_id(), + "image": {"url": "http://example.com/ryan/image"}, + }, + "tags": [ + { + "id": "tag:fa.ke,2013:nobody", + }, + { + "id": self.source.user_tag_id(), + "objectType": "person", + }, + ], + "upstreamDuplicates": ["http://or.ig/post"], + } + } + ] + FakeGrSource.activities = self.activities + FakeGrSource.comment = { + "id": "tag:fa.ke,2013:a1-b2.c3", # test alphanumeric id (like G+) + "content": "qwert", + "inReplyTo": [{"url": "http://fa.ke/000"}], + "author": {"image": {"url": "http://example.com/ryan/image"}}, + "tags": self.activities[0]["object"]["tags"], + } + FakeGrSource.event = { + "object": { + "id": "tag:fa.ke,2013:123", + "url": "http://fa.ke/events/123", + "content": "Come to the next #Bridgy meetup http://other/link", + "upstreamDuplicates": ["http://or.ig/event"], + }, + "id": "123", + "url": "http://fa.ke/events/123", + } + + def check_response(self, url_template, expected_body=None, expected_status=200): + resp = self.client.get( + url_template % self.source.key.string_id(), + # use an HTTPS request so that URL schemes are converted + base_url="https://localhost/", + ) + self.assertEqual(expected_status, resp.status_code) + + if expected_body: + header_lines = len(handlers.TEMPLATE.template.splitlines()) - 2 + actual = "\n".join( + resp.get_data(as_text=True).splitlines()[header_lines:-1] + ) + self.assert_multiline_equals(expected_body, actual, ignore_blanks=True) + + return resp + + def test_post_html(self): + self.check_response( + "/post/fake/%s/000", + """\
tag:fa.ke,2013:000 @@ -94,127 +107,150 @@ def test_post_html(self):
-""" % {'key': self.source.key.id(), 'id': self.source.user_tag_id()}) - - def test_post_json(self): - resp = self.client.get( - f'/post/fake/{self.source.key.string_id()}/000?format=json', - base_url='https://localhost/') - self.assertEqual(200, resp.status_code, resp.get_data(as_text=True)) - self.assert_equals({ - 'type': ['h-entry'], - 'properties': { - 'uid': ['tag:fa.ke,2013:000'], - 'url': ['http://fa.ke/000', 'http://or.ig/post'], - 'content': [{ 'html': """\ +""" + % {"key": self.source.key.id(), "id": self.source.user_tag_id()}, + ) + + def test_post_json(self): + resp = self.client.get( + f"/post/fake/{self.source.key.string_id()}/000?format=json", + base_url="https://localhost/", + ) + self.assertEqual(200, resp.status_code, resp.get_data(as_text=True)) + self.assert_equals( + { + "type": ["h-entry"], + "properties": { + "uid": ["tag:fa.ke,2013:000"], + "url": ["http://fa.ke/000", "http://or.ig/post"], + "content": [ + { + "html": """\ asdf http://other/link qwert """, - 'value': 'asdf http://other/link qwert', - }], - 'author': [{ - 'type': ['h-card'], - 'properties': { - 'uid': [self.source.user_tag_id()], - 'url': ['http://fa.ke/%s' % self.source.key.id()], - 'photo': ['https://example.com/ryan/image'], + "value": "asdf http://other/link qwert", + } + ], + "author": [ + { + "type": ["h-card"], + "properties": { + "uid": [self.source.user_tag_id()], + "url": ["http://fa.ke/%s" % self.source.key.id()], + "photo": ["https://example.com/ryan/image"], + }, + } + ], + "category": [ + { + "type": ["h-card"], + "properties": { + "uid": [self.source.user_tag_id()], + "url": ["http://or.ig", "https://fa.ke"], + }, + } + ], + }, }, - }], - 'category': [{ - 'type': ['h-card'], - 'properties': { - 'uid': [self.source.user_tag_id()], - 'url': ['http://or.ig', 'https://fa.ke'], - }, - }], - }, - }, resp.json) - - def test_post_missing(self): - FakeGrSource.activities = [] - self.check_response('/post/fake/%s/000', expected_status=404) - - def test_bad_source_type(self): - self.check_response('/post/not_a_type/%s/000', expected_status=400) - - def test_bad_user(self): - self.check_response('/post/fake/not_a_user_%s/000', expected_status=400) - - def test_disabled_user(self): - self.source.status = 'disabled' - self.source.put() - self.check_response('/post/fake/%s/000', expected_status=400) - - def test_user_without_listen_feature(self): - self.source.features = [] - self.source.put() - self.check_response('/post/fake/%s/000', expected_status=400) - - def test_bad_format(self): - self.check_response('/post/fake/%s/000?format=asdf', expected_status=400) - - def test_bad_id(self): - for url in ('/post/fake/%s/x"1', '/comment/fake/%s/123/y(2', - '/like/fake/%s/abc/z$3'): - with self.subTest(url): - resp = self.check_response(url, expected_status=404) - self.assertIn('Invalid id', resp.get_data(as_text=True)) - - def test_author_uid_not_tag_uri(self): - self.activities[0]['object']['author']['id'] = 'not a tag uri' - resp = self.check_response('/post/fake/%s/000?format=json', expected_status=200) - props = resp.json['properties']['author'][0]['properties'] - self.assert_equals(['not a tag uri'], props['uid']) - self.assertNotIn('url', props) - - def test_ignore_unknown_query_params(self): - self.check_response('/post/fake/%s/000?target=x/y/z') - - def test_pass_through_source_errors(self): - user_id = self.source.key.string_id() - err = urllib.error.HTTPError('url', 410, 'Gone', {}, - io.StringIO('Gone baby gone')) - self.mox.StubOutWithMock(testutil.FakeSource, 'get_activities') - testutil.FakeSource.get_activities(activity_id='000', user_id=user_id - ).AndRaise(err) - self.mox.ReplayAll() - - resp = self.check_response('/post/fake/%s/000', expected_status=410) - self.assertEqual('text/plain; charset=utf-8', resp.headers['Content-Type']) - self.assertIn('Gone baby gone', resp.get_data(as_text=True)) - - def test_connection_failures_504(self): - user_id = self.source.key.string_id() - self.mox.StubOutWithMock(testutil.FakeSource, 'get_activities') - testutil.FakeSource.get_activities(activity_id='000', user_id=user_id - ).AndRaise(Exception('Connection closed unexpectedly')) - self.mox.ReplayAll() - resp = self.check_response('/post/fake/%s/000', expected_status=504) - self.assertIn('Connection closed unexpectedly', resp.get_data(as_text=True)) - - def test_handle_disable_source(self): - self.mox.StubOutWithMock(testutil.FakeSource, 'get_activities') - testutil.FakeSource.get_activities( - activity_id='000', user_id=self.source.key.string_id() - ).AndRaise(models.DisableSource()) - self.mox.ReplayAll() - - resp = self.check_response('/post/fake/%s/000', expected_status=401) - self.assertIn("Bridgy's access to your account has expired", - html.unescape(resp.get_data(as_text=True))) - - def test_handle_value_error(self): - self.mox.StubOutWithMock(testutil.FakeSource, 'get_activities') - testutil.FakeSource.get_activities( - activity_id='000', user_id=self.source.key.string_id() - ).AndRaise(ValueError('foo bar')) - self.mox.ReplayAll() - - resp = self.check_response('/post/fake/%s/000', expected_status=400) - self.assertIn('FakeSource error: foo bar', resp.get_data(as_text=True)) - - def test_comment(self): - self.check_response('/comment/fake/%s/000/a1-b2.c3', """\ + resp.json, + ) + + def test_post_missing(self): + FakeGrSource.activities = [] + self.check_response("/post/fake/%s/000", expected_status=404) + + def test_bad_source_type(self): + self.check_response("/post/not_a_type/%s/000", expected_status=400) + + def test_bad_user(self): + self.check_response("/post/fake/not_a_user_%s/000", expected_status=400) + + def test_disabled_user(self): + self.source.status = "disabled" + self.source.put() + self.check_response("/post/fake/%s/000", expected_status=400) + + def test_user_without_listen_feature(self): + self.source.features = [] + self.source.put() + self.check_response("/post/fake/%s/000", expected_status=400) + + def test_bad_format(self): + self.check_response("/post/fake/%s/000?format=asdf", expected_status=400) + + def test_bad_id(self): + for url in ( + '/post/fake/%s/x"1', + "/comment/fake/%s/123/y(2", + "/like/fake/%s/abc/z$3", + ): + with self.subTest(url): + resp = self.check_response(url, expected_status=404) + self.assertIn("Invalid id", resp.get_data(as_text=True)) + + def test_author_uid_not_tag_uri(self): + self.activities[0]["object"]["author"]["id"] = "not a tag uri" + resp = self.check_response("/post/fake/%s/000?format=json", expected_status=200) + props = resp.json["properties"]["author"][0]["properties"] + self.assert_equals(["not a tag uri"], props["uid"]) + self.assertNotIn("url", props) + + def test_ignore_unknown_query_params(self): + self.check_response("/post/fake/%s/000?target=x/y/z") + + def test_pass_through_source_errors(self): + user_id = self.source.key.string_id() + err = urllib.error.HTTPError( + "url", 410, "Gone", {}, io.StringIO("Gone baby gone") + ) + self.mox.StubOutWithMock(testutil.FakeSource, "get_activities") + testutil.FakeSource.get_activities(activity_id="000", user_id=user_id).AndRaise( + err + ) + self.mox.ReplayAll() + + resp = self.check_response("/post/fake/%s/000", expected_status=410) + self.assertEqual("text/plain; charset=utf-8", resp.headers["Content-Type"]) + self.assertIn("Gone baby gone", resp.get_data(as_text=True)) + + def test_connection_failures_504(self): + user_id = self.source.key.string_id() + self.mox.StubOutWithMock(testutil.FakeSource, "get_activities") + testutil.FakeSource.get_activities(activity_id="000", user_id=user_id).AndRaise( + Exception("Connection closed unexpectedly") + ) + self.mox.ReplayAll() + resp = self.check_response("/post/fake/%s/000", expected_status=504) + self.assertIn("Connection closed unexpectedly", resp.get_data(as_text=True)) + + def test_handle_disable_source(self): + self.mox.StubOutWithMock(testutil.FakeSource, "get_activities") + testutil.FakeSource.get_activities( + activity_id="000", user_id=self.source.key.string_id() + ).AndRaise(models.DisableSource()) + self.mox.ReplayAll() + + resp = self.check_response("/post/fake/%s/000", expected_status=401) + self.assertIn( + "Bridgy's access to your account has expired", + html.unescape(resp.get_data(as_text=True)), + ) + + def test_handle_value_error(self): + self.mox.StubOutWithMock(testutil.FakeSource, "get_activities") + testutil.FakeSource.get_activities( + activity_id="000", user_id=self.source.key.string_id() + ).AndRaise(ValueError("foo bar")) + self.mox.ReplayAll() + + resp = self.check_response("/post/fake/%s/000", expected_status=400) + self.assertIn("FakeSource error: foo bar", resp.get_data(as_text=True)) + + def test_comment(self): + self.check_response( + "/comment/fake/%s/000/a1-b2.c3", + """\
tag:fa.ke,2013:a1-b2.c3 @@ -232,60 +268,72 @@ def test_comment(self):
-""" % self.source.user_tag_id()) - - def test_comment_optimized_comments(self): - self.mox.StubOutWithMock(self.source.gr_source, 'OPTIMIZED_COMMENTS') - self.source.gr_source.OPTIMIZED_COMMENTS = True - - self.mox.StubOutWithMock(testutil.FakeSource, 'get_activities') - testutil.FakeSource.get_activities( - activity_id='000', user_id=self.source.key.string_id(), fetch_replies=False, - ).AndReturn(self.activities[0]) - self.mox.ReplayAll() - - self.check_response('/comment/fake/%s/000/a1') - - def test_comment_optimized_comments_activity_has_replies(self): - self.mox.StubOutWithMock(self.source.gr_source, 'OPTIMIZED_COMMENTS') - self.source.gr_source.OPTIMIZED_COMMENTS = True - - replies = self.activities[0]['object']['replies'] = { - 'items': [{ - 'objectType': 'comment', - 'id': 'tag:source.com,2013:1_2_%s' % id, - 'url': 'http://fa.ke/comment/url', - 'content': 'foo bar', - }], - } - - self.mox.StubOutWithMock(testutil.FakeSource, 'get_activities') - testutil.FakeSource.get_activities( - activity_id='000', user_id=self.source.key.string_id(), fetch_replies=False, - ).AndReturn([self.activities[0]]) - - self.mox.StubOutWithMock(FakeSource, 'get_comment') - FakeSource.get_comment('a1', activity_id='000', - activity_author_id=self.source.key_id(), - activity=self.activities[0]).AndReturn( - replies['items'][0]) - self.mox.ReplayAll() - - self.check_response('/comment/fake/%s/000/a1') - - def test_like(self): - FakeGrSource.like = { - 'objectType': 'activity', - 'verb': 'like', - 'id': 'tag:fa.ke,2013:111', - 'object': {'url': 'http://example.com/original/post'}, - 'author': { - 'displayName': 'Alice', - 'image': {'url': 'http://example.com/ryan/image'}, - }, - } - - resp = self.check_response('/like/fake/%s/000/111', """\ +""" + % self.source.user_tag_id(), + ) + + def test_comment_optimized_comments(self): + self.mox.StubOutWithMock(self.source.gr_source, "OPTIMIZED_COMMENTS") + self.source.gr_source.OPTIMIZED_COMMENTS = True + + self.mox.StubOutWithMock(testutil.FakeSource, "get_activities") + testutil.FakeSource.get_activities( + activity_id="000", + user_id=self.source.key.string_id(), + fetch_replies=False, + ).AndReturn(self.activities[0]) + self.mox.ReplayAll() + + self.check_response("/comment/fake/%s/000/a1") + + def test_comment_optimized_comments_activity_has_replies(self): + self.mox.StubOutWithMock(self.source.gr_source, "OPTIMIZED_COMMENTS") + self.source.gr_source.OPTIMIZED_COMMENTS = True + + replies = self.activities[0]["object"]["replies"] = { + "items": [ + { + "objectType": "comment", + "id": "tag:source.com,2013:1_2_%s" % id, + "url": "http://fa.ke/comment/url", + "content": "foo bar", + } + ], + } + + self.mox.StubOutWithMock(testutil.FakeSource, "get_activities") + testutil.FakeSource.get_activities( + activity_id="000", + user_id=self.source.key.string_id(), + fetch_replies=False, + ).AndReturn([self.activities[0]]) + + self.mox.StubOutWithMock(FakeSource, "get_comment") + FakeSource.get_comment( + "a1", + activity_id="000", + activity_author_id=self.source.key_id(), + activity=self.activities[0], + ).AndReturn(replies["items"][0]) + self.mox.ReplayAll() + + self.check_response("/comment/fake/%s/000/a1") + + def test_like(self): + FakeGrSource.like = { + "objectType": "activity", + "verb": "like", + "id": "tag:fa.ke,2013:111", + "object": {"url": "http://example.com/original/post"}, + "author": { + "displayName": "Alice", + "image": {"url": "http://example.com/ryan/image"}, + }, + } + + resp = self.check_response( + "/like/fake/%s/000/111", + """\
tag:fa.ke,2013:111 @@ -298,22 +346,25 @@ def test_like(self):
-""") - - def test_reaction(self): - FakeGrSource.reaction = { - 'objectType': 'activity', - 'verb': 'react', - 'id': 'tag:fa.ke,2013:000_scissors_by_111', - 'content': '✁', - 'object': {'url': 'http://example.com/original/post'}, - 'author': { - 'displayName': 'Alice', - 'image': {'url': 'http://example.com/ryan/image'}, - }, - } - - self.check_response('/react/fake/%s/000/111/scissors', """\ +""", + ) + + def test_reaction(self): + FakeGrSource.reaction = { + "objectType": "activity", + "verb": "react", + "id": "tag:fa.ke,2013:000_scissors_by_111", + "content": "✁", + "object": {"url": "http://example.com/original/post"}, + "author": { + "displayName": "Alice", + "image": {"url": "http://example.com/ryan/image"}, + }, + } + + self.check_response( + "/react/fake/%s/000/111/scissors", + """\
tag:fa.ke,2013:000_scissors_by_111 @@ -326,29 +377,33 @@ def test_reaction(self):
-""") - - def test_repost_with_syndicated_post_and_mentions(self): - self.activities[0]['object']['content'] += ' http://another/mention' - models.SyndicatedPost( - parent=self.source.key, - original='http://or.ig/post', - syndication='http://example.com/original/post').put() - - FakeGrSource.share = { - 'objectType': 'activity', - 'verb': 'share', - 'id': 'tag:fa.ke,2013:111', - 'object': {'url': 'http://example.com/original/post'}, - 'content': 'message from sharer', - 'author': { - 'id': 'tag:fa.ke,2013:reposter_id', - 'url': 'http://personal.domain/', - 'image': {'url': 'http://example.com/ryan/image'}, - }, - } - - self.check_response('/repost/fake/%s/000/111', """\ +""", + ) + + def test_repost_with_syndicated_post_and_mentions(self): + self.activities[0]["object"]["content"] += " http://another/mention" + models.SyndicatedPost( + parent=self.source.key, + original="http://or.ig/post", + syndication="http://example.com/original/post", + ).put() + + FakeGrSource.share = { + "objectType": "activity", + "verb": "share", + "id": "tag:fa.ke,2013:111", + "object": {"url": "http://example.com/original/post"}, + "content": "message from sharer", + "author": { + "id": "tag:fa.ke,2013:reposter_id", + "url": "http://personal.domain/", + "image": {"url": "http://example.com/ryan/image"}, + }, + } + + self.check_response( + "/repost/fake/%s/000/111", + """\
tag:fa.ke,2013:111 @@ -363,26 +418,29 @@ def test_repost_with_syndicated_post_and_mentions(self):
-""") - - def test_repost_not_found(self): - FakeGrSource.share = None - self.check_response('/repost/fake/%s/000/111', expected_status=404) - - def test_rsvp(self): - FakeGrSource.rsvp = { - 'objectType': 'activity', - 'verb': 'rsvp-no', - 'id': 'tag:fa.ke,2013:111', - 'object': {'url': 'http://example.com/event'}, - 'author': { - 'id': 'tag:fa.ke,2013:rsvper_id', - 'url': 'http://fa.ke/rsvper_id', # same URL as FakeSource.user_url() - 'image': {'url': 'http://example.com/ryan/image'}, - }, - } - - self.check_response('/rsvp/fake/%s/000/111', """\ +""", + ) + + def test_repost_not_found(self): + FakeGrSource.share = None + self.check_response("/repost/fake/%s/000/111", expected_status=404) + + def test_rsvp(self): + FakeGrSource.rsvp = { + "objectType": "activity", + "verb": "rsvp-no", + "id": "tag:fa.ke,2013:111", + "object": {"url": "http://example.com/event"}, + "author": { + "id": "tag:fa.ke,2013:rsvper_id", + "url": "http://fa.ke/rsvper_id", # same URL as FakeSource.user_url() + "image": {"url": "http://example.com/ryan/image"}, + }, + } + + self.check_response( + "/rsvp/fake/%s/000/111", + """\
tag:fa.ke,2013:111 @@ -396,26 +454,29 @@ def test_rsvp(self):
-""") - - def test_invite(self): - FakeGrSource.rsvp = { - 'id': 'tag:fa.ke,2013:111', - 'objectType': 'activity', - 'verb': 'invite', - 'url': 'http://fa.ke/event', - 'actor': { - 'displayName': 'Mrs. Host', - 'url': 'http://fa.ke/host', - }, - 'object': { - 'objectType': 'person', - 'displayName': 'Ms. Guest', - 'url': 'http://fa.ke/guest', - }, - } - - self.check_response('/rsvp/fake/%s/000/111', """\ +""", + ) + + def test_invite(self): + FakeGrSource.rsvp = { + "id": "tag:fa.ke,2013:111", + "objectType": "activity", + "verb": "invite", + "url": "http://fa.ke/event", + "actor": { + "displayName": "Mrs. Host", + "url": "http://fa.ke/host", + }, + "object": { + "objectType": "person", + "displayName": "Ms. Guest", + "url": "http://fa.ke/guest", + }, + } + + self.check_response( + "/rsvp/fake/%s/000/111", + """\
tag:fa.ke,2013:111 @@ -429,37 +490,45 @@ def test_invite(self):
-""") - - def test_granary_source_user_url_not_implemented(self): - self.mox.StubOutWithMock(FakeGrSource, 'user_url') - FakeGrSource.user_url('reposter_id').AndRaise(NotImplementedError()) - self.mox.ReplayAll() - - FakeGrSource.share = { - 'objectType': 'activity', - 'verb': 'share', - 'object': {'url': 'http://example.com/original/post'}, - 'author': {'id': 'tag:fa.ke,2013:reposter_id'}, - } - resp = self.check_response('/repost/fake/%s/000/111') - self.assertIn('', resp.get_data(as_text=True)) - self.assertNotIn('u-url', resp.get_data(as_text=True)) - - def test_original_post_urls_follow_redirects(self): - FakeGrSource.comment = { - 'content': 'qwert', - 'inReplyTo': [{'url': 'http://fa.ke/000'}], - } - - self.expect_requests_head('https://fa.ke/000').InAnyOrder() - self.expect_requests_head( - 'http://or.ig/post', redirected_url='http://or.ig/post/redirect').InAnyOrder() - self.expect_requests_head( - 'http://other/link', redirected_url='http://other/link/redirect').InAnyOrder() - self.mox.ReplayAll() - - self.check_response('/comment/fake/%s/000/111', """\ +""", + ) + + def test_granary_source_user_url_not_implemented(self): + self.mox.StubOutWithMock(FakeGrSource, "user_url") + FakeGrSource.user_url("reposter_id").AndRaise(NotImplementedError()) + self.mox.ReplayAll() + + FakeGrSource.share = { + "objectType": "activity", + "verb": "share", + "object": {"url": "http://example.com/original/post"}, + "author": {"id": "tag:fa.ke,2013:reposter_id"}, + } + resp = self.check_response("/repost/fake/%s/000/111") + self.assertIn( + '', + resp.get_data(as_text=True), + ) + self.assertNotIn("u-url", resp.get_data(as_text=True)) + + def test_original_post_urls_follow_redirects(self): + FakeGrSource.comment = { + "content": "qwert", + "inReplyTo": [{"url": "http://fa.ke/000"}], + } + + self.expect_requests_head("https://fa.ke/000").InAnyOrder() + self.expect_requests_head( + "http://or.ig/post", redirected_url="http://or.ig/post/redirect" + ).InAnyOrder() + self.expect_requests_head( + "http://other/link", redirected_url="http://other/link/redirect" + ).InAnyOrder() + self.mox.ReplayAll() + + self.check_response( + "/comment/fake/%s/000/111", + """\
@@ -471,15 +540,20 @@ def test_original_post_urls_follow_redirects(self):
-""") - - def test_strip_utm_query_params(self): - self.activities[0]['object'].update({ - 'content': 'asdf http://other/link?utm_source=x&utm_medium=y&a=b qwert', - 'upstreamDuplicates': ['http://or.ig/post?utm_campaign=123'], - }) - FakeGrSource.comment = {'content': 'qwert'} - self.check_response('/comment/fake/%s/000/111', """\ +""", + ) + + def test_strip_utm_query_params(self): + self.activities[0]["object"].update( + { + "content": "asdf http://other/link?utm_source=x&utm_medium=y&a=b qwert", + "upstreamDuplicates": ["http://or.ig/post?utm_campaign=123"], + } + ) + FakeGrSource.comment = {"content": "qwert"} + self.check_response( + "/comment/fake/%s/000/111", + """\
@@ -488,25 +562,32 @@ def test_strip_utm_query_params(self):
-""") - - def test_dedupe_http_and_https(self): - self.activities[0]['object'].update({ - 'content': 'X http://mention/only Y https://reply Z https://upstream ' - 'W http://all', - 'upstreamDuplicates': ['http://upstream/only', - 'http://upstream', - 'http://all', - ], - }) - - FakeGrSource.comment = { - 'inReplyTo': [{'url': 'https://reply/only'}, - {'url': 'http://reply'}, - {'url': 'https://all'}, - ], - } - self.check_response('/comment/fake/%s/000/111', """\ +""", + ) + + def test_dedupe_http_and_https(self): + self.activities[0]["object"].update( + { + "content": "X http://mention/only Y https://reply Z https://upstream " + "W http://all", + "upstreamDuplicates": [ + "http://upstream/only", + "http://upstream", + "http://all", + ], + } + ) + + FakeGrSource.comment = { + "inReplyTo": [ + {"url": "https://reply/only"}, + {"url": "http://reply"}, + {"url": "https://all"}, + ], + } + self.check_response( + "/comment/fake/%s/000/111", + """\
@@ -520,42 +601,48 @@ def test_dedupe_http_and_https(self):
-""") - - def test_tag_without_url(self): - self.activities[0]['object'] = { - 'id': 'tag:fa.ke,2013:000', - 'tags': [{'foo': 'bar'}], - } - self.check_response('/post/fake/%s/000', """\ +""", + ) + + def test_tag_without_url(self): + self.activities[0]["object"] = { + "id": "tag:fa.ke,2013:000", + "tags": [{"foo": "bar"}], + } + self.check_response( + "/post/fake/%s/000", + """\
tag:fa.ke,2013:000
-""") +""", + ) - @enable_flask_caching(app, cache) - def test_cache(self): - orig = self.check_response('/post/fake/%s/000') + @enable_flask_caching(app, cache) + def test_cache(self): + orig = self.check_response("/post/fake/%s/000") - # should serve the cached response and not refetch - self.mox.StubOutWithMock(FakeGrSource, 'get_activities_response') - self.mox.ReplayAll() + # should serve the cached response and not refetch + self.mox.StubOutWithMock(FakeGrSource, "get_activities_response") + self.mox.ReplayAll() - cached = self.check_response('/post/fake/%s/000') - self.assert_multiline_equals(orig.get_data(as_text=True), - cached.get_data(as_text=True)) + cached = self.check_response("/post/fake/%s/000") + self.assert_multiline_equals( + orig.get_data(as_text=True), cached.get_data(as_text=True) + ) - def test_in_blocklist(self): - self.mox.StubOutWithMock(FakeSource, 'is_blocked') - FakeSource.is_blocked(mox.IgnoreArg()).AndReturn(True) - self.mox.ReplayAll() + def test_in_blocklist(self): + self.mox.StubOutWithMock(FakeSource, "is_blocked") + FakeSource.is_blocked(mox.IgnoreArg()).AndReturn(True) + self.mox.ReplayAll() - self.check_response('/comment/fake/%s/000/111', expected_status=410) + self.check_response("/comment/fake/%s/000/111", expected_status=410) - def test_head(self): - resp = self.client.get( - f'/post/fake/{self.source.key.string_id()}/000', method='HEAD') - self.assertEqual(200, resp.status_code) + def test_head(self): + resp = self.client.get( + f"/post/fake/{self.source.key.string_id()}/000", method="HEAD" + ) + self.assertEqual(200, resp.status_code) diff --git a/tests/test_indieauth.py b/tests/test_indieauth.py index b86e8049..509436a4 100644 --- a/tests/test_indieauth.py +++ b/tests/test_indieauth.py @@ -16,65 +16,81 @@ class IndieAuthTest(testutil.AppTest): + def setUp(self): + super().setUp() + self.auth_entity = indieauth.IndieAuth(id="http://snarfed.org") - def setUp(self): - super().setUp() - self.auth_entity = indieauth.IndieAuth(id='http://snarfed.org') + def expect_indieauth_check(self): + return TestCase.expect_requests_post( + self, + indieauth.INDIEAUTH_URL, + "me=http://snarfed.org", + data={ + "me": "http://snarfed.org", + "state": "towkin", + "code": "my_code", + "client_id": indieauth.INDIEAUTH_CLIENT_ID, + "redirect_uri": "http://localhost/indieauth/callback", + }, + ) - def expect_indieauth_check(self): - return TestCase.expect_requests_post( - self, indieauth.INDIEAUTH_URL, 'me=http://snarfed.org', data={ - 'me': 'http://snarfed.org', - 'state': 'towkin', - 'code': 'my_code', - 'client_id': indieauth.INDIEAUTH_CLIENT_ID, - 'redirect_uri': 'http://localhost/indieauth/callback', - }) - - def expect_site_fetch(self, body=None): - if body is None: - body = """ + def expect_site_fetch(self, body=None): + if body is None: + body = """ me on insta """ - return TestCase.expect_requests_get(self, 'http://snarfed.org', body) + return TestCase.expect_requests_get(self, "http://snarfed.org", body) - def callback(self, token='towkin'): - resp = self.client.get( - '/indieauth/callback?code=my_code&state=%s' % util.encode_oauth_state({ - 'endpoint': indieauth.INDIEAUTH_URL, - 'me': 'http://snarfed.org', - 'state': token, - })) - self.assertEqual(302, resp.status_code) - return resp + def callback(self, token="towkin"): + resp = self.client.get( + "/indieauth/callback?code=my_code&state=%s" + % util.encode_oauth_state( + { + "endpoint": indieauth.INDIEAUTH_URL, + "me": "http://snarfed.org", + "state": token, + } + ) + ) + self.assertEqual(302, resp.status_code) + return resp - def test_callback_new_domain(self): - self.expect_indieauth_check() - self.expect_site_fetch() - self.mox.ReplayAll() + def test_callback_new_domain(self): + self.expect_indieauth_check() + self.expect_site_fetch() + self.mox.ReplayAll() - resp = self.callback() - self.assertEqual('http://localhost/',resp.headers['Location']) - self.assertEqual(['Authorized you for snarfed.org.'], get_flashed_messages()) + resp = self.callback() + self.assertEqual("http://localhost/", resp.headers["Location"]) + self.assertEqual(["Authorized you for snarfed.org."], get_flashed_messages()) - self.assert_entities_equal([ - Domain(id='snarfed.org', tokens=['towkin'], auth=self.auth_entity.key), - ], Domain.query().fetch(), ignore=('created', 'updated')) + self.assert_entities_equal( + [ + Domain(id="snarfed.org", tokens=["towkin"], auth=self.auth_entity.key), + ], + Domain.query().fetch(), + ignore=("created", "updated"), + ) - def test_start_get(self): - resp = self.client.get('/indieauth/start?token=foo') - self.assertEqual(200, resp.status_code) + def test_start_get(self): + resp = self.client.get("/indieauth/start?token=foo") + self.assertEqual(200, resp.status_code) - def test_start_post(self): - self.expect_site_fetch() - self.mox.ReplayAll() + def test_start_post(self): + self.expect_site_fetch() + self.mox.ReplayAll() - resp = self.client.post('/indieauth/start', data={ - 'token': 'foo', - 'me': 'http://snarfed.org', - }) - self.assertEqual(302, resp.status_code) - self.assertTrue(resp.headers['Location'].startswith(indieauth.INDIEAUTH_URL), - resp.headers['Location']) + resp = self.client.post( + "/indieauth/start", + data={ + "token": "foo", + "me": "http://snarfed.org", + }, + ) + self.assertEqual(302, resp.status_code) + self.assertTrue( + resp.headers["Location"].startswith(indieauth.INDIEAUTH_URL), + resp.headers["Location"], + ) diff --git a/tests/test_instagram.py b/tests/test_instagram.py index d0008992..a89837af 100644 --- a/tests/test_instagram.py +++ b/tests/test_instagram.py @@ -5,20 +5,20 @@ from granary import instagram as gr_instagram from granary.tests.test_instagram import ( - HTML_FEED_COMPLETE, - HTML_FOOTER, - HTML_HEADER, - HTML_PHOTO_ACTIVITY, - HTML_PHOTO_ACTIVITY_LIKES, - HTML_PHOTO_LIKES_RESPONSE, - HTML_PROFILE_COMPLETE, - HTML_PROFILE_PRIVATE_COMPLETE, - HTML_VIDEO_ACTIVITY, - HTML_VIDEO_ACTIVITY_FULL, - HTML_VIDEO_EXTRA_COMMENT_OBJ, - HTML_VIDEO_PAGE, - HTML_VIEWER_CONFIG, - LIKE_OBJS, + HTML_FEED_COMPLETE, + HTML_FOOTER, + HTML_HEADER, + HTML_PHOTO_ACTIVITY, + HTML_PHOTO_ACTIVITY_LIKES, + HTML_PHOTO_LIKES_RESPONSE, + HTML_PROFILE_COMPLETE, + HTML_PROFILE_PRIVATE_COMPLETE, + HTML_VIDEO_ACTIVITY, + HTML_VIDEO_ACTIVITY_FULL, + HTML_VIDEO_EXTRA_COMMENT_OBJ, + HTML_VIDEO_PAGE, + HTML_VIEWER_CONFIG, + LIKE_OBJS, ) from oauth_dropins.webutil.util import HTTP_TIMEOUT, json_dumps, json_loads @@ -28,195 +28,232 @@ from . import testutil HTML_VIDEO_WITH_VIEWER = copy.deepcopy(HTML_VIDEO_PAGE) -HTML_VIDEO_WITH_VIEWER['config'] = HTML_VIEWER_CONFIG +HTML_VIDEO_WITH_VIEWER["config"] = HTML_VIEWER_CONFIG HTML_VIDEO_COMPLETE = HTML_HEADER + json_dumps(HTML_VIDEO_WITH_VIEWER) + HTML_FOOTER class InstagramTest(testutil.AppTest): - - def setUp(self): - super().setUp() - self.source = Instagram.new(actor=self.actor) - self.domain = Domain(id='snarfed.org', tokens=['towkin']).put() - self.auth = f'token=towkin&key={self.source.key.urlsafe().decode()}' - - def get_response(self, path_query, auth=True, **kwargs): - if auth and '?' not in path_query: - path_query += f'?{self.auth}' - return self.client.post(f'/instagram/browser/{path_query}', **kwargs) - - def store_activity(self): - activity = copy.deepcopy(HTML_PHOTO_ACTIVITY) - activity['actor']['url'] = 'http://snarfed.org/' - return Activity(id='tag:instagram.com,2013:123_456', source=self.source.key, - activity_json=json_dumps(activity)).put() - - def test_new(self): - self.assertIsNone(self.source.auth_entity) - self.assertEqual('snarfed', self.source.key.string_id()) - self.assertEqual('http://pic.ture/url', self.source.picture) - self.assertEqual('https://www.instagram.com/snarfed/', self.source.silo_url()) - self.assertEqual('Ryan B', self.source.name) - self.assertEqual('snarfed (Instagram)', self.source.label()) - - def test_canonicalize_url(self): - self.unstub_requests_head() - for url in ( - 'http://www.instagram.com/p/abcd', - 'https://www.instagram.com/p/abcd', - 'https://www.instagram.com/p/abcd/', - 'https://instagram.com/p/abcd', - ): - self.assertEqual('https://www.instagram.com/p/abcd/', - self.source.canonicalize_url(url)) - - self.assertIsNone(self.source.canonicalize_url('https://www.foo.com/p/abcd/')) - - def test_canonicalize_url_approve_checks_full_url(self): - """...specifically, that the regex ends with a $ - https://github.com/snarfed/bridgy/issues/686 - """ - self.assertEqual('https://www.instagram.com/p/abcd/123/', - self.source.canonicalize_url('https://www.instagram.com/p/abcd/123')) - - def test_get_activities_response_activity_id(self): - Activity(id='tag:instagram.com,2013:123', - activity_json=json_dumps({'foo': 'bar'})).put() - - resp = self.source.get_activities_response(activity_id='123') - self.assertEqual([{'foo': 'bar'}], resp['items']) - - def test_get_activities_response_no_activity_id(self): - Activity(id='tag:instagram.com,2013:123', source=self.source.key, - activity_json=json_dumps({'foo': 'bar'})).put() - Activity(id='tag:instagram.com,2013:456', source=self.source.key, - activity_json=json_dumps({'baz': 'biff'})).put() - - other = Instagram.new(actor={'username': 'other'}).put() - Activity(id='tag:instagram.com,2013:789', source=other, - activity_json=json_dumps({'boo': 'bah'})).put() - - resp = self.source.get_activities_response() - self.assert_equals([{'foo': 'bar'}, {'baz': 'biff'}], resp['items']) - - def test_get_activities_response_no_stored_activity(self): - resp = self.source.get_activities_response(activity_id='123') - self.assertEqual([], resp['items']) - - def test_get_comment(self): - self.assert_equals( - HTML_VIDEO_EXTRA_COMMENT_OBJ, - self.source.get_comment('020', activity=HTML_VIDEO_ACTIVITY_FULL)) - - def test_get_comment_no_matching_id(self): - self.assertIsNone(self.source.get_comment('333', activity=HTML_VIDEO_ACTIVITY_FULL)) - - def test_get_comment_no_activity_kwarg(self): - self.assertIsNone(self.source.get_comment('020')) - - def test_get_like(self): - self.assert_equals(LIKE_OBJS[1], self.source.get_like( - 'unused', '123', '9', activity=HTML_PHOTO_ACTIVITY_LIKES)) - - def test_get_like_no_matching_user(self): - self.assertIsNone(self.source.get_like( - 'unused', '123', '222', activity=HTML_PHOTO_ACTIVITY_LIKES)) - - def test_get_like_no_activity_kwarg(self): - self.assertIsNone(self.source.get_like('unused', '123', '9')) - - def test_homepage(self): - resp = self.get_response('homepage', data=HTML_FEED_COMPLETE) - self.assertEqual(200, resp.status_code) - self.assertEqual('snarfed', resp.json) - - def test_homepage_bad_html(self): - resp = self.get_response('homepage', data='not a logged in IG feed') - self.assertEqual(400, resp.status_code) - self.assertIn("Couldn't determine logged in Instagram user", - html.unescape(resp.get_data(as_text=True))) - - def test_profile_new_user(self): - self.assertIsNone(Instagram.get_by_id('snarfed')) - - self.expect_requests_get('https://snarfed.org/', '') - self.mox.ReplayAll() - - resp = self.get_response('profile?token=towkin', data=HTML_PROFILE_COMPLETE) - - self.assertEqual(200, resp.status_code) - self.assertEqual(self.source.key.urlsafe().decode(), resp.json) - - ig = Instagram.get_by_id('snarfed') - self.assertEqual('Ryan B', ig.name) - self.assertEqual('https://scontent-sjc2-1.cdninstagram.com/hphotos-xfa1/t51.2885-19/11373714_959073410822287_2004790583_a.jpg', ig.picture) - self.assertEqual('https://www.instagram.com/snarfed/', ig.silo_url()) - self.assertEqual(['https://snarfed.org/'], ig.domain_urls) - self.assertEqual(['snarfed.org'], ig.domains) - - def test_profile_private_account(self): - resp = self.get_response('profile', data=HTML_PROFILE_PRIVATE_COMPLETE) - self.assertEqual(400, resp.status_code) - self.assertIn('Your Instagram account is private.', resp.get_data(as_text=True)) - - def test_post(self): - self.source.put() - - resp = self.get_response('post', data=HTML_VIDEO_COMPLETE) - self.assertEqual(200, resp.status_code, resp.get_data(as_text=True)) - self.assertEqual(HTML_VIDEO_ACTIVITY_FULL, resp.json) - - activities = Activity.query().fetch() - self.assertEqual(1, len(activities)) - self.assertEqual(self.source.key, activities[0].source) - self.assertEqual(HTML_VIDEO_ACTIVITY_FULL, json_loads(activities[0].activity_json)) - - def test_post_empty(self): - self.source.put() - empty = HTML_HEADER + json_dumps({'config': HTML_VIEWER_CONFIG}) + HTML_FOOTER - resp = self.get_response('post', data=empty) - self.assertEqual(400, resp.status_code) - self.assertIn('No Instagram post found in HTML', resp.get_data(as_text=True)) - - def test_post_merge_comments(self): - self.source.put() - - # existing activity with one of the two comments in HTML_VIDEO_COMPLETE - existing_activity = copy.deepcopy(HTML_VIDEO_ACTIVITY) - existing_activity['object']['replies'] = { - 'totalItems': 1, - 'items': [HTML_VIDEO_ACTIVITY_FULL['object']['replies']['items'][0]], - } - activity_key = Activity(id='tag:instagram.com,2013:789_456', - activity_json=json_dumps(existing_activity)).put() - - # send HTML_VIDEO_COMPLETE to /post, check that the response and stored - # activity have both of its comments - resp = self.get_response('post', data=HTML_VIDEO_COMPLETE) - self.assertEqual(200, resp.status_code, resp.get_data(as_text=True)) - self.assert_equals(HTML_VIDEO_ACTIVITY_FULL, resp.json) - - activity = activity_key.get() - self.assert_equals(HTML_VIDEO_ACTIVITY_FULL, json_loads(activity.activity_json)) - - def test_likes(self): - self.source.put() - key = self.store_activity() - - resp = self.get_response(f'likes?id=tag:instagram.com,2013:123_456&{self.auth}', - json=HTML_PHOTO_LIKES_RESPONSE) - self.assertEqual(200, resp.status_code, resp.get_data(as_text=True)) - self.assertEqual(LIKE_OBJS, resp.json) - - activity = json_loads(key.get().activity_json) - self.assertEqual(LIKE_OBJS, activity['object']['tags']) - - def test_poll(self): - key = self.source.put() - self.expect_task('poll', eta_seconds=0, source_key=key, - last_polled='1970-01-01-00-00-00') - self.mox.ReplayAll() - - resp = self.get_response(f'poll') - self.assertEqual(200, resp.status_code, resp.get_data(as_text=True)) - self.assertEqual('OK', resp.json) + def setUp(self): + super().setUp() + self.source = Instagram.new(actor=self.actor) + self.domain = Domain(id="snarfed.org", tokens=["towkin"]).put() + self.auth = f"token=towkin&key={self.source.key.urlsafe().decode()}" + + def get_response(self, path_query, auth=True, **kwargs): + if auth and "?" not in path_query: + path_query += f"?{self.auth}" + return self.client.post(f"/instagram/browser/{path_query}", **kwargs) + + def store_activity(self): + activity = copy.deepcopy(HTML_PHOTO_ACTIVITY) + activity["actor"]["url"] = "http://snarfed.org/" + return Activity( + id="tag:instagram.com,2013:123_456", + source=self.source.key, + activity_json=json_dumps(activity), + ).put() + + def test_new(self): + self.assertIsNone(self.source.auth_entity) + self.assertEqual("snarfed", self.source.key.string_id()) + self.assertEqual("http://pic.ture/url", self.source.picture) + self.assertEqual("https://www.instagram.com/snarfed/", self.source.silo_url()) + self.assertEqual("Ryan B", self.source.name) + self.assertEqual("snarfed (Instagram)", self.source.label()) + + def test_canonicalize_url(self): + self.unstub_requests_head() + for url in ( + "http://www.instagram.com/p/abcd", + "https://www.instagram.com/p/abcd", + "https://www.instagram.com/p/abcd/", + "https://instagram.com/p/abcd", + ): + self.assertEqual( + "https://www.instagram.com/p/abcd/", self.source.canonicalize_url(url) + ) + + self.assertIsNone(self.source.canonicalize_url("https://www.foo.com/p/abcd/")) + + def test_canonicalize_url_approve_checks_full_url(self): + """...specifically, that the regex ends with a $ + https://github.com/snarfed/bridgy/issues/686 + """ + self.assertEqual( + "https://www.instagram.com/p/abcd/123/", + self.source.canonicalize_url("https://www.instagram.com/p/abcd/123"), + ) + + def test_get_activities_response_activity_id(self): + Activity( + id="tag:instagram.com,2013:123", activity_json=json_dumps({"foo": "bar"}) + ).put() + + resp = self.source.get_activities_response(activity_id="123") + self.assertEqual([{"foo": "bar"}], resp["items"]) + + def test_get_activities_response_no_activity_id(self): + Activity( + id="tag:instagram.com,2013:123", + source=self.source.key, + activity_json=json_dumps({"foo": "bar"}), + ).put() + Activity( + id="tag:instagram.com,2013:456", + source=self.source.key, + activity_json=json_dumps({"baz": "biff"}), + ).put() + + other = Instagram.new(actor={"username": "other"}).put() + Activity( + id="tag:instagram.com,2013:789", + source=other, + activity_json=json_dumps({"boo": "bah"}), + ).put() + + resp = self.source.get_activities_response() + self.assert_equals([{"foo": "bar"}, {"baz": "biff"}], resp["items"]) + + def test_get_activities_response_no_stored_activity(self): + resp = self.source.get_activities_response(activity_id="123") + self.assertEqual([], resp["items"]) + + def test_get_comment(self): + self.assert_equals( + HTML_VIDEO_EXTRA_COMMENT_OBJ, + self.source.get_comment("020", activity=HTML_VIDEO_ACTIVITY_FULL), + ) + + def test_get_comment_no_matching_id(self): + self.assertIsNone( + self.source.get_comment("333", activity=HTML_VIDEO_ACTIVITY_FULL) + ) + + def test_get_comment_no_activity_kwarg(self): + self.assertIsNone(self.source.get_comment("020")) + + def test_get_like(self): + self.assert_equals( + LIKE_OBJS[1], + self.source.get_like( + "unused", "123", "9", activity=HTML_PHOTO_ACTIVITY_LIKES + ), + ) + + def test_get_like_no_matching_user(self): + self.assertIsNone( + self.source.get_like( + "unused", "123", "222", activity=HTML_PHOTO_ACTIVITY_LIKES + ) + ) + + def test_get_like_no_activity_kwarg(self): + self.assertIsNone(self.source.get_like("unused", "123", "9")) + + def test_homepage(self): + resp = self.get_response("homepage", data=HTML_FEED_COMPLETE) + self.assertEqual(200, resp.status_code) + self.assertEqual("snarfed", resp.json) + + def test_homepage_bad_html(self): + resp = self.get_response("homepage", data="not a logged in IG feed") + self.assertEqual(400, resp.status_code) + self.assertIn( + "Couldn't determine logged in Instagram user", + html.unescape(resp.get_data(as_text=True)), + ) + + def test_profile_new_user(self): + self.assertIsNone(Instagram.get_by_id("snarfed")) + + self.expect_requests_get("https://snarfed.org/", "") + self.mox.ReplayAll() + + resp = self.get_response("profile?token=towkin", data=HTML_PROFILE_COMPLETE) + + self.assertEqual(200, resp.status_code) + self.assertEqual(self.source.key.urlsafe().decode(), resp.json) + + ig = Instagram.get_by_id("snarfed") + self.assertEqual("Ryan B", ig.name) + self.assertEqual( + "https://scontent-sjc2-1.cdninstagram.com/hphotos-xfa1/t51.2885-19/11373714_959073410822287_2004790583_a.jpg", + ig.picture, + ) + self.assertEqual("https://www.instagram.com/snarfed/", ig.silo_url()) + self.assertEqual(["https://snarfed.org/"], ig.domain_urls) + self.assertEqual(["snarfed.org"], ig.domains) + + def test_profile_private_account(self): + resp = self.get_response("profile", data=HTML_PROFILE_PRIVATE_COMPLETE) + self.assertEqual(400, resp.status_code) + self.assertIn("Your Instagram account is private.", resp.get_data(as_text=True)) + + def test_post(self): + self.source.put() + + resp = self.get_response("post", data=HTML_VIDEO_COMPLETE) + self.assertEqual(200, resp.status_code, resp.get_data(as_text=True)) + self.assertEqual(HTML_VIDEO_ACTIVITY_FULL, resp.json) + + activities = Activity.query().fetch() + self.assertEqual(1, len(activities)) + self.assertEqual(self.source.key, activities[0].source) + self.assertEqual( + HTML_VIDEO_ACTIVITY_FULL, json_loads(activities[0].activity_json) + ) + + def test_post_empty(self): + self.source.put() + empty = HTML_HEADER + json_dumps({"config": HTML_VIEWER_CONFIG}) + HTML_FOOTER + resp = self.get_response("post", data=empty) + self.assertEqual(400, resp.status_code) + self.assertIn("No Instagram post found in HTML", resp.get_data(as_text=True)) + + def test_post_merge_comments(self): + self.source.put() + + # existing activity with one of the two comments in HTML_VIDEO_COMPLETE + existing_activity = copy.deepcopy(HTML_VIDEO_ACTIVITY) + existing_activity["object"]["replies"] = { + "totalItems": 1, + "items": [HTML_VIDEO_ACTIVITY_FULL["object"]["replies"]["items"][0]], + } + activity_key = Activity( + id="tag:instagram.com,2013:789_456", + activity_json=json_dumps(existing_activity), + ).put() + + # send HTML_VIDEO_COMPLETE to /post, check that the response and stored + # activity have both of its comments + resp = self.get_response("post", data=HTML_VIDEO_COMPLETE) + self.assertEqual(200, resp.status_code, resp.get_data(as_text=True)) + self.assert_equals(HTML_VIDEO_ACTIVITY_FULL, resp.json) + + activity = activity_key.get() + self.assert_equals(HTML_VIDEO_ACTIVITY_FULL, json_loads(activity.activity_json)) + + def test_likes(self): + self.source.put() + key = self.store_activity() + + resp = self.get_response( + f"likes?id=tag:instagram.com,2013:123_456&{self.auth}", + json=HTML_PHOTO_LIKES_RESPONSE, + ) + self.assertEqual(200, resp.status_code, resp.get_data(as_text=True)) + self.assertEqual(LIKE_OBJS, resp.json) + + activity = json_loads(key.get().activity_json) + self.assertEqual(LIKE_OBJS, activity["object"]["tags"]) + + def test_poll(self): + key = self.source.put() + self.expect_task( + "poll", eta_seconds=0, source_key=key, last_polled="1970-01-01-00-00-00" + ) + self.mox.ReplayAll() + + resp = self.get_response(f"poll") + self.assertEqual(200, resp.status_code, resp.get_data(as_text=True)) + self.assertEqual("OK", resp.json) diff --git a/tests/test_mastodon.py b/tests/test_mastodon.py index bbd578eb..924243e8 100644 --- a/tests/test_mastodon.py +++ b/tests/test_mastodon.py @@ -11,80 +11,86 @@ class MastodonTest(testutil.AppTest): + def setUp(self): + super().setUp() - def setUp(self): - super().setUp() + app = oauth_mastodon.MastodonApp(instance="https://foo.com", data="") + app.put() + self.auth_entity = oauth_mastodon.MastodonAuth( + id="@me@foo.com", + access_token_str="towkin", + app=app.key, + user_json=json_dumps( + { + "id": "123", + "username": "me", + "acct": "me", + "url": "https://foo.com/@me", + "display_name": "Ryan Barrett", + "avatar": "http://pi.ct/ure", + } + ), + ) + self.auth_entity.put() + self.m = Mastodon.new(auth_entity=self.auth_entity) - app = oauth_mastodon.MastodonApp(instance='https://foo.com', data='') - app.put() - self.auth_entity = oauth_mastodon.MastodonAuth( - id='@me@foo.com', access_token_str='towkin', app=app.key, user_json=json_dumps({ - 'id': '123', - 'username': 'me', - 'acct': 'me', - 'url': 'https://foo.com/@me', - 'display_name': 'Ryan Barrett', - 'avatar': 'http://pi.ct/ure', - })) - self.auth_entity.put() - self.m = Mastodon.new(auth_entity=self.auth_entity) + def test_new(self): + self.assertEqual(self.auth_entity, self.m.auth_entity.get()) + self.assertEqual("towkin", self.m.gr_source.access_token) + self.assertEqual("@me@foo.com", self.m.key.string_id()) + self.assertEqual("http://pi.ct/ure", self.m.picture) + self.assertEqual("Ryan Barrett", self.m.name) + self.assertEqual("https://foo.com/@me", self.m.url) + self.assertEqual("https://foo.com/@me", self.m.silo_url()) + self.assertEqual("tag:foo.com,2013:me", self.m.user_tag_id()) + self.assertEqual("@me@foo.com (Mastodon)", self.m.label()) - def test_new(self): - self.assertEqual(self.auth_entity, self.m.auth_entity.get()) - self.assertEqual('towkin', self.m.gr_source.access_token) - self.assertEqual('@me@foo.com', self.m.key.string_id()) - self.assertEqual('http://pi.ct/ure', self.m.picture) - self.assertEqual('Ryan Barrett', self.m.name) - self.assertEqual('https://foo.com/@me', self.m.url) - self.assertEqual('https://foo.com/@me', self.m.silo_url()) - self.assertEqual('tag:foo.com,2013:me', self.m.user_tag_id()) - self.assertEqual('@me@foo.com (Mastodon)', self.m.label()) + def test_canonicalize_url(self): + good = "https://foo.com/@x/123" + self.assertEqual(good, self.m.canonicalize_url(good)) + self.assertEqual(good, self.m.canonicalize_url("http://foo.com/@x/123/")) - def test_canonicalize_url(self): - good = 'https://foo.com/@x/123' - self.assertEqual(good, self.m.canonicalize_url(good)) - self.assertEqual(good, self.m.canonicalize_url('http://foo.com/@x/123/')) + def test_is_private(self): + self.assertFalse(self.m.is_private()) - def test_is_private(self): - self.assertFalse(self.m.is_private()) + self.auth_entity.user_json = json_dumps({"locked": True}) + self.auth_entity.put() + self.assertTrue(self.m.is_private()) - self.auth_entity.user_json = json_dumps({'locked': True}) - self.auth_entity.put() - self.assertTrue(self.m.is_private()) + def test_search_links(self): + self.m.domains = ["foo.com", "bar"] - def test_search_links(self): - self.m.domains = ['foo.com', 'bar'] + self.expect_requests_get( + "https://foo.com" + API_SEARCH, + params={"q": "foo.com OR bar", "resolve": True, "offset": 0}, + response={"statuses": [STATUS]}, + headers={"Authorization": "Bearer towkin"}, + ) + self.mox.ReplayAll() - self.expect_requests_get( - 'https://foo.com' + API_SEARCH, params={ - 'q': 'foo.com OR bar', - 'resolve': True, - 'offset': 0}, - response={'statuses': [STATUS]}, - headers={'Authorization': 'Bearer towkin'}) - self.mox.ReplayAll() + got = self.m.search_for_links() + self.assert_equals(1, len(got)) + # granary.test_mastodon's ACTIVITY has tag URIs without 2013, but we + # generate them, so work around that in this comparison. + self.assert_equals(util.tag_uri("foo.com", STATUS["id"]), got[0]["id"]) - got = self.m.search_for_links() - self.assert_equals(1, len(got)) - # granary.test_mastodon's ACTIVITY has tag URIs without 2013, but we - # generate them, so work around that in this comparison. - self.assert_equals(util.tag_uri('foo.com', STATUS['id']), got[0]['id']) + def test_search_links_no_domains(self): + self.m.domains = [] + self.assert_equals([], self.m.search_for_links()) - def test_search_links_no_domains(self): - self.m.domains = [] - self.assert_equals([], self.m.search_for_links()) + def test_load_blocklist_missing_scope(self): + self.expect_requests_get( + "https://foo.com" + API_BLOCKS, + headers={"Authorization": "Bearer towkin"}, + status_code=403, + ) + self.mox.ReplayAll() + self.m.load_blocklist() + self.assertEqual([], self.m.blocked_ids) + self.assertFalse(self.m.is_blocked({"numeric_id": 123})) - def test_load_blocklist_missing_scope(self): - self.expect_requests_get('https://foo.com' + API_BLOCKS, - headers={'Authorization': 'Bearer towkin'}, - status_code=403) - self.mox.ReplayAll() - self.m.load_blocklist() - self.assertEqual([], self.m.blocked_ids) - self.assertFalse(self.m.is_blocked({'numeric_id': 123})) - - def test_gr_class_with_max_toot_chars(self): - app = self.auth_entity.app.get() - app.instance_info = '{"max_toot_chars": 999}' - app.put() - self.assert_equals(999, self.m.gr_source.TRUNCATE_TEXT_LENGTH) + def test_gr_class_with_max_toot_chars(self): + app = self.auth_entity.app.get() + app.instance_info = '{"max_toot_chars": 999}' + app.put() + self.assert_equals(999, self.m.gr_source.TRUNCATE_TEXT_LENGTH) diff --git a/tests/test_medium.py b/tests/test_medium.py index 83a9a3b1..dcb1c38a 100644 --- a/tests/test_medium.py +++ b/tests/test_medium.py @@ -15,150 +15,162 @@ USER = { - 'data': { - 'id': 'abcdef01234', - 'username': 'ry', - 'name': 'Ryan', - 'url': 'http://medium.com/@ry', - 'imageUrl': 'http://ava/tar', - }, + "data": { + "id": "abcdef01234", + "username": "ry", + "name": "Ryan", + "url": "http://medium.com/@ry", + "imageUrl": "http://ava/tar", + }, } PUBLICATIONS = { - 'data': [{ - 'id': 'b969ac62a46b', - 'name': 'About Medium', - 'description': 'What is this thing and how does it work?', - 'url': 'https://medium.com/about', - 'imageUrl': 'https://about/image.png' - }, { - 'id': 'b45573563f5a', - 'name': 'Developers', - 'description': 'Medium’s Developer resources', - 'url': 'https://medium.com/developers', - 'imageUrl': 'https://developers/image.png' - }], + "data": [ + { + "id": "b969ac62a46b", + "name": "About Medium", + "description": "What is this thing and how does it work?", + "url": "https://medium.com/about", + "imageUrl": "https://about/image.png", + }, + { + "id": "b45573563f5a", + "name": "Developers", + "description": "Medium’s Developer resources", + "url": "https://medium.com/developers", + "imageUrl": "https://developers/image.png", + }, + ], } -class MediumTest(testutil.AppTest): - def setUp(self): - super().setUp() - self.auth_entity = oauth_medium.MediumAuth( - id='abcdef01234', access_token_str='my token', user_json=json_dumps(USER), - publications_json=json_dumps(PUBLICATIONS)) - self.auth_entity.put() - - # prevent subscribing to superfeedr - self.orig_local = appengine_info.LOCAL - appengine_info.LOCAL = True - - def tearDown(self): - appengine_info.LOCAL = self.orig_local - super().tearDown() - - def expect_requests_get(self, path, *args, **kwargs): - return super().expect_requests_get( - oauth_medium.API_BASE + path, - *args, - headers={ - 'Authorization': 'Bearer my token', - 'User-Agent': oauth_medium.USER_AGENT, - }, - **kwargs) - - def expect_get_publications(self, pubs): - # https://github.com/Medium/medium-api-docs/#user-content-listing-the-users-publications - self.expect_requests_get('users/abcdef01234/publications', json_dumps(pubs)) - self.mox.ReplayAll() - - def assert_created_profile(self, medium=None): - if not medium: - mediums = list(Medium.query()) - self.assertEqual(1, len(mediums)) - medium = mediums[0] - - self.assertEqual('@ry', medium.key.id()) - self.assertEqual(self.auth_entity.key, medium.auth_entity) - self.assertEqual('Ryan', medium.name) - self.assertEqual('http://medium.com/@ry', medium.url) - self.assertEqual('http://ava/tar', medium.picture) - self.assertFalse(medium.is_publication()) - self.assertEqual('http://medium.com/feed/@ry', medium.feed_url()) - self.assertEqual('http://medium.com/@ry', medium.silo_url()) - - def assert_created_publication(self, medium=None): - if not medium: - mediums = list(Medium.query()) - self.assertEqual(1, len(mediums)) - medium = mediums[0] - - self.assertEqual('b45573563f5a', medium.key.id()) - self.assertEqual(self.auth_entity.key, medium.auth_entity) - self.assertEqual('Developers', medium.name) - self.assertEqual('https://medium.com/developers', medium.url) - self.assertEqual('https://developers/image.png', medium.picture) - self.assertTrue(medium.is_publication()) - self.assertEqual('https://medium.com/feed/developers', medium.feed_url()) - self.assertEqual('https://medium.com/developers', medium.silo_url()) - - def test_new_profile(self): - self.assert_created_profile( - Medium.new(auth_entity=self.auth_entity, id='@ry')) - - def test_new_publication(self): - self.assert_created_publication( - Medium.new(auth_entity=self.auth_entity, id='b45573563f5a')) - - def test_choose_blog_decline(self): - with app.test_request_context(): - with self.assertRaises(RequestRedirect) as rr: - ChooseBlog('/unused').finish(None) - self.assertEqual(0, Medium.query().count()) - self.assertEqual(302, rr.exception.code) - self.assertEqual('http://localhost/', rr.exception.new_url) - self.assertEqual(["OK, you're not signed up. Hope you reconsider!"], - get_flashed_messages()) - - def test_choose_blog_no_publications(self): - self.expect_get_publications({}) - - with app.test_request_context(): - with self.assertRaises(RequestRedirect) as rr: - ChooseBlog('/unused').finish(self.auth_entity) - self.assertEqual(302, rr.exception.code) - self.assertEqual('http://localhost/medium/@ry', rr.exception.new_url) - self.assert_created_profile() - - def test_choose_blog_publications(self): - self.expect_get_publications(PUBLICATIONS) - - with app.test_request_context(): - resp = ChooseBlog('/unused').finish(self.auth_entity) - for expected in ('action="/medium/add" method="post"', - 'url"}, + # t.co is in the webmention blocklist + {"url": "http://t.co/foo"}, + # fa.ke is the source's domain + {"url": "http://fa.ke/bar"}, + ): + auth_entity = None + if user_json is not None: + auth_entity = testutil.FakeAuthEntity( + id="x", user_json=json_dumps(user_json) + ) + auth_entity.put() + with self.app.test_request_context(): + source = FakeSource.create_new(auth_entity=auth_entity) + self.assertEqual([], source.domains) + self.assertEqual([], source.domain_urls) + + # good URLs + for url in ( + "http://foo.com/bar", + "https://www.foo.com/bar", + "http://FoO.cOm/", # should be normalized to lowercase + ): + auth_entity = testutil.FakeAuthEntity( + id="x", user_json=json_dumps({"url": url}) + ) + auth_entity.put() + with self.app.test_request_context(): + source = FakeSource.create_new(auth_entity=auth_entity) + self.assertEqual([url.lower()], source.domain_urls) + self.assertEqual(["foo.com"], source.domains) + + # multiple good URLs and one that's in the webmention blocklist + auth_entity = testutil.FakeAuthEntity( + id="x", + user_json=json_dumps( + { + "url": "http://foo.org", + "urls": [ + {"value": u} + for u in ( + "http://bar.com", + "http://t.co/x", + "http://baz", + # utm_* query params should be stripped + "https://baj/biff?utm_campaign=x&utm_source=y", + ) + ], + } + ), + ) + auth_entity.put() + with self.app.test_request_context(): + source = FakeSource.create_new(auth_entity=auth_entity) + self.assertEqual( + ["http://foo.org/", "http://bar.com/", "http://baz/", "https://baj/biff"], + source.domain_urls, + ) + self.assertEqual(["foo.org", "bar.com", "baz", "baj"], source.domains) + + # a URL that redirects + auth_entity = testutil.FakeAuthEntity( + id="x", user_json=json_dumps({"url": "http://orig"}) + ) + auth_entity.put() + + self.expect_requests_head("http://orig", redirected_url="http://final") + self.mox.ReplayAll() + + with self.app.test_request_context(): + source = FakeSource.create_new(auth_entity=auth_entity) + self.assertEqual(["http://final/"], source.domain_urls) + self.assertEqual(["final"], source.domains) + + def test_create_new_domain_url_redirects_to_path(self): + """If a profile URL is a root that redirects to a path, keep the root.""" + auth_entity = testutil.FakeAuthEntity( + id="x", user_json=json_dumps({"url": "http://site"}) + ) + auth_entity.put() - def test_sources_global(self): - self.assertEqual(blogger.Blogger, models.sources['blogger']) - self.assertEqual(flickr.Flickr, models.sources['flickr']) - self.assertEqual(instagram.Instagram, models.sources['instagram']) - self.assertEqual(tumblr.Tumblr, models.sources['tumblr']) - self.assertEqual(twitter.Twitter, models.sources['twitter']) - self.assertEqual(wordpress_rest.WordPress, models.sources['wordpress']) - - def _test_create_new(self, expected_msg, **kwargs): - with self.app.test_request_context(): - source = FakeSource.create_new(domains=['foo'], - domain_urls=['http://foo.com'], - webmention_endpoint='http://x/y', - **kwargs) - flashed = get_flashed_messages() - self.assertEqual(1, len(flashed)) - self.assertIn(expected_msg, flashed[0]) - - source = source.key.get() - self.assertEqual('fake (FakeSource)', source.label()) - return source - - def test_create_new(self): - key = FakeSource.next_key() - for queue in 'poll-now', 'poll': - self.expect_task(queue, source_key=key, last_polled='1970-01-01-00-00-00') - self.mox.ReplayAll() - - orig_count = FakeSource.query().count() - self._test_create_new( - "Added fake (FakeSource). Refresh in a minute to see what we've found!", - features=['listen']) - self.assertEqual(orig_count + 1, FakeSource.query().count()) - - def test_escape_key_id(self): - s = Source(id='__foo__') - self.assert_equals(r'\__foo__', s.key.string_id()) - self.assert_equals('__foo__', s.key_id()) - - def test_get_activities_injects_web_site_urls_into_user_mentions(self): - source = FakeSource.new(domain_urls=['http://site1/', 'http://site2/']) - source.put() - - mention = { - 'object': { - 'tags': [{ - 'objectType': 'person', - 'id': 'tag:fa.ke,2013:%s' % source.key.id(), - 'url': 'https://fa.ke/me', - }, { - 'objectType': 'person', - 'id': 'tag:fa.ke,2013:bob', - }], - }, - } - FakeGrSource.activities = [mention] - - # check that we inject their web sites - got = super(FakeSource, source).get_activities_response() - mention['object']['tags'][0]['urls'] = [ - {'value': 'http://site1/'}, {'value': 'http://site2/'}] - self.assert_equals([mention], got['items']) - - def test_get_comment_injects_web_site_urls_into_user_mentions(self): - source = FakeSource.new(domain_urls=['http://site1/', 'http://site2/']) - source.put() - - user_id = 'tag:fa.ke,2013:%s' % source.key.id() - FakeGrSource.comment = { - 'id': 'tag:fa.ke,2013:a1-b2.c3', - 'tags': [ - {'id': 'tag:fa.ke,2013:nobody'}, - {'id': user_id}, - ], - } - - # check that we inject their web sites - self.assert_equals({ - 'id': 'tag:fa.ke,2013:%s' % source.key.id(), - 'urls': [{'value': 'http://site1/'}, {'value': 'http://site2/'}], - }, super(FakeSource, source).get_comment('x')['tags'][1]) - - def test_create_new_already_exists(self): - long_ago = datetime.datetime(year=1901, month=2, day=3) - props = { - 'created': long_ago, - 'last_webmention_sent': long_ago + datetime.timedelta(days=1), - 'last_polled': long_ago + datetime.timedelta(days=2), - 'last_hfeed_refetch': long_ago + datetime.timedelta(days=3), - 'last_syndication_url': long_ago + datetime.timedelta(days=4), - 'superfeedr_secret': 'asdfqwert', - } - key = FakeSource.new(features=['listen'], **props).put() - self.assert_equals(['listen'], FakeSource.query().get().features) - - for queue in 'poll-now', 'poll': - self.expect_task(queue, source_key=key, last_polled='1901-02-05-00-00-00') - self.mox.ReplayAll() - - FakeSource.string_id_counter -= 1 - auth_entity = testutil.FakeAuthEntity( - id='x', user_json=json_dumps({'url': 'http://foo.com/'})) - auth_entity.put() - - orig_count = FakeSource.query().count() - source = self._test_create_new( - 'Updated fake (FakeSource)', auth_entity=auth_entity, features=['publish']) - self.assertEqual(orig_count, FakeSource.query().count()) - - source = source.key.get() - self.assert_equals(['listen', 'publish'], source.features) - for prop, value in props.items(): - self.assert_equals(value, getattr(source, prop), prop) - - def test_create_new_publish(self): - """If a source is publish only, we shouldn't insert a poll task.""" - with self.app.test_request_context(): - FakeSource.create_new(features=['publish']) - # tasks_client is stubbed out, it will complain if it gets called - - def test_create_new_webmention(self): - """We should subscribe to webmention sources in Superfeedr.""" - self.expect_requests_get('http://primary/', 'no webmention endpoint') - self.mox.StubOutWithMock(superfeedr, 'subscribe') - - def check_source(source): - assert isinstance(source, FakeSource) - assert source.is_saved - return True - superfeedr.subscribe(mox.Func(check_source)) - - self.mox.ReplayAll() - with self.app.test_request_context(): - FakeSource.create_new(features=['webmention'], - domains=['primary/'], domain_urls=['http://primary/']) - - def test_create_new_domain(self): - """If the source has a URL set, extract its domain.""" - util.BLOCKLIST.remove('fa.ke') - - self.expect_requests_get('http://fa.ke') - self.expect_requests_get('http://foo.com') - self.expect_requests_get('https://www.foo.com') - self.expect_requests_get('https://baj') - self.mox.ReplayAll() - - # bad URLs - for user_json in (None, {}, {'url': 'noturl'}, - # t.co is in the webmention blocklist - {'url': 'http://t.co/foo'}, - # fa.ke is the source's domain - {'url': 'http://fa.ke/bar'}, - ): - auth_entity = None - if user_json is not None: - auth_entity = testutil.FakeAuthEntity(id='x', user_json=json_dumps(user_json)) + self.expect_requests_head("http://site", redirected_url="https://site/path") + self.mox.ReplayAll() + + with self.app.test_request_context(): + source = FakeSource.create_new(auth_entity=auth_entity) + self.assertEqual(["http://site/"], source.domain_urls) + self.assertEqual(["site"], source.domains) + + def test_create_new_domain_url_matches_root_relme(self): + """If a profile URL contains a path, check the root for a rel=me to the path.""" + auth_entity = testutil.FakeAuthEntity( + id="x", user_json=json_dumps({"url": "http://site/path"}) + ) auth_entity.put() - with self.app.test_request_context(): - source = FakeSource.create_new(auth_entity=auth_entity) - self.assertEqual([], source.domains) - self.assertEqual([], source.domain_urls) - - # good URLs - for url in ('http://foo.com/bar', 'https://www.foo.com/bar', - 'http://FoO.cOm/', # should be normalized to lowercase - ): - auth_entity = testutil.FakeAuthEntity( - id='x', user_json=json_dumps({'url': url})) - auth_entity.put() - with self.app.test_request_context(): - source = FakeSource.create_new(auth_entity=auth_entity) - self.assertEqual([url.lower()], source.domain_urls) - self.assertEqual(['foo.com'], source.domains) - - # multiple good URLs and one that's in the webmention blocklist - auth_entity = testutil.FakeAuthEntity(id='x', user_json=json_dumps({ - 'url': 'http://foo.org', - 'urls': [{'value': u} for u in - ('http://bar.com', 'http://t.co/x', 'http://baz', - # utm_* query params should be stripped - 'https://baj/biff?utm_campaign=x&utm_source=y')], - })) - auth_entity.put() - with self.app.test_request_context(): - source = FakeSource.create_new(auth_entity=auth_entity) - self.assertEqual(['http://foo.org/', 'http://bar.com/', 'http://baz/', - 'https://baj/biff'], - source.domain_urls) - self.assertEqual(['foo.org', 'bar.com', 'baz', 'baj'], source.domains) - - # a URL that redirects - auth_entity = testutil.FakeAuthEntity( - id='x', user_json=json_dumps({'url': 'http://orig'})) - auth_entity.put() - - self.expect_requests_head('http://orig', redirected_url='http://final') - self.mox.ReplayAll() - - with self.app.test_request_context(): - source = FakeSource.create_new(auth_entity=auth_entity) - self.assertEqual(['http://final/'], source.domain_urls) - self.assertEqual(['final'], source.domains) - - def test_create_new_domain_url_redirects_to_path(self): - """If a profile URL is a root that redirects to a path, keep the root.""" - auth_entity = testutil.FakeAuthEntity( - id='x', user_json=json_dumps({'url': 'http://site'})) - auth_entity.put() - - self.expect_requests_head('http://site', redirected_url='https://site/path') - self.mox.ReplayAll() - - with self.app.test_request_context(): - source = FakeSource.create_new(auth_entity=auth_entity) - self.assertEqual(['http://site/'], source.domain_urls) - self.assertEqual(['site'], source.domains) - - def test_create_new_domain_url_matches_root_relme(self): - """If a profile URL contains a path, check the root for a rel=me to the path.""" - auth_entity = testutil.FakeAuthEntity( - id='x', user_json=json_dumps({'url': 'http://site/path'})) - auth_entity.put() - - self.expect_requests_get('http://site', 'http://site/path') - self.mox.ReplayAll() - - with self.app.test_request_context(): - source = FakeSource.create_new(auth_entity=auth_entity) - self.assertEqual(['http://site/'], source.domain_urls) - self.assertEqual(['site'], source.domains) - - def test_create_new_domain_url_no_root_relme(self): - """If a profile URL contains a path, check the root for a rel=me to the path.""" - auth_entity = testutil.FakeAuthEntity( - id='x', user_json=json_dumps({'url': 'http://site/path'})) - auth_entity.put() - - self.expect_requests_get('http://site') - self.mox.ReplayAll() - - with self.app.test_request_context(): - source = FakeSource.create_new(auth_entity=auth_entity) - self.assertEqual(['http://site/path'], source.domain_urls) - self.assertEqual(['site'], source.domains) - - def test_create_new_unicode_chars(self): - """We should handle unusual unicode chars in the source's name ok.""" - # the invisible character in the middle is an unusual unicode character - with self.app.test_request_context(): - FakeSource.create_new(name='a ✁ b') - - def test_create_new_rereads_domains(self): - key = FakeSource.new(features=['listen'], - domain_urls=['http://foo'], domains=['foo']).put() - - FakeSource.string_id_counter -= 1 - auth_entity = testutil.FakeAuthEntity(id='x', user_json=json_dumps( - {'urls': [{'value': 'http://bar'}, {'value': 'http://baz'}]})) - self.expect_requests_get('http://bar/', 'no webmention endpoint') - - for queue in 'poll-now', 'poll': - self.expect_task(queue, source_key=key, last_polled='1970-01-01-00-00-00') - - self.mox.ReplayAll() - with self.app.test_request_context(): - source = FakeSource.create_new(auth_entity=auth_entity) - self.assertEqual(['http://bar/', 'http://baz/'], source.domain_urls) - self.assertEqual(['bar', 'baz'], source.domains) - - @skip("can't keep old domains on signup until edit websites works. #623") - def test_create_new_merges_domains(self): - FakeSource.new(features=['listen'], - domain_urls=['http://foo'], domains=['foo']).put() - - FakeSource.string_id_counter -= 1 - auth_entity = testutil.FakeAuthEntity(id='x', user_json=json_dumps( - {'urls': [{'value': 'http://bar'}, {'value': 'http://baz'}]})) - self.expect_requests_get('http://bar/', 'no webmention endpoint') - - self.mox.ReplayAll() - with self.app.test_request_context(): - source = FakeSource.create_new(auth_entity=auth_entity) - self.assertEqual(['http://bar/', 'http://baz/', 'http://foo/'], source.domain_urls) - self.assertEqual(['baz', 'foo', 'bar'], source.domains) - - def test_create_new_dedupes_domains(self): - auth_entity = testutil.FakeAuthEntity(id='x', user_json=json_dumps( - {'urls': [{'value': 'http://foo'}, - {'value': 'https://foo/'}, - {'value': 'http://foo/'}, - {'value': 'http://foo'}, - ]})) - self.mox.ReplayAll() - with self.app.test_request_context(): - source = FakeSource.create_new(auth_entity=auth_entity) - self.assertEqual(['https://foo/'], source.domain_urls) - self.assertEqual(['foo'], source.domains) - - def test_create_new_too_many_domains(self): - urls = ['http://%s/' % i for i in range(10)] - auth_entity = testutil.FakeAuthEntity(id='x', user_json=json_dumps( - {'urls': [{'value': u} for u in urls]})) - - # we should only check the first 5 - for url in urls[:models.MAX_AUTHOR_URLS]: - self.expect_requests_head(url) - self.mox.ReplayAll() - - with self.app.test_request_context(): - source = FakeSource.create_new(auth_entity=auth_entity) - self.assertEqual(urls, source.domain_urls) - self.assertEqual([str(i) for i in range(10)], source.domains) - - def test_create_new_domain_url_path_fails(self): - auth_entity = testutil.FakeAuthEntity(id='x', user_json=json_dumps( - {'urls': [{'value': 'http://flaky/foo'}]})) - self.expect_requests_get('http://flaky', status_code=500) - self.mox.ReplayAll() - - with self.app.test_request_context(): - source = FakeSource.create_new(auth_entity=auth_entity) - self.assertEqual(['http://flaky/foo'], source.domain_urls) - self.assertEqual(['flaky'], source.domains) - - def test_create_new_domain_url_path_connection_fails(self): - auth_entity = testutil.FakeAuthEntity(id='x', user_json=json_dumps( - {'urls': [{'value': 'http://flaky/foo'}]})) - self.expect_requests_get('http://flaky').AndRaise( - requests.ConnectionError('DNS lookup failed for URL: http://bad/')) - self.mox.ReplayAll() - - with self.app.test_request_context(): - source = FakeSource.create_new(auth_entity=auth_entity) - self.assertEqual(['http://flaky/foo'], source.domain_urls) - self.assertEqual(['flaky'], source.domains) - - def test_verify(self): - self.expect_requests_get('http://primary/', """ + + self.expect_requests_get( + "http://site", + 'http://site/path', + ) + self.mox.ReplayAll() + + with self.app.test_request_context(): + source = FakeSource.create_new(auth_entity=auth_entity) + self.assertEqual(["http://site/"], source.domain_urls) + self.assertEqual(["site"], source.domains) + + def test_create_new_domain_url_no_root_relme(self): + """If a profile URL contains a path, check the root for a rel=me to the path.""" + auth_entity = testutil.FakeAuthEntity( + id="x", user_json=json_dumps({"url": "http://site/path"}) + ) + auth_entity.put() + + self.expect_requests_get("http://site") + self.mox.ReplayAll() + + with self.app.test_request_context(): + source = FakeSource.create_new(auth_entity=auth_entity) + self.assertEqual(["http://site/path"], source.domain_urls) + self.assertEqual(["site"], source.domains) + + def test_create_new_unicode_chars(self): + """We should handle unusual unicode chars in the source's name ok.""" + # the invisible character in the middle is an unusual unicode character + with self.app.test_request_context(): + FakeSource.create_new(name="a ✁ b") + + def test_create_new_rereads_domains(self): + key = FakeSource.new( + features=["listen"], domain_urls=["http://foo"], domains=["foo"] + ).put() + + FakeSource.string_id_counter -= 1 + auth_entity = testutil.FakeAuthEntity( + id="x", + user_json=json_dumps( + {"urls": [{"value": "http://bar"}, {"value": "http://baz"}]} + ), + ) + self.expect_requests_get("http://bar/", "no webmention endpoint") + + for queue in "poll-now", "poll": + self.expect_task(queue, source_key=key, last_polled="1970-01-01-00-00-00") + + self.mox.ReplayAll() + with self.app.test_request_context(): + source = FakeSource.create_new(auth_entity=auth_entity) + self.assertEqual(["http://bar/", "http://baz/"], source.domain_urls) + self.assertEqual(["bar", "baz"], source.domains) + + @skip("can't keep old domains on signup until edit websites works. #623") + def test_create_new_merges_domains(self): + FakeSource.new( + features=["listen"], domain_urls=["http://foo"], domains=["foo"] + ).put() + + FakeSource.string_id_counter -= 1 + auth_entity = testutil.FakeAuthEntity( + id="x", + user_json=json_dumps( + {"urls": [{"value": "http://bar"}, {"value": "http://baz"}]} + ), + ) + self.expect_requests_get("http://bar/", "no webmention endpoint") + + self.mox.ReplayAll() + with self.app.test_request_context(): + source = FakeSource.create_new(auth_entity=auth_entity) + self.assertEqual( + ["http://bar/", "http://baz/", "http://foo/"], source.domain_urls + ) + self.assertEqual(["baz", "foo", "bar"], source.domains) + + def test_create_new_dedupes_domains(self): + auth_entity = testutil.FakeAuthEntity( + id="x", + user_json=json_dumps( + { + "urls": [ + {"value": "http://foo"}, + {"value": "https://foo/"}, + {"value": "http://foo/"}, + {"value": "http://foo"}, + ] + } + ), + ) + self.mox.ReplayAll() + with self.app.test_request_context(): + source = FakeSource.create_new(auth_entity=auth_entity) + self.assertEqual(["https://foo/"], source.domain_urls) + self.assertEqual(["foo"], source.domains) + + def test_create_new_too_many_domains(self): + urls = ["http://%s/" % i for i in range(10)] + auth_entity = testutil.FakeAuthEntity( + id="x", user_json=json_dumps({"urls": [{"value": u} for u in urls]}) + ) + + # we should only check the first 5 + for url in urls[: models.MAX_AUTHOR_URLS]: + self.expect_requests_head(url) + self.mox.ReplayAll() + + with self.app.test_request_context(): + source = FakeSource.create_new(auth_entity=auth_entity) + self.assertEqual(urls, source.domain_urls) + self.assertEqual([str(i) for i in range(10)], source.domains) + + def test_create_new_domain_url_path_fails(self): + auth_entity = testutil.FakeAuthEntity( + id="x", user_json=json_dumps({"urls": [{"value": "http://flaky/foo"}]}) + ) + self.expect_requests_get("http://flaky", status_code=500) + self.mox.ReplayAll() + + with self.app.test_request_context(): + source = FakeSource.create_new(auth_entity=auth_entity) + self.assertEqual(["http://flaky/foo"], source.domain_urls) + self.assertEqual(["flaky"], source.domains) + + def test_create_new_domain_url_path_connection_fails(self): + auth_entity = testutil.FakeAuthEntity( + id="x", user_json=json_dumps({"urls": [{"value": "http://flaky/foo"}]}) + ) + self.expect_requests_get("http://flaky").AndRaise( + requests.ConnectionError("DNS lookup failed for URL: http://bad/") + ) + self.mox.ReplayAll() + + with self.app.test_request_context(): + source = FakeSource.create_new(auth_entity=auth_entity) + self.assertEqual(["http://flaky/foo"], source.domain_urls) + self.assertEqual(["flaky"], source.domains) + + def test_verify(self): + self.expect_requests_get( + "http://primary/", + """ -""") - self.mox.ReplayAll() - - source = FakeSource.new(features=['webmention'], - domain_urls=['http://primary/'], domains=['primary']) - source.verify() - self.assertEqual('http://web.ment/ion', source.webmention_endpoint) - - def test_verify_unicode_characters(self): - """Older versions of BS4 had an issue where it would check short HTML - documents to make sure the user wasn't accidentally passing a URL, - but converting the utf-8 document to ascii caused exceptions in some cases. - """ - self.expect_requests_get( - 'http://primary/', """\xef\xbb\xbf +""", + ) + self.mox.ReplayAll() + + source = FakeSource.new( + features=["webmention"], + domain_urls=["http://primary/"], + domains=["primary"], + ) + source.verify() + self.assertEqual("http://web.ment/ion", source.webmention_endpoint) + + def test_verify_unicode_characters(self): + """Older versions of BS4 had an issue where it would check short HTML + documents to make sure the user wasn't accidentally passing a URL, + but converting the utf-8 document to ascii caused exceptions in some cases. + """ + self.expect_requests_get( + "http://primary/", + """\xef\xbb\xbf -""") - self.mox.ReplayAll() - - source = FakeSource.new(features=['webmention'], - domain_urls=['http://primary/'], - domains=['primary']) - source.verify() - self.assertEqual('http://web.ment/ion', source.webmention_endpoint) - - def test_verify_without_webmention_endpoint(self): - self.expect_requests_get('http://primary/', 'no webmention endpoint here!') - self.mox.ReplayAll() - - source = FakeSource.new(features=['webmention'], - domain_urls=['http://primary/'], domains=['primary']) - source.verify() - self.assertIsNone(source.webmention_endpoint) - - def test_verify_checks_blocklist(self): - self.expect_requests_get('http://good/', """ +""", + ) + self.mox.ReplayAll() + + source = FakeSource.new( + features=["webmention"], + domain_urls=["http://primary/"], + domains=["primary"], + ) + source.verify() + self.assertEqual("http://web.ment/ion", source.webmention_endpoint) + + def test_verify_without_webmention_endpoint(self): + self.expect_requests_get("http://primary/", "no webmention endpoint here!") + self.mox.ReplayAll() + + source = FakeSource.new( + features=["webmention"], + domain_urls=["http://primary/"], + domains=["primary"], + ) + source.verify() + self.assertIsNone(source.webmention_endpoint) + + def test_verify_checks_blocklist(self): + self.expect_requests_get( + "http://good/", + """ -""") - self.mox.ReplayAll() +""", + ) + self.mox.ReplayAll() + + source = FakeSource.new( + features=["webmention"], + domain_urls=["http://bad.www/", "http://good/"], + domains=["bad.www", "good"], + ) + source.verify() + self.assertEqual("http://web.ment/ion", source.webmention_endpoint) + + def test_has_bridgy_webmention_endpoint(self): + source = FakeSource.new() + for endpoint, has in ( + (None, False), + ("http://foo", False), + ("https://brid.gy/webmention/fake", True), + ("https://www.brid.gy/webmention/fake", True), + ): + source.webmention_endpoint = endpoint + self.assertEqual(has, source.has_bridgy_webmention_endpoint(), endpoint) - source = FakeSource.new(features=['webmention'], - domain_urls=['http://bad.www/', 'http://good/'], - domains=['bad.www', 'good']) - source.verify() - self.assertEqual('http://web.ment/ion', source.webmention_endpoint) + def test_put_updates(self): + source = FakeSource.new() + source.put() + updates = source.updates = {"status": "disabled"} - def test_has_bridgy_webmention_endpoint(self): - source = FakeSource.new() - for endpoint, has in ((None, False), - ('http://foo', False ), - ('https://brid.gy/webmention/fake', True), - ('https://www.brid.gy/webmention/fake', True), - ): - source.webmention_endpoint = endpoint - self.assertEqual(has, source.has_bridgy_webmention_endpoint(), endpoint) + Source.put_updates(source) + self.assertEqual("disabled", source.key.get().status) - def test_put_updates(self): - source = FakeSource.new() - source.put() - updates = source.updates = {'status': 'disabled'} + def test_poll_period(self): + source = FakeSource.new() + source.put() - Source.put_updates(source) - self.assertEqual('disabled', source.key.get().status) + self.assertEqual(source.FAST_POLL, source.poll_period()) - def test_poll_period(self): - source = FakeSource.new() - source.put() + source.created = datetime.datetime(2000, 1, 1) + self.assertEqual(source.SLOW_POLL, source.poll_period()) - self.assertEqual(source.FAST_POLL, source.poll_period()) + now = datetime.datetime.now() + source.last_webmention_sent = now - datetime.timedelta(days=8) + self.assertEqual(source.FAST_POLL * 10, source.poll_period()) - source.created = datetime.datetime(2000, 1, 1) - self.assertEqual(source.SLOW_POLL, source.poll_period()) + source.last_webmention_sent = now + self.assertEqual(source.FAST_POLL, source.poll_period()) - now = datetime.datetime.now() - source.last_webmention_sent = now - datetime.timedelta(days=8) - self.assertEqual(source.FAST_POLL * 10, source.poll_period()) + source.rate_limited = True + self.assertEqual(source.RATE_LIMITED_POLL, source.poll_period()) - source.last_webmention_sent = now - self.assertEqual(source.FAST_POLL, source.poll_period()) + def test_should_refetch(self): + source = FakeSource.new() # haven't found a synd url yet + self.assertFalse(source.should_refetch()) - source.rate_limited = True - self.assertEqual(source.RATE_LIMITED_POLL, source.poll_period()) + source.last_hfeed_refetch = models.REFETCH_HFEED_TRIGGER # override + self.assertTrue(source.should_refetch()) - def test_should_refetch(self): - source = FakeSource.new() # haven't found a synd url yet - self.assertFalse(source.should_refetch()) + source.last_syndication_url = ( + source.last_hfeed_refetch + ) = testutil.NOW # too soon + self.assertFalse(source.should_refetch()) - source.last_hfeed_refetch = models.REFETCH_HFEED_TRIGGER # override - self.assertTrue(source.should_refetch()) + source.last_poll_attempt = testutil.NOW # too soon + self.assertFalse(source.should_refetch()) - source.last_syndication_url = source.last_hfeed_refetch = testutil.NOW # too soon - self.assertFalse(source.should_refetch()) + hour = datetime.timedelta(hours=1) + source.last_hfeed_refetch -= Source.FAST_REFETCH + hour + self.assertTrue(source.should_refetch()) - source.last_poll_attempt = testutil.NOW # too soon - self.assertFalse(source.should_refetch()) + source.last_syndication_url -= datetime.timedelta(days=15) # slow refetch + self.assertFalse(source.should_refetch()) - hour = datetime.timedelta(hours=1) - source.last_hfeed_refetch -= (Source.FAST_REFETCH + hour) - self.assertTrue(source.should_refetch()) + source.last_hfeed_refetch -= Source.SLOW_REFETCH + hour + self.assertTrue(source.should_refetch()) - source.last_syndication_url -= datetime.timedelta(days=15) # slow refetch - self.assertFalse(source.should_refetch()) + def test_is_beta_user(self): + source = Source(id="x") + self.assertFalse(source.is_beta_user()) - source.last_hfeed_refetch -= (Source.SLOW_REFETCH + hour) - self.assertTrue(source.should_refetch()) + self.mox.stubs.Set(util, "BETA_USER_PATHS", set()) + self.assertFalse(source.is_beta_user()) - def test_is_beta_user(self): - source = Source(id='x') - self.assertFalse(source.is_beta_user()) + self.mox.stubs.Set(util, "BETA_USER_PATHS", set([source.bridgy_path()])) + self.assertTrue(source.is_beta_user()) - self.mox.stubs.Set(util, 'BETA_USER_PATHS', set()) - self.assertFalse(source.is_beta_user()) + def test_load_blocklist(self): + self.mox.stubs.Set(models, "BLOCKLIST_MAX_IDS", 2) + FakeGrSource.blocklist_ids = [1, 2, 3] - self.mox.stubs.Set(util, 'BETA_USER_PATHS', set([source.bridgy_path()])) - self.assertTrue(source.is_beta_user()) + source = FakeSource(id="x") + source.load_blocklist() + self.assertEqual([1, 2], source.blocked_ids) - def test_load_blocklist(self): - self.mox.stubs.Set(models, 'BLOCKLIST_MAX_IDS', 2) - FakeGrSource.blocklist_ids = [1, 2, 3] + def test_load_blocklist_rate_limited(self): + source = FakeSource(id="x") + self.mox.StubOutWithMock(source.gr_source, "get_blocklist_ids") + source.gr_source.get_blocklist_ids().AndRaise( + gr_source.RateLimited(partial=[4, 5]) + ) + self.mox.ReplayAll() - source = FakeSource(id='x') - source.load_blocklist() - self.assertEqual([1, 2], source.blocked_ids) + source.load_blocklist() + self.assertEqual([4, 5], source.blocked_ids) - def test_load_blocklist_rate_limited(self): - source = FakeSource(id='x') - self.mox.StubOutWithMock(source.gr_source, 'get_blocklist_ids') - source.gr_source.get_blocklist_ids().AndRaise( - gr_source.RateLimited(partial=[4, 5])) - self.mox.ReplayAll() + def test_is_blocked(self): + source = Source(id="x") + self.assertFalse(source.is_blocked({"author": {"numeric_id": "1"}})) - source.load_blocklist() - self.assertEqual([4, 5], source.blocked_ids) + source = Source(id="x", blocked_ids=["1", "2"]) + self.assertTrue(source.is_blocked({"author": {"numeric_id": "1"}})) + self.assertFalse(source.is_blocked({"object": {"actor": {"numeric_id": "3"}}})) - def test_is_blocked(self): - source = Source(id='x') - self.assertFalse(source.is_blocked({'author': {'numeric_id': '1'}})) - - source = Source(id='x', blocked_ids = ['1', '2']) - self.assertTrue(source.is_blocked({'author': {'numeric_id': '1'}})) - self.assertFalse(source.is_blocked({'object': {'actor': {'numeric_id': '3'}}})) - - def test_getattr_doesnt_exist(self): - source = FakeSource(id='x') - with self.assertRaises(AttributeError): - source.bad + def test_getattr_doesnt_exist(self): + source = FakeSource(id="x") + with self.assertRaises(AttributeError): + source.bad class BlogPostTest(testutil.AppTest): + def test_label(self): + for feed_item in None, {}: + bp = BlogPost(id="x") + bp.put() + self.assertEqual("BlogPost x [no url]", bp.label()) - def test_label(self): - for feed_item in None, {}: - bp = BlogPost(id='x') - bp.put() - self.assertEqual('BlogPost x [no url]', bp.label()) + bp = BlogPost(id="x", feed_item={"permalinkUrl": "http://perma/link"}) + bp.put() + self.assertEqual("BlogPost x http://perma/link", bp.label()) - bp = BlogPost(id='x', feed_item={'permalinkUrl': 'http://perma/link'}) - bp.put() - self.assertEqual('BlogPost x http://perma/link', bp.label()) + def test_restart(self): + self.expect_task("propagate-blogpost", key=self.blogposts[0]) + self.mox.ReplayAll() - def test_restart(self): - self.expect_task('propagate-blogpost', key=self.blogposts[0]) - self.mox.ReplayAll() + urls = self.blogposts[0].sent + self.blogposts[0].restart() - urls = self.blogposts[0].sent - self.blogposts[0].restart() - - blogpost = self.blogposts[0].key.get() - self.assert_equals(urls, blogpost.unsent) - self.assert_equals([], blogpost.sent) + blogpost = self.blogposts[0].key.get() + self.assert_equals(urls, blogpost.unsent) + self.assert_equals([], blogpost.sent) class SyndicatedPostTest(testutil.AppTest): - - def setUp(self): - super().setUp() - - self.source = FakeSource.new() - self.source.put() - - self.relationships = [] - self.relationships.append( - SyndicatedPost(parent=self.source.key, - original='http://original/post/url', - syndication='http://silo/post/url')) - # two syndication for the same original - self.relationships.append( - SyndicatedPost(parent=self.source.key, - original='http://original/post/url', - syndication='http://silo/another/url')) - # two originals for the same syndication - self.relationships.append( - SyndicatedPost(parent=self.source.key, - original='http://original/another/post', - syndication='http://silo/post/url')) - self.relationships.append( - SyndicatedPost(parent=self.source.key, - original=None, - syndication='http://silo/no-original')) - self.relationships.append( - SyndicatedPost(parent=self.source.key, - original='http://original/no-syndication', - syndication=None)) - - for r in self.relationships: - r.put() - - def test_insert_replaces_blanks(self): - """Make sure we replace original=None with original=something - when it is discovered""" - - # add a blank for the original too - SyndicatedPost.insert_original_blank( - self.source, 'http://original/newly-discovered') - - self.assertTrue( - SyndicatedPost.query( - SyndicatedPost.syndication == 'http://silo/no-original', - SyndicatedPost.original == None, ancestor=self.source.key).get()) - - self.assertTrue( - SyndicatedPost.query( - SyndicatedPost.original == 'http://original/newly-discovered', - SyndicatedPost.syndication == None, ancestor=self.source.key).get()) - - r = SyndicatedPost.insert( - self.source, 'http://silo/no-original', - 'http://original/newly-discovered') - self.assertIsNotNone(r) - self.assertEqual('http://original/newly-discovered', r.original) - - # make sure it's in NDB - rs = SyndicatedPost.query( - SyndicatedPost.syndication == 'http://silo/no-original', - ancestor=self.source.key - ).fetch() - self.assertEqual(1, len(rs)) - self.assertEqual('http://original/newly-discovered', rs[0].original) - self.assertEqual('http://silo/no-original', rs[0].syndication) - - # and the blanks have been removed - self.assertFalse( - SyndicatedPost.query( - SyndicatedPost.syndication == 'http://silo/no-original', - SyndicatedPost.original == None, ancestor=self.source.key).get()) - - self.assertFalse( - SyndicatedPost.query( - SyndicatedPost.original == 'http://original/newly-discovered', - SyndicatedPost.syndication == None, ancestor=self.source.key).get()) - - def test_insert_auguments_existing(self): - """Make sure we add newly discovered urls for a given syndication url, - rather than overwrite them - """ - r = SyndicatedPost.insert( - self.source, 'http://silo/post/url', - 'http://original/different/url') - self.assertIsNotNone(r) - self.assertEqual('http://original/different/url', r.original) - - # make sure they're both in the DB - rs = SyndicatedPost.query( - SyndicatedPost.syndication == 'http://silo/post/url', - ancestor=self.source.key - ).fetch() - - self.assertCountEqual(['http://original/post/url', - 'http://original/another/post', - 'http://original/different/url'], - [rel.original for rel in rs]) - - def test_get_or_insert_by_syndication_do_not_duplicate_blanks(self): - """Make sure we don't insert duplicate blank entries""" - - SyndicatedPost.insert_syndication_blank( - self.source, 'http://silo/no-original') - - # make sure there's only one in the DB - rs = SyndicatedPost.query( - SyndicatedPost.syndication == 'http://silo/no-original', - ancestor=self.source.key - ).fetch() - - self.assertCountEqual([None], [rel.original for rel in rs]) - - def test_insert_no_duplicates(self): - """Make sure we don't insert duplicate entries""" - - r = SyndicatedPost.insert( - self.source, 'http://silo/post/url', 'http://original/post/url') - self.assertIsNotNone(r) - self.assertEqual('http://original/post/url', r.original) - - # make sure there's only one in the DB - rs = SyndicatedPost.query( - SyndicatedPost.syndication == 'http://silo/post/url', - SyndicatedPost.original == 'http://original/post/url', - ancestor=self.source.key - ).fetch() - - self.assertEqual(1, len(rs)) + def setUp(self): + super().setUp() + + self.source = FakeSource.new() + self.source.put() + + self.relationships = [] + self.relationships.append( + SyndicatedPost( + parent=self.source.key, + original="http://original/post/url", + syndication="http://silo/post/url", + ) + ) + # two syndication for the same original + self.relationships.append( + SyndicatedPost( + parent=self.source.key, + original="http://original/post/url", + syndication="http://silo/another/url", + ) + ) + # two originals for the same syndication + self.relationships.append( + SyndicatedPost( + parent=self.source.key, + original="http://original/another/post", + syndication="http://silo/post/url", + ) + ) + self.relationships.append( + SyndicatedPost( + parent=self.source.key, + original=None, + syndication="http://silo/no-original", + ) + ) + self.relationships.append( + SyndicatedPost( + parent=self.source.key, + original="http://original/no-syndication", + syndication=None, + ) + ) + + for r in self.relationships: + r.put() + + def test_insert_replaces_blanks(self): + """Make sure we replace original=None with original=something + when it is discovered""" + + # add a blank for the original too + SyndicatedPost.insert_original_blank( + self.source, "http://original/newly-discovered" + ) + + self.assertTrue( + SyndicatedPost.query( + SyndicatedPost.syndication == "http://silo/no-original", + SyndicatedPost.original == None, + ancestor=self.source.key, + ).get() + ) + + self.assertTrue( + SyndicatedPost.query( + SyndicatedPost.original == "http://original/newly-discovered", + SyndicatedPost.syndication == None, + ancestor=self.source.key, + ).get() + ) + + r = SyndicatedPost.insert( + self.source, "http://silo/no-original", "http://original/newly-discovered" + ) + self.assertIsNotNone(r) + self.assertEqual("http://original/newly-discovered", r.original) + + # make sure it's in NDB + rs = SyndicatedPost.query( + SyndicatedPost.syndication == "http://silo/no-original", + ancestor=self.source.key, + ).fetch() + self.assertEqual(1, len(rs)) + self.assertEqual("http://original/newly-discovered", rs[0].original) + self.assertEqual("http://silo/no-original", rs[0].syndication) + + # and the blanks have been removed + self.assertFalse( + SyndicatedPost.query( + SyndicatedPost.syndication == "http://silo/no-original", + SyndicatedPost.original == None, + ancestor=self.source.key, + ).get() + ) + + self.assertFalse( + SyndicatedPost.query( + SyndicatedPost.original == "http://original/newly-discovered", + SyndicatedPost.syndication == None, + ancestor=self.source.key, + ).get() + ) + + def test_insert_auguments_existing(self): + """Make sure we add newly discovered urls for a given syndication url, + rather than overwrite them + """ + r = SyndicatedPost.insert( + self.source, "http://silo/post/url", "http://original/different/url" + ) + self.assertIsNotNone(r) + self.assertEqual("http://original/different/url", r.original) + + # make sure they're both in the DB + rs = SyndicatedPost.query( + SyndicatedPost.syndication == "http://silo/post/url", + ancestor=self.source.key, + ).fetch() + + self.assertCountEqual( + [ + "http://original/post/url", + "http://original/another/post", + "http://original/different/url", + ], + [rel.original for rel in rs], + ) + + def test_get_or_insert_by_syndication_do_not_duplicate_blanks(self): + """Make sure we don't insert duplicate blank entries""" + + SyndicatedPost.insert_syndication_blank(self.source, "http://silo/no-original") + + # make sure there's only one in the DB + rs = SyndicatedPost.query( + SyndicatedPost.syndication == "http://silo/no-original", + ancestor=self.source.key, + ).fetch() + + self.assertCountEqual([None], [rel.original for rel in rs]) + + def test_insert_no_duplicates(self): + """Make sure we don't insert duplicate entries""" + + r = SyndicatedPost.insert( + self.source, "http://silo/post/url", "http://original/post/url" + ) + self.assertIsNotNone(r) + self.assertEqual("http://original/post/url", r.original) + + # make sure there's only one in the DB + rs = SyndicatedPost.query( + SyndicatedPost.syndication == "http://silo/post/url", + SyndicatedPost.original == "http://original/post/url", + ancestor=self.source.key, + ).fetch() + + self.assertEqual(1, len(rs)) diff --git a/tests/test_original_post_discovery.py b/tests/test_original_post_discovery.py index c4b8bb96..f61a1618 100644 --- a/tests/test_original_post_discovery.py +++ b/tests/test_original_post_discovery.py @@ -15,104 +15,125 @@ class OriginalPostDiscoveryTest(testutil.AppTest): - - def setUp(self): - super().setUp() - self.source = self.sources[0] - self.source.domain_urls = ['http://author/'] - self.source.domains = ['author'] - self.source.put() - self.source.updates = {} - - self.activity = self.activities[0] - self.activity['object'].update({ - 'url': 'https://fa.ke/post/url', # silo domain is fa.ke - 'content': 'content without links', - }) - - def assert_discover(self, expected_originals, expected_mentions=[], **kwargs): - got = discover(self.source, self.activity, **kwargs) - self.assertEqual((set(expected_originals), set(expected_mentions)), got, got) - - def assert_syndicated_posts(self, *expected): - got = [(r.original, r.syndication) for r in - SyndicatedPost.query(ancestor=self.source.key)] - self.assertCountEqual(expected, got, got) - - def test_single_post(self): - """Test that original post discovery does the reverse lookup to scan - author's h-feed for rel=syndication links - """ - self.expect_requests_get('http://author/', """ + def setUp(self): + super().setUp() + self.source = self.sources[0] + self.source.domain_urls = ["http://author/"] + self.source.domains = ["author"] + self.source.put() + self.source.updates = {} + + self.activity = self.activities[0] + self.activity["object"].update( + { + "url": "https://fa.ke/post/url", # silo domain is fa.ke + "content": "content without links", + } + ) + + def assert_discover(self, expected_originals, expected_mentions=[], **kwargs): + got = discover(self.source, self.activity, **kwargs) + self.assertEqual((set(expected_originals), set(expected_mentions)), got, got) + + def assert_syndicated_posts(self, *expected): + got = [ + (r.original, r.syndication) + for r in SyndicatedPost.query(ancestor=self.source.key) + ] + self.assertCountEqual(expected, got, got) + + def test_single_post(self): + """Test that original post discovery does the reverse lookup to scan + author's h-feed for rel=syndication links + """ + self.expect_requests_get( + "http://author/", + """
- """) + """, + ) - # syndicated to two places - self.expect_requests_get('http://author/post/permalink', """ + # syndicated to two places + self.expect_requests_get( + "http://author/post/permalink", + """
-
""") - - self.mox.ReplayAll() - self.assertIsNone(self.source.last_syndication_url) - self.assert_discover(['http://author/post/permalink']) - self.assert_syndicated_posts(('http://author/post/permalink', - 'https://fa.ke/post/url')) - self.assertEqual(testutil.NOW, self.source.updates['last_syndication_url']) - - def test_syndication_url_in_hfeed(self): - """Like test_single_post, but because the syndication URL is given in - the h-feed we skip fetching the permalink. - """ - # silo domain is fa.ke - self.expect_requests_get('http://author/', """ + """, + ) + + self.mox.ReplayAll() + self.assertIsNone(self.source.last_syndication_url) + self.assert_discover(["http://author/post/permalink"]) + self.assert_syndicated_posts( + ("http://author/post/permalink", "https://fa.ke/post/url") + ) + self.assertEqual(testutil.NOW, self.source.updates["last_syndication_url"]) + + def test_syndication_url_in_hfeed(self): + """Like test_single_post, but because the syndication URL is given in + the h-feed we skip fetching the permalink. + """ + # silo domain is fa.ke + self.expect_requests_get( + "http://author/", + """
- """) - - self.mox.ReplayAll() - self.assert_discover(['http://author/post/permalink']) - self.assert_syndicated_posts(('http://author/post/permalink', - 'https://fa.ke/post/url')) - - self.assertEqual(testutil.NOW, self.source.updates['last_syndication_url']) - self.assertEqual(testutil.NOW, self.source.updates['last_feed_syndication_url']) - - def test_syndication_url_in_hfeed_with_redirect(self): - """Like test_syndication_url_in_hfeed but u-url redirects to the - actual post URL. We should follow the redirect like we do everywhere - else. - """ - self.expect_requests_head('https://fa.ke/post/url') - self.expect_requests_head('http://author/') - self.expect_requests_get('http://author/', """ + """, + ) + + self.mox.ReplayAll() + self.assert_discover(["http://author/post/permalink"]) + self.assert_syndicated_posts( + ("http://author/post/permalink", "https://fa.ke/post/url") + ) + + self.assertEqual(testutil.NOW, self.source.updates["last_syndication_url"]) + self.assertEqual(testutil.NOW, self.source.updates["last_feed_syndication_url"]) + + def test_syndication_url_in_hfeed_with_redirect(self): + """Like test_syndication_url_in_hfeed but u-url redirects to the + actual post URL. We should follow the redirect like we do everywhere + else. + """ + self.expect_requests_head("https://fa.ke/post/url") + self.expect_requests_head("http://author/") + self.expect_requests_get( + "http://author/", + """
- """) - - self.expect_requests_head( - 'http://author/post/will-redirect', - redirected_url='http://author/post/final') - - self.mox.ReplayAll() - self.assert_discover(['http://author/post/final']) - self.assert_syndicated_posts(('http://author/post/final', - 'https://fa.ke/post/url')) - - def test_nested_hfeed(self): - """Test that we find an h-feed nested inside an h-card like on tantek.com""" - self.expect_requests_get('http://author/', """ + """, + ) + + self.expect_requests_head( + "http://author/post/will-redirect", + redirected_url="http://author/post/final", + ) + + self.mox.ReplayAll() + self.assert_discover(["http://author/post/final"]) + self.assert_syndicated_posts( + ("http://author/post/final", "https://fa.ke/post/url") + ) + + def test_nested_hfeed(self): + """Test that we find an h-feed nested inside an h-card like on tantek.com""" + self.expect_requests_get( + "http://author/", + """ Author
@@ -121,33 +142,42 @@ def test_nested_hfeed(self):
- """) + """, + ) - self.expect_requests_get('http://author/post/permalink', """ + self.expect_requests_get( + "http://author/post/permalink", + """ - """) - - self.mox.ReplayAll() - self.assert_discover(['http://author/post/permalink']) - self.assert_syndicated_posts(('http://author/post/permalink', - 'https://fa.ke/post/url')) - - def test_multiple_hfeeds(self): - """That that we search all the h-feeds on a page if there are more than one. - - Inspired by https://sixtwothree.org/ - """ - for i, activity in enumerate(self.activities): - activity['object'].update({ - 'content': 'post content without backlinks', - 'url': 'https://fa.ke/post/url%d' % (i + 1), - }) - - # silo domain is fa.ke - self.expect_requests_get('http://author/', """ + """, + ) + + self.mox.ReplayAll() + self.assert_discover(["http://author/post/permalink"]) + self.assert_syndicated_posts( + ("http://author/post/permalink", "https://fa.ke/post/url") + ) + + def test_multiple_hfeeds(self): + """That that we search all the h-feeds on a page if there are more than one. + + Inspired by https://sixtwothree.org/ + """ + for i, activity in enumerate(self.activities): + activity["object"].update( + { + "content": "post content without backlinks", + "url": "https://fa.ke/post/url%d" % (i + 1), + } + ) + + # silo domain is fa.ke + self.expect_requests_get( + "http://author/", + """
@@ -161,29 +191,31 @@ def test_multiple_hfeeds(self):
- """) - - self.mox.ReplayAll() - self.assert_discover(['http://author/post/permalink1']) - self.assert_syndicated_posts( - ('http://author/post/permalink1', 'https://fa.ke/post/url1'), - ('http://author/post/permalink2', 'https://fa.ke/post/url2'), - ) - - - def test_additional_requests_do_not_require_rework(self): - """Test that original post discovery fetches and stores all entries up - front so that it does not have to reparse the author's h-feed for - every new post. Test that original post discovery does the reverse - lookup to scan author's h-feed for rel=syndication links - """ - for i, activity in enumerate(self.activities): - activity['object'].update({ - 'content': 'post content without backlinks', - 'url': 'https://fa.ke/post/url%d' % (i + 1), - }) - - author_feed = u""" + """, + ) + + self.mox.ReplayAll() + self.assert_discover(["http://author/post/permalink1"]) + self.assert_syndicated_posts( + ("http://author/post/permalink1", "https://fa.ke/post/url1"), + ("http://author/post/permalink2", "https://fa.ke/post/url2"), + ) + + def test_additional_requests_do_not_require_rework(self): + """Test that original post discovery fetches and stores all entries up + front so that it does not have to reparse the author's h-feed for + every new post. Test that original post discovery does the reverse + lookup to scan author's h-feed for rel=syndication links + """ + for i, activity in enumerate(self.activities): + activity["object"].update( + { + "content": "post content without backlinks", + "url": "https://fa.ke/post/url%d" % (i + 1), + } + ) + + author_feed = u"""
@@ -198,162 +230,206 @@ def test_additional_requests_do_not_require_rework(self):
""" - self.expect_requests_get('http://author/', author_feed) + self.expect_requests_get("http://author/", author_feed) - # first post is syndicated - self.expect_requests_get('http://author/post/permalink1', """ + # first post is syndicated + self.expect_requests_get( + "http://author/post/permalink1", + """
-
""").InAnyOrder() + """, + ).InAnyOrder() - # second post is syndicated - self.expect_requests_get('http://author/post/perma✁2', u""" + # second post is syndicated + self.expect_requests_get( + "http://author/post/perma✁2", + u"""
-
""", content_type='text/html; charset=utf-8').InAnyOrder() - - # third post is not syndicated - self.expect_requests_get('http://author/post/permalink3', """ + """, + content_type="text/html; charset=utf-8", + ).InAnyOrder() + + # third post is not syndicated + self.expect_requests_get( + "http://author/post/permalink3", + """
-
""").InAnyOrder() - - # the second activity lookup should not make any HTTP requests - - # the third activity lookup will fetch the author's h-feed one more time - self.expect_requests_get('http://author/', author_feed).InAnyOrder() - - self.mox.ReplayAll() - - # first activity should trigger all the lookups and storage - self.assert_discover(['http://author/post/permalink1']) - syndposts = [('http://author/post/permalink1', 'https://fa.ke/post/url1'), - ('http://author/post/perma✁2', 'https://fa.ke/post/url2'), - ('http://author/post/permalink3', None)] - self.assert_syndicated_posts(*syndposts) - - # second lookup should require no additional HTTP requests. - # the second syndicated post should be linked up to the second permalink. - self.assertEqual((set(['http://author/post/perma✁2']), set()), - discover(self.source, self.activities[1])) - - # third activity lookup. since we didn't find a back-link for the third - # syndicated post, it should fetch the author's feed again, but seeing no - # new posts, it should not follow any of the permalinks. - self.assertEqual((set(), set()), discover(self.source, self.activities[2])) - - # should have saved a blank to prevent subsequent checks of this syndicated - # post from fetching the h-feed again - syndposts.append((None, 'https://fa.ke/post/url3')) - self.assert_syndicated_posts(*syndposts) - - # confirm that we do not fetch the h-feed again for the same syndicated post - self.assertEqual((set(), set()), discover(self.source, self.activities[2])) - - def test_no_duplicate_links(self): - """Make sure that a link found by both original-post-discovery and - posse-post-discovery will not result in two webmentions being sent. - """ - self.activity['object']['content'] = 'with a link http://author/post/url' - original = 'http://author/post/url' - - self.expect_requests_get('http://author/', """ + """, + ).InAnyOrder() + + # the second activity lookup should not make any HTTP requests + + # the third activity lookup will fetch the author's h-feed one more time + self.expect_requests_get("http://author/", author_feed).InAnyOrder() + + self.mox.ReplayAll() + + # first activity should trigger all the lookups and storage + self.assert_discover(["http://author/post/permalink1"]) + syndposts = [ + ("http://author/post/permalink1", "https://fa.ke/post/url1"), + ("http://author/post/perma✁2", "https://fa.ke/post/url2"), + ("http://author/post/permalink3", None), + ] + self.assert_syndicated_posts(*syndposts) + + # second lookup should require no additional HTTP requests. + # the second syndicated post should be linked up to the second permalink. + self.assertEqual( + (set(["http://author/post/perma✁2"]), set()), + discover(self.source, self.activities[1]), + ) + + # third activity lookup. since we didn't find a back-link for the third + # syndicated post, it should fetch the author's feed again, but seeing no + # new posts, it should not follow any of the permalinks. + self.assertEqual((set(), set()), discover(self.source, self.activities[2])) + + # should have saved a blank to prevent subsequent checks of this syndicated + # post from fetching the h-feed again + syndposts.append((None, "https://fa.ke/post/url3")) + self.assert_syndicated_posts(*syndposts) + + # confirm that we do not fetch the h-feed again for the same syndicated post + self.assertEqual((set(), set()), discover(self.source, self.activities[2])) + + def test_no_duplicate_links(self): + """Make sure that a link found by both original-post-discovery and + posse-post-discovery will not result in two webmentions being sent. + """ + self.activity["object"]["content"] = "with a link http://author/post/url" + original = "http://author/post/url" + + self.expect_requests_get( + "http://author/", + """
- """ % original) - self.expect_requests_get(original, """ + """ + % original, + ) + self.expect_requests_get( + original, + """
-
""" % (original, 'https://fa.ke/post/url')) - - self.mox.ReplayAll() - self.assert_discover([original]) - - def test_exclude_mentions_except_user(self): - """Ignore mentions *except* to the user themselves.""" - self.activity['object'].update({ - 'content': 'foo http://author/ bar http://other/', - 'tags': [{ - 'objectType': 'person', - 'url': 'http://author/', - }, { - 'objectType': 'person', - 'url': 'http://other/', - }], - }) - self.assert_discover(['http://author/'], fetch_hfeed=False) - - def test_require_http_or_https(self): - """Ignore non-http URLs.""" - self.activity['object']['content'] = 'ftp://a/b chrome://flags dat://c/d' - self.assert_discover([], fetch_hfeed=False) - - def test_strip_www_when_comparing_domains(self): - """We should ignore leading www when comparing syndicated URL domains.""" - self.activity['object']['url'] = 'http://www.fa.ke/post/url' - - self.expect_requests_get('http://author/', """ + """ + % (original, "https://fa.ke/post/url"), + ) + + self.mox.ReplayAll() + self.assert_discover([original]) + + def test_exclude_mentions_except_user(self): + """Ignore mentions *except* to the user themselves.""" + self.activity["object"].update( + { + "content": "foo http://author/ bar http://other/", + "tags": [ + { + "objectType": "person", + "url": "http://author/", + }, + { + "objectType": "person", + "url": "http://other/", + }, + ], + } + ) + self.assert_discover(["http://author/"], fetch_hfeed=False) + + def test_require_http_or_https(self): + """Ignore non-http URLs.""" + self.activity["object"]["content"] = "ftp://a/b chrome://flags dat://c/d" + self.assert_discover([], fetch_hfeed=False) + + def test_strip_www_when_comparing_domains(self): + """We should ignore leading www when comparing syndicated URL domains.""" + self.activity["object"]["url"] = "http://www.fa.ke/post/url" + + self.expect_requests_get( + "http://author/", + """
- """) - self.expect_requests_get('http://author/post/url', """ + """, + ) + self.expect_requests_get( + "http://author/post/url", + """
-
""") + """, + ) - self.mox.ReplayAll() - self.assert_discover(['http://author/post/url']) + self.mox.ReplayAll() + self.assert_discover(["http://author/post/url"]) - def test_ignore_synd_urls_on_other_silos(self): - """We should ignore syndication URLs on other (silos') domains.""" - self.expect_requests_get('http://author/', """ + def test_ignore_synd_urls_on_other_silos(self): + """We should ignore syndication URLs on other (silos') domains.""" + self.expect_requests_get( + "http://author/", + """
- """) - self.expect_requests_get('http://author/post/url') - - self.mox.ReplayAll() - self.assert_discover([]) - self.assert_syndicated_posts(('http://author/post/url', None), - (None, 'https://fa.ke/post/url')) - - def test_rel_feed_link(self): - """Check that we follow the rel=feed link when looking for the - author's full feed URL - """ - self.expect_requests_get('http://author/', """ + """, + ) + self.expect_requests_get("http://author/post/url") + + self.mox.ReplayAll() + self.assert_discover([]) + self.assert_syndicated_posts( + ("http://author/post/url", None), (None, "https://fa.ke/post/url") + ) + + def test_rel_feed_link(self): + """Check that we follow the rel=feed link when looking for the + author's full feed URL + """ + self.expect_requests_get( + "http://author/", + """ - """) + """, + ) - self.expect_requests_get('http://author/try_this.html', """ + self.expect_requests_get( + "http://author/try_this.html", + """
Hi
- """) + """, + ) - self.mox.ReplayAll() - discover(self.source, self.activity) + self.mox.ReplayAll() + discover(self.source, self.activity) - def test_rel_feed_anchor(self): - """Check that we follow the rel=feed when it's in an tag instead of - """ - self.expect_requests_get('http://author/', """ + def test_rel_feed_anchor(self): + """Check that we follow the rel=feed when it's in an tag instead of """ + self.expect_requests_get( + "http://author/", + """ @@ -362,154 +438,182 @@ def test_rel_feed_anchor(self): full unfiltered feed - """) + """, + ) - self.expect_requests_get('http://author/try_this.html', """ + self.expect_requests_get( + "http://author/try_this.html", + """
Hi
- """) + """, + ) - self.mox.ReplayAll() - discover(self.source, self.activity) + self.mox.ReplayAll() + discover(self.source, self.activity) - def test_rel_feed_adds_to_domains(self): - """rel=feed discovery should update Source.domains.""" - self.expect_requests_get('http://author/', """ + def test_rel_feed_adds_to_domains(self): + """rel=feed discovery should update Source.domains.""" + self.expect_requests_get( + "http://author/", + """ - """) - self.expect_requests_get('http://other/domain', 'foo') - self.mox.ReplayAll() - - discover(self.source, self.activity) - self.assertEqual(['author', 'other'], self.source.updates['domains']) - - def test_no_h_entries(self): - """Make sure nothing bad happens when fetching a feed without h-entries. - """ - self.expect_requests_get('http://author/', """ + """, + ) + self.expect_requests_get("http://other/domain", "foo") + self.mox.ReplayAll() + + discover(self.source, self.activity) + self.assertEqual(["author", "other"], self.source.updates["domains"]) + + def test_no_h_entries(self): + """Make sure nothing bad happens when fetching a feed without h-entries.""" + self.expect_requests_get( + "http://author/", + """

under construction

- """) - - self.mox.ReplayAll() - self.assert_discover([]) - self.assert_syndicated_posts((None, 'https://fa.ke/post/url')) - - def test_existing_syndicated_posts(self): - """Confirm that no additional requests are made if we already have a - SyndicatedPost in the DB. - """ - original_url = 'http://author/notes/2014/04/24/1' - syndication_url = 'https://fa.ke/post/url' - - # save the syndicated post ahead of time (as if it had been - # discovered previously) - SyndicatedPost(parent=self.source.key, original=original_url, - syndication=syndication_url).put() - - # should append the author note url, with no addt'l requests - self.assert_discover([original_url]) - - def test_invalid_webmention_target(self): - """Confirm that no additional requests are made if the author url is - an invalid webmention target. Right now this pretty much just - means they're on the blocklist. Eventually we want to filter out - targets that don't have certain features, like a webmention - endpoint or microformats. - """ - self.source.domain_urls = ['http://amazon.com'] - discover(self.source, self.activity) - # nothing attempted, but we should have saved a placeholder to prevent us - # from trying again - self.assert_syndicated_posts((None, 'https://fa.ke/post/url')) - - def _test_failed_domain_url_fetch(self, raise_exception): - """Make sure something reasonable happens when the author's domain url - gives an unexpected response - """ - if raise_exception: - self.expect_requests_get('http://author/').AndRaise(HTTPError()) - else: - self.expect_requests_get('http://author/', status_code=404) - - self.mox.ReplayAll() - discover(self.source, self.activity) - - # nothing attempted, but we should have saved a placeholder to prevent us - # from trying again - self.assert_syndicated_posts((None, 'https://fa.ke/post/url')) - - def test_domain_url_not_found(self): - """Make sure something reasonable happens when the author's domain url - returns a 404 status code - """ - self._test_failed_domain_url_fetch(raise_exception=False) - - def test_domain_url_error(self): - """Make sure something reasonable happens when fetching the author's - domain url raises an exception - """ - self._test_failed_domain_url_fetch(raise_exception=True) - - def _expect_multiple_domain_url_fetches(self): - self.source.domain_urls = ['http://author1', 'http://author2', 'http://author3'] - self.activity['object']['url'] = 'http://fa.ke/A' - self.expect_requests_get('http://author1', """ + """, + ) + + self.mox.ReplayAll() + self.assert_discover([]) + self.assert_syndicated_posts((None, "https://fa.ke/post/url")) + + def test_existing_syndicated_posts(self): + """Confirm that no additional requests are made if we already have a + SyndicatedPost in the DB. + """ + original_url = "http://author/notes/2014/04/24/1" + syndication_url = "https://fa.ke/post/url" + + # save the syndicated post ahead of time (as if it had been + # discovered previously) + SyndicatedPost( + parent=self.source.key, original=original_url, syndication=syndication_url + ).put() + + # should append the author note url, with no addt'l requests + self.assert_discover([original_url]) + + def test_invalid_webmention_target(self): + """Confirm that no additional requests are made if the author url is + an invalid webmention target. Right now this pretty much just + means they're on the blocklist. Eventually we want to filter out + targets that don't have certain features, like a webmention + endpoint or microformats. + """ + self.source.domain_urls = ["http://amazon.com"] + discover(self.source, self.activity) + # nothing attempted, but we should have saved a placeholder to prevent us + # from trying again + self.assert_syndicated_posts((None, "https://fa.ke/post/url")) + + def _test_failed_domain_url_fetch(self, raise_exception): + """Make sure something reasonable happens when the author's domain url + gives an unexpected response + """ + if raise_exception: + self.expect_requests_get("http://author/").AndRaise(HTTPError()) + else: + self.expect_requests_get("http://author/", status_code=404) + + self.mox.ReplayAll() + discover(self.source, self.activity) + + # nothing attempted, but we should have saved a placeholder to prevent us + # from trying again + self.assert_syndicated_posts((None, "https://fa.ke/post/url")) + + def test_domain_url_not_found(self): + """Make sure something reasonable happens when the author's domain url + returns a 404 status code + """ + self._test_failed_domain_url_fetch(raise_exception=False) + + def test_domain_url_error(self): + """Make sure something reasonable happens when fetching the author's + domain url raises an exception + """ + self._test_failed_domain_url_fetch(raise_exception=True) + + def _expect_multiple_domain_url_fetches(self): + self.source.domain_urls = ["http://author1", "http://author2", "http://author3"] + self.activity["object"]["url"] = "http://fa.ke/A" + self.expect_requests_get( + "http://author1", + """
- """) - self.expect_requests_get('http://author2').AndRaise(HTTPError()) - self.expect_requests_get('http://author3', """ + """, + ) + self.expect_requests_get("http://author2").AndRaise(HTTPError()) + self.expect_requests_get( + "http://author3", + """
- """) - self.mox.ReplayAll() - - def test_canonicalize_drops_non_silo_activity_url(self): - """For https://console.cloud.google.com/errors/CNnLpJml7O3cvAE .""" - self.source.BACKFEED_REQUIRES_SYNDICATION_LINK = True - self.activity['object']['url'] = 'http://not/silo' - self.assert_discover([]) - - def test_discover_multiple_domain_urls(self): - """We should fetch and process all of a source's URLs.""" - self._expect_multiple_domain_url_fetches() - self.assert_discover(['http://author1/A']) - self.assert_syndicated_posts(('http://author1/A', 'https://fa.ke/A'), - ('http://author3/B', 'https://fa.ke/B')) - - def test_refetch_multiple_domain_urls(self): - """We should refetch all of a source's URLs.""" - self._expect_multiple_domain_url_fetches() - result = refetch(self.source) - self.assert_equals(['https://fa.ke/A' ,'https://fa.ke/B'], list(result.keys())) - self.assert_syndicated_posts(('http://author1/A', 'https://fa.ke/A'), - ('http://author3/B', 'https://fa.ke/B')) - - def test_url_limit(self): - """We should cap fetches at 5 URLs.""" - self.source.domain_urls = ['http://a1', 'http://b2', 'https://c3', - 'http://d4', 'http://e5', 'https://f6'] - for url in self.source.domain_urls[:5]: - self.expect_requests_get(url, '') - self.mox.ReplayAll() - self.assert_discover([]) - - def test_permalink_limit(self): - self.mox.stubs.Set(original_post_discovery, 'MAX_PERMALINK_FETCHES_BETA', 3) - - self.expect_requests_get('http://author/', """ + """, + ) + self.mox.ReplayAll() + + def test_canonicalize_drops_non_silo_activity_url(self): + """For https://console.cloud.google.com/errors/CNnLpJml7O3cvAE .""" + self.source.BACKFEED_REQUIRES_SYNDICATION_LINK = True + self.activity["object"]["url"] = "http://not/silo" + self.assert_discover([]) + + def test_discover_multiple_domain_urls(self): + """We should fetch and process all of a source's URLs.""" + self._expect_multiple_domain_url_fetches() + self.assert_discover(["http://author1/A"]) + self.assert_syndicated_posts( + ("http://author1/A", "https://fa.ke/A"), + ("http://author3/B", "https://fa.ke/B"), + ) + + def test_refetch_multiple_domain_urls(self): + """We should refetch all of a source's URLs.""" + self._expect_multiple_domain_url_fetches() + result = refetch(self.source) + self.assert_equals(["https://fa.ke/A", "https://fa.ke/B"], list(result.keys())) + self.assert_syndicated_posts( + ("http://author1/A", "https://fa.ke/A"), + ("http://author3/B", "https://fa.ke/B"), + ) + + def test_url_limit(self): + """We should cap fetches at 5 URLs.""" + self.source.domain_urls = [ + "http://a1", + "http://b2", + "https://c3", + "http://d4", + "http://e5", + "https://f6", + ] + for url in self.source.domain_urls[:5]: + self.expect_requests_get(url, "") + self.mox.ReplayAll() + self.assert_discover([]) + + def test_permalink_limit(self): + self.mox.stubs.Set(original_post_discovery, "MAX_PERMALINK_FETCHES_BETA", 3) + + self.expect_requests_get( + "http://author/", + """
@@ -527,20 +631,23 @@ def test_permalink_limit(self):
-""") +""", + ) - # should sort by dt-updated/dt-published, then feed order - self.expect_requests_get('http://author/c') - self.expect_requests_get('http://author/e') - self.expect_requests_get('http://author/a') + # should sort by dt-updated/dt-published, then feed order + self.expect_requests_get("http://author/c") + self.expect_requests_get("http://author/e") + self.expect_requests_get("http://author/a") - self.mox.ReplayAll() - self.assert_discover([]) + self.mox.ReplayAll() + self.assert_discover([]) - def test_feed_entry_limit(self): - self.mox.stubs.Set(original_post_discovery, 'MAX_FEED_ENTRIES', 2) + def test_feed_entry_limit(self): + self.mox.stubs.Set(original_post_discovery, "MAX_FEED_ENTRIES", 2) - self.expect_requests_get('http://author/', """ + self.expect_requests_get( + "http://author/", + """
@@ -551,59 +658,74 @@ def test_feed_entry_limit(self):
-""") - - self.mox.ReplayAll() - self.assert_discover(['http://author/a', 'http://author/b']) - self.assert_syndicated_posts(('http://author/a', 'https://fa.ke/post/url'), - ('http://author/b', 'https://fa.ke/post/url')) - - def test_homepage_too_big(self): - self.expect_requests_head('https://fa.ke/post/url') - self.expect_requests_head('http://author/', - response_headers={'Content-Length': str(util.MAX_HTTP_RESPONSE_SIZE + 1)}) - # no GET for /author since it's too big - self.mox.ReplayAll() - self.assert_discover([]) - - def test_feed_too_big(self): - self.expect_requests_head('https://fa.ke/post/url') - self.expect_requests_head('http://author/') - self.expect_requests_get( - 'http://author/', - '') - self.expect_requests_head('http://author/feed', response_headers={ - 'Content-Type': 'text/html', - 'Content-Length': str(util.MAX_HTTP_RESPONSE_SIZE + 1), - }) - # no GET for /author/feed since it's too big - self.mox.ReplayAll() - self.assert_discover([]) - - def test_syndication_url_head_error(self): - """We should ignore syndication URLs that 4xx or 5xx.""" - self.expect_requests_head('https://fa.ke/post/url') - self.expect_requests_head('http://author/') - self.expect_requests_get('http://author/', """ +""", + ) + + self.mox.ReplayAll() + self.assert_discover(["http://author/a", "http://author/b"]) + self.assert_syndicated_posts( + ("http://author/a", "https://fa.ke/post/url"), + ("http://author/b", "https://fa.ke/post/url"), + ) + + def test_homepage_too_big(self): + self.expect_requests_head("https://fa.ke/post/url") + self.expect_requests_head( + "http://author/", + response_headers={"Content-Length": str(util.MAX_HTTP_RESPONSE_SIZE + 1)}, + ) + # no GET for /author since it's too big + self.mox.ReplayAll() + self.assert_discover([]) + + def test_feed_too_big(self): + self.expect_requests_head("https://fa.ke/post/url") + self.expect_requests_head("http://author/") + self.expect_requests_get( + "http://author/", + '', + ) + self.expect_requests_head( + "http://author/feed", + response_headers={ + "Content-Type": "text/html", + "Content-Length": str(util.MAX_HTTP_RESPONSE_SIZE + 1), + }, + ) + # no GET for /author/feed since it's too big + self.mox.ReplayAll() + self.assert_discover([]) + + def test_syndication_url_head_error(self): + """We should ignore syndication URLs that 4xx or 5xx.""" + self.expect_requests_head("https://fa.ke/post/url") + self.expect_requests_head("http://author/") + self.expect_requests_get( + "http://author/", + """
- """) - self.expect_requests_head('http://author/post') - self.expect_requests_get('http://author/post') - self.expect_requests_head('https://fa.ke/other', status_code=404) - self.mox.ReplayAll() - - self.assert_discover([]) - self.assert_syndicated_posts(('http://author/post', None), - (None, 'https://fa.ke/post/url')) - - def test_rel_feed_link_error(self): - """Author page has an h-feed link that raises an exception. We should - recover and use the main page's h-entries as a fallback.""" - self.expect_requests_get('http://author/', """ + """, + ) + self.expect_requests_head("http://author/post") + self.expect_requests_get("http://author/post") + self.expect_requests_head("https://fa.ke/other", status_code=404) + self.mox.ReplayAll() + + self.assert_discover([]) + self.assert_syndicated_posts( + ("http://author/post", None), (None, "https://fa.ke/post/url") + ) + + def test_rel_feed_link_error(self): + """Author page has an h-feed link that raises an exception. We should + recover and use the main page's h-entries as a fallback.""" + self.expect_requests_get( + "http://author/", + """ @@ -615,86 +737,97 @@ def test_rel_feed_link_error(self):
- """) - - # try to do this and fail - self.expect_requests_get('http://author/try_this.html', 'nope', - status_code=404) - - # despite the error, should fallback on the main page's h-entries and - # check the permalink - self.expect_requests_get('http://author/recover_and_fetch_this.html', 'ok') - - self.mox.ReplayAll() - discover(self.source, self.activity) - - def _test_failed_post_permalink_fetch(self, raise_exception): - """Make sure something reasonable happens when we're unable to fetch - the permalink of an entry linked in the h-feed - """ - self.expect_requests_get('http://author/', """ + """, + ) + + # try to do this and fail + self.expect_requests_get("http://author/try_this.html", "nope", status_code=404) + + # despite the error, should fallback on the main page's h-entries and + # check the permalink + self.expect_requests_get("http://author/recover_and_fetch_this.html", "ok") + + self.mox.ReplayAll() + discover(self.source, self.activity) + + def _test_failed_post_permalink_fetch(self, raise_exception): + """Make sure something reasonable happens when we're unable to fetch + the permalink of an entry linked in the h-feed + """ + self.expect_requests_get( + "http://author/", + """
- """) - - if raise_exception: - self.expect_requests_get('http://author/nonexistent.html').AndRaise(HTTPError()) - else: - self.expect_requests_get('http://author/nonexistent.html', status_code=410) - - self.mox.ReplayAll() - discover(self.source, self.activity) - # we should have saved placeholders to prevent us from trying the - # syndication url or permalink again - self.assert_syndicated_posts(('http://author/nonexistent.html', None), - (None, 'https://fa.ke/post/url')) - - def test_post_permalink_not_found(self): - """Make sure something reasonable happens when the permalink of an - entry returns a 404 not found - """ - self._test_failed_post_permalink_fetch(raise_exception=False) - - def test_post_permalink_error(self): - """Make sure something reasonable happens when fetching the permalink - of an entry raises an exception - """ - self._test_failed_post_permalink_fetch(raise_exception=True) - - def test_no_author_url(self): - """Make sure something reasonable happens when the author doesn't have - a url at all. - """ - self.source.domain_urls = [] - discover(self.source, self.activity) - # nothing attempted, and no SyndicatedPost saved - self.assertFalse(SyndicatedPost.query(ancestor=self.source.key).get()) - - def test_feed_type_application_xml(self): - """Confirm that we don't fetch non-HTML rel=feeds. - """ - self.expect_requests_head(self.activity['object']['url']) - self.expect_requests_head('http://author/') - self.expect_requests_get('http://author/', """ + """, + ) + + if raise_exception: + self.expect_requests_get("http://author/nonexistent.html").AndRaise( + HTTPError() + ) + else: + self.expect_requests_get("http://author/nonexistent.html", status_code=410) + + self.mox.ReplayAll() + discover(self.source, self.activity) + # we should have saved placeholders to prevent us from trying the + # syndication url or permalink again + self.assert_syndicated_posts( + ("http://author/nonexistent.html", None), (None, "https://fa.ke/post/url") + ) + + def test_post_permalink_not_found(self): + """Make sure something reasonable happens when the permalink of an + entry returns a 404 not found + """ + self._test_failed_post_permalink_fetch(raise_exception=False) + + def test_post_permalink_error(self): + """Make sure something reasonable happens when fetching the permalink + of an entry raises an exception + """ + self._test_failed_post_permalink_fetch(raise_exception=True) + + def test_no_author_url(self): + """Make sure something reasonable happens when the author doesn't have + a url at all. + """ + self.source.domain_urls = [] + discover(self.source, self.activity) + # nothing attempted, and no SyndicatedPost saved + self.assertFalse(SyndicatedPost.query(ancestor=self.source.key).get()) + + def test_feed_type_application_xml(self): + """Confirm that we don't fetch non-HTML rel=feeds.""" + self.expect_requests_head(self.activity["object"]["url"]) + self.expect_requests_head("http://author/") + self.expect_requests_get( + "http://author/", + """ - """) - self.expect_requests_head('http://author/updates.atom', - response_headers={'Content-Type': 'application/xml'}) - # check that we don't GET http://author/updates.atom - self.mox.ReplayAll() - discover(self.source, self.activity) - - def test_feed_head_request_failed(self): - """Confirm that we fetch permalinks even if HEAD fails. - """ - self.expect_requests_get('http://author/', """ + """, + ) + self.expect_requests_head( + "http://author/updates.atom", + response_headers={"Content-Type": "application/xml"}, + ) + # check that we don't GET http://author/updates.atom + self.mox.ReplayAll() + discover(self.source, self.activity) + + def test_feed_head_request_failed(self): + """Confirm that we fetch permalinks even if HEAD fails.""" + self.expect_requests_get( + "http://author/", + """ @@ -705,148 +838,181 @@ def test_feed_head_request_failed(self): - """) + """, + ) - # head request to follow redirects on the post url - self.expect_requests_head(self.activity['object']['url']) + # head request to follow redirects on the post url + self.expect_requests_head(self.activity["object"]["url"]) - # and for the author url - self.expect_requests_head('http://author/') + # and for the author url + self.expect_requests_head("http://author/") - # try and fail to get the feed - self.expect_requests_head('http://author/updates', status_code=400) - self.expect_requests_get('http://author/updates', status_code=400) + # try and fail to get the feed + self.expect_requests_head("http://author/updates", status_code=400) + self.expect_requests_get("http://author/updates", status_code=400) - # fall back on the original page, and fetch the post permalink - self.expect_requests_head('http://author/permalink') - self.expect_requests_get('http://author/permalink', '') + # fall back on the original page, and fetch the post permalink + self.expect_requests_head("http://author/permalink") + self.expect_requests_get("http://author/permalink", "") - self.mox.ReplayAll() - discover(self.source, self.activity) + self.mox.ReplayAll() + discover(self.source, self.activity) - def test_feed_type_unknown(self): - """Confirm that we look for an h-feed with type=text/html even when - the type is not given in , and keep looking until we find one. - """ - self.expect_requests_get('http://author/', """ + def test_feed_type_unknown(self): + """Confirm that we look for an h-feed with type=text/html even when + the type is not given in , and keep looking until we find one. + """ + self.expect_requests_get( + "http://author/", + """ - """) + """, + ) - # head request to follow redirects on the post url - self.expect_requests_head(self.activity['object']['url']) + # head request to follow redirects on the post url + self.expect_requests_head(self.activity["object"]["url"]) - # and for the author url - self.expect_requests_head('http://author/') + # and for the author url + self.expect_requests_head("http://author/") - # try to get the atom feed first - self.expect_requests_head('http://author/updates.atom', - content_type='application/xml') + # try to get the atom feed first + self.expect_requests_head( + "http://author/updates.atom", content_type="application/xml" + ) - # keep looking for an html feed - self.expect_requests_head('http://author/updates.html') + # keep looking for an html feed + self.expect_requests_head("http://author/updates.html") - # look at the rss feed last - self.expect_requests_head('http://author/updates.rss', - content_type='application/xml') + # look at the rss feed last + self.expect_requests_head( + "http://author/updates.rss", content_type="application/xml" + ) - # now fetch the html feed - self.expect_requests_get('http://author/updates.html', """ + # now fetch the html feed + self.expect_requests_get( + "http://author/updates.html", + """ - """) + """, + ) - # should not try to get the rss feed at this point - # but we will follow the post permalink + # should not try to get the rss feed at this point + # but we will follow the post permalink - # keep looking for an html feed - self.expect_requests_head('http://author/permalink') - self.expect_requests_get('http://author/permalink', """ + # keep looking for an html feed + self.expect_requests_head("http://author/permalink") + self.expect_requests_get( + "http://author/permalink", + """

Title

- """) + """, + ) - self.mox.ReplayAll() - discover(self.source, self.activity) + self.mox.ReplayAll() + discover(self.source, self.activity) - # TODO: activity with existing responses, make sure they're merged right + # TODO: activity with existing responses, make sure they're merged right - def test_multiple_rel_feeds(self): - """Make sure that we follow all rel=feed links, e.g. if notes and - articles are in separate feeds.""" + def test_multiple_rel_feeds(self): + """Make sure that we follow all rel=feed links, e.g. if notes and + articles are in separate feeds.""" - self.expect_requests_get('http://author/', """ + self.expect_requests_get( + "http://author/", + """ - """) + """, + ) - # fetches all feeds first - self.expect_requests_get('http://author/articles', """ + # fetches all feeds first + self.expect_requests_get( + "http://author/articles", + """
- """).InAnyOrder('feed') + """, + ).InAnyOrder("feed") - self.expect_requests_get('http://author/notes', """ + self.expect_requests_get( + "http://author/notes", + """
- """).InAnyOrder('feed') - - # then the permalinks (in any order since they are hashed to - # remove duplicates) - self.expect_requests_get('http://author/article-permalink', """ + """, + ).InAnyOrder("feed") + + # then the permalinks (in any order since they are hashed to + # remove duplicates) + self.expect_requests_get( + "http://author/article-permalink", + """ - """).InAnyOrder('permalink') + """, + ).InAnyOrder("permalink") - self.expect_requests_get('http://author/note-permalink', """ + self.expect_requests_get( + "http://author/note-permalink", + """ - """).InAnyOrder('permalink') - - self.mox.ReplayAll() - discover(self.source, self.activity) - self.assert_syndicated_posts( - ('http://author/note-permalink', 'https://fa.ke/note'), - ('http://author/article-permalink', 'https://fa.ke/article'), - (None, 'https://fa.ke/post/url')) - - def test_avoid_author_page_with_bad_content_type(self): - """Confirm that we check the author page's content type before - fetching and parsing it - """ - # head request to follow redirects on the post url - self.expect_requests_head(self.activity['object']['url']) - self.expect_requests_head('http://author/', response_headers={ - 'content-type': 'application/xml', - }) - - # give up - self.mox.ReplayAll() - discover(self.source, self.activity) - - def test_avoid_permalink_with_bad_content_type(self): - """Confirm that we don't follow u-url's that lead to anything that - isn't text/html (e.g., PDF) - """ - # head request to follow redirects on the post url - self.expect_requests_head(self.activity['object']['url']) - self.expect_requests_head('http://author/') - self.expect_requests_get('http://author/', """ + """, + ).InAnyOrder("permalink") + + self.mox.ReplayAll() + discover(self.source, self.activity) + self.assert_syndicated_posts( + ("http://author/note-permalink", "https://fa.ke/note"), + ("http://author/article-permalink", "https://fa.ke/article"), + (None, "https://fa.ke/post/url"), + ) + + def test_avoid_author_page_with_bad_content_type(self): + """Confirm that we check the author page's content type before + fetching and parsing it + """ + # head request to follow redirects on the post url + self.expect_requests_head(self.activity["object"]["url"]) + self.expect_requests_head( + "http://author/", + response_headers={ + "content-type": "application/xml", + }, + ) + + # give up + self.mox.ReplayAll() + discover(self.source, self.activity) + + def test_avoid_permalink_with_bad_content_type(self): + """Confirm that we don't follow u-url's that lead to anything that + isn't text/html (e.g., PDF) + """ + # head request to follow redirects on the post url + self.expect_requests_head(self.activity["object"]["url"]) + self.expect_requests_head("http://author/") + self.expect_requests_get( + "http://author/", + """
@@ -854,561 +1020,668 @@ def test_avoid_permalink_with_bad_content_type(self):
- """) - - # and to check the content-type of the article - self.expect_requests_head('http://scholarly.com/paper.pdf', - response_headers={ - 'content-type': 'application/pdf' - }) - - # call to requests.get for permalink should be skipped - self.mox.ReplayAll() - discover(self.source, self.activity) - - def test_do_not_fetch_hfeed(self): - """Confirms behavior of discover() when fetch_hfeed=False. - Discovery should only check the database for previously discovered matches. - It should not make any GET requests - """ - discover(self.source, self.activity, fetch_hfeed=False) - self.assertFalse(SyndicatedPost.query(ancestor=self.source.key).get()) - - def test_source_domains(self): - """Only links to the user's own domains should end up in originals.""" - self.expect_requests_get('http://author/', '') - self.mox.ReplayAll() - - self.activity['object']['content'] = 'x http://author/post y https://mention z' - self.assert_discover(['http://author/post'], ['https://mention/']) - - self.activity['object']['content'] = 'a https://mention b' - self.assert_discover([], ['https://mention/']) - - # if we don't know the user's domains, we should allow anything - self.source.domain_urls = self.source.domains = [] - self.source.put() - - self.assert_discover(['https://mention/']) - - def test_not_source_DOMAIN(self): - """Links to the source silo's domain should be ignored.""" - self.source.domain_urls = self.source.domains = [] - self.activity['object']['content'] = 'x http://fa.ke/post' - self.assert_discover([], []) - - def test_source_user(self): - """Only links from the user's own posts should end up in originals.""" - self.activity['object']['content'] = 'x http://author/post y' - self.expect_requests_get('http://author/', '') - self.mox.ReplayAll() - - user_id = self.source.user_tag_id() - assert user_id - self.activity['object']['author'] = {'id': user_id} - self.assert_discover(['http://author/post'], []) - - self.activity['object']['author'] = {'id': 'tag:fa.ke,2013:someone_else'} - self.assert_discover([], ['http://author/post']) - - del self.activity['object']['author'] - self.assert_discover(['http://author/post'], []) - - def test_compare_username(self): - """Accept posts with author id with the user's username.""" - self.activity['object']['content'] = 'x http://author/post y' - self.expect_requests_get('http://author/', '') - self.mox.ReplayAll() - - self.activity['object']['author'] = { - 'id': 'tag:fa.ke,2013:someone_else', - 'username': self.source.key.id(), - } - self.assert_discover(['http://author/post'], []) - - def test_attachments(self): - """Discovery should search for original URL of attachments when the - attachment is by our user. - """ - SyndicatedPost(parent=self.source.key, - original='http://author/permalink', - syndication='https://fa.ke/post/quoted').put() - - self.activity['object']['author'] = { - 'id': 'tag:fa.ke,2013:someone_else', - } - self.activity['object']['attachments'] = [{ - 'objectType': 'note', - 'content': 'This note is being referenced or otherwise quoted', - 'author': {'id': self.source.user_tag_id()}, - 'url': 'https://fa.ke/post/quoted', - }] - - self.expect_requests_get('http://author/', '') - self.mox.ReplayAll() - - self.assert_discover([], ['http://author/permalink']) - - def test_refetch_hfeed(self): - """refetch should grab resources again, even if they were previously - marked with a blank SyndicatedPost - """ - # refetch 1 and 3 to see if they've been updated, 2 has already - # been resolved for this source - SyndicatedPost(parent=self.source.key, - original='http://author/permalink1', - syndication=None).put() - SyndicatedPost(parent=self.source.key, - original=None, - syndication='https://fa.ke/post/url1').put() - - SyndicatedPost(parent=self.source.key, - original='http://author/permalink2', - syndication='https://fa.ke/post/url2').put() - - SyndicatedPost(parent=self.source.key, - original='http://author/permalink3', - syndication=None).put() - - self.expect_requests_get('http://author/', """ + """, + ) + + # and to check the content-type of the article + self.expect_requests_head( + "http://scholarly.com/paper.pdf", + response_headers={"content-type": "application/pdf"}, + ) + + # call to requests.get for permalink should be skipped + self.mox.ReplayAll() + discover(self.source, self.activity) + + def test_do_not_fetch_hfeed(self): + """Confirms behavior of discover() when fetch_hfeed=False. + Discovery should only check the database for previously discovered matches. + It should not make any GET requests + """ + discover(self.source, self.activity, fetch_hfeed=False) + self.assertFalse(SyndicatedPost.query(ancestor=self.source.key).get()) + + def test_source_domains(self): + """Only links to the user's own domains should end up in originals.""" + self.expect_requests_get("http://author/", "") + self.mox.ReplayAll() + + self.activity["object"]["content"] = "x http://author/post y https://mention z" + self.assert_discover(["http://author/post"], ["https://mention/"]) + + self.activity["object"]["content"] = "a https://mention b" + self.assert_discover([], ["https://mention/"]) + + # if we don't know the user's domains, we should allow anything + self.source.domain_urls = self.source.domains = [] + self.source.put() + + self.assert_discover(["https://mention/"]) + + def test_not_source_DOMAIN(self): + """Links to the source silo's domain should be ignored.""" + self.source.domain_urls = self.source.domains = [] + self.activity["object"]["content"] = "x http://fa.ke/post" + self.assert_discover([], []) + + def test_source_user(self): + """Only links from the user's own posts should end up in originals.""" + self.activity["object"]["content"] = "x http://author/post y" + self.expect_requests_get("http://author/", "") + self.mox.ReplayAll() + + user_id = self.source.user_tag_id() + assert user_id + self.activity["object"]["author"] = {"id": user_id} + self.assert_discover(["http://author/post"], []) + + self.activity["object"]["author"] = {"id": "tag:fa.ke,2013:someone_else"} + self.assert_discover([], ["http://author/post"]) + + del self.activity["object"]["author"] + self.assert_discover(["http://author/post"], []) + + def test_compare_username(self): + """Accept posts with author id with the user's username.""" + self.activity["object"]["content"] = "x http://author/post y" + self.expect_requests_get("http://author/", "") + self.mox.ReplayAll() + + self.activity["object"]["author"] = { + "id": "tag:fa.ke,2013:someone_else", + "username": self.source.key.id(), + } + self.assert_discover(["http://author/post"], []) + + def test_attachments(self): + """Discovery should search for original URL of attachments when the + attachment is by our user. + """ + SyndicatedPost( + parent=self.source.key, + original="http://author/permalink", + syndication="https://fa.ke/post/quoted", + ).put() + + self.activity["object"]["author"] = { + "id": "tag:fa.ke,2013:someone_else", + } + self.activity["object"]["attachments"] = [ + { + "objectType": "note", + "content": "This note is being referenced or otherwise quoted", + "author": {"id": self.source.user_tag_id()}, + "url": "https://fa.ke/post/quoted", + } + ] + + self.expect_requests_get("http://author/", "") + self.mox.ReplayAll() + + self.assert_discover([], ["http://author/permalink"]) + + def test_refetch_hfeed(self): + """refetch should grab resources again, even if they were previously + marked with a blank SyndicatedPost + """ + # refetch 1 and 3 to see if they've been updated, 2 has already + # been resolved for this source + SyndicatedPost( + parent=self.source.key, + original="http://author/permalink1", + syndication=None, + ).put() + SyndicatedPost( + parent=self.source.key, original=None, syndication="https://fa.ke/post/url1" + ).put() + + SyndicatedPost( + parent=self.source.key, + original="http://author/permalink2", + syndication="https://fa.ke/post/url2", + ).put() + + SyndicatedPost( + parent=self.source.key, + original="http://author/permalink3", + syndication=None, + ).put() + + self.expect_requests_get( + "http://author/", + """ - """) + """, + ) - # yay, permalink1 has an updated syndication url - self.expect_requests_get('http://author/permalink1', """ + # yay, permalink1 has an updated syndication url + self.expect_requests_get( + "http://author/permalink1", + """ - """).InAnyOrder() + """, + ).InAnyOrder() - # permalink2 hasn't changed since we first checked it - self.expect_requests_get('http://author/permalink2', """ + # permalink2 hasn't changed since we first checked it + self.expect_requests_get( + "http://author/permalink2", + """ - """).InAnyOrder() + """, + ).InAnyOrder() - # permalink3 hasn't changed since we first checked it - self.expect_requests_get('http://author/permalink3', """ + # permalink3 hasn't changed since we first checked it + self.expect_requests_get( + "http://author/permalink3", + """ - """).InAnyOrder() - - self.mox.ReplayAll() - refetch(self.source) - self.assert_syndicated_posts( - ('http://author/permalink1', 'https://fa.ke/post/url1'), - ('http://author/permalink2', 'https://fa.ke/post/url2'), - ('http://author/permalink3', None)) - - def test_refetch_multiple_responses_same_activity(self): - """Ensure that refetching a post that has several replies does not - generate duplicate original -> None blank entries in the - database. See https://github.com/snarfed/bridgy/issues/259 for - details - """ - for activity in self.activities: - activity['object']['content'] = 'post content without backlinks' - activity['object']['url'] = 'https://fa.ke/post/url' - - author_feed = """ + """, + ).InAnyOrder() + + self.mox.ReplayAll() + refetch(self.source) + self.assert_syndicated_posts( + ("http://author/permalink1", "https://fa.ke/post/url1"), + ("http://author/permalink2", "https://fa.ke/post/url2"), + ("http://author/permalink3", None), + ) + + def test_refetch_multiple_responses_same_activity(self): + """Ensure that refetching a post that has several replies does not + generate duplicate original -> None blank entries in the + database. See https://github.com/snarfed/bridgy/issues/259 for + details + """ + for activity in self.activities: + activity["object"]["content"] = "post content without backlinks" + activity["object"]["url"] = "https://fa.ke/post/url" + + author_feed = """
""" - author_entry = """ + author_entry = """ """ - # original - self.expect_requests_get('http://author/', author_feed) - self.expect_requests_get('http://author/post/permalink', author_entry) - # refetch - self.expect_requests_get('http://author/', author_feed) - self.expect_requests_get('http://author/post/permalink', author_entry) - self.mox.ReplayAll() - - for activity in self.activities: - discover(self.source, activity) - refetch(self.source) - self.assert_syndicated_posts(('http://author/post/permalink', None), - (None, 'https://fa.ke/post/url')) - - def test_multiple_refetches(self): - """Ensure that multiple refetches of the same post (with and without - u-syndication) does not generate duplicate blank entries in the - database. See https://github.com/snarfed/bridgy/issues/259 for details - """ - self.activities[0]['object'].update({ - 'content': 'post content without backlinks', - 'url': 'https://fa.ke/post/url', - }) - - hfeed = """ + # original + self.expect_requests_get("http://author/", author_feed) + self.expect_requests_get("http://author/post/permalink", author_entry) + # refetch + self.expect_requests_get("http://author/", author_feed) + self.expect_requests_get("http://author/post/permalink", author_entry) + self.mox.ReplayAll() + + for activity in self.activities: + discover(self.source, activity) + refetch(self.source) + self.assert_syndicated_posts( + ("http://author/post/permalink", None), (None, "https://fa.ke/post/url") + ) + + def test_multiple_refetches(self): + """Ensure that multiple refetches of the same post (with and without + u-syndication) does not generate duplicate blank entries in the + database. See https://github.com/snarfed/bridgy/issues/259 for details + """ + self.activities[0]["object"].update( + { + "content": "post content without backlinks", + "url": "https://fa.ke/post/url", + } + ) + + hfeed = """ """ - unsyndicated = """ + unsyndicated = """ """ - syndicated = """ + syndicated = """ """ - # first attempt, no syndication url yet - self.expect_requests_get('http://author/', hfeed) - self.expect_requests_get('http://author/permalink', unsyndicated) - - # refetch, still no syndication url - self.expect_requests_get('http://author/', hfeed) - self.expect_requests_get('http://author/permalink', unsyndicated) - - # second refetch, has a syndication url this time - self.expect_requests_get('http://author/', hfeed) - self.expect_requests_get('http://author/permalink', syndicated) - - self.mox.ReplayAll() - discover(self.source, self.activities[0]) - refetch(self.source) - self.assert_syndicated_posts(('http://author/permalink', None), - (None, 'https://fa.ke/post/url')) - - refetch(self.source) - self.assert_syndicated_posts(('http://author/permalink', 'https://fa.ke/post/url')) - - def test_refetch_two_permalinks_same_syndication(self): - """ - This causes a problem if refetch assumes that syndication-url is - unique under a given source. - """ - self.activities[0]['object'].update({ - 'content': 'post content without backlinks', - 'url': 'https://fa.ke/post/url', - }) - - hfeed = """ + # first attempt, no syndication url yet + self.expect_requests_get("http://author/", hfeed) + self.expect_requests_get("http://author/permalink", unsyndicated) + + # refetch, still no syndication url + self.expect_requests_get("http://author/", hfeed) + self.expect_requests_get("http://author/permalink", unsyndicated) + + # second refetch, has a syndication url this time + self.expect_requests_get("http://author/", hfeed) + self.expect_requests_get("http://author/permalink", syndicated) + + self.mox.ReplayAll() + discover(self.source, self.activities[0]) + refetch(self.source) + self.assert_syndicated_posts( + ("http://author/permalink", None), (None, "https://fa.ke/post/url") + ) + + refetch(self.source) + self.assert_syndicated_posts( + ("http://author/permalink", "https://fa.ke/post/url") + ) + + def test_refetch_two_permalinks_same_syndication(self): + """ + This causes a problem if refetch assumes that syndication-url is + unique under a given source. + """ + self.activities[0]["object"].update( + { + "content": "post content without backlinks", + "url": "https://fa.ke/post/url", + } + ) + + hfeed = """ """ - hentries = [ - ('http://author/post%d' % (i + 1), - """ + hentries = [ + ( + "http://author/post%d" % (i + 1), + """ - """ % (i + 1)) for i in range(2) - ] - - self.expect_requests_get('http://author/', hfeed) - for permalink, content in hentries: - self.expect_requests_get(permalink, content) - - # refetch - self.expect_requests_get('http://author/', hfeed) - for permalink, content in hentries: - self.expect_requests_get(permalink, content) - - self.mox.ReplayAll() - self.assert_discover(['http://author/post1', 'http://author/post2']) - self.assert_syndicated_posts(('http://author/post1', 'https://fa.ke/post/url'), - ('http://author/post2', 'https://fa.ke/post/url')) - - # discover should have already handled all relationships, refetch should - # not find anything - self.assertFalse(refetch(self.source)) - - def test_refetch_permalink_with_two_syndications(self): - """Test one permalink with two syndicated posts. Make sure that - refetch doesn't have a problem with two entries for the same - original URL. - """ - for idx, activity in enumerate(self.activities): - activity['object'].update({ - 'content': 'post content without backlinks', - 'url': 'https://fa.ke/post/url%d' % (idx + 1), - }) - - hfeed = """ + """ + % (i + 1), + ) + for i in range(2) + ] + + self.expect_requests_get("http://author/", hfeed) + for permalink, content in hentries: + self.expect_requests_get(permalink, content) + + # refetch + self.expect_requests_get("http://author/", hfeed) + for permalink, content in hentries: + self.expect_requests_get(permalink, content) + + self.mox.ReplayAll() + self.assert_discover(["http://author/post1", "http://author/post2"]) + self.assert_syndicated_posts( + ("http://author/post1", "https://fa.ke/post/url"), + ("http://author/post2", "https://fa.ke/post/url"), + ) + + # discover should have already handled all relationships, refetch should + # not find anything + self.assertFalse(refetch(self.source)) + + def test_refetch_permalink_with_two_syndications(self): + """Test one permalink with two syndicated posts. Make sure that + refetch doesn't have a problem with two entries for the same + original URL. + """ + for idx, activity in enumerate(self.activities): + activity["object"].update( + { + "content": "post content without backlinks", + "url": "https://fa.ke/post/url%d" % (idx + 1), + } + ) + + hfeed = """ """ - hentry = """ + hentry = """ """ - self.expect_requests_get('http://author/', hfeed) - self.expect_requests_get('http://author/permalink', hentry) - - # refetch - self.expect_requests_get('http://author/', hfeed) - # refetch grabs posts that it's seen before in case there have been updates - self.expect_requests_get('http://author/permalink', hentry) - - self.mox.ReplayAll() - discover(self.source, self.activities[0]) - self.assert_syndicated_posts( - ('http://author/permalink', 'https://fa.ke/post/url1'), - ('http://author/permalink', 'https://fa.ke/post/url3'), - ('http://author/permalink', 'https://fa.ke/post/url5')) - self.assertFalse(refetch(self.source)) - - def test_refetch_with_updated_permalink(self): - """Permalinks can change (e.g., if a stub is added or modified). - - This causes a problem if refetch assumes that syndication-url is - unique under a given source. - """ - self.activities[0]['object'].update({ - 'content': 'post content without backlinks', - 'url': 'https://fa.ke/post/url', - }) - - # first attempt, no stub yet - self.expect_requests_get('http://author/', """ + self.expect_requests_get("http://author/", hfeed) + self.expect_requests_get("http://author/permalink", hentry) + + # refetch + self.expect_requests_get("http://author/", hfeed) + # refetch grabs posts that it's seen before in case there have been updates + self.expect_requests_get("http://author/permalink", hentry) + + self.mox.ReplayAll() + discover(self.source, self.activities[0]) + self.assert_syndicated_posts( + ("http://author/permalink", "https://fa.ke/post/url1"), + ("http://author/permalink", "https://fa.ke/post/url3"), + ("http://author/permalink", "https://fa.ke/post/url5"), + ) + self.assertFalse(refetch(self.source)) + + def test_refetch_with_updated_permalink(self): + """Permalinks can change (e.g., if a stub is added or modified). + + This causes a problem if refetch assumes that syndication-url is + unique under a given source. + """ + self.activities[0]["object"].update( + { + "content": "post content without backlinks", + "url": "https://fa.ke/post/url", + } + ) + + # first attempt, no stub yet + self.expect_requests_get( + "http://author/", + """ - """) - self.expect_requests_get('http://author/2014/08/09', """ + """, + ) + self.expect_requests_get( + "http://author/2014/08/09", + """ - """) + """, + ) - # refetch, permalink has a stub now - self.expect_requests_get('http://author/', """ + # refetch, permalink has a stub now + self.expect_requests_get( + "http://author/", + """ - """) + """, + ) - self.expect_requests_get('http://author/2014/08/09/this-is-a-stub', """ + self.expect_requests_get( + "http://author/2014/08/09/this-is-a-stub", + """ - """) + """, + ) - # refetch again - self.expect_requests_get('http://author/', """ + # refetch again + self.expect_requests_get( + "http://author/", + """ - """) + """, + ) - # permalink hasn't changed - self.expect_requests_get('http://author/2014/08/09/this-is-a-stub', """ + # permalink hasn't changed + self.expect_requests_get( + "http://author/2014/08/09/this-is-a-stub", + """ - """) - - self.mox.ReplayAll() - # modified activity should have /2014/08/09 as an upstreamDuplicate now - self.assert_discover(['http://author/2014/08/09']) - - # refetch should find the updated original url -> syndication url. - # it should *not* find the previously discovered relationship. - first_results = refetch(self.source) - self.assertEqual(1, len(first_results)) - new_relations = first_results.get('https://fa.ke/post/url') - self.assertEqual(1, len(new_relations)) - self.assertEqual('https://fa.ke/post/url', new_relations[0].syndication) - self.assertEqual('http://author/2014/08/09/this-is-a-stub', - new_relations[0].original) - - # second refetch should find nothing because nothing has changed - # since the previous refetch. - self.assertFalse(refetch(self.source)) - - def test_refetch_changed_syndication(self): - """Update syndication links that have changed since our last fetch.""" - SyndicatedPost(parent=self.source.key, - original='http://author/permalink', - syndication='https://fa.ke/post/url').put() - self.expect_requests_get('http://author/', """ + """, + ) + + self.mox.ReplayAll() + # modified activity should have /2014/08/09 as an upstreamDuplicate now + self.assert_discover(["http://author/2014/08/09"]) + + # refetch should find the updated original url -> syndication url. + # it should *not* find the previously discovered relationship. + first_results = refetch(self.source) + self.assertEqual(1, len(first_results)) + new_relations = first_results.get("https://fa.ke/post/url") + self.assertEqual(1, len(new_relations)) + self.assertEqual("https://fa.ke/post/url", new_relations[0].syndication) + self.assertEqual( + "http://author/2014/08/09/this-is-a-stub", new_relations[0].original + ) + + # second refetch should find nothing because nothing has changed + # since the previous refetch. + self.assertFalse(refetch(self.source)) + + def test_refetch_changed_syndication(self): + """Update syndication links that have changed since our last fetch.""" + SyndicatedPost( + parent=self.source.key, + original="http://author/permalink", + syndication="https://fa.ke/post/url", + ).put() + self.expect_requests_get( + "http://author/", + """
- """) - - self.mox.ReplayAll() - results = refetch(self.source) - self.assert_syndicated_posts( - ('http://author/permalink', 'https://fa.ke/changed/url')) - self.assert_equals(['https://fa.ke/changed/url'], list(results.keys())) - self.assert_entities_equal( - list(SyndicatedPost.query()), results['https://fa.ke/changed/url']) - self.assertEqual(testutil.NOW, self.source.updates['last_syndication_url']) - self.assertEqual(testutil.NOW, self.source.updates['last_feed_syndication_url']) - - def test_refetch_deleted_syndication(self): - """Deleted syndication links that have disappeared since our last fetch.""" - SyndicatedPost(parent=self.source.key, - original='http://author/permalink', - syndication='https://fa.ke/post/url').put() - self.expect_requests_get('http://author/', """ + """, + ) + + self.mox.ReplayAll() + results = refetch(self.source) + self.assert_syndicated_posts( + ("http://author/permalink", "https://fa.ke/changed/url") + ) + self.assert_equals(["https://fa.ke/changed/url"], list(results.keys())) + self.assert_entities_equal( + list(SyndicatedPost.query()), results["https://fa.ke/changed/url"] + ) + self.assertEqual(testutil.NOW, self.source.updates["last_syndication_url"]) + self.assertEqual(testutil.NOW, self.source.updates["last_feed_syndication_url"]) + + def test_refetch_deleted_syndication(self): + """Deleted syndication links that have disappeared since our last fetch.""" + SyndicatedPost( + parent=self.source.key, + original="http://author/permalink", + syndication="https://fa.ke/post/url", + ).put() + self.expect_requests_get( + "http://author/", + """
- """) - self.expect_requests_get('http://author/permalink', """ + """, + ) + self.expect_requests_get( + "http://author/permalink", + """ - """) - - self.mox.ReplayAll() - self.assert_equals({}, refetch(self.source)) - self.assert_syndicated_posts(('http://author/permalink', None)) - - def test_refetch_blank_syndication(self): - """We should preserve blank SyndicatedPosts during refetches.""" - blank = SyndicatedPost(parent=self.source.key, - original='http://author/permalink', - syndication=None) - blank.put() - self.expect_requests_get('http://author/', """ + """, + ) + + self.mox.ReplayAll() + self.assert_equals({}, refetch(self.source)) + self.assert_syndicated_posts(("http://author/permalink", None)) + + def test_refetch_blank_syndication(self): + """We should preserve blank SyndicatedPosts during refetches.""" + blank = SyndicatedPost( + parent=self.source.key, original="http://author/permalink", syndication=None + ) + blank.put() + self.expect_requests_get( + "http://author/", + """
- """) - self.expect_requests_get('http://author/permalink', """ + """, + ) + self.expect_requests_get( + "http://author/permalink", + """ - """) - - self.mox.ReplayAll() - self.assert_equals({}, refetch(self.source)) - self.assert_syndicated_posts(('http://author/permalink', None)) - - def test_refetch_unchanged_syndication(self): - """We should preserve unchanged SyndicatedPosts during refetches.""" - synd = SyndicatedPost(parent=self.source.key, - original='http://author/permalink', - syndication='https://fa.ke/post/url') - synd.put() - self.expect_requests_get('http://author/', """ + """, + ) + + self.mox.ReplayAll() + self.assert_equals({}, refetch(self.source)) + self.assert_syndicated_posts(("http://author/permalink", None)) + + def test_refetch_unchanged_syndication(self): + """We should preserve unchanged SyndicatedPosts during refetches.""" + synd = SyndicatedPost( + parent=self.source.key, + original="http://author/permalink", + syndication="https://fa.ke/post/url", + ) + synd.put() + self.expect_requests_get( + "http://author/", + """
- """) + """, + ) - self.mox.ReplayAll() - refetch(self.source) - self.assert_entities_equal([synd], list(SyndicatedPost.query())) + self.mox.ReplayAll() + refetch(self.source) + self.assert_entities_equal([synd], list(SyndicatedPost.query())) - def test_refetch_with_last_feed_syndication_url_skips_permalinks(self): - self.source.last_feed_syndication_url = datetime.datetime(1970, 1, 1) - self.source.put() + def test_refetch_with_last_feed_syndication_url_skips_permalinks(self): + self.source.last_feed_syndication_url = datetime.datetime(1970, 1, 1) + self.source.put() - self.expect_requests_get('http://author/', """ + self.expect_requests_get( + "http://author/", + """
- """) - # *don't* expect permalink fetch - - self.mox.ReplayAll() - self.assert_equals({}, refetch(self.source)) - self.assert_syndicated_posts(('http://author/permalink', None)) - - def test_refetch_dont_follow_other_silo_syndication(self): - """We should only resolve redirects if the initial domain is our silo.""" - self.unstub_requests_head() - self.expect_requests_head('http://author/') - self.expect_requests_get('http://author/', """ + """, + ) + # *don't* expect permalink fetch + + self.mox.ReplayAll() + self.assert_equals({}, refetch(self.source)) + self.assert_syndicated_posts(("http://author/permalink", None)) + + def test_refetch_dont_follow_other_silo_syndication(self): + """We should only resolve redirects if the initial domain is our silo.""" + self.unstub_requests_head() + self.expect_requests_head("http://author/") + self.expect_requests_get( + "http://author/", + """
- """) - self.expect_requests_head('http://author/permalink') - self.expect_requests_get('http://author/permalink') - - self.mox.ReplayAll() - refetch(self.source) - - synds = list(SyndicatedPost.query()) - self.assertEqual(1, len(synds)) - self.assertEqual('http://author/permalink', synds[0].original) - self.assertIsNone(synds[0].syndication) - - def test_refetch_syndication_url_head_error(self): - """We should ignore syndication URLs that 4xx or 5xx.""" - self.expect_requests_head('http://author/') - self.expect_requests_get('http://author/', """ + """, + ) + self.expect_requests_head("http://author/permalink") + self.expect_requests_get("http://author/permalink") + + self.mox.ReplayAll() + refetch(self.source) + + synds = list(SyndicatedPost.query()) + self.assertEqual(1, len(synds)) + self.assertEqual("http://author/permalink", synds[0].original) + self.assertIsNone(synds[0].syndication) + + def test_refetch_syndication_url_head_error(self): + """We should ignore syndication URLs that 4xx or 5xx.""" + self.expect_requests_head("http://author/") + self.expect_requests_get( + "http://author/", + """
- """) - self.expect_requests_head('http://author/post') - self.expect_requests_get('http://author/post') - self.expect_requests_head('https://fa.ke/post/url', status_code=404) - - self.mox.ReplayAll() - refetch(self.source) - - self.assert_syndicated_posts(('http://author/post', None)) - - def test_refetch_synd_url_on_other_silo(self): - """We should ignore syndication URLs on other (silos') domains.""" - self.expect_requests_get('http://author/', """ + """, + ) + self.expect_requests_head("http://author/post") + self.expect_requests_get("http://author/post") + self.expect_requests_head("https://fa.ke/post/url", status_code=404) + + self.mox.ReplayAll() + refetch(self.source) + + self.assert_syndicated_posts(("http://author/post", None)) + + def test_refetch_synd_url_on_other_silo(self): + """We should ignore syndication URLs on other (silos') domains.""" + self.expect_requests_get( + "http://author/", + """
- """) - self.expect_requests_get('http://author/post/url') - - self.mox.ReplayAll() - refetch(self.source) - - self.assert_syndicated_posts(('http://author/post/url', None)) - - def test_malformed_url_property(self): - """Non string-like url values (i.e. dicts) used to cause an unhashable - type exception while processing the h-feed. Make sure that we - ignore them. - """ - self.activities[0]['object'].update({ - 'content': 'post content without backlinks', - 'url': 'https://fa.ke/post/url', - }) - - # malformed u-url, should skip it without an unhashable dict error - self.expect_requests_get('http://author/', """ + """, + ) + self.expect_requests_get("http://author/post/url") + + self.mox.ReplayAll() + refetch(self.source) + + self.assert_syndicated_posts(("http://author/post/url", None)) + + def test_malformed_url_property(self): + """Non string-like url values (i.e. dicts) used to cause an unhashable + type exception while processing the h-feed. Make sure that we + ignore them. + """ + self.activities[0]["object"].update( + { + "content": "post content without backlinks", + "url": "https://fa.ke/post/url", + } + ) + + # malformed u-url, should skip it without an unhashable dict error + self.expect_requests_get( + "http://author/", + """
this is a strange permalink
-""") - - self.mox.ReplayAll() - self.assert_discover([]) - - def test_merge_front_page_and_h_feed(self): - """Make sure we are correctly merging the front page and rel-feed by - checking that we visit h-entries that are only the front page or - only the rel-feed page. - """ - self.expect_requests_get('http://author/', """ +""", + ) + + self.mox.ReplayAll() + self.assert_discover([]) + + def test_merge_front_page_and_h_feed(self): + """Make sure we are correctly merging the front page and rel-feed by + checking that we visit h-entries that are only the front page or + only the rel-feed page. + """ + self.expect_requests_get( + "http://author/", + """
@@ -1417,9 +1690,12 @@ def test_merge_front_page_and_h_feed(self):
- """) + """, + ) - self.expect_requests_get('http://author/feed', """ + self.expect_requests_get( + "http://author/feed", + """
@@ -1428,54 +1704,67 @@ def test_merge_front_page_and_h_feed(self):
- """) + """, + ) - for orig in ('/only-on-frontpage', '/on-both', '/only-on-feed'): - self.expect_requests_get('http://author%s' % orig, - """
+ for orig in ("/only-on-frontpage", "/on-both", "/only-on-feed"): + self.expect_requests_get( + "http://author%s" % orig, + """
-
""" % orig).InAnyOrder() - - self.mox.ReplayAll() - discover(self.source, self.activity) - # should be three blank SyndicatedPosts now - self.assert_syndicated_posts(('http://author/only-on-frontpage', None), - ('http://author/on-both', None), - ('http://author/only-on-feed', None), - (None, 'https://fa.ke/post/url')) - - def test_url_in_activity_not_object(self): - """We should use the url field in the activity if object doesn't have it. - - setUp() sets self.activity['object']['url'], so the other tests test that case. - """ - del self.activity['object']['url'] - self.activity['url'] = 'http://www.fa.ke/post/url' - - self.expect_requests_get('http://author/', """ +
""" + % orig, + ).InAnyOrder() + + self.mox.ReplayAll() + discover(self.source, self.activity) + # should be three blank SyndicatedPosts now + self.assert_syndicated_posts( + ("http://author/only-on-frontpage", None), + ("http://author/on-both", None), + ("http://author/only-on-feed", None), + (None, "https://fa.ke/post/url"), + ) + + def test_url_in_activity_not_object(self): + """We should use the url field in the activity if object doesn't have it. + + setUp() sets self.activity['object']['url'], so the other tests test that case. + """ + del self.activity["object"]["url"] + self.activity["url"] = "http://www.fa.ke/post/url" + + self.expect_requests_get( + "http://author/", + """
- """) + """, + ) - self.mox.ReplayAll() - self.assert_discover(['http://author/post/url']) + self.mox.ReplayAll() + self.assert_discover(["http://author/post/url"]) - def test_skip_non_string_u_urls(self): - """Make sure that we do not abort due to u-urls that contain objects - """ - self.expect_requests_get('http://author/', """ + def test_skip_non_string_u_urls(self): + """Make sure that we do not abort due to u-urls that contain objects""" + self.expect_requests_get( + "http://author/", + """
someone made a mistake
- """) + """, + ) - self.expect_requests_get('http://author/feed', """ + self.expect_requests_get( + "http://author/feed", + """
someone made a mistake @@ -1488,72 +1777,90 @@ def test_skip_non_string_u_urls(self):
someone made a mistake, and no correct link
- """) + """, + ) - for orig in ('/post-with-mistake', '/only-on-feed'): - self.expect_requests_get('http://author%s' % orig, - """
+ for orig in ("/post-with-mistake", "/only-on-feed"): + self.expect_requests_get( + "http://author%s" % orig, + """
-
""" % orig).InAnyOrder() - - self.mox.ReplayAll() - discover(self.source, self.activity) - # should have found both posts successfully - self.assert_syndicated_posts(('http://author/post-with-mistake', None), - ('http://author/only-on-feed', None), - (None, 'https://fa.ke/post/url')) - - def test_default_strip_fragments(self): - """We should strip fragments in syndication URLs by default. - - ...even across resolving redirects. - https://github.com/snarfed/bridgy/issues/984 - """ - self.expect_requests_get('http://author/', """ +
""" + % orig, + ).InAnyOrder() + + self.mox.ReplayAll() + discover(self.source, self.activity) + # should have found both posts successfully + self.assert_syndicated_posts( + ("http://author/post-with-mistake", None), + ("http://author/only-on-feed", None), + (None, "https://fa.ke/post/url"), + ) + + def test_default_strip_fragments(self): + """We should strip fragments in syndication URLs by default. + + ...even across resolving redirects. + https://github.com/snarfed/bridgy/issues/984 + """ + self.expect_requests_get( + "http://author/", + """
- """) - - self.mox.ReplayAll() - result = refetch(self.source) - self.assertCountEqual(['https://fa.ke/post'], result.keys(), result.keys()) - self.assert_syndicated_posts(('http://author/post', 'https://fa.ke/post')) - - def test_drop_reserved_hosts(self): - """We should should drop URLs with reserved and local hostnames.""" - self.mox.StubOutWithMock(original_post_discovery, 'DEBUG') - original_post_discovery.DEBUG = False - - self.mox.ReplayAll() - self.activity['object']['content'] = 'http://localhost http://other/link https://x.test/ http://y.local/path' - self.assert_discover([], fetch_hfeed=False) - - def test_github_preserve_fragments(self): - """GitHub sources should preserve fragments in syndication URLs. - - ...even across resolving redirects. - https://github.com/snarfed/bridgy/issues/984 - """ - self.expect_requests_get('http://author/', """ + """, + ) + + self.mox.ReplayAll() + result = refetch(self.source) + self.assertCountEqual(["https://fa.ke/post"], result.keys(), result.keys()) + self.assert_syndicated_posts(("http://author/post", "https://fa.ke/post")) + + def test_drop_reserved_hosts(self): + """We should should drop URLs with reserved and local hostnames.""" + self.mox.StubOutWithMock(original_post_discovery, "DEBUG") + original_post_discovery.DEBUG = False + + self.mox.ReplayAll() + self.activity["object"][ + "content" + ] = "http://localhost http://other/link https://x.test/ http://y.local/path" + self.assert_discover([], fetch_hfeed=False) + + def test_github_preserve_fragments(self): + """GitHub sources should preserve fragments in syndication URLs. + + ...even across resolving redirects. + https://github.com/snarfed/bridgy/issues/984 + """ + self.expect_requests_get( + "http://author/", + """
- """) - - self.mox.ReplayAll() - self.source = GitHub(id='snarfed', domain_urls=['http://author/'], - domains=['author']) - self.source.put() - - result = refetch(self.source) - self.assertCountEqual(['https://github.com/post#frag'], result.keys(), - result.keys()) - - self.activity['object']['url'] = 'https://github.com/post' - self.assert_discover(['http://author/post']) - self.assert_syndicated_posts(('http://author/post', 'https://github.com/post#frag')) + """, + ) + + self.mox.ReplayAll() + self.source = GitHub( + id="snarfed", domain_urls=["http://author/"], domains=["author"] + ) + self.source.put() + + result = refetch(self.source) + self.assertCountEqual( + ["https://github.com/post#frag"], result.keys(), result.keys() + ) + + self.activity["object"]["url"] = "https://github.com/post" + self.assert_discover(["http://author/post"]) + self.assert_syndicated_posts( + ("http://author/post", "https://github.com/post#frag") + ) diff --git a/tests/test_pages.py b/tests/test_pages.py index a39e2a38..188859f3 100644 --- a/tests/test_pages.py +++ b/tests/test_pages.py @@ -24,686 +24,821 @@ class PagesTest(testutil.AppTest): - - def test_front_page(self): - resp = self.client.get('/') - self.assertEqual(200, resp.status_code) - - def test_poll_now(self): - key = self.sources[0].key.urlsafe().decode() - self.expect_task ('poll-now', source_key=key, last_polled='1970-01-01-00-00-00') - self.mox.ReplayAll() - - resp = self.client.post('/poll-now', data={'key': key}) - self.assertEqual(302, resp.status_code) - self.assertEqual(self.source_bridgy_url, resp.headers['Location']) - - def test_retry(self): - source = self.sources[0] - source.domain_urls = ['http://orig'] - source.last_hfeed_refetch = last_hfeed_refetch = \ - testutil.NOW - datetime.timedelta(minutes=1) - source.put() - - resp = self.responses[0] - resp.status = 'complete' - resp.unsent = ['http://unsent'] - resp.sent = ['http://sent'] - resp.error = ['http://error'] - resp.failed = ['http://failed'] - resp.skipped = ['https://skipped'] - - # SyndicatedPost with new target URLs - resp.activities_json = [ - json_dumps({'object': {'url': 'https://fa.ke/1'}}), - json_dumps({'url': 'https://fa.ke/2', 'object': {'unused': 'ok'}}), - json_dumps({'url': 'https://fa.ke/3'}), - ] - resp.put() - SyndicatedPost.insert(source, 'https://fa.ke/1', 'https://orig/1') - SyndicatedPost.insert(source, 'https://fa.ke/2', 'http://orig/2') - SyndicatedPost.insert(source, 'https://fa.ke/3', 'http://orig/3') - - key = resp.key.urlsafe().decode() - self.expect_task('propagate', response_key=key) - self.mox.ReplayAll() - - # cached webmention endpoint - util.webmention_endpoint_cache['W https skipped /'] = 'asdf' - - response = self.client.post('/retry', data={'key': key}) - self.assertEqual(302, response.status_code) - self.assertEqual(self.source_bridgy_url, response.headers['Location']) - - # status and URLs should be refreshed - got = resp.key.get() - self.assertEqual('new', got.status) - self.assertCountEqual( - ['http://unsent/', 'http://sent/', 'https://skipped/', 'http://error/', - 'http://failed/', 'https://orig/1', 'http://orig/2', 'http://orig/3'], - got.unsent) - for field in got.sent, got.skipped, got.error, got.failed: - self.assertEqual([], field) - - # webmention endpoints for URL domains should be refreshed - self.assertNotIn('W https skipped /', util.webmention_endpoint_cache) - - # shouldn't have refetched h-feed - self.assertEqual(last_hfeed_refetch, source.key.get().last_hfeed_refetch) - - def test_retry_redirect_to(self): - key = self.responses[0].put().urlsafe().decode() - self.expect_task('propagate', response_key=key) - self.mox.ReplayAll() - - response = self.client.post('/retry', data={ - 'key': key, - 'redirect_to': '/foo/bar', - }) - self.assertEqual(302, response.status_code) - self.assertEqual('http://localhost/foo/bar', response.headers['Location']) - - def test_crawl_now(self): - source = self.sources[0] - source.domain_urls = ['http://orig'] - source.last_hfeed_refetch = source.last_feed_syndication_url = testutil.NOW - source.put() - - key = source.key.urlsafe().decode() - self.expect_task('poll-now', source_key=key, last_polled='1970-01-01-00-00-00') - self.mox.ReplayAll() - - response = self.client.post('/crawl-now', data={'key': key}) - self.assertEqual(302, response.status_code) - self.assertEqual(self.source_bridgy_url, response.headers['Location']) - - source = source.key.get() - self.assertEqual(models.REFETCH_HFEED_TRIGGER, source.last_hfeed_refetch) - self.assertIsNone(source.last_feed_syndication_url) - - def test_poll_now_and_retry_response_missing_key(self): - for endpoint in '/poll-now', '/retry': - for body in {}, {'key': self.responses[0].key.urlsafe().decode()}: # hasn't been stored - resp = self.client.post(endpoint, data=body) - self.assertEqual(400, resp.status_code) - - def test_delete_source_callback(self): - key = self.sources[0].key.urlsafe().decode() - - resp = self.client.post('/delete/start', data={ - 'feature': 'listen', - 'key': key, - 'callback': 'http://withknown.com/bridgy_callback', - }) - - encoded_state = urllib.parse.quote_plus(json_dumps({ - 'callback': 'http://withknown.com/bridgy_callback', - 'feature': 'listen', - 'operation': 'delete', - 'source': key, - }, sort_keys=True)) - - # when silo oauth is done, it should send us back to /SOURCE/delete/finish, - # which would in turn redirect to the more general /delete/finish. - expected_auth_url = 'http://fake/auth/url?' + urlencode({ - 'redirect_uri': 'http://localhost/fake/delete/finish?state=' - + encoded_state, - }) - - self.assertEqual(302, resp.status_code) - self.assertEqual(expected_auth_url, resp.headers['Location']) - - # assume that the silo auth finishes and redirects to /delete/finish - resp = self.client.get( - '/delete/finish?' - + 'auth_entity=' + self.sources[0].auth_entity.urlsafe().decode() - + '&state=' + encoded_state) - - self.assertEqual(302, resp.status_code) - self.assertEqual( - 'http://withknown.com/bridgy_callback?' + urlencode([ - ('result', 'success'), - ('user', 'http://localhost/fake/0123456789'), - ('key', ndb.Key('FakeSource', '0123456789').urlsafe().decode()), - ]), resp.headers['Location']) - - def test_delete_source_declined(self): - key = self.sources[0].key.urlsafe().decode() - resp = self.client.post('/delete/start', data={ - 'feature': 'listen', - 'key': key, - 'callback': 'http://withknown.com/bridgy_callback', - }) - - encoded_state = urllib.parse.quote_plus(json_dumps({ - 'callback': 'http://withknown.com/bridgy_callback', - 'feature': 'listen', - 'operation': 'delete', - 'source': key, - }, sort_keys=True)) - - # when silo oauth is done, it should send us back to /SOURCE/delete/finish, - # which would in turn redirect to the more general /delete/finish. - expected_auth_url = 'http://fake/auth/url?' + urlencode({ - 'redirect_uri': 'http://localhost/fake/delete/finish?state=' - + encoded_state, - }) - - self.assertEqual(302, resp.status_code) - self.assertEqual(expected_auth_url, resp.headers['Location']) - - # assume that the silo auth finishes - resp = self.client.get( - '/delete/finish?declined=True&state=' + encoded_state) - - self.assertEqual(302, resp.status_code) - self.assertEqual( - 'http://withknown.com/bridgy_callback?' + urlencode([ - ('result', 'declined') - ]), resp.headers['Location']) - - def test_delete_start_redirect_url_error(self): - self.mox.StubOutWithMock(testutil.OAuthStart, 'redirect_url') - testutil.OAuthStart.redirect_url(state=mox.IgnoreArg() - ).AndRaise(tweepy.TweepError('Connection closed unexpectedly...')) - self.mox.ReplayAll() - - resp = self.client.post('/delete/start', data={ - 'feature': 'listen', - 'key': self.sources[0].key.urlsafe().decode(), - }) - self.assertEqual(302, resp.status_code) - location = urllib.parse.urlparse(resp.headers['Location']) - self.assertEqual('/fake/0123456789', location.path) - self.assertEqual(['FakeSource API error 504: Connection closed unexpectedly...'], - get_flashed_messages()) - - def test_delete_removes_from_logins_cookie(self): - self.client.set_cookie( - 'localhost', 'logins', - f'/fake/{self.sources[0].key.id()}?Fake%20User|/other/1?bob') - - with app.test_request_context(): - state = util.construct_state_param_for_add( - feature='listen', operation='delete', - source=self.sources[0].key.urlsafe().decode()) - - auth_entity_key = self.sources[0].auth_entity.urlsafe().decode() - resp = self.client.get( - f'/delete/finish?auth_entity={auth_entity_key}&state={state}') - - self.assertEqual(302, resp.status_code) - location = resp.headers['Location'] - self.assertEqual('http://localhost/', location) - self.assertIn('logins="/other/1?bob";', - resp.headers['Set-Cookie'].split(' ')) - - def test_user_page(self): - resp = self.client.get(self.sources[0].bridgy_path()) - self.assertEqual(200, resp.status_code) - - def test_user_page_lookup_with_username_etc(self): - self.sources[0].username = 'FooBar' - self.sources[0].name = 'Snoøpy Barrett' - self.sources[0].domains = ['foox.com'] - self.sources[0].put() - - for id in 'FooBar', 'Snoøpy Barrett', 'foox.com': - resp = self.client.get(f'/fake/{urllib.parse.quote(id.encode())}') - self.assertEqual(301, resp.status_code) - self.assertEqual(f'http://localhost/fake/{self.sources[0].key.id()}', - resp.headers['Location']) - - resp = self.client.get('/fake/nope') - self.assertEqual(404, resp.status_code) - - def test_user_page_with_no_features_404s(self): - self.sources[0].features = [] - self.sources[0].put() - - resp = self.client.get(self.sources[0].bridgy_path()) - self.assertEqual(404, resp.status_code) - - def test_social_user_page_mf2(self): - """Check the custom mf2 we render on social user pages.""" - self.sources[0].features = ['listen', 'publish'] - self.sources[0].put() - - # test invite with missing object and content - resp = json_loads(self.responses[8].response_json) - resp['verb'] = 'invite' - resp.pop('object', None) - resp.pop('content', None) - self.responses[8].response_json = json_dumps(resp) - - # test that invites render the invitee, not the inviter - # https://github.com/snarfed/bridgy/issues/754 - self.responses[9].type = 'rsvp' - self.responses[9].response_json = json_dumps({ - 'id': 'tag:fa.ke,2013:111', - 'objectType': 'activity', - 'verb': 'invite', - 'url': 'http://fa.ke/event', - 'actor': { - 'displayName': 'Mrs. Host', - 'url': 'http://fa.ke/host', - }, - 'object': { - 'objectType': 'person', - 'displayName': 'Ms. Guest', - 'url': 'http://fa.ke/guest', - }, - }) - - for entity in self.responses + self.publishes + self.blogposts: - entity.put() - - user_url = self.sources[0].bridgy_path() - response = self.client.get(user_url) - self.assertEqual(200, response.status_code) - - parsed = util.parse_mf2(response.get_data(as_text=True), user_url) - hcard = parsed.get('items', [])[0] - self.assertEqual(['h-card'], hcard['type']) - self.assertEqual( - ['Fake User'], hcard['properties'].get('name')) - self.assertEqual( - ['http://fa.ke/profile/url'], hcard['properties'].get('url')) - self.assertEqual( - ['enabled'], hcard['properties'].get('bridgy-account-status')) - self.assertEqual( - ['enabled'], hcard['properties'].get('bridgy-listen-status')) - self.assertEqual( - ['enabled'], hcard['properties'].get('bridgy-publish-status')) - - expected_resps = self.responses[:10] - for item, resp in zip(hcard['children'], expected_resps): - self.assertIn('h-bridgy-response', item['type']) - props = item['properties'] - self.assertEqual([resp.status], props['bridgy-status']) - self.assertEqual([json_loads(resp.activities_json[0])['url']], - props['bridgy-original-source']) - self.assertEqual(resp.unsent, props['bridgy-target']) - - # check invite - self.assertIn('Ms. Guest is invited.', response.get_data(as_text=True)) - self.assertNotIn('Mrs. Host is invited.', response.get_data(as_text=True)) - - publish = hcard['children'][len(expected_resps)] - self.assertIn('h-bridgy-publish', publish['type']) - props = publish['properties'] - self.assertEqual([self.publishes[0].key.parent().id()], props['url']) - self.assertEqual([self.publishes[0].status], props['bridgy-status']) - - def test_user_page_private_twitter(self): - auth_entity = TwitterAuth( - id='foo', - user_json=json_dumps({'protected': True}), - token_key='', token_secret='', - ).put() - tw = twitter.Twitter(id='foo', auth_entity=auth_entity, features=['listen']) - tw.put() - - resp = self.client.get(tw.bridgy_path()) - self.assertEqual(200, resp.status_code) - self.assertIn('Your Twitter account is private!', resp.get_data(as_text=True)) - self.assertNotIn('most of your recent posts are private', resp.get_data(as_text=True)) - - def test_user_page_recent_private_posts(self): - self.sources[0].recent_private_posts = pages.RECENT_PRIVATE_POSTS_THRESHOLD - self.sources[0].put() - - resp = self.client.get(self.sources[0].bridgy_path()) - self.assertEqual(200, resp.status_code) - self.assertIn('most of your recent posts are private', resp.get_data(as_text=True)) - - def test_user_page_recent_private_posts_none(self): - self.sources[0].recent_private_posts = None - self.sources[0].put() - - resp = self.client.get(self.sources[0].bridgy_path()) - self.assertEqual(200, resp.status_code) - self.assertNotIn('most of your recent posts are private', resp.get_data(as_text=True)) - - def test_user_page_publish_url_with_unicode_char(self): - """Check the custom mf2 we render on social user pages.""" - self.sources[0].features = ['publish'] - self.sources[0].put() - - url = 'https://ptt.com/ransomw…ocks-user-access/' - Publish(parent=PublishedPage(id=url).key, - source=self.sources[0].key).put() - - user_url = self.sources[0].bridgy_path() - resp = self.client.get(user_url) - self.assertEqual(200, resp.status_code) - - parsed = util.parse_mf2(resp.get_data(as_text=True), user_url) - publish = parsed['items'][0]['children'][0] - - def test_user_page_escapes_html_chars(self): - html = ' a&b' - escaped = '<xyz> a&b' - - activity = json_loads(self.responses[0].activities_json[0]) - activity['object']['content'] = escaped - self.responses[0].activities_json = [json_dumps(activity)] - - resp = json_loads(self.responses[0].response_json) - resp['content'] = escaped - self.responses[0].response_json = json_dumps(resp) - self.responses[0].status = 'processing' - self.responses[0].put() - - resp = self.client.get(self.sources[0].bridgy_path()) - self.assertEqual(200, resp.status_code) - self.assertNotIn(html, resp.get_data(as_text=True)) - self.assertIn(escaped, resp.get_data(as_text=True)) - - self.assertNotIn('<span class="glyphicon glyphicon-transfer">', resp.get_data(as_text=True)) - self.assertIn('', resp.get_data(as_text=True)) - - def test_user_page_rate_limited_never_successfully_polled(self): - self.sources[0].rate_limited = True - self.sources[0].last_poll_attempt = datetime.datetime(2019, 1, 1) - self.sources[0].put() - - resp = self.client.get(self.sources[0].bridgy_path()) - self.assertEqual(200, resp.status_code) - self.assertIn('Not polled yet,', resp.get_data(as_text=True)) - - def test_blog_user_page_escapes_html_chars(self): - html = ' a&b' - escaped = '<xyz> a&b' - - source = FakeBlogSource.new() - source.features = ['webmention'] - source.put() - - self.blogposts[0].source = source.key - self.blogposts[0].feed_item['title'] = html - self.blogposts[0].put() - - resp = self.client.get(source.bridgy_path()) - self.assertEqual(200, resp.status_code) - self.assertNotIn(html, resp.get_data(as_text=True)) - self.assertIn(escaped, resp.get_data(as_text=True)) - - def test_users_page(self): - resp = self.client.get('/users') - for source in self.sources: - self.assertIn(f'foo.com.'], - get_flashed_messages()) - - source = source.key.get() - self.assertIn('foo.com', source.domains) - self.assertIn('http://foo.com/', source.domain_urls) - - def test_edit_web_sites_add_existing(self): - source = self.sources[0] - source.domain_urls = ['http://foo.com/'] - source.domains = ['foo.com'] - source.put() - - resp = self.client.post('/edit-websites', data={ - 'source_key': source.key.urlsafe().decode(), - 'add': 'http://foo.com/', - }) - self.assertEqual(302, resp.status_code) - self.assertEqual( - f'http://localhost/edit-websites?source_key={source.key.urlsafe().decode()}', - resp.headers['Location']) - self.assertEqual(['foo.com already exists.'], - get_flashed_messages()) - - source = source.key.get() - self.assertEqual(['foo.com'], source.domains) - self.assertEqual(['http://foo.com/'], source.domain_urls) - - def test_edit_web_sites_add_bad(self): - source = self.sources[0] - resp = self.client.post('/edit-websites', data={ - 'source_key': source.key.urlsafe().decode(), - 'add': 'http://facebook.com/', - }) - self.assertEqual(302, resp.status_code) - self.assertEqual( - f'http://localhost/edit-websites?source_key={source.key.urlsafe().decode()}', - resp.headers['Location']) - self.assertEqual( - ['facebook.com doesn\'t look like your web site. Try again?'], - get_flashed_messages()) - - source = source.key.get() - self.assertEqual([], source.domains) - self.assertEqual([], source.domain_urls) - - def test_edit_web_sites_delete(self): - source = self.sources[0] - source.domain_urls = ['http://foo/', 'https://bar'] - source.domains = ['foo', 'bar'] - source.put() - - resp = self.client.post('/edit-websites', data={ - 'source_key': source.key.urlsafe().decode(), - 'delete': 'https://bar', - }) - self.assertEqual(302, resp.status_code) - self.assertEqual( - f'http://localhost/edit-websites?source_key={source.key.urlsafe().decode()}', - resp.headers['Location']) - self.assertEqual(['Removed bar.'], - get_flashed_messages()) - - source = source.key.get() - self.assertEqual(['foo'], source.domains) - self.assertEqual(['http://foo/'], source.domain_urls) - - def test_edit_web_sites_delete_multiple_urls_same_domain(self): - source = self.sources[0] - source.domain_urls = ['http://foo.com/bar', 'https://foo.com/baz'] - source.domains = ['foo.com'] - source.put() - - resp = self.client.post('/edit-websites', data={ - 'source_key': source.key.urlsafe().decode(), - 'delete': 'https://foo.com/baz', - }) - self.assertEqual(302, resp.status_code) - self.assertEqual( - f'http://localhost/edit-websites?source_key={source.key.urlsafe().decode()}', - resp.headers['Location']) - self.assertEqual(['Removed foo.com/baz.'], - get_flashed_messages()) - - source = source.key.get() - self.assertEqual(['foo.com'], source.domains) - self.assertEqual(['http://foo.com/bar'], source.domain_urls) - - def test_edit_web_sites_errors(self): - source_key = self.sources[0].key.urlsafe().decode() - - for data in ( - {}, - {'source_key': source_key}, - {'add': 'http://foo'}, - {'delete': 'http://foo'}, - {'source_key': 'asdf', 'add': 'http://foo'}, - {'source_key': 'asdf', 'delete': 'http://foo', 'add': 'http://bar'}, - {'source_key': source_key, 'delete': 'http://missing'}, - ): - resp = self.client.post('/edit-websites', data=data) - self.assertEqual(400, resp.status_code) + def test_front_page(self): + resp = self.client.get("/") + self.assertEqual(200, resp.status_code) + + def test_poll_now(self): + key = self.sources[0].key.urlsafe().decode() + self.expect_task("poll-now", source_key=key, last_polled="1970-01-01-00-00-00") + self.mox.ReplayAll() + + resp = self.client.post("/poll-now", data={"key": key}) + self.assertEqual(302, resp.status_code) + self.assertEqual(self.source_bridgy_url, resp.headers["Location"]) + + def test_retry(self): + source = self.sources[0] + source.domain_urls = ["http://orig"] + source.last_hfeed_refetch = ( + last_hfeed_refetch + ) = testutil.NOW - datetime.timedelta(minutes=1) + source.put() + + resp = self.responses[0] + resp.status = "complete" + resp.unsent = ["http://unsent"] + resp.sent = ["http://sent"] + resp.error = ["http://error"] + resp.failed = ["http://failed"] + resp.skipped = ["https://skipped"] + + # SyndicatedPost with new target URLs + resp.activities_json = [ + json_dumps({"object": {"url": "https://fa.ke/1"}}), + json_dumps({"url": "https://fa.ke/2", "object": {"unused": "ok"}}), + json_dumps({"url": "https://fa.ke/3"}), + ] + resp.put() + SyndicatedPost.insert(source, "https://fa.ke/1", "https://orig/1") + SyndicatedPost.insert(source, "https://fa.ke/2", "http://orig/2") + SyndicatedPost.insert(source, "https://fa.ke/3", "http://orig/3") + + key = resp.key.urlsafe().decode() + self.expect_task("propagate", response_key=key) + self.mox.ReplayAll() + + # cached webmention endpoint + util.webmention_endpoint_cache["W https skipped /"] = "asdf" + + response = self.client.post("/retry", data={"key": key}) + self.assertEqual(302, response.status_code) + self.assertEqual(self.source_bridgy_url, response.headers["Location"]) + + # status and URLs should be refreshed + got = resp.key.get() + self.assertEqual("new", got.status) + self.assertCountEqual( + [ + "http://unsent/", + "http://sent/", + "https://skipped/", + "http://error/", + "http://failed/", + "https://orig/1", + "http://orig/2", + "http://orig/3", + ], + got.unsent, + ) + for field in got.sent, got.skipped, got.error, got.failed: + self.assertEqual([], field) + + # webmention endpoints for URL domains should be refreshed + self.assertNotIn("W https skipped /", util.webmention_endpoint_cache) + + # shouldn't have refetched h-feed + self.assertEqual(last_hfeed_refetch, source.key.get().last_hfeed_refetch) + + def test_retry_redirect_to(self): + key = self.responses[0].put().urlsafe().decode() + self.expect_task("propagate", response_key=key) + self.mox.ReplayAll() + + response = self.client.post( + "/retry", + data={ + "key": key, + "redirect_to": "/foo/bar", + }, + ) + self.assertEqual(302, response.status_code) + self.assertEqual("http://localhost/foo/bar", response.headers["Location"]) + + def test_crawl_now(self): + source = self.sources[0] + source.domain_urls = ["http://orig"] + source.last_hfeed_refetch = source.last_feed_syndication_url = testutil.NOW + source.put() + + key = source.key.urlsafe().decode() + self.expect_task("poll-now", source_key=key, last_polled="1970-01-01-00-00-00") + self.mox.ReplayAll() + + response = self.client.post("/crawl-now", data={"key": key}) + self.assertEqual(302, response.status_code) + self.assertEqual(self.source_bridgy_url, response.headers["Location"]) + + source = source.key.get() + self.assertEqual(models.REFETCH_HFEED_TRIGGER, source.last_hfeed_refetch) + self.assertIsNone(source.last_feed_syndication_url) + + def test_poll_now_and_retry_response_missing_key(self): + for endpoint in "/poll-now", "/retry": + for body in {}, { + "key": self.responses[0].key.urlsafe().decode() + }: # hasn't been stored + resp = self.client.post(endpoint, data=body) + self.assertEqual(400, resp.status_code) + + def test_delete_source_callback(self): + key = self.sources[0].key.urlsafe().decode() + + resp = self.client.post( + "/delete/start", + data={ + "feature": "listen", + "key": key, + "callback": "http://withknown.com/bridgy_callback", + }, + ) + + encoded_state = urllib.parse.quote_plus( + json_dumps( + { + "callback": "http://withknown.com/bridgy_callback", + "feature": "listen", + "operation": "delete", + "source": key, + }, + sort_keys=True, + ) + ) + + # when silo oauth is done, it should send us back to /SOURCE/delete/finish, + # which would in turn redirect to the more general /delete/finish. + expected_auth_url = "http://fake/auth/url?" + urlencode( + { + "redirect_uri": "http://localhost/fake/delete/finish?state=" + + encoded_state, + } + ) + + self.assertEqual(302, resp.status_code) + self.assertEqual(expected_auth_url, resp.headers["Location"]) + + # assume that the silo auth finishes and redirects to /delete/finish + resp = self.client.get( + "/delete/finish?" + + "auth_entity=" + + self.sources[0].auth_entity.urlsafe().decode() + + "&state=" + + encoded_state + ) + + self.assertEqual(302, resp.status_code) + self.assertEqual( + "http://withknown.com/bridgy_callback?" + + urlencode( + [ + ("result", "success"), + ("user", "http://localhost/fake/0123456789"), + ("key", ndb.Key("FakeSource", "0123456789").urlsafe().decode()), + ] + ), + resp.headers["Location"], + ) + + def test_delete_source_declined(self): + key = self.sources[0].key.urlsafe().decode() + resp = self.client.post( + "/delete/start", + data={ + "feature": "listen", + "key": key, + "callback": "http://withknown.com/bridgy_callback", + }, + ) + + encoded_state = urllib.parse.quote_plus( + json_dumps( + { + "callback": "http://withknown.com/bridgy_callback", + "feature": "listen", + "operation": "delete", + "source": key, + }, + sort_keys=True, + ) + ) + + # when silo oauth is done, it should send us back to /SOURCE/delete/finish, + # which would in turn redirect to the more general /delete/finish. + expected_auth_url = "http://fake/auth/url?" + urlencode( + { + "redirect_uri": "http://localhost/fake/delete/finish?state=" + + encoded_state, + } + ) + + self.assertEqual(302, resp.status_code) + self.assertEqual(expected_auth_url, resp.headers["Location"]) + + # assume that the silo auth finishes + resp = self.client.get("/delete/finish?declined=True&state=" + encoded_state) + + self.assertEqual(302, resp.status_code) + self.assertEqual( + "http://withknown.com/bridgy_callback?" + + urlencode([("result", "declined")]), + resp.headers["Location"], + ) + + def test_delete_start_redirect_url_error(self): + self.mox.StubOutWithMock(testutil.OAuthStart, "redirect_url") + testutil.OAuthStart.redirect_url(state=mox.IgnoreArg()).AndRaise( + tweepy.TweepError("Connection closed unexpectedly...") + ) + self.mox.ReplayAll() + + resp = self.client.post( + "/delete/start", + data={ + "feature": "listen", + "key": self.sources[0].key.urlsafe().decode(), + }, + ) + self.assertEqual(302, resp.status_code) + location = urllib.parse.urlparse(resp.headers["Location"]) + self.assertEqual("/fake/0123456789", location.path) + self.assertEqual( + ["FakeSource API error 504: Connection closed unexpectedly..."], + get_flashed_messages(), + ) + + def test_delete_removes_from_logins_cookie(self): + self.client.set_cookie( + "localhost", + "logins", + f"/fake/{self.sources[0].key.id()}?Fake%20User|/other/1?bob", + ) + + with app.test_request_context(): + state = util.construct_state_param_for_add( + feature="listen", + operation="delete", + source=self.sources[0].key.urlsafe().decode(), + ) + + auth_entity_key = self.sources[0].auth_entity.urlsafe().decode() + resp = self.client.get( + f"/delete/finish?auth_entity={auth_entity_key}&state={state}" + ) + + self.assertEqual(302, resp.status_code) + location = resp.headers["Location"] + self.assertEqual("http://localhost/", location) + self.assertIn('logins="/other/1?bob";', resp.headers["Set-Cookie"].split(" ")) + + def test_user_page(self): + resp = self.client.get(self.sources[0].bridgy_path()) + self.assertEqual(200, resp.status_code) + + def test_user_page_lookup_with_username_etc(self): + self.sources[0].username = "FooBar" + self.sources[0].name = "Snoøpy Barrett" + self.sources[0].domains = ["foox.com"] + self.sources[0].put() + + for id in "FooBar", "Snoøpy Barrett", "foox.com": + resp = self.client.get(f"/fake/{urllib.parse.quote(id.encode())}") + self.assertEqual(301, resp.status_code) + self.assertEqual( + f"http://localhost/fake/{self.sources[0].key.id()}", + resp.headers["Location"], + ) + + resp = self.client.get("/fake/nope") + self.assertEqual(404, resp.status_code) + + def test_user_page_with_no_features_404s(self): + self.sources[0].features = [] + self.sources[0].put() + + resp = self.client.get(self.sources[0].bridgy_path()) + self.assertEqual(404, resp.status_code) + + def test_social_user_page_mf2(self): + """Check the custom mf2 we render on social user pages.""" + self.sources[0].features = ["listen", "publish"] + self.sources[0].put() + + # test invite with missing object and content + resp = json_loads(self.responses[8].response_json) + resp["verb"] = "invite" + resp.pop("object", None) + resp.pop("content", None) + self.responses[8].response_json = json_dumps(resp) + + # test that invites render the invitee, not the inviter + # https://github.com/snarfed/bridgy/issues/754 + self.responses[9].type = "rsvp" + self.responses[9].response_json = json_dumps( + { + "id": "tag:fa.ke,2013:111", + "objectType": "activity", + "verb": "invite", + "url": "http://fa.ke/event", + "actor": { + "displayName": "Mrs. Host", + "url": "http://fa.ke/host", + }, + "object": { + "objectType": "person", + "displayName": "Ms. Guest", + "url": "http://fa.ke/guest", + }, + } + ) + + for entity in self.responses + self.publishes + self.blogposts: + entity.put() + + user_url = self.sources[0].bridgy_path() + response = self.client.get(user_url) + self.assertEqual(200, response.status_code) + + parsed = util.parse_mf2(response.get_data(as_text=True), user_url) + hcard = parsed.get("items", [])[0] + self.assertEqual(["h-card"], hcard["type"]) + self.assertEqual(["Fake User"], hcard["properties"].get("name")) + self.assertEqual(["http://fa.ke/profile/url"], hcard["properties"].get("url")) + self.assertEqual(["enabled"], hcard["properties"].get("bridgy-account-status")) + self.assertEqual(["enabled"], hcard["properties"].get("bridgy-listen-status")) + self.assertEqual(["enabled"], hcard["properties"].get("bridgy-publish-status")) + + expected_resps = self.responses[:10] + for item, resp in zip(hcard["children"], expected_resps): + self.assertIn("h-bridgy-response", item["type"]) + props = item["properties"] + self.assertEqual([resp.status], props["bridgy-status"]) + self.assertEqual( + [json_loads(resp.activities_json[0])["url"]], + props["bridgy-original-source"], + ) + self.assertEqual(resp.unsent, props["bridgy-target"]) + + # check invite + self.assertIn("Ms. Guest is invited.", response.get_data(as_text=True)) + self.assertNotIn("Mrs. Host is invited.", response.get_data(as_text=True)) + + publish = hcard["children"][len(expected_resps)] + self.assertIn("h-bridgy-publish", publish["type"]) + props = publish["properties"] + self.assertEqual([self.publishes[0].key.parent().id()], props["url"]) + self.assertEqual([self.publishes[0].status], props["bridgy-status"]) + + def test_user_page_private_twitter(self): + auth_entity = TwitterAuth( + id="foo", + user_json=json_dumps({"protected": True}), + token_key="", + token_secret="", + ).put() + tw = twitter.Twitter(id="foo", auth_entity=auth_entity, features=["listen"]) + tw.put() + + resp = self.client.get(tw.bridgy_path()) + self.assertEqual(200, resp.status_code) + self.assertIn("Your Twitter account is private!", resp.get_data(as_text=True)) + self.assertNotIn( + "most of your recent posts are private", resp.get_data(as_text=True) + ) + + def test_user_page_recent_private_posts(self): + self.sources[0].recent_private_posts = pages.RECENT_PRIVATE_POSTS_THRESHOLD + self.sources[0].put() + + resp = self.client.get(self.sources[0].bridgy_path()) + self.assertEqual(200, resp.status_code) + self.assertIn( + "most of your recent posts are private", resp.get_data(as_text=True) + ) + + def test_user_page_recent_private_posts_none(self): + self.sources[0].recent_private_posts = None + self.sources[0].put() + + resp = self.client.get(self.sources[0].bridgy_path()) + self.assertEqual(200, resp.status_code) + self.assertNotIn( + "most of your recent posts are private", resp.get_data(as_text=True) + ) + + def test_user_page_publish_url_with_unicode_char(self): + """Check the custom mf2 we render on social user pages.""" + self.sources[0].features = ["publish"] + self.sources[0].put() + + url = "https://ptt.com/ransomw…ocks-user-access/" + Publish(parent=PublishedPage(id=url).key, source=self.sources[0].key).put() + + user_url = self.sources[0].bridgy_path() + resp = self.client.get(user_url) + self.assertEqual(200, resp.status_code) + + parsed = util.parse_mf2(resp.get_data(as_text=True), user_url) + publish = parsed["items"][0]["children"][0] + + def test_user_page_escapes_html_chars(self): + html = " a&b" + escaped = "<xyz> a&b" + + activity = json_loads(self.responses[0].activities_json[0]) + activity["object"]["content"] = escaped + self.responses[0].activities_json = [json_dumps(activity)] + + resp = json_loads(self.responses[0].response_json) + resp["content"] = escaped + self.responses[0].response_json = json_dumps(resp) + self.responses[0].status = "processing" + self.responses[0].put() + + resp = self.client.get(self.sources[0].bridgy_path()) + self.assertEqual(200, resp.status_code) + self.assertNotIn(html, resp.get_data(as_text=True)) + self.assertIn(escaped, resp.get_data(as_text=True)) + + self.assertNotIn( + '<span class="glyphicon glyphicon-transfer">', + resp.get_data(as_text=True), + ) + self.assertIn( + '', resp.get_data(as_text=True) + ) + + def test_user_page_rate_limited_never_successfully_polled(self): + self.sources[0].rate_limited = True + self.sources[0].last_poll_attempt = datetime.datetime(2019, 1, 1) + self.sources[0].put() + + resp = self.client.get(self.sources[0].bridgy_path()) + self.assertEqual(200, resp.status_code) + self.assertIn("Not polled yet,", resp.get_data(as_text=True)) + + def test_blog_user_page_escapes_html_chars(self): + html = " a&b" + escaped = "<xyz> a&b" + + source = FakeBlogSource.new() + source.features = ["webmention"] + source.put() + + self.blogposts[0].source = source.key + self.blogposts[0].feed_item["title"] = html + self.blogposts[0].put() + + resp = self.client.get(source.bridgy_path()) + self.assertEqual(200, resp.status_code) + self.assertNotIn(html, resp.get_data(as_text=True)) + self.assertIn(escaped, resp.get_data(as_text=True)) + + def test_users_page(self): + resp = self.client.get("/users") + for source in self.sources: + self.assertIn( + f'foo.com.'], get_flashed_messages() + ) + + source = source.key.get() + self.assertIn("foo.com", source.domains) + self.assertIn("http://foo.com/", source.domain_urls) + + def test_edit_web_sites_add_existing(self): + source = self.sources[0] + source.domain_urls = ["http://foo.com/"] + source.domains = ["foo.com"] + source.put() + + resp = self.client.post( + "/edit-websites", + data={ + "source_key": source.key.urlsafe().decode(), + "add": "http://foo.com/", + }, + ) + self.assertEqual(302, resp.status_code) + self.assertEqual( + f"http://localhost/edit-websites?source_key={source.key.urlsafe().decode()}", + resp.headers["Location"], + ) + self.assertEqual( + ['foo.com already exists.'], + get_flashed_messages(), + ) + + source = source.key.get() + self.assertEqual(["foo.com"], source.domains) + self.assertEqual(["http://foo.com/"], source.domain_urls) + + def test_edit_web_sites_add_bad(self): + source = self.sources[0] + resp = self.client.post( + "/edit-websites", + data={ + "source_key": source.key.urlsafe().decode(), + "add": "http://facebook.com/", + }, + ) + self.assertEqual(302, resp.status_code) + self.assertEqual( + f"http://localhost/edit-websites?source_key={source.key.urlsafe().decode()}", + resp.headers["Location"], + ) + self.assertEqual( + [ + 'facebook.com doesn\'t look like your web site. Try again?' + ], + get_flashed_messages(), + ) + + source = source.key.get() + self.assertEqual([], source.domains) + self.assertEqual([], source.domain_urls) + + def test_edit_web_sites_delete(self): + source = self.sources[0] + source.domain_urls = ["http://foo/", "https://bar"] + source.domains = ["foo", "bar"] + source.put() + + resp = self.client.post( + "/edit-websites", + data={ + "source_key": source.key.urlsafe().decode(), + "delete": "https://bar", + }, + ) + self.assertEqual(302, resp.status_code) + self.assertEqual( + f"http://localhost/edit-websites?source_key={source.key.urlsafe().decode()}", + resp.headers["Location"], + ) + self.assertEqual( + ['Removed bar.'], get_flashed_messages() + ) + + source = source.key.get() + self.assertEqual(["foo"], source.domains) + self.assertEqual(["http://foo/"], source.domain_urls) + + def test_edit_web_sites_delete_multiple_urls_same_domain(self): + source = self.sources[0] + source.domain_urls = ["http://foo.com/bar", "https://foo.com/baz"] + source.domains = ["foo.com"] + source.put() + + resp = self.client.post( + "/edit-websites", + data={ + "source_key": source.key.urlsafe().decode(), + "delete": "https://foo.com/baz", + }, + ) + self.assertEqual(302, resp.status_code) + self.assertEqual( + f"http://localhost/edit-websites?source_key={source.key.urlsafe().decode()}", + resp.headers["Location"], + ) + self.assertEqual( + ['Removed foo.com/baz.'], + get_flashed_messages(), + ) + + source = source.key.get() + self.assertEqual(["foo.com"], source.domains) + self.assertEqual(["http://foo.com/bar"], source.domain_urls) + + def test_edit_web_sites_errors(self): + source_key = self.sources[0].key.urlsafe().decode() + + for data in ( + {}, + {"source_key": source_key}, + {"add": "http://foo"}, + {"delete": "http://foo"}, + {"source_key": "asdf", "add": "http://foo"}, + {"source_key": "asdf", "delete": "http://foo", "add": "http://bar"}, + {"source_key": source_key, "delete": "http://missing"}, + ): + resp = self.client.post("/edit-websites", data=data) + self.assertEqual(400, resp.status_code) class DiscoverTest(testutil.AppTest): - - def setUp(self): - super().setUp() - self.source = self.sources[0] - self.source.domains = ['si.te'] - self.source.put() - - def check_discover(self, url, expected_message): - resp = self.client.post('/discover', data={ - 'source_key': self.source.key.urlsafe().decode(), - 'url': url, - }) - self.assertEqual(302, resp.status_code) - location = urllib.parse.urlparse(resp.headers['Location']) - detail = ' '.join((url, str(resp.status_code), repr(location), repr(resp.get_data(as_text=True)))) - self.assertEqual(self.source.bridgy_path(), location.path, detail) - self.assertEqual([expected_message], get_flashed_messages()) - - def check_fail(self, body, **kwargs): - self.expect_requests_get('http://si.te/123', body, **kwargs) - self.mox.ReplayAll() - - self.check_discover('http://si.te/123', - 'Failed to fetch si.te/123 or ' - 'find a FakeSource syndication link.') - - # tasks_client.create_task() is stubbed out, so if any calls to it were - # made, mox would notice that and fail. - - def test_discover_param_errors(self): - for url in ('/discover', - '/discover?key=bad', - f'/discover?key={self.source.key.urlsafe().decode()}', - '/discover?url=bad', - '/discover?url=http://foo/bar', - ): - resp = self.client.post(url) - self.assertEqual(400, resp.status_code) - - def test_discover_url_not_site_or_silo_error(self): - self.check_discover('http://not/site/or/silo', - 'Please enter a URL on either your web site or FakeSource.') - - def test_discover_url_silo_post(self): - self.expect_task('discover', source_key=self.source, post_id='123') - self.mox.ReplayAll() - - self.check_discover('http://fa.ke/123', - 'Discovering now. Refresh in a minute to see the results!') - - def test_discover_url_silo_event(self): - self.expect_task('discover', source_key=self.source, post_id='123', - type='event') - self.mox.ReplayAll() - - self.check_discover('http://fa.ke/events/123', - 'Discovering now. Refresh in a minute to see the results!') - - def test_discover_url_silo_not_post_url(self): - self.check_discover('http://fa.ke/', - "Sorry, that doesn't look like a FakeSource post URL.") - - def test_discover_twitter_profile_url_error(self): - """https://console.cloud.google.com/errors/7553065641439031622""" - auth_entity = TwitterAuth(id='foo', user_json='', - token_key='', token_secret='').put() - self.source = twitter.Twitter(id='foo', features=['listen'], - auth_entity=auth_entity) - self.source.put() - self.check_discover('https://twitter.com/bltavares', - "Sorry, that doesn't look like a Twitter post URL.") - - def test_discover_url_site_post_fetch_fails(self): - self.check_fail('fooey', status_code=404) - - def test_discover_url_site_post_no_mf2(self): - self.check_fail('foo') - - def test_discover_url_site_post_no_hentry(self): - self.check_fail('
foo
') - - def test_discover_url_site_post_no_syndication_links(self): - self.check_fail('
foo
') - - def test_discover_url_site_post_syndication_link_to_other_silo(self): - self.check_fail(""" + def setUp(self): + super().setUp() + self.source = self.sources[0] + self.source.domains = ["si.te"] + self.source.put() + + def check_discover(self, url, expected_message): + resp = self.client.post( + "/discover", + data={ + "source_key": self.source.key.urlsafe().decode(), + "url": url, + }, + ) + self.assertEqual(302, resp.status_code) + location = urllib.parse.urlparse(resp.headers["Location"]) + detail = " ".join( + ( + url, + str(resp.status_code), + repr(location), + repr(resp.get_data(as_text=True)), + ) + ) + self.assertEqual(self.source.bridgy_path(), location.path, detail) + self.assertEqual([expected_message], get_flashed_messages()) + + def check_fail(self, body, **kwargs): + self.expect_requests_get("http://si.te/123", body, **kwargs) + self.mox.ReplayAll() + + self.check_discover( + "http://si.te/123", + 'Failed to fetch si.te/123 or ' + "find a FakeSource syndication link.", + ) + + # tasks_client.create_task() is stubbed out, so if any calls to it were + # made, mox would notice that and fail. + + def test_discover_param_errors(self): + for url in ( + "/discover", + "/discover?key=bad", + f"/discover?key={self.source.key.urlsafe().decode()}", + "/discover?url=bad", + "/discover?url=http://foo/bar", + ): + resp = self.client.post(url) + self.assertEqual(400, resp.status_code) + + def test_discover_url_not_site_or_silo_error(self): + self.check_discover( + "http://not/site/or/silo", + "Please enter a URL on either your web site or FakeSource.", + ) + + def test_discover_url_silo_post(self): + self.expect_task("discover", source_key=self.source, post_id="123") + self.mox.ReplayAll() + + self.check_discover( + "http://fa.ke/123", + "Discovering now. Refresh in a minute to see the results!", + ) + + def test_discover_url_silo_event(self): + self.expect_task( + "discover", source_key=self.source, post_id="123", type="event" + ) + self.mox.ReplayAll() + + self.check_discover( + "http://fa.ke/events/123", + "Discovering now. Refresh in a minute to see the results!", + ) + + def test_discover_url_silo_not_post_url(self): + self.check_discover( + "http://fa.ke/", "Sorry, that doesn't look like a FakeSource post URL." + ) + + def test_discover_twitter_profile_url_error(self): + """https://console.cloud.google.com/errors/7553065641439031622""" + auth_entity = TwitterAuth( + id="foo", user_json="", token_key="", token_secret="" + ).put() + self.source = twitter.Twitter( + id="foo", features=["listen"], auth_entity=auth_entity + ) + self.source.put() + self.check_discover( + "https://twitter.com/bltavares", + "Sorry, that doesn't look like a Twitter post URL.", + ) + + def test_discover_url_site_post_fetch_fails(self): + self.check_fail("fooey", status_code=404) + + def test_discover_url_site_post_no_mf2(self): + self.check_fail("foo") + + def test_discover_url_site_post_no_hentry(self): + self.check_fail('
foo
') + + def test_discover_url_site_post_no_syndication_links(self): + self.check_fail('
foo
') + + def test_discover_url_site_post_syndication_link_to_other_silo(self): + self.check_fail( + """
foo -
""") +
""" + ) - def test_discover_url_site_post_syndication_links(self): - self.expect_requests_get('http://si.te/123', """ + def test_discover_url_site_post_syndication_links(self): + self.expect_requests_get( + "http://si.te/123", + """
foo -
""") - - self.expect_task('discover', source_key=self.source, post_id='222') - self.expect_task('discover', source_key=self.source, post_id='444') - self.mox.ReplayAll() - - self.assertEqual(0, SyndicatedPost.query().count()) - self.check_discover('http://si.te/123', - 'Discovering now. Refresh in a minute to see the results!') - - self.assertCountEqual([ - {'https://fa.ke/222': 'http://si.te/123'}, - {'https://fa.ke/post/444': 'http://si.te/123'}, - ], [{sp.syndication: sp.original} for sp in models.SyndicatedPost.query()]) - - now = util.now_fn() - source = self.source.key.get() - self.assertEqual(now, source.last_syndication_url) - - def test_discover_url_site_post_last_feed_syndication_url(self): - now = util.now_fn() - self.source.last_feed_syndication_url = now - self.source.put() - - self.expect_requests_get('http://si.te/123', """ +
""", + ) + + self.expect_task("discover", source_key=self.source, post_id="222") + self.expect_task("discover", source_key=self.source, post_id="444") + self.mox.ReplayAll() + + self.assertEqual(0, SyndicatedPost.query().count()) + self.check_discover( + "http://si.te/123", + "Discovering now. Refresh in a minute to see the results!", + ) + + self.assertCountEqual( + [ + {"https://fa.ke/222": "http://si.te/123"}, + {"https://fa.ke/post/444": "http://si.te/123"}, + ], + [{sp.syndication: sp.original} for sp in models.SyndicatedPost.query()], + ) + + now = util.now_fn() + source = self.source.key.get() + self.assertEqual(now, source.last_syndication_url) + + def test_discover_url_site_post_last_feed_syndication_url(self): + now = util.now_fn() + self.source.last_feed_syndication_url = now + self.source.put() + + self.expect_requests_get( + "http://si.te/123", + """
-
""") +
""", + ) - self.expect_task('discover', source_key=self.source, post_id='222') - self.mox.ReplayAll() + self.expect_task("discover", source_key=self.source, post_id="222") + self.mox.ReplayAll() - self.check_discover('http://si.te/123', - 'Discovering now. Refresh in a minute to see the results!') + self.check_discover( + "http://si.te/123", + "Discovering now. Refresh in a minute to see the results!", + ) - source = self.source.key.get() - self.assertEqual(now, source.last_syndication_url) + source = self.source.key.get() + self.assertEqual(now, source.last_syndication_url) diff --git a/tests/test_publish.py b/tests/test_publish.py index 1e981f8e..7cc871b6 100644 --- a/tests/test_publish.py +++ b/tests/test_publish.py @@ -23,509 +23,619 @@ class FakeSend(publish.Send): - # populated in setUp() - auth_entity = None - oauth_state = None + # populated in setUp() + auth_entity = None + oauth_state = None - def dispatch_request(self): - state = (util.encode_oauth_state(self.oauth_state) - if self.oauth_state else None) - return self.finish(self.auth_entity, state) + def dispatch_request(self): + state = util.encode_oauth_state(self.oauth_state) if self.oauth_state else None + return self.finish(self.auth_entity, state) -app.add_url_rule('/publish/fake', view_func=FakeSend.as_view('test_publish_fake')) +app.add_url_rule("/publish/fake", view_func=FakeSend.as_view("test_publish_fake")) -class PublishTest(testutil.AppTest): - def setUp(self): - super().setUp() - publish.SOURCE_NAMES['fake'] = testutil.FakeSource - publish.SOURCE_DOMAINS['fa.ke'] = testutil.FakeSource - - self.auth_entity = FakeSend.auth_entity = testutil.FakeAuthEntity(id='0123456789') - self.source = testutil.FakeSource( - id='foo.com', features=['publish'], domains=['foo.com'], - domain_urls=['http://foo.com/'], auth_entity=self.auth_entity.key) - self.source.put() - - FakeSend.oauth_state = { - 'source_url': 'http://foo.com/bar', - 'target_url': 'https://brid.gy/publish/fake', - 'source_key': self.source.key.urlsafe().decode(), - 'include_link': gr_source.INCLUDE_LINK, - } - self.post_html = '

%s

' - self.backlink = '\n' - - def get_response(self, source=None, target=None, preview=False, - interactive=False, params=None): - if params is None: - params = {} - params.update({ - 'source': source or 'http://foo.com/bar', - 'target': target or 'https://brid.gy/publish/fake', - 'source_key': self.source.key.urlsafe().decode(), - }) - - assert not (preview and interactive) - if interactive: - return self.client.get('/publish/fake', data=params) - elif preview: - return self.client.post('/publish/preview', data=params) - else: - return self.client.post('/publish/webmention', data=params) - - def expect_requests_get(self, url, body='', backlink=None, **kwargs): - body += backlink or self.backlink - resp = super().expect_requests_get(url, body, **kwargs) - return resp - - def assert_response(self, expected, status=None, preview=False, **kwargs): - resp = self.get_response(preview=preview, **kwargs) - body = html.unescape(resp.get_data(as_text=True)) - self.assertEqual(status, resp.status_code, - '%s != %s: %s' % (status, resp.status_code, body)) - if preview: - self.assertIn(expected, body, - '%r\n\n=== vs ===\n\n%r' % (expected, body)) - else: - if resp.headers['Content-Type'].startswith('application/json'): - body = json_loads(body)['content' if status < 300 else 'error'] - self.assertIn(expected, body) - - return resp - - def assert_success(self, expected, **kwargs): - return self.assert_response(expected, status=200, **kwargs) - - def assert_created(self, expected, **kwargs): - return self.assert_response(expected, status=201, **kwargs) - - def assert_error(self, expected, status=400, **kwargs): - return self.assert_response(expected, status=status, **kwargs) - - def _check_entity(self, url='http://foo.com/bar', content='foo', - html_content=None, expected_html=None): - if html_content is None: - html_content = content - self.assertTrue(PublishedPage.get_by_id(url)) - publish = Publish.query().get() - self.assertEqual(self.source.key, publish.source) - self.assertEqual('complete', publish.status) - self.assertEqual('post', publish.type) - self.assertEqual('FakeSource post label', publish.type_label()) - if expected_html is None: - expected_html = (self.post_html % html_content) - self.assertEqual(expected_html + self.backlink, publish.html) - self.assertEqual({ - 'id': 'fake id', - 'url': 'http://fake/url', - 'content': '%s - %s' % (content, url), - 'granary_message': 'granary message', - }, publish.published) - - def test_webmention_success(self): - self.expect_requests_get('http://foo.com/bar', self.post_html % 'foo') - self.mox.ReplayAll() - resp = self.assert_created('foo - http://foo.com/bar', interactive=False) - self.assertEqual('http://fake/url', resp.headers['Location']) - self._check_entity() - - def test_interactive_success(self): - self.expect_requests_get('http://foo.com/bar', self.post_html % 'foo') - self.mox.ReplayAll() - - resp = self.get_response(interactive=True) - self.assertEqual(302, resp.status_code) - - loc = urllib.parse.unquote_plus(resp.headers['Location']) - self.assertEqual('http://localhost/fake/foo.com', loc) - self.assertEqual(['Done! Click here to view.', - 'granary message'], - get_flashed_messages()) - - self._check_entity() - - def test_interactive_from_wrong_user_page(self): - other_source = testutil.FakeSource.new().put() - FakeSend.oauth_state['source_key'] = other_source.urlsafe().decode() - - resp = self.get_response(interactive=True) - self.assertEqual(302, resp.status_code) - self.assertEqual(f'http://localhost/fake/{other_source.id()}', - resp.headers['Location']) - self.assertEqual(['Please log into FakeSource as fake to publish that page.'], - get_flashed_messages()) - - self.assertIsNone(Publish.query().get()) - - def test_interactive_oauth_decline(self): - FakeSend.auth_entity = None - resp = self.get_response(interactive=True) - self.assertEqual(302, resp.status_code) - self.assertEqual('http://localhost/fake/foo.com', resp.headers['Location']) - self.assertEqual( - ['If you want to publish or preview, please approve the prompt.'], - get_flashed_messages()) - - self.assertIsNone(Publish.query().get()) - - def test_interactive_no_state(self): - """https://github.com/snarfed/bridgy/issues/449""" - FakeSend.oauth_state = None - resp = self.get_response(interactive=True) - self.assertEqual(302, resp.status_code) - self.assertEqual('http://localhost/', resp.headers['Location']) - self.assertEqual( - ['If you want to publish or preview, please approve the prompt.'], - get_flashed_messages()) - - self.assertIsNone(Publish.query().get()) - - def test_success_domain_translates_to_lowercase(self): - self.expect_requests_get('http://FoO.cOm/Bar', self.post_html % 'foo') - self.mox.ReplayAll() - self.assert_created('foo - http://FoO.cOm/Bar', source='http://FoO.cOm/Bar') - - def test_success_domain_http_vs_https(self): - self.expect_requests_get('https://foo.com/bar', self.post_html % 'foo') - self.mox.ReplayAll() - self.assert_created('foo - https://foo.com/bar', source='https://foo.com/bar') - - def test_already_published(self): - """We shouldn't allow duplicating an existing, *completed* publish.""" - page = PublishedPage(id='http://foo.com/bar') - - # these are all fine - Publish(parent=page.key, source=self.source.key, status='failed').put() - Publish(parent=page.key, source=self.source.key, status='complete', - type='preview', published={'content': 'foo'}).put() - - for i in range(5): - self.expect_requests_get('http://foo.com/bar', self.post_html % 'foo') - self.mox.ReplayAll() - - # first attempt should work - self.assert_success('preview of foo - http://foo.com/bar', preview=True) - created = self.assert_created('foo - http://foo.com/bar') - self.assertEqual(4, Publish.query().count()) - self.assertEqual(3, Publish.query(Publish.status == 'complete').count()) - - completed = list(Publish.query(Publish.status == 'complete', - Publish.type == 'post')) - self.assertEqual(1, len(completed)) - completed = completed[0] - orig_published = completed.published - - # now that there's a complete Publish entity, more attempts should fail - resp = self.assert_error("Sorry, you've already published that page") - self.assertEqual(json_loads(created.get_data(as_text=True)), json_loads(resp.get_data(as_text=True))['original']) - self.assertEqual('complete', completed.key.get().status) - - # try again to test for a bug we had where a second try would succeed - self.assert_error("Sorry, you've already published that page") - - # should still be able to preview, but the preview shouldn't modify the - # Publish entity. - self.assert_success('preview of foo', preview=True) - self.assertEqual(orig_published, completed.key.get().published) - - def test_already_published_interactive(self): - self.expect_requests_get('http://foo.com/bar', self.post_html % 'foo') - self.mox.ReplayAll() - - page = PublishedPage(id='http://foo.com/bar') - Publish(parent=page.key, source=self.source.key, status='complete', - type='post', published={'content': 'foo'}).put() - - resp = self.assert_response('', status=302, interactive=True) - self.assertIn("Sorry, you've already published that page", - get_flashed_messages()[0]) - - def test_publish_entity_collision(self): - page = PublishedPage(id='http://foo.com/bar') - Publish(parent=page.key, source=self.source.key, status='new').put() - self.assert_error("You're already publishing that post in another request.", - status=429) - - def test_publish_entity_too_much_contention(self): - self.mox.StubOutWithMock(publish.PublishBase, '_get_or_add_publish_entity', - use_mock_anything=True) - - class GrpcError(Exception): - def code(self): - return grpc.StatusCode.ABORTED - def details(self): - return 'too much contention on these datastore entities...' - - publish.PublishBase._get_or_add_publish_entity('http://foo.com/bar' - ).AndRaise(GrpcError()) - - self.mox.ReplayAll() - self.assert_error("You're already publishing that post in another request.", - status=429) - - def test_already_published_then_preview_feed_with_no_items(self): - page = PublishedPage(id='http://foo.com/bar') - Publish(parent=page.key, source=self.source.key, status='complete', - type='post', published={'content': 'foo'}).put() - - self.expect_requests_get('http://foo.com/bar', '
') - self.mox.ReplayAll() - self.assert_success('', preview=True) - - def test_more_than_one_silo(self): - """POSSE to more than one silo should not trip the already published check""" - class FauxSource(testutil.FakeSource): - SHORT_NAME = 'faux' - - publish.SOURCE_NAMES['faux'] = FauxSource - FauxSource( - id='foo.com', features=['publish'], domains=['foo.com'], - domain_urls=['http://foo.com/']).put() - - html = self.post_html % 'foo' - self.expect_requests_get('http://foo.com/bar', html) - self.expect_requests_get('http://foo.com/bar', html, - backlink='\n') - - self.mox.ReplayAll() - - self.assert_created('') - self.assert_created('', target='https://brid.gy/publish/faux') - - def test_bad_target_url(self): - for target in ( - 'foo', - 'https://brid.gy/publish/googleplus', - 'https://brid.gy/publish/instagram', +class PublishTest(testutil.AppTest): + def setUp(self): + super().setUp() + publish.SOURCE_NAMES["fake"] = testutil.FakeSource + publish.SOURCE_DOMAINS["fa.ke"] = testutil.FakeSource + + self.auth_entity = FakeSend.auth_entity = testutil.FakeAuthEntity( + id="0123456789" + ) + self.source = testutil.FakeSource( + id="foo.com", + features=["publish"], + domains=["foo.com"], + domain_urls=["http://foo.com/"], + auth_entity=self.auth_entity.key, + ) + self.source.put() + + FakeSend.oauth_state = { + "source_url": "http://foo.com/bar", + "target_url": "https://brid.gy/publish/fake", + "source_key": self.source.key.urlsafe().decode(), + "include_link": gr_source.INCLUDE_LINK, + } + self.post_html = ( + '

%s

' + ) + self.backlink = '\n' + + def get_response( + self, source=None, target=None, preview=False, interactive=False, params=None + ): + if params is None: + params = {} + params.update( + { + "source": source or "http://foo.com/bar", + "target": target or "https://brid.gy/publish/fake", + "source_key": self.source.key.urlsafe().decode(), + } + ) + + assert not (preview and interactive) + if interactive: + return self.client.get("/publish/fake", data=params) + elif preview: + return self.client.post("/publish/preview", data=params) + else: + return self.client.post("/publish/webmention", data=params) + + def expect_requests_get(self, url, body="", backlink=None, **kwargs): + body += backlink or self.backlink + resp = super().expect_requests_get(url, body, **kwargs) + return resp + + def assert_response(self, expected, status=None, preview=False, **kwargs): + resp = self.get_response(preview=preview, **kwargs) + body = html.unescape(resp.get_data(as_text=True)) + self.assertEqual( + status, resp.status_code, "%s != %s: %s" % (status, resp.status_code, body) + ) + if preview: + self.assertIn(expected, body, "%r\n\n=== vs ===\n\n%r" % (expected, body)) + else: + if resp.headers["Content-Type"].startswith("application/json"): + body = json_loads(body)["content" if status < 300 else "error"] + self.assertIn(expected, body) + + return resp + + def assert_success(self, expected, **kwargs): + return self.assert_response(expected, status=200, **kwargs) + + def assert_created(self, expected, **kwargs): + return self.assert_response(expected, status=201, **kwargs) + + def assert_error(self, expected, status=400, **kwargs): + return self.assert_response(expected, status=status, **kwargs) + + def _check_entity( + self, + url="http://foo.com/bar", + content="foo", + html_content=None, + expected_html=None, ): - self.assert_error( - 'Target must be brid.gy/publish/{flickr,github,mastodon,meetup,twitter}', - target=target) - - def test_source_url_redirects(self): - self.expect_requests_head('http://will/redirect', redirected_url='http://foo.com/1') - - self.expect_requests_get('http://foo.com/1', self.post_html % 'foo') - self.mox.ReplayAll() - # check that we include the original link, not the resolved one - self.assert_created('foo - http://will/redirect', source='http://will/redirect') - - def test_source_url_redirects_with_refresh_header(self): - self.expect_requests_head('http://will/redirect', - response_headers={'refresh': '0; url=http://foo.com/1'}) - self.expect_requests_head('http://foo.com/1') - - self.expect_requests_get('http://foo.com/1', self.post_html % 'foo') - self.mox.ReplayAll() - # check that we include the original link, not the resolved one - self.assert_created('foo - http://will/redirect', source='http://will/redirect') - - def test_link_rel_shortlink(self): - self._test_shortlink("""\ + if html_content is None: + html_content = content + self.assertTrue(PublishedPage.get_by_id(url)) + publish = Publish.query().get() + self.assertEqual(self.source.key, publish.source) + self.assertEqual("complete", publish.status) + self.assertEqual("post", publish.type) + self.assertEqual("FakeSource post label", publish.type_label()) + if expected_html is None: + expected_html = self.post_html % html_content + self.assertEqual(expected_html + self.backlink, publish.html) + self.assertEqual( + { + "id": "fake id", + "url": "http://fake/url", + "content": "%s - %s" % (content, url), + "granary_message": "granary message", + }, + publish.published, + ) + + def test_webmention_success(self): + self.expect_requests_get("http://foo.com/bar", self.post_html % "foo") + self.mox.ReplayAll() + resp = self.assert_created("foo - http://foo.com/bar", interactive=False) + self.assertEqual("http://fake/url", resp.headers["Location"]) + self._check_entity() + + def test_interactive_success(self): + self.expect_requests_get("http://foo.com/bar", self.post_html % "foo") + self.mox.ReplayAll() + + resp = self.get_response(interactive=True) + self.assertEqual(302, resp.status_code) + + loc = urllib.parse.unquote_plus(resp.headers["Location"]) + self.assertEqual("http://localhost/fake/foo.com", loc) + self.assertEqual( + [ + 'Done! Click here to view.', + "granary message", + ], + get_flashed_messages(), + ) + + self._check_entity() + + def test_interactive_from_wrong_user_page(self): + other_source = testutil.FakeSource.new().put() + FakeSend.oauth_state["source_key"] = other_source.urlsafe().decode() + + resp = self.get_response(interactive=True) + self.assertEqual(302, resp.status_code) + self.assertEqual( + f"http://localhost/fake/{other_source.id()}", resp.headers["Location"] + ) + self.assertEqual( + ["Please log into FakeSource as fake to publish that page."], + get_flashed_messages(), + ) + + self.assertIsNone(Publish.query().get()) + + def test_interactive_oauth_decline(self): + FakeSend.auth_entity = None + resp = self.get_response(interactive=True) + self.assertEqual(302, resp.status_code) + self.assertEqual("http://localhost/fake/foo.com", resp.headers["Location"]) + self.assertEqual( + ["If you want to publish or preview, please approve the prompt."], + get_flashed_messages(), + ) + + self.assertIsNone(Publish.query().get()) + + def test_interactive_no_state(self): + """https://github.com/snarfed/bridgy/issues/449""" + FakeSend.oauth_state = None + resp = self.get_response(interactive=True) + self.assertEqual(302, resp.status_code) + self.assertEqual("http://localhost/", resp.headers["Location"]) + self.assertEqual( + ["If you want to publish or preview, please approve the prompt."], + get_flashed_messages(), + ) + + self.assertIsNone(Publish.query().get()) + + def test_success_domain_translates_to_lowercase(self): + self.expect_requests_get("http://FoO.cOm/Bar", self.post_html % "foo") + self.mox.ReplayAll() + self.assert_created("foo - http://FoO.cOm/Bar", source="http://FoO.cOm/Bar") + + def test_success_domain_http_vs_https(self): + self.expect_requests_get("https://foo.com/bar", self.post_html % "foo") + self.mox.ReplayAll() + self.assert_created("foo - https://foo.com/bar", source="https://foo.com/bar") + + def test_already_published(self): + """We shouldn't allow duplicating an existing, *completed* publish.""" + page = PublishedPage(id="http://foo.com/bar") + + # these are all fine + Publish(parent=page.key, source=self.source.key, status="failed").put() + Publish( + parent=page.key, + source=self.source.key, + status="complete", + type="preview", + published={"content": "foo"}, + ).put() + + for i in range(5): + self.expect_requests_get("http://foo.com/bar", self.post_html % "foo") + self.mox.ReplayAll() + + # first attempt should work + self.assert_success("preview of foo - http://foo.com/bar", preview=True) + created = self.assert_created("foo - http://foo.com/bar") + self.assertEqual(4, Publish.query().count()) + self.assertEqual(3, Publish.query(Publish.status == "complete").count()) + + completed = list( + Publish.query(Publish.status == "complete", Publish.type == "post") + ) + self.assertEqual(1, len(completed)) + completed = completed[0] + orig_published = completed.published + + # now that there's a complete Publish entity, more attempts should fail + resp = self.assert_error("Sorry, you've already published that page") + self.assertEqual( + json_loads(created.get_data(as_text=True)), + json_loads(resp.get_data(as_text=True))["original"], + ) + self.assertEqual("complete", completed.key.get().status) + + # try again to test for a bug we had where a second try would succeed + self.assert_error("Sorry, you've already published that page") + + # should still be able to preview, but the preview shouldn't modify the + # Publish entity. + self.assert_success("preview of foo", preview=True) + self.assertEqual(orig_published, completed.key.get().published) + + def test_already_published_interactive(self): + self.expect_requests_get("http://foo.com/bar", self.post_html % "foo") + self.mox.ReplayAll() + + page = PublishedPage(id="http://foo.com/bar") + Publish( + parent=page.key, + source=self.source.key, + status="complete", + type="post", + published={"content": "foo"}, + ).put() + + resp = self.assert_response("", status=302, interactive=True) + self.assertIn( + "Sorry, you've already published that page", get_flashed_messages()[0] + ) + + def test_publish_entity_collision(self): + page = PublishedPage(id="http://foo.com/bar") + Publish(parent=page.key, source=self.source.key, status="new").put() + self.assert_error( + "You're already publishing that post in another request.", status=429 + ) + + def test_publish_entity_too_much_contention(self): + self.mox.StubOutWithMock( + publish.PublishBase, "_get_or_add_publish_entity", use_mock_anything=True + ) + + class GrpcError(Exception): + def code(self): + return grpc.StatusCode.ABORTED + + def details(self): + return "too much contention on these datastore entities..." + + publish.PublishBase._get_or_add_publish_entity("http://foo.com/bar").AndRaise( + GrpcError() + ) + + self.mox.ReplayAll() + self.assert_error( + "You're already publishing that post in another request.", status=429 + ) + + def test_already_published_then_preview_feed_with_no_items(self): + page = PublishedPage(id="http://foo.com/bar") + Publish( + parent=page.key, + source=self.source.key, + status="complete", + type="post", + published={"content": "foo"}, + ).put() + + self.expect_requests_get("http://foo.com/bar", '
') + self.mox.ReplayAll() + self.assert_success("", preview=True) + + def test_more_than_one_silo(self): + """POSSE to more than one silo should not trip the already published check""" + + class FauxSource(testutil.FakeSource): + SHORT_NAME = "faux" + + publish.SOURCE_NAMES["faux"] = FauxSource + FauxSource( + id="foo.com", + features=["publish"], + domains=["foo.com"], + domain_urls=["http://foo.com/"], + ).put() + + html = self.post_html % "foo" + self.expect_requests_get("http://foo.com/bar", html) + self.expect_requests_get( + "http://foo.com/bar", + html, + backlink='\n', + ) + + self.mox.ReplayAll() + + self.assert_created("") + self.assert_created("", target="https://brid.gy/publish/faux") + + def test_bad_target_url(self): + for target in ( + "foo", + "https://brid.gy/publish/googleplus", + "https://brid.gy/publish/instagram", + ): + self.assert_error( + "Target must be brid.gy/publish/{flickr,github,mastodon,meetup,twitter}", + target=target, + ) + + def test_source_url_redirects(self): + self.expect_requests_head( + "http://will/redirect", redirected_url="http://foo.com/1" + ) + + self.expect_requests_get("http://foo.com/1", self.post_html % "foo") + self.mox.ReplayAll() + # check that we include the original link, not the resolved one + self.assert_created("foo - http://will/redirect", source="http://will/redirect") + + def test_source_url_redirects_with_refresh_header(self): + self.expect_requests_head( + "http://will/redirect", + response_headers={"refresh": "0; url=http://foo.com/1"}, + ) + self.expect_requests_head("http://foo.com/1") + + self.expect_requests_get("http://foo.com/1", self.post_html % "foo") + self.mox.ReplayAll() + # check that we include the original link, not the resolved one + self.assert_created("foo - http://will/redirect", source="http://will/redirect") + + def test_link_rel_shortlink(self): + self._test_shortlink( + """\ -""" + self.post_html % 'foo' + """\ +""" + + self.post_html % "foo" + + """\ -""") +""" + ) - def test_expand_link_rel_shortlink(self): - self._test_shortlink("""\ + def test_expand_link_rel_shortlink(self): + self._test_shortlink( + """\ -""" + self.post_html % 'foo' + """\ +""" + + self.post_html % "foo" + + """\ -""") +""" + ) - def test_a_rel_shortlink(self): - self._test_shortlink(self.post_html % """\ + def test_a_rel_shortlink(self): + self._test_shortlink( + self.post_html + % """\ foo -""") - - def _test_shortlink(self, html): - self.expect_requests_get('http://foo.com/bar', html) - self.mox.ReplayAll() - self.assert_created('foo - http://foo.com/short') - - def test_rel_shortlink_overrides_redirect(self): - self.expect_requests_head('http://will/redirect', redirected_url='http://foo.com/1') - self.expect_requests_get('http://foo.com/1', self.post_html % """\ +""" + ) + + def _test_shortlink(self, html): + self.expect_requests_get("http://foo.com/bar", html) + self.mox.ReplayAll() + self.assert_created("foo - http://foo.com/short") + + def test_rel_shortlink_overrides_redirect(self): + self.expect_requests_head( + "http://will/redirect", redirected_url="http://foo.com/1" + ) + self.expect_requests_get( + "http://foo.com/1", + self.post_html + % """\ foo -""") - self.mox.ReplayAll() - self.assert_created('foo - http://foo.com/short', source='http://will/redirect') - - def test_bad_source(self): - # no source - self.source.key.delete() - self.assert_error('Could not find FakeSource account for foo.com.') - - # source without publish feature - self.source.features = ['listen'] - self.source.put() - msg = 'Publish is not enabled' - self.assert_error(msg) - - # status disabled - self.source.features = ['publish'] - self.source.status = 'disabled' - self.source.put() - self.assert_error(msg) - - # two bad sources with same domain - source_2 = self.source = testutil.FakeSource(id='z', **self.source.to_dict()) - source_2.status = 'enabled' - source_2.features = ['listen'] - source_2.put() - self.assert_error(msg) - - # one bad source, one good source, same domain. should automatically use the - # good source. - source_2.features.append('publish') - source_2.put() - self.expect_requests_get('http://foo.com/bar', self.post_html % 'xyz') - self.mox.ReplayAll() - self.assert_created('xyz - http://foo.com/bar') - self.assertEqual(source_2.key, Publish.query().get().source) - - def test_source_with_multiple_domains(self): - """Publish domain is second in source's domains list.""" - self.source.domains = ['baj.com', 'foo.com'] - self.source.domain_urls = ['http://baj.com/', 'http://foo.com/'] - self.source.put() - self.expect_requests_get('http://foo.com/bar', self.post_html % 'xyz') - self.mox.ReplayAll() - self.assert_created('xyz - http://foo.com/bar') - self.assertEqual(self.source.key, Publish.query().get().source) - - def test_source_missing_mf2(self): - self.expect_requests_get('http://foo.com/bar', '') - self.mox.ReplayAll() - self.assert_error('No microformats2 data found in http://foo.com/') - - self.assertTrue(PublishedPage.get_by_id('http://foo.com/bar')) - publish = Publish.query().get() - self.assertEqual('failed', publish.status) - self.assertEqual(self.source.key, publish.source) - - def test_h_feed_no_items(self): - self.expect_requests_get('http://foo.com/bar', '
') - self.mox.ReplayAll() - self.assert_error('Could not find content') - self.assertEqual('failed', Publish.query().get().status) - - def test_no_content(self): - self.expect_requests_get('http://foo.com/bar', - '
') - self.mox.ReplayAll() - - self.assert_error('Could not find content') - self.assertEqual('failed', Publish.query().get().status) - - def test_no_content_ignore_formatting(self): - self.expect_requests_get('http://foo.com/bar', - '
') - self.mox.ReplayAll() - - self.assert_error('Could not find content', - params={'bridgy_ignore_formatting': ''}) - self.assertEqual('failed', Publish.query().get().status) - - def test_multiple_items_chooses_first_that_works(self): - html = ('Mic Lim\n' + - self.post_html % 'foo') - self.expect_requests_get('http://foo.com/bar', html) - self.mox.ReplayAll() - self.assert_created('foo - http://foo.com/bar') - - def test_unpublishable_type(self): - html = ('

not publishable

\n' + - self.post_html % 'foo') - self.expect_requests_get('http://foo.com/bar', html) - self.mox.ReplayAll() - self.assert_created('foo - http://foo.com/bar') - - def test_type_not_implemented(self): - self.expect_requests_get('http://foo.com/bar', """ -""") - self.expect_requests_get('http://foo.com/xyz', '') - self.mox.ReplayAll() - - # FakeSource.create() raises NotImplementedError on likes - self.assert_error('Cannot publish likes') - self.assertEqual('failed', Publish.query().get().status) - - def test_source_url_is_domain_url(self): - self.source.put() - self.assert_error("Looks like that's your home page.", source='http://foo.com#') - - # query params alone shouldn't trigger this - self.expect_requests_get('http://foo.com/?p=123', self.post_html % 'foo') - self.mox.ReplayAll() - self.assert_created('foo - http://foo.com/?p=123', - source='http://foo.com/?p=123') - - def test_source_url_redirects_to_domain_url(self): - self.expect_requests_head('http://will/redirect', redirected_url='http://foo.com') - self.mox.ReplayAll() - self.source.put() - self.assert_error("Looks like that's your home page.", - source='http://will/redirect') - - def test_source_url_is_silo(self): - self.source.put() - self.assert_error( - "Looks like that's a FakeSource URL. Try one from your web site instead!", - source='http://fa.ke/post/123') - self.assert_error( - "Looks like that's a Twitter URL. Try one from your web site instead!", - source='http://twitter.com/post/123') - - def test_embedded_type_not_implemented(self): - self.expect_requests_get('http://foo.com/bar', """ +""", + ) + self.mox.ReplayAll() + self.assert_created("foo - http://foo.com/short", source="http://will/redirect") + + def test_bad_source(self): + # no source + self.source.key.delete() + self.assert_error( + "Could not find FakeSource account for foo.com." + ) + + # source without publish feature + self.source.features = ["listen"] + self.source.put() + msg = "Publish is not enabled" + self.assert_error(msg) + + # status disabled + self.source.features = ["publish"] + self.source.status = "disabled" + self.source.put() + self.assert_error(msg) + + # two bad sources with same domain + source_2 = self.source = testutil.FakeSource(id="z", **self.source.to_dict()) + source_2.status = "enabled" + source_2.features = ["listen"] + source_2.put() + self.assert_error(msg) + + # one bad source, one good source, same domain. should automatically use the + # good source. + source_2.features.append("publish") + source_2.put() + self.expect_requests_get("http://foo.com/bar", self.post_html % "xyz") + self.mox.ReplayAll() + self.assert_created("xyz - http://foo.com/bar") + self.assertEqual(source_2.key, Publish.query().get().source) + + def test_source_with_multiple_domains(self): + """Publish domain is second in source's domains list.""" + self.source.domains = ["baj.com", "foo.com"] + self.source.domain_urls = ["http://baj.com/", "http://foo.com/"] + self.source.put() + self.expect_requests_get("http://foo.com/bar", self.post_html % "xyz") + self.mox.ReplayAll() + self.assert_created("xyz - http://foo.com/bar") + self.assertEqual(self.source.key, Publish.query().get().source) + + def test_source_missing_mf2(self): + self.expect_requests_get("http://foo.com/bar", "") + self.mox.ReplayAll() + self.assert_error("No microformats2 data found in http://foo.com/") + + self.assertTrue(PublishedPage.get_by_id("http://foo.com/bar")) + publish = Publish.query().get() + self.assertEqual("failed", publish.status) + self.assertEqual(self.source.key, publish.source) + + def test_h_feed_no_items(self): + self.expect_requests_get("http://foo.com/bar", '
') + self.mox.ReplayAll() + self.assert_error("Could not find content") + self.assertEqual("failed", Publish.query().get().status) + + def test_no_content(self): + self.expect_requests_get( + "http://foo.com/bar", '
' + ) + self.mox.ReplayAll() + + self.assert_error("Could not find content") + self.assertEqual("failed", Publish.query().get().status) + + def test_no_content_ignore_formatting(self): + self.expect_requests_get( + "http://foo.com/bar", '
' + ) + self.mox.ReplayAll() + + self.assert_error( + "Could not find content", params={"bridgy_ignore_formatting": ""} + ) + self.assertEqual("failed", Publish.query().get().status) + + def test_multiple_items_chooses_first_that_works(self): + html = ( + 'Mic Lim\n' + + self.post_html % "foo" + ) + self.expect_requests_get("http://foo.com/bar", html) + self.mox.ReplayAll() + self.assert_created("foo - http://foo.com/bar") + + def test_unpublishable_type(self): + html = ( + '

not publishable

\n' + + self.post_html % "foo" + ) + self.expect_requests_get("http://foo.com/bar", html) + self.mox.ReplayAll() + self.assert_created("foo - http://foo.com/bar") + + def test_type_not_implemented(self): + self.expect_requests_get( + "http://foo.com/bar", + """ +""", + ) + self.expect_requests_get("http://foo.com/xyz", "") + self.mox.ReplayAll() + + # FakeSource.create() raises NotImplementedError on likes + self.assert_error("Cannot publish likes") + self.assertEqual("failed", Publish.query().get().status) + + def test_source_url_is_domain_url(self): + self.source.put() + self.assert_error("Looks like that's your home page.", source="http://foo.com#") + + # query params alone shouldn't trigger this + self.expect_requests_get("http://foo.com/?p=123", self.post_html % "foo") + self.mox.ReplayAll() + self.assert_created( + "foo - http://foo.com/?p=123", source="http://foo.com/?p=123" + ) + + def test_source_url_redirects_to_domain_url(self): + self.expect_requests_head( + "http://will/redirect", redirected_url="http://foo.com" + ) + self.mox.ReplayAll() + self.source.put() + self.assert_error( + "Looks like that's your home page.", source="http://will/redirect" + ) + + def test_source_url_is_silo(self): + self.source.put() + self.assert_error( + "Looks like that's a FakeSource URL. Try one from your web site instead!", + source="http://fa.ke/post/123", + ) + self.assert_error( + "Looks like that's a Twitter URL. Try one from your web site instead!", + source="http://twitter.com/post/123", + ) + + def test_embedded_type_not_implemented(self): + self.expect_requests_get( + "http://foo.com/bar", + """ """) - self.mox.ReplayAll() - - # FakeSource.create() returns an error message for verb='like' - self.assert_error("Cannot publish likes") - self.assertEqual('failed', Publish.query().get().status) - - def test_mf1_backward_compatibility_inside_hfeed(self): - """This is based on Blogger's default markup, e.g. - http://daisystanton.blogspot.com/2014/06/so-elections.html - """ - self.expect_requests_get('http://foo.com/bar', """ +""", + ) + self.mox.ReplayAll() + + # FakeSource.create() returns an error message for verb='like' + self.assert_error("Cannot publish likes") + self.assertEqual("failed", Publish.query().get().status) + + def test_mf1_backward_compatibility_inside_hfeed(self): + """This is based on Blogger's default markup, e.g. + http://daisystanton.blogspot.com/2014/06/so-elections.html + """ + self.expect_requests_get( + "http://foo.com/bar", + """
this is my article -
""") - self.mox.ReplayAll() - self.assert_created('this is my article - http://foo.com/bar') - - def test_ignore_hfeed_contents(self): - """Background in https://github.com/snarfed/bridgy/issues/219""" - self.expect_requests_get('http://foo.com/bar', """ +""", + ) + self.mox.ReplayAll() + self.assert_created("this is my article - http://foo.com/bar") + + def test_ignore_hfeed_contents(self): + """Background in https://github.com/snarfed/bridgy/issues/219""" + self.expect_requests_get( + "http://foo.com/bar", + """
my feed
my article
-
""") - self.mox.ReplayAll() - self.assert_created('my article - http://foo.com/bar') - - def test_tumblr_markup(self): - """This is based on Tumblr's default markup, e.g. - http://snarfed.tumblr.com/post/84623272717/stray-cat - """ - self.expect_requests_get('http://foo.com/bar', """ +
""", + ) + self.mox.ReplayAll() + self.assert_created("my article - http://foo.com/bar") + + def test_tumblr_markup(self): + """This is based on Tumblr's default markup, e.g. + http://snarfed.tumblr.com/post/84623272717/stray-cat + """ + self.expect_requests_get( + "http://foo.com/bar", + """
@@ -534,14 +644,17 @@ def test_tumblr_markup(self):
-""") - self.mox.ReplayAll() - self.assert_created('this is my article - http://foo.com/bar') - - def test_tumblr_markup_with_photo(self): - """A tumblr post with a picture but no text. - Based on http://require.aorcsik.com/post/98159554316/whitenoisegirl-the-clayprofessor-chris """ - self.expect_requests_get('http://foo.com/bar', """ +""", + ) + self.mox.ReplayAll() + self.assert_created("this is my article - http://foo.com/bar") + + def test_tumblr_markup_with_photo(self): + """A tumblr post with a picture but no text. + Based on http://require.aorcsik.com/post/98159554316/whitenoisegirl-the-clayprofessor-chris""" + self.expect_requests_get( + "http://foo.com/bar", + """
@@ -557,16 +670,19 @@ def test_tumblr_markup_with_photo(self):
-""") - self.mox.ReplayAll() - self.assert_error('Could not find content') - - def test_tumblr_special_case_does_not_override_mf1(self): - """Tumblr's special case should not add "h-entry" on a class - that already has mf1 microformats on it (or it will cause the parser - to ignore the mf2 properties). - """ - self.expect_requests_get('http://foo.com/bar', """ +""", + ) + self.mox.ReplayAll() + self.assert_error("Could not find content") + + def test_tumblr_special_case_does_not_override_mf1(self): + """Tumblr's special case should not add "h-entry" on a class + that already has mf1 microformats on it (or it will cause the parser + to ignore the mf2 properties). + """ + self.expect_requests_get( + "http://foo.com/bar", + """ @@ -580,527 +696,680 @@ def test_tumblr_special_case_does_not_override_mf1(self): -""") - self.mox.ReplayAll() - self.assert_created('blah - http://foo.com/bar') - - def test_tumblr_backlink_in_t_umblr_com_url(self): - """Tumblr now rewrites links in t.umblr.com wrapper. Handle that. - - https://github.com/snarfed/bridgy/issues/609""" - link = '' - self.expect_requests_get('http://foo.com/bar', self.post_html % 'foo', - backlink=link) - self.mox.ReplayAll() - self.assert_created('foo - http://foo.com/bar', interactive=False) - - def test_returned_type_overrides(self): - # FakeSource returns type 'post' when it sees 'rsvp' - self.expect_requests_get('http://foo.com/bar', """ +""", + ) + self.mox.ReplayAll() + self.assert_created("blah - http://foo.com/bar") + + def test_tumblr_backlink_in_t_umblr_com_url(self): + """Tumblr now rewrites links in t.umblr.com wrapper. Handle that. + + https://github.com/snarfed/bridgy/issues/609""" + link = '' + self.expect_requests_get( + "http://foo.com/bar", self.post_html % "foo", backlink=link + ) + self.mox.ReplayAll() + self.assert_created("foo - http://foo.com/bar", interactive=False) + + def test_returned_type_overrides(self): + # FakeSource returns type 'post' when it sees 'rsvp' + self.expect_requests_get( + "http://foo.com/bar", + """

-

""") - self.mox.ReplayAll() - self.assert_created('') - self.assertEqual('post', Publish.query().get().type) - - def test_in_reply_to_domain_allows_subdomains(self): - """(The code that handles this is in granary.Source.base_object.)""" - subdomains = 'www.', 'mobile.', '' - for i, subdomain in enumerate(subdomains): - self.expect_requests_get('http://foo.com/%d' % i, -"""

+

""", + ) + self.mox.ReplayAll() + self.assert_created("") + self.assertEqual("post", Publish.query().get().type) + + def test_in_reply_to_domain_allows_subdomains(self): + """(The code that handles this is in granary.Source.base_object.)""" + subdomains = "www.", "mobile.", "" + for i, subdomain in enumerate(subdomains): + self.expect_requests_get( + "http://foo.com/%d" % i, + """

foo -

""" % subdomain) - self.mox.ReplayAll() - - for i in range(len(subdomains)): - resp = self.get_response(source='http://foo.com/%d' % i) - self.assertEqual(201, resp.status_code, resp.get_data(as_text=True)) - - def test_relative_u_url(self): - """mf2py expands urls; this just check that we give it the source URL.""" - html = """
+

""" + % subdomain, + ) + self.mox.ReplayAll() + + for i in range(len(subdomains)): + resp = self.get_response(source="http://foo.com/%d" % i) + self.assertEqual(201, resp.status_code, resp.get_data(as_text=True)) + + def test_relative_u_url(self): + """mf2py expands urls; this just check that we give it the source URL.""" + html = """

foo

""" - self.expect_requests_get('http://foo.com/bar', html) - self.mox.ReplayAll() - self.assert_created('foo - http://foo.com/foo/bar') - - def test_report_error(self): - """Should report most errors from create() or preview_create().""" - for i in range(2): - self.expect_requests_get('http://foo.com/bar', self.post_html % 'foo') - - self.mox.StubOutWithMock(error_reporting_client, 'report', - use_mock_anything=True) - for subject in 'Webmention None failed', 'Preview preview new': - error_reporting_client.report(subject, http_context=mox.IgnoreArg(), - user=u'http://localhost/fake/foo.com') - - self.mox.StubOutWithMock(self.source.gr_source, 'create', - use_mock_anything=True) - err = requests.HTTPError(response=util.Struct(status_code='429', text='fooey')) - self.source.gr_source.create(mox.IgnoreArg(), - include_link=gr_source.INCLUDE_LINK, - ignore_formatting=False - ).AndRaise(err) - - self.mox.StubOutWithMock(self.source.gr_source, 'preview_create', - use_mock_anything=True) - self.source.gr_source.preview_create(mox.IgnoreArg(), - include_link=gr_source.INCLUDE_LINK, - ignore_formatting=False - ).AndRaise(err) - - self.mox.ReplayAll() - self.assert_error('fooey', status=429) - self.assertEqual(429, self.get_response(preview=True).status_code) - - def test_silo_500_returns_502(self): - self.expect_requests_get('http://foo.com/bar', self.post_html % 'xyz') - self.mox.StubOutWithMock(self.source.gr_source, 'create', - use_mock_anything=True) - err = requests.HTTPError(response=util.Struct(status_code='500', text='foooey bar')) - self.source.gr_source.create(mox.IgnoreArg(), - include_link=gr_source.INCLUDE_LINK, - ignore_formatting=False - ).AndRaise(err) - self.mox.ReplayAll() - self.assert_error('Error: foooey bar', status=502) - - def test_connection_error_returns_504(self): - self.expect_requests_get('http://foo.com/bar', self.post_html % 'xyz') - self.mox.StubOutWithMock(self.source.gr_source, 'create', - use_mock_anything=True) - self.source.gr_source.create(mox.IgnoreArg(), - include_link=gr_source.INCLUDE_LINK, - ignore_formatting=False - ).AndRaise(socket.timeout('foooey bar')) - self.mox.ReplayAll() - self.assert_error('Error: foooey bar', status=504) - - def test_auth_error_disables_source(self): - self.expect_requests_get('http://foo.com/bar', self.post_html % 'xyz') - self.mox.StubOutWithMock(self.source.gr_source, 'create', - use_mock_anything=True) - err = requests.HTTPError(response=requests_response('orig', status=401)) - self.source.gr_source.create(mox.IgnoreArg(), - include_link=gr_source.INCLUDE_LINK, - ignore_formatting=False - ).AndRaise(err) - self.mox.ReplayAll() - - self.assert_error('orig', status=401) - self.assertEqual('disabled', self.source.key.get().status) - - def test_non_http_exception(self): - """If we crash, we shouldn't blame the silo or the user's site.""" - self.expect_requests_get('http://foo.com/bar', self.post_html % 'xyz') - self.mox.StubOutWithMock(self.source.gr_source, 'create', - use_mock_anything=True) - self.source.gr_source.create(mox.IgnoreArg(), - include_link=gr_source.INCLUDE_LINK, - ignore_formatting=False - ).AndRaise(RuntimeError('baz')) - self.mox.ReplayAll() - self.assert_error('Internal Server Error', status=500) - - def test_value_error(self): - """For example, Twitter raises ValueError on invalid in-reply-to URL.... - - ...eg https:/twitter.com/, which matches domain but isn't a tweet. - """ - self.expect_requests_get('http://foo.com/bar', self.post_html % 'xyz') - self.mox.StubOutWithMock(self.source.gr_source, 'create', - use_mock_anything=True) - self.source.gr_source.create(mox.IgnoreArg(), - include_link=gr_source.INCLUDE_LINK, - ignore_formatting=False - ).AndRaise(ValueError('baz')) - self.mox.ReplayAll() - self.assert_error('baz', status=400) - - def test_preview(self): - html = self.post_html % 'foo' - self.expect_requests_get('http://foo.com/bar', html) - # make sure create() isn't called - self.mox.StubOutWithMock(self.source.gr_source, 'create', use_mock_anything=True) - self.mox.ReplayAll() - self.assert_success('preview of foo', preview=True) - - publish = Publish.query().get() - self.assertEqual(self.source.key, publish.source) - self.assertEqual('complete', publish.status) - self.assertEqual('preview', publish.type) - self.assertEqual(html + self.backlink, publish.html) - - def test_bridgy_omit_link_query_param(self): - self.expect_requests_get('http://foo.com/bar', self.post_html % 'foo') - self.mox.ReplayAll() - resp = self.assert_created('foo', params={'bridgy_omit_link': 'True'}) - self.assertEqual('foo', json_loads(resp.get_data(as_text=True))['content']) - - def test_bridgy_omit_link_target_query_param(self): - self.expect_requests_get('http://foo.com/bar', self.post_html % 'foo') - self.mox.ReplayAll() - - target = 'https://brid.gy/publish/fake?bridgy_omit_link=true' - resp = self.assert_created('foo', target=target) - self.assertEqual('foo', json_loads(resp.get_data(as_text=True))['content']) - - def test_bridgy_omit_link_mf2(self): - html = self.post_html % 'foo ' - self.expect_requests_get('http://foo.com/bar', html) - self.mox.ReplayAll() - resp = self.assert_created('foo') - self.assertEqual('foo', json_loads(resp.get_data(as_text=True))['content']) - - def test_preview_omit_link_no_query_param_overrides_mf2(self): - self.expect_requests_get('http://foo.com/bar', self.post_html % 'foo') - self.mox.ReplayAll() - - resp = self.assert_success('preview of foo', preview=True) - self.assertIn( - '' + self.expect_requests_get("http://foo.com/bar", html) + self.mox.ReplayAll() + resp = self.assert_created("foo") + self.assertEqual("foo", json_loads(resp.get_data(as_text=True))["content"]) + + def test_preview_omit_link_no_query_param_overrides_mf2(self): + self.expect_requests_get("http://foo.com/bar", self.post_html % "foo") + self.mox.ReplayAll() + + resp = self.assert_success("preview of foo", preview=True) + self.assertIn( + '
foo
""" - self.expect_requests_get('http://foo.com/bar', html) - self.mox.ReplayAll() - - resp = self.assert_success('preview of foo - http://foo.com/bar', - preview=True, - params={'bridgy_omit_link': 'false'}) - self.assertIn( - '
foo
bar
-
""") - self.mox.ReplayAll() - self.assert_created('foo bar', params={'bridgy_ignore_formatting': ''}) - - def test_bridgy_ignore_formatting_target_query_param(self): - self.expect_requests_get('http://foo.com/bar', """\ +""", + ) + self.mox.ReplayAll() + self.assert_created("foo bar", params={"bridgy_ignore_formatting": ""}) + + def test_bridgy_ignore_formatting_target_query_param(self): + self.expect_requests_get( + "http://foo.com/bar", + """\
foo
bar
-
""") - self.mox.ReplayAll() - target = 'https://brid.gy/publish/fake?bridgy_ignore_formatting=true' - self.assert_created('foo bar', target=target) - - def test_bridgy_ignore_formatting_mf2(self): - self.expect_requests_get('http://foo.com/bar', """\ +""", + ) + self.mox.ReplayAll() + target = "https://brid.gy/publish/fake?bridgy_ignore_formatting=true" + self.assert_created("foo bar", target=target) + + def test_bridgy_ignore_formatting_mf2(self): + self.expect_requests_get( + "http://foo.com/bar", + """\
foo
bar
-
""") - self.mox.ReplayAll() - self.assert_created('foo bar') - - def test_bridgy_content_query_param_unsupported(self): - """We originally supported this, then disabled it since it's a security hole. - - https://github.com/snarfed/bridgy/issues/560#issuecomment-161691819 - """ - params = {'bridgy_fake_content': 'use this'} - self.assert_error('bridgy_fake_content parameter is not supported', - params=params) - self.assert_error('bridgy_fake_content parameter is not supported', - preview=True, params=params) - - def test_bridgy_content_mf2(self): - for i in range(2): - self.expect_requests_get('http://foo.com/bar', """\ +""", + ) + self.mox.ReplayAll() + self.assert_created("foo bar") + + def test_bridgy_content_query_param_unsupported(self): + """We originally supported this, then disabled it since it's a security hole. + + https://github.com/snarfed/bridgy/issues/560#issuecomment-161691819 + """ + params = {"bridgy_fake_content": "use this"} + self.assert_error( + "bridgy_fake_content parameter is not supported", params=params + ) + self.assert_error( + "bridgy_fake_content parameter is not supported", + preview=True, + params=params, + ) + + def test_bridgy_content_mf2(self): + for i in range(2): + self.expect_requests_get( + "http://foo.com/bar", + """\
unused
use this
-
""") - self.mox.ReplayAll() - - params = {'bridgy_omit_link': 'false', - 'bridgy_ignore_formatting': 'true'} - self.assert_success('use this - http://foo.com/bar', preview=True, params=params) - self.assert_created('use this - http://foo.com/bar', params=params) - - def test_expand_target_urls_u_syndication(self): - """Comment on a post with a u-syndication value""" - self.mox.StubOutWithMock(self.source.gr_source, 'create', - use_mock_anything=True) - - self.expect_requests_get('http://foo.com/bar', """ +""", + ) + self.mox.ReplayAll() + + params = {"bridgy_omit_link": "false", "bridgy_ignore_formatting": "true"} + self.assert_success( + "use this - http://foo.com/bar", preview=True, params=params + ) + self.assert_created("use this - http://foo.com/bar", params=params) + + def test_expand_target_urls_u_syndication(self): + """Comment on a post with a u-syndication value""" + self.mox.StubOutWithMock( + self.source.gr_source, "create", use_mock_anything=True + ) + + self.expect_requests_get( + "http://foo.com/bar", + """ - """) + """, + ) - self.expect_requests_get('http://orig.domain/baz', """ + self.expect_requests_get( + "http://orig.domain/baz", + """ - """) - - self.source.gr_source.create({ - 'inReplyTo': [{'url': 'http://orig.domain/baz'}, - {'url': 'https://fa.ke/a/b'}], - 'displayName': 'In reply to', - 'url': 'http://foo.com/bar', - 'objectType': 'comment', - }, include_link=gr_source.INCLUDE_LINK, ignore_formatting=False). \ - AndReturn(gr_source.creation_result({ - 'url': 'http://fake/url', - 'id': 'http://fake/url', - 'content': 'This is a reply', - })) - - self.mox.ReplayAll() - self.assert_created('') - - def test_expand_target_urls_rel_syndication(self): - """Publishing a like of a post with two rel=syndication values""" - self.mox.StubOutWithMock(self.source.gr_source, 'create', - use_mock_anything=True) - - self.expect_requests_get('http://foo.com/bar', """ + """, + ) + + self.source.gr_source.create( + { + "inReplyTo": [ + {"url": "http://orig.domain/baz"}, + {"url": "https://fa.ke/a/b"}, + ], + "displayName": "In reply to", + "url": "http://foo.com/bar", + "objectType": "comment", + }, + include_link=gr_source.INCLUDE_LINK, + ignore_formatting=False, + ).AndReturn( + gr_source.creation_result( + { + "url": "http://fake/url", + "id": "http://fake/url", + "content": "This is a reply", + } + ) + ) + + self.mox.ReplayAll() + self.assert_created("") + + def test_expand_target_urls_rel_syndication(self): + """Publishing a like of a post with two rel=syndication values""" + self.mox.StubOutWithMock( + self.source.gr_source, "create", use_mock_anything=True + ) + + self.expect_requests_get( + "http://foo.com/bar", + """ - """) + """, + ) - self.expect_requests_get('http://orig.domain/baz', """ + self.expect_requests_get( + "http://orig.domain/baz", + """
Original post
- """) - - self.source.gr_source.create({ - 'verb': 'like', - 'displayName': 'liked this', - 'url': 'http://foo.com/bar', - 'object': [{'url': 'http://orig.domain/baz'}, - {'url': 'https://fa.ke/a/b'}, - {'url': 'https://flic.kr/c/d'}], - 'objectType': 'activity', - }, include_link=gr_source.INCLUDE_LINK, ignore_formatting=False). \ - AndReturn(gr_source.creation_result({ - 'url': 'http://fake/url', - 'id': 'http://fake/url', - 'content': 'liked this', - })) - - self.mox.ReplayAll() - self.assert_created('') - - def test_expand_target_urls_h_cite(self): - """Repost a post with a p-syndication h-cite value (syndication - property is a dict rather than a string) - """ - self.mox.StubOutWithMock(self.source.gr_source, 'create', - use_mock_anything=True) - - self.expect_requests_get('http://foo.com/bar', """ + """, + ) + + self.source.gr_source.create( + { + "verb": "like", + "displayName": "liked this", + "url": "http://foo.com/bar", + "object": [ + {"url": "http://orig.domain/baz"}, + {"url": "https://fa.ke/a/b"}, + {"url": "https://flic.kr/c/d"}, + ], + "objectType": "activity", + }, + include_link=gr_source.INCLUDE_LINK, + ignore_formatting=False, + ).AndReturn( + gr_source.creation_result( + { + "url": "http://fake/url", + "id": "http://fake/url", + "content": "liked this", + } + ) + ) + + self.mox.ReplayAll() + self.assert_created("") + + def test_expand_target_urls_h_cite(self): + """Repost a post with a p-syndication h-cite value (syndication + property is a dict rather than a string) + """ + self.mox.StubOutWithMock( + self.source.gr_source, "create", use_mock_anything=True + ) + + self.expect_requests_get( + "http://foo.com/bar", + """ - """) + """, + ) - self.expect_requests_get('http://orig.domain/baz', """ + self.expect_requests_get( + "http://orig.domain/baz", + """ - """) - - self.source.gr_source.create({ - 'verb': 'share', - 'displayName': 'reposted this', - 'url': 'http://foo.com/bar', - 'object': [{'url': 'http://orig.domain/baz'}, - {'url': 'https://fa.ke/a/b'}], - 'objectType': 'activity', - }, include_link=gr_source.INCLUDE_LINK, ignore_formatting=False). \ - AndReturn(gr_source.creation_result({ - 'url': 'http://fake/url', - 'id': 'http://fake/url', - 'content': 'reposted this', - })) - - self.mox.ReplayAll() - self.assert_created('') - - def test_expand_target_urls_h_event_in_h_feed(self): - """RSVP to an event is a single element inside an h-feed; we should handle - it just like a normal post permalink page. - """ - self.mox.StubOutWithMock(self.source.gr_source, 'create', - use_mock_anything=True) - - self.expect_requests_get('http://foo.com/bar', """ + """, + ) + + self.source.gr_source.create( + { + "verb": "share", + "displayName": "reposted this", + "url": "http://foo.com/bar", + "object": [ + {"url": "http://orig.domain/baz"}, + {"url": "https://fa.ke/a/b"}, + ], + "objectType": "activity", + }, + include_link=gr_source.INCLUDE_LINK, + ignore_formatting=False, + ).AndReturn( + gr_source.creation_result( + { + "url": "http://fake/url", + "id": "http://fake/url", + "content": "reposted this", + } + ) + ) + + self.mox.ReplayAll() + self.assert_created("") + + def test_expand_target_urls_h_event_in_h_feed(self): + """RSVP to an event is a single element inside an h-feed; we should handle + it just like a normal post permalink page. + """ + self.mox.StubOutWithMock( + self.source.gr_source, "create", use_mock_anything=True + ) + + self.expect_requests_get( + "http://foo.com/bar", + """
yes
- """) + """, + ) - self.expect_requests_get('http://orig.domain/baz', """ + self.expect_requests_get( + "http://orig.domain/baz", + """ - """) - - self.source.gr_source.create({ - 'url': 'http://foo.com/bar', - 'verb': 'rsvp-yes', - 'object': [{'url': 'http://orig.domain/baz'}, - {'url': 'https://fa.ke/a/b'}], - 'objectType': 'activity', - }, include_link=gr_source.INCLUDE_LINK, ignore_formatting=False). \ - AndReturn(gr_source.creation_result({ - 'url': 'http://fake/url', - 'id': 'http://fake/url', - 'content': 'RSVPd yes', - })) - - self.mox.ReplayAll() - self.assert_created('') - - def test_expand_target_urls_fetch_failure(self): - """Fetching the in-reply-to URL fails, but that shouldn't prevent us - from publishing the post itself. - """ - self.mox.StubOutWithMock(self.source.gr_source, 'create', - use_mock_anything=True) - - self.expect_requests_get('http://foo.com/bar', """ + """, + ) + + self.source.gr_source.create( + { + "url": "http://foo.com/bar", + "verb": "rsvp-yes", + "object": [ + {"url": "http://orig.domain/baz"}, + {"url": "https://fa.ke/a/b"}, + ], + "objectType": "activity", + }, + include_link=gr_source.INCLUDE_LINK, + ignore_formatting=False, + ).AndReturn( + gr_source.creation_result( + { + "url": "http://fake/url", + "id": "http://fake/url", + "content": "RSVPd yes", + } + ) + ) + + self.mox.ReplayAll() + self.assert_created("") + + def test_expand_target_urls_fetch_failure(self): + """Fetching the in-reply-to URL fails, but that shouldn't prevent us + from publishing the post itself. + """ + self.mox.StubOutWithMock( + self.source.gr_source, "create", use_mock_anything=True + ) + + self.expect_requests_get( + "http://foo.com/bar", + """ - """) - - self.expect_requests_get('http://orig.domain/baz', '', status_code=404) - - self.source.gr_source.create({ - 'inReplyTo': [{'url': 'http://orig.domain/baz'}], - 'displayName': 'In reply to', - 'url': 'http://foo.com/bar', - 'objectType': 'comment', - }, include_link=gr_source.INCLUDE_LINK, ignore_formatting=False). \ - AndReturn(gr_source.creation_result({ - 'url': 'http://fake/url', - 'id': 'http://fake/url', - 'content': 'This is a reply', - })) - - self.mox.ReplayAll() - self.assert_created('') - - def test_expand_target_urls_no_microformats(self): - """Publishing a like of a post that has no microformats; should have no - problems posting the like anyway. - """ - self.mox.StubOutWithMock(self.source.gr_source, 'create', - use_mock_anything=True) - - self.expect_requests_get('http://foo.com/bar', """ + """, + ) + + self.expect_requests_get("http://orig.domain/baz", "", status_code=404) + + self.source.gr_source.create( + { + "inReplyTo": [{"url": "http://orig.domain/baz"}], + "displayName": "In reply to", + "url": "http://foo.com/bar", + "objectType": "comment", + }, + include_link=gr_source.INCLUDE_LINK, + ignore_formatting=False, + ).AndReturn( + gr_source.creation_result( + { + "url": "http://fake/url", + "id": "http://fake/url", + "content": "This is a reply", + } + ) + ) + + self.mox.ReplayAll() + self.assert_created("") + + def test_expand_target_urls_no_microformats(self): + """Publishing a like of a post that has no microformats; should have no + problems posting the like anyway. + """ + self.mox.StubOutWithMock( + self.source.gr_source, "create", use_mock_anything=True + ) + + self.expect_requests_get( + "http://foo.com/bar", + """ - """) + """, + ) - self.expect_requests_get('http://orig.domain/baz', """ + self.expect_requests_get( + "http://orig.domain/baz", + """
A fantastically well-written article
- """) - - self.source.gr_source.create({ - 'verb': 'like', - 'displayName': 'liked this', - 'url': 'http://foo.com/bar', - 'object': [{'url': 'http://orig.domain/baz'}], - 'objectType': 'activity', - }, include_link=gr_source.INCLUDE_LINK, ignore_formatting=False). \ - AndReturn(gr_source.creation_result({ - 'url': 'http://fake/url', - 'id': 'http://fake/url', - 'content': 'liked this', - })) - - self.mox.ReplayAll() - self.assert_created('') - - def test_expand_target_urls_blocklisted_target(self): - """RSVP to a domain in the webmention blocklist should not trigger a fetch. - """ - self.mox.StubOutWithMock(self.source.gr_source, 'create', - use_mock_anything=True) - - self.expect_requests_get('http://foo.com/bar', """ + """, + ) + + self.source.gr_source.create( + { + "verb": "like", + "displayName": "liked this", + "url": "http://foo.com/bar", + "object": [{"url": "http://orig.domain/baz"}], + "objectType": "activity", + }, + include_link=gr_source.INCLUDE_LINK, + ignore_formatting=False, + ).AndReturn( + gr_source.creation_result( + { + "url": "http://fake/url", + "id": "http://fake/url", + "content": "liked this", + } + ) + ) + + self.mox.ReplayAll() + self.assert_created("") + + def test_expand_target_urls_blocklisted_target(self): + """RSVP to a domain in the webmention blocklist should not trigger a fetch.""" + self.mox.StubOutWithMock( + self.source.gr_source, "create", use_mock_anything=True + ) + + self.expect_requests_get( + "http://foo.com/bar", + """
yes @@ -1108,30 +1377,40 @@ def test_expand_target_urls_blocklisted_target(self):
- """) - - self.source.gr_source.create({ - 'url': 'http://foo.com/bar', - 'verb': 'rsvp-yes', - 'object': [{'url': 'http://fa.ke/homebrew-website-club'}], - 'objectType': 'activity', - 'content': 'yes\n', - }, include_link=gr_source.INCLUDE_LINK, ignore_formatting=False). \ - AndReturn(gr_source.creation_result({ - 'url': 'http://fake/url', - 'id': 'http://fake/url', - 'content': 'RSVPd yes', - })) - - self.mox.ReplayAll() - self.assert_created('') - - def test_in_reply_to_no_target(self): - """in-reply-to an original that does not syndicate to the silo should - fail with a helpful error message. The error message is generated by - granary. - """ - self.expect_requests_get('http://foo.com/bar', """ + """, + ) + + self.source.gr_source.create( + { + "url": "http://foo.com/bar", + "verb": "rsvp-yes", + "object": [{"url": "http://fa.ke/homebrew-website-club"}], + "objectType": "activity", + "content": 'yes\n', + }, + include_link=gr_source.INCLUDE_LINK, + ignore_formatting=False, + ).AndReturn( + gr_source.creation_result( + { + "url": "http://fake/url", + "id": "http://fake/url", + "content": "RSVPd yes", + } + ) + ) + + self.mox.ReplayAll() + self.assert_created("") + + def test_in_reply_to_no_target(self): + """in-reply-to an original that does not syndicate to the silo should + fail with a helpful error message. The error message is generated by + granary. + """ + self.expect_requests_get( + "http://foo.com/bar", + """
In reply to a post on original @@ -1139,40 +1418,49 @@ def test_in_reply_to_no_target(self): Great post about an important subject
- """) + """, + ) - self.expect_requests_get('http://original.domain/baz', """ + self.expect_requests_get( + "http://original.domain/baz", + """ - """) + """, + ) - self.mox.ReplayAll() + self.mox.ReplayAll() - self.assert_error('no fa.ke url to reply to') + self.assert_error("no fa.ke url to reply to") - def test_dont_expand_home_page_target_url(self): - """Replying to a home page shouldn't expand syndication etc. URLs.""" - self.expect_requests_get('http://foo.com/bar', """ + def test_dont_expand_home_page_target_url(self): + """Replying to a home page shouldn't expand syndication etc. URLs.""" + self.expect_requests_get( + "http://foo.com/bar", + """ - """) - # shouldn't fetch http://ho.me/ - self.mox.ReplayAll() - - self.assert_error('no fa.ke url to reply to') - - def test_html2text(self): - """Test that using html2text renders whitespace ok in publish content.""" - # based on https://snarfed.org/2014-01-15_homebrew-website-club-tonight - for i in range(2): - self.expect_requests_get('http://foo.com/bar', """\ + """, + ) + # shouldn't fetch http://ho.me/ + self.mox.ReplayAll() + + self.assert_error("no fa.ke url to reply to") + + def test_html2text(self): + """Test that using html2text renders whitespace ok in publish content.""" + # based on https://snarfed.org/2014-01-15_homebrew-website-club-tonight + for i in range(2): + self.expect_requests_get( + "http://foo.com/bar", + """\ - """) + """, + ) - self.mox.ReplayAll() - expected = """\ + self.mox.ReplayAll() + expected = """\ Homebrew Website Club is _tonight_! 6:30pm PST at Mozilla SF and Esri Portland. Join us!""" - self.assert_success(expected, preview=True) - expected += ' - http://foo.com/bar' - resp = self.assert_created(expected, preview=False) - self.assertEqual(expected, json_loads(resp.get_data(as_text=True))['content']) - - def test_unicode(self): - """Test that we pass through unicode chars correctly.""" - text = 'Démo pour les développeur. Je suis navrée de ce problème.' - for i in range(2): - self.expect_requests_get('http://foo.com/bår', self.post_html % text, - content_type='text/html; charset=utf-8') - self.mox.ReplayAll() - - url = 'http://foo.com/bår'.encode() - self.assert_created(text, preview=False, source=url, params={'bridgy_omit_link': ''}) - self.assert_success(text, preview=True, source=url, params={'bridgy_omit_link': ''}) - - def test_utf8_meta_tag(self): - self._test_charset_in_meta_tag('utf-8') - - def test_iso8859_meta_tag(self): - """https://github.com/snarfed/bridgy/issues/385""" - self._test_charset_in_meta_tag('iso-8859-1') - - def _test_charset_in_meta_tag(self, charset): - """Test that we support charset in meta tag as well as HTTP header.""" - text = 'Démo pour les développeur. Je suis navrée de ce problème.' - - resp = requests.Response() - resp._content = (u""" + self.assert_success(expected, preview=True) + expected += " - http://foo.com/bar" + resp = self.assert_created(expected, preview=False) + self.assertEqual(expected, json_loads(resp.get_data(as_text=True))["content"]) + + def test_unicode(self): + """Test that we pass through unicode chars correctly.""" + text = "Démo pour les développeur. Je suis navrée de ce problème." + for i in range(2): + self.expect_requests_get( + "http://foo.com/bår", + self.post_html % text, + content_type="text/html; charset=utf-8", + ) + self.mox.ReplayAll() + + url = "http://foo.com/bår".encode() + self.assert_created( + text, preview=False, source=url, params={"bridgy_omit_link": ""} + ) + self.assert_success( + text, preview=True, source=url, params={"bridgy_omit_link": ""} + ) + + def test_utf8_meta_tag(self): + self._test_charset_in_meta_tag("utf-8") + + def test_iso8859_meta_tag(self): + """https://github.com/snarfed/bridgy/issues/385""" + self._test_charset_in_meta_tag("iso-8859-1") + + def _test_charset_in_meta_tag(self, charset): + """Test that we support charset in meta tag as well as HTTP header.""" + text = "Démo pour les développeur. Je suis navrée de ce problème." + + resp = requests.Response() + resp._content = ( + """

%s

-""" % (charset, text)).encode(charset) - resp._text = "shouldn't use this! " + text - resp.url = 'http://foo.com/bar' - resp.status_code = 200 - requests.get(resp.url, timeout=util.HTTP_TIMEOUT, - headers=util.REQUEST_HEADERS, stream=True).AndReturn(resp) - self.mox.ReplayAll() - - self.assert_created(text, params={'bridgy_omit_link': ''}) - - def test_missing_backlink(self): - # use super to avoid this class's override that adds backlink - super().expect_requests_get( - 'http://foo.com/bar', self.post_html % 'foo') - self.mox.ReplayAll() - self.assert_error("Couldn't find link to localhost/publish/fake") - - def test_require_like_of_repost_of(self): - """We only trigger on like-of and repost-of, not like or repost.""" - for prop in 'like', 'repost': - url = 'http://foo.com/%s' % prop - self.expect_requests_get(url, """ +""" + % (charset, text) + ).encode(charset) + resp._text = "shouldn't use this! " + text + resp.url = "http://foo.com/bar" + resp.status_code = 200 + requests.get( + resp.url, + timeout=util.HTTP_TIMEOUT, + headers=util.REQUEST_HEADERS, + stream=True, + ).AndReturn(resp) + self.mox.ReplayAll() + + self.assert_created(text, params={"bridgy_omit_link": ""}) + + def test_missing_backlink(self): + # use super to avoid this class's override that adds backlink + super().expect_requests_get("http://foo.com/bar", self.post_html % "foo") + self.mox.ReplayAll() + self.assert_error("Couldn't find link to localhost/publish/fake") + + def test_require_like_of_repost_of(self): + """We only trigger on like-of and repost-of, not like or repost.""" + for prop in "like", "repost": + url = "http://foo.com/%s" % prop + self.expect_requests_get( + url, + """

foo

- """ % (url, prop)) - - self.mox.ReplayAll() - for prop in 'like', 'repost': - url = 'http://foo.com/%s' % prop - self.assert_created('foo - %s' % url, source=url) - - def test_unescape(self): - self.expect_requests_get('http://foo.com/bar', self.post_html % 'abc & xyz') - self.mox.ReplayAll() - self.assert_created('abc & xyz - http://foo.com/bar') - - def test_multi_rsvp(self): - """Test RSVP that replies to multiple event URLs like - http://tantek.com/2015/308/t1/homebrew-website-club-mozsf - """ - html = """
+ """ + % (url, prop), + ) + + self.mox.ReplayAll() + for prop in "like", "repost": + url = "http://foo.com/%s" % prop + self.assert_created("foo - %s" % url, source=url) + + def test_unescape(self): + self.expect_requests_get("http://foo.com/bar", self.post_html % "abc & xyz") + self.mox.ReplayAll() + self.assert_created("abc & xyz - http://foo.com/bar") + + def test_multi_rsvp(self): + """Test RSVP that replies to multiple event URLs like + http://tantek.com/2015/308/t1/homebrew-website-club-mozsf + """ + html = """
RSVP yes to: @@ -1286,77 +1592,103 @@ def test_multi_rsvp(self): value="http://tantek.com/2015/308/t1/homebrew-website-club-mozsf" />
""" - self.expect_requests_get('http://foo.com/bar', html) - self.expect_requests_get('https://kylewm.com/2015/11/sf-homebrew-website-club', '') - - # make sure create() isn't called - self.mox.StubOutWithMock(self.source.gr_source, 'create', use_mock_anything=True) - self.mox.ReplayAll() - self.assert_success('going to Homebrew', preview=True) - - def test_multiple_users_on_domain(self): - source_2 = testutil.FakeSource( - id='foo.com/b', features=['publish'], domains=['foo.com'], - domain_urls=['http://foo.com/b'], auth_entity=self.auth_entity.key) - source_2.put() - source_3 = testutil.FakeSource( - id='foo.com/c', features=['publish'], domains=['foo.com'], - domain_urls=['http://foo.com/c'], auth_entity=self.auth_entity.key) - source_3.put() - - self.expect_requests_get('http://foo.com/bar', self.post_html % 'foo') - self.mox.ReplayAll() - self.assert_created('foo - http://foo.com/bar', interactive=False) - self.assertEqual(source_2.key, Publish.query().get().source) - - def test_multiple_users_on_domain_no_path_matches(self): - self.source.domain_urls = ['http://foo.com/a'] - self.source.put() - source_2 = testutil.FakeSource( - id='foo.com/c', features=['publish'], domains=['foo.com'], - domain_urls=['http://foo.com/c'], auth_entity=self.auth_entity.key) - source_2.put() - - self.assert_error('No account found that matches') - - def test_multiple_users_only_one_registered(self): - self.source.key.delete() - source_2 = testutil.FakeSource( - id='foo.com/b', features=['publish'], domains=['foo.com'], - auth_entity=self.auth_entity.key) - source_2.put() - source_3 = testutil.FakeSource( - id='foo.com/c', features=['publish'], domains=['foo.com'], - domain_urls=['http://foo.com/c'], auth_entity=self.auth_entity.key) - source_3.put() - - self.assert_error('No account found that matches') - - def test_single_user_on_domain_with_wrong_path(self): - self.source.domain_urls = ['http://foo.com/x'] - self.source.put() - self.assert_error('No account found that matches') - - def test_dont_escape_period_in_content(self): - """Odd bug triggered by specific combination of leading and trailing #. - - Root cause was html2text escaping markdown sequences it emits. - - https://github.com/snarfed/bridgy/issues/656 - """ - self.expect_requests_get('http://foo.com/bar', - self.post_html % ' 2016. #') - self.mox.ReplayAll() - self.assert_created('2016. # - http://foo.com/bar', interactive=False) - self._check_entity(content='2016. #', html_content=' 2016. #') - - def test_ignore_nested_uphoto(self): - """We should only use u-photo directly inside the published item. - - ...not u-photos in children, e.g. h-cards. - """ - for i in range(2): - self.expect_requests_get('http://foo.com/bar', """ + self.expect_requests_get("http://foo.com/bar", html) + self.expect_requests_get( + "https://kylewm.com/2015/11/sf-homebrew-website-club", "" + ) + + # make sure create() isn't called + self.mox.StubOutWithMock( + self.source.gr_source, "create", use_mock_anything=True + ) + self.mox.ReplayAll() + self.assert_success("going to Homebrew", preview=True) + + def test_multiple_users_on_domain(self): + source_2 = testutil.FakeSource( + id="foo.com/b", + features=["publish"], + domains=["foo.com"], + domain_urls=["http://foo.com/b"], + auth_entity=self.auth_entity.key, + ) + source_2.put() + source_3 = testutil.FakeSource( + id="foo.com/c", + features=["publish"], + domains=["foo.com"], + domain_urls=["http://foo.com/c"], + auth_entity=self.auth_entity.key, + ) + source_3.put() + + self.expect_requests_get("http://foo.com/bar", self.post_html % "foo") + self.mox.ReplayAll() + self.assert_created("foo - http://foo.com/bar", interactive=False) + self.assertEqual(source_2.key, Publish.query().get().source) + + def test_multiple_users_on_domain_no_path_matches(self): + self.source.domain_urls = ["http://foo.com/a"] + self.source.put() + source_2 = testutil.FakeSource( + id="foo.com/c", + features=["publish"], + domains=["foo.com"], + domain_urls=["http://foo.com/c"], + auth_entity=self.auth_entity.key, + ) + source_2.put() + + self.assert_error("No account found that matches") + + def test_multiple_users_only_one_registered(self): + self.source.key.delete() + source_2 = testutil.FakeSource( + id="foo.com/b", + features=["publish"], + domains=["foo.com"], + auth_entity=self.auth_entity.key, + ) + source_2.put() + source_3 = testutil.FakeSource( + id="foo.com/c", + features=["publish"], + domains=["foo.com"], + domain_urls=["http://foo.com/c"], + auth_entity=self.auth_entity.key, + ) + source_3.put() + + self.assert_error("No account found that matches") + + def test_single_user_on_domain_with_wrong_path(self): + self.source.domain_urls = ["http://foo.com/x"] + self.source.put() + self.assert_error("No account found that matches") + + def test_dont_escape_period_in_content(self): + """Odd bug triggered by specific combination of leading and trailing #. + + Root cause was html2text escaping markdown sequences it emits. + + https://github.com/snarfed/bridgy/issues/656 + """ + self.expect_requests_get( + "http://foo.com/bar", self.post_html % " 2016. #" + ) + self.mox.ReplayAll() + self.assert_created("2016. # - http://foo.com/bar", interactive=False) + self._check_entity(content="2016. #", html_content=" 2016. #") + + def test_ignore_nested_uphoto(self): + """We should only use u-photo directly inside the published item. + + ...not u-photos in children, e.g. h-cards. + """ + for i in range(2): + self.expect_requests_get( + "http://foo.com/bar", + """
blah @@ -1365,19 +1697,22 @@ def test_ignore_nested_uphoto(self):
-""") - self.mox.ReplayAll() - - resp = self.assert_created('blah - http://foo.com/bar') - self.assertNotIn('images', json_loads(resp.get_data(as_text=True))) - - resp = self.assert_success('blah - http://foo.com/bar', preview=True) - self.assertNotIn('with images', resp.get_data(as_text=True)) - - def test_ignore_jetpack_lazy_loaded_imgs(self): - """https://github.com/snarfed/bridgy/issues/798""" - for i in range(2): - self.expect_requests_get('http://foo.com/bar', """ +""", + ) + self.mox.ReplayAll() + + resp = self.assert_created("blah - http://foo.com/bar") + self.assertNotIn("images", json_loads(resp.get_data(as_text=True))) + + resp = self.assert_success("blah - http://foo.com/bar", preview=True) + self.assertNotIn("with images", resp.get_data(as_text=True)) + + def test_ignore_jetpack_lazy_loaded_imgs(self): + """https://github.com/snarfed/bridgy/issues/798""" + for i in range(2): + self.expect_requests_get( + "http://foo.com/bar", + """
@@ -1386,32 +1721,43 @@ def test_ignore_jetpack_lazy_loaded_imgs(self):
blah
-""") - self.mox.ReplayAll() - - resp = self.assert_created("blah - http://foo.com/bar") - self.assertEqual(['http://example.com/real'], json_loads(resp.get_data(as_text=True))['images']) - - resp = self.assert_success('blah - http://foo.com/bar', preview=True) - self.assertIn('with images http://example.com/real', resp.get_data(as_text=True)) - - def test_nested_h_as_entry(self): - """https://github.com/snarfed/bridgy/issues/735""" - self.expect_requests_get('http://foo.com/bar', """ +""", + ) + self.mox.ReplayAll() + + resp = self.assert_created("blah - http://foo.com/bar") + self.assertEqual( + ["http://example.com/real"], + json_loads(resp.get_data(as_text=True))["images"], + ) + + resp = self.assert_success("blah - http://foo.com/bar", preview=True) + self.assertIn( + "with images http://example.com/real", resp.get_data(as_text=True) + ) + + def test_nested_h_as_entry(self): + """https://github.com/snarfed/bridgy/issues/735""" + self.expect_requests_get( + "http://foo.com/bar", + """

I'M CONTENT

-""") - self.mox.ReplayAll() - self.assert_error("doesn't support type(s) h-as-entry") - self.assertEqual('failed', Publish.query().get().status) - - def test_nested_object_without_url(self): - """p-repost-of creates an inner object, this one without a u-url. - - From https://dougbeal.com/2017/09/23/instagram-post-by-murbers-%e2%80%a2-sep-23-2017-at-107am-utc/""" - self.expect_requests_get('http://foo.com/bar', """ +""", + ) + self.mox.ReplayAll() + self.assert_error("doesn't support type(s) h-as-entry") + self.assertEqual("failed", Publish.query().get().status) + + def test_nested_object_without_url(self): + """p-repost-of creates an inner object, this one without a u-url. + + From https://dougbeal.com/2017/09/23/instagram-post-by-murbers-%e2%80%a2-sep-23-2017-at-107am-utc/""" + self.expect_requests_get( + "http://foo.com/bar", + """
@@ -1422,86 +1768,116 @@ def test_nested_object_without_url(self):
-""") - self.mox.ReplayAll() - self.assert_created('Doug (@murderofcro.ws) is SOOPER excited about #pelikanhubs2017') - - def test_fragment(self): - """If we get a fragment, just publish the element with that id.""" - html = """ +""", + ) + self.mox.ReplayAll() + self.assert_created( + "Doug (@murderofcro.ws) is SOOPER excited about #pelikanhubs2017" + ) + + def test_fragment(self): + """If we get a fragment, just publish the element with that id.""" + html = """
foo
""" - self.expect_requests_get('http://foo.com/bar#baz', html) - self.mox.ReplayAll() - resp = self.assert_created('foo - http://foo.com/bar#baz', - source='http://foo.com/bar#baz') - self._check_entity(url='http://foo.com/bar#baz', expected_html=html) - - def test_fragment_not_found(self): - """If we get a fragment but there's no element with that id, return error.""" - self.expect_requests_get('http://foo.com/bar#baz', """ + self.expect_requests_get("http://foo.com/bar#baz", html) + self.mox.ReplayAll() + resp = self.assert_created( + "foo - http://foo.com/bar#baz", source="http://foo.com/bar#baz" + ) + self._check_entity(url="http://foo.com/bar#baz", expected_html=html) + + def test_fragment_not_found(self): + """If we get a fragment but there's no element with that id, return error.""" + self.expect_requests_get( + "http://foo.com/bar#baz", + """
-""") - self.mox.ReplayAll() - self.assert_error('Got fragment baz but no element found with that id.', - source='http://foo.com/bar#baz') - - def test_not_implemented_error(self): - """https://github.com/snarfed/bridgy/issues/832""" - self.expect_requests_get('http://foo.com/bar', """ +""", + ) + self.mox.ReplayAll() + self.assert_error( + "Got fragment baz but no element found with that id.", + source="http://foo.com/bar#baz", + ) + + def test_not_implemented_error(self): + """https://github.com/snarfed/bridgy/issues/832""" + self.expect_requests_get( + "http://foo.com/bar", + """
-""") - self.mox.ReplayAll() - self.assert_error('Combined in-reply-to and tag-of is not yet supported.') - - def test_delete_not_published_error(self): - self.expect_requests_get('http://foo.com/bar', status_code=410) - self.mox.ReplayAll() - self.assert_error("Can't delete this post from FakeSource because Bridgy Publish didn't originally POSSE it there") - - def test_delete(self): - page = PublishedPage(id='http://foo.com/bar') - Publish(parent=page.key, source=self.source.key, status='complete', - published={'id': 'the_id'}).put() - - for i in range(2): - self.expect_requests_get('http://foo.com/bar', status_code=410) - self.mox.ReplayAll() - - resp = self.assert_success('delete the_id', preview=True) - resp = self.assert_response('', status=302, interactive=True) - self.assertEqual('http://localhost/fake/foo.com', resp.headers['Location']) - self.assertEqual(['Done! Click here to view.'], - get_flashed_messages()) - - delete = list(Publish.query())[-1] - self.assertEqual(delete.key.parent(), page.key) - self.assertEqual('deleted', delete.status) - self.assertEqual('delete', delete.type) - self.assertEqual({ - 'id': 'the_id', - 'url': 'http://fake/url', - 'msg': 'delete the_id', - }, delete.published) - - def test_preview_delete_unsupported_silo(self): - page = PublishedPage(id='http://foo.com/bar') - Publish(parent=page.key, source=self.source.key, status='complete', - published={'id': 'the_id'}).put() - - self.expect_requests_get('http://foo.com/bar', status_code=410) - self.mox.StubOutWithMock(self.source.gr_source, 'preview_delete', - use_mock_anything=True) - self.source.gr_source.preview_delete( - mox.IgnoreArg()).AndRaise(NotImplementedError()) - self.mox.ReplayAll() - - self.assert_error("Sorry, deleting isn't supported for FakeSource yet", - preview=True) +""", + ) + self.mox.ReplayAll() + self.assert_error("Combined in-reply-to and tag-of is not yet supported.") + + def test_delete_not_published_error(self): + self.expect_requests_get("http://foo.com/bar", status_code=410) + self.mox.ReplayAll() + self.assert_error( + "Can't delete this post from FakeSource because Bridgy Publish didn't originally POSSE it there" + ) + + def test_delete(self): + page = PublishedPage(id="http://foo.com/bar") + Publish( + parent=page.key, + source=self.source.key, + status="complete", + published={"id": "the_id"}, + ).put() + + for i in range(2): + self.expect_requests_get("http://foo.com/bar", status_code=410) + self.mox.ReplayAll() + + resp = self.assert_success("delete the_id", preview=True) + resp = self.assert_response("", status=302, interactive=True) + self.assertEqual("http://localhost/fake/foo.com", resp.headers["Location"]) + self.assertEqual( + ['Done! Click here to view.'], + get_flashed_messages(), + ) + + delete = list(Publish.query())[-1] + self.assertEqual(delete.key.parent(), page.key) + self.assertEqual("deleted", delete.status) + self.assertEqual("delete", delete.type) + self.assertEqual( + { + "id": "the_id", + "url": "http://fake/url", + "msg": "delete the_id", + }, + delete.published, + ) + + def test_preview_delete_unsupported_silo(self): + page = PublishedPage(id="http://foo.com/bar") + Publish( + parent=page.key, + source=self.source.key, + status="complete", + published={"id": "the_id"}, + ).put() + + self.expect_requests_get("http://foo.com/bar", status_code=410) + self.mox.StubOutWithMock( + self.source.gr_source, "preview_delete", use_mock_anything=True + ) + self.source.gr_source.preview_delete(mox.IgnoreArg()).AndRaise( + NotImplementedError() + ) + self.mox.ReplayAll() + + self.assert_error( + "Sorry, deleting isn't supported for FakeSource yet", preview=True + ) diff --git a/tests/test_reddit.py b/tests/test_reddit.py index b76ebc16..114c353e 100644 --- a/tests/test_reddit.py +++ b/tests/test_reddit.py @@ -10,32 +10,33 @@ class RedditTest(testutil.AppTest): + def setUp(self): + super().setUp() + oauth_dropins.reddit.REDDIT_APP_KEY = "my_app_key" + oauth_dropins.reddit.REDDIT_APP_SECRET = "my_app_secret" + user = oauth_dropins.reddit.praw_to_user(gr_reddit_test.FakeRedditor()) + self.auth_entity = oauth_dropins.reddit.RedditAuth( + id="my_string_id", refresh_token="silly_token", user_json=json_dumps(user) + ) + self.auth_entity.put() + self.r = Reddit.new(auth_entity=self.auth_entity) - def setUp(self): - super().setUp() - oauth_dropins.reddit.REDDIT_APP_KEY = 'my_app_key' - oauth_dropins.reddit.REDDIT_APP_SECRET = 'my_app_secret' - user = oauth_dropins.reddit.praw_to_user(gr_reddit_test.FakeRedditor()) - self.auth_entity = oauth_dropins.reddit.RedditAuth( - id='my_string_id', - refresh_token='silly_token', - user_json=json_dumps(user)) - self.auth_entity.put() - self.r = Reddit.new(auth_entity=self.auth_entity) + def test_new(self): + self.assertEqual(self.auth_entity, self.r.auth_entity.get()) + self.assertEqual("silly_token", self.r.gr_source.refresh_token) + self.assertEqual("bonkerfield", self.r.key.string_id()) + self.assertEqual( + "https://styles.redditmedia.com/t5_2az095/styles/profileIcon_ek6onop1xbf41.png", + self.r.picture, + ) + self.assertEqual("bonkerfield", self.r.name) + self.assertEqual("https://reddit.com/user/bonkerfield", self.r.url) + self.assertEqual("https://reddit.com/user/bonkerfield", self.r.silo_url()) + self.assertEqual("tag:reddit.com,2013:bonkerfield", self.r.user_tag_id()) + self.assertEqual("bonkerfield (Reddit)", self.r.label()) - def test_new(self): - self.assertEqual(self.auth_entity, self.r.auth_entity.get()) - self.assertEqual('silly_token', self.r.gr_source.refresh_token) - self.assertEqual('bonkerfield', self.r.key.string_id()) - self.assertEqual('https://styles.redditmedia.com/t5_2az095/styles/profileIcon_ek6onop1xbf41.png', self.r.picture) - self.assertEqual('bonkerfield', self.r.name) - self.assertEqual('https://reddit.com/user/bonkerfield', self.r.url) - self.assertEqual('https://reddit.com/user/bonkerfield', self.r.silo_url()) - self.assertEqual('tag:reddit.com,2013:bonkerfield', self.r.user_tag_id()) - self.assertEqual('bonkerfield (Reddit)', self.r.label()) - - def test_search_for_links_no_urls(self): - # only a blocklisted domain - self.r.domain_urls = ['https://t.co/xyz'] - self.r.put() - self.assert_equals([], self.r.search_for_links()) + def test_search_for_links_no_urls(self): + # only a blocklisted domain + self.r.domain_urls = ["https://t.co/xyz"] + self.r.put() + self.assert_equals([], self.r.search_for_links()) diff --git a/tests/test_superfeedr.py b/tests/test_superfeedr.py index 145fce5e..e5b0e8fc 100644 --- a/tests/test_superfeedr.py +++ b/tests/test_superfeedr.py @@ -12,175 +12,214 @@ class FakeNotify(superfeedr.Notify): - SOURCE_CLS = testutil.FakeSource + SOURCE_CLS = testutil.FakeSource class SuperfeedrTest(testutil.AppTest): - - def setUp(self): - super().setUp() - - self.app = Flask('test_superfeedr') - self.app.add_url_rule('/notify/', methods=['POST'], - view_func=FakeNotify.as_view('test_superfeedr')) - self.app.config['ENV'] = 'development' - self.client = self.app.test_client() - - self.source = testutil.FakeSource(id='foo.com', domains=['foo.com'], - features=['webmention']) - self.source.put() - self.item = {'id': 'A', 'content': 'B'} - self.feed = {'items': [self.item]} - - def assert_blogposts(self, expected): - got = list(BlogPost.query()) - self.assert_entities_equal(expected, got, ignore=('created', 'updated')) - - def test_subscribe(self): - expected = { - 'hub.mode': 'subscribe', - 'hub.topic': 'fake feed url', - 'hub.callback': 'http://localhost/fake/notify/foo.com', - 'format': 'json', - 'retrieve': 'true', - } - item_a = {'permalinkUrl': 'A', 'content': 'a http://a.com a'} - item_b = {'permalinkUrl': 'B', 'summary': 'b http://b.com b'} - feed = {'items': [item_a, {}, item_b]} - self.expect_requests_post(superfeedr.PUSH_API_URL, feed, - data=expected, auth=mox.IgnoreArg()) - - post_a = BlogPost(id='A', source=self.source.key, feed_item=item_a, - unsent=['http://a.com/']) - post_b = BlogPost(id='B', source=self.source.key, feed_item=item_b, - unsent=['http://b.com/']) - self.expect_task('propagate-blogpost', key=post_a) - self.expect_task('propagate-blogpost', key=post_b) - self.mox.ReplayAll() - - with self.app.test_request_context(): - superfeedr.subscribe(self.source) - self.assert_blogposts([post_a, post_b]) - - def test_handle_feed(self): - item_a = {'permalinkUrl': 'A', - 'content': 'a http://a.com http://foo.com/self/link b'} - post_a = BlogPost(id='A', source=self.source.key, feed_item=item_a, - # self link should be discarded - unsent=['http://a.com/']) - self.expect_task('propagate-blogpost', key=post_a) - self.mox.ReplayAll() - - superfeedr.handle_feed({'items': [item_a]}, self.source) - self.assert_blogposts([post_a]) - - def test_handle_feed_no_items(self): - superfeedr.handle_feed({}, self.source) - self.assert_blogposts([]) - - superfeedr.handle_feed(None, self.source) - self.assert_blogposts([]) - - def test_handle_feed_disabled_source(self): - self.source.status = 'disabled' - self.source.put() - superfeedr.handle_feed(self.feed, self.source) - self.assert_blogposts([]) - - def test_handle_feed_source_missing_webmention_feature(self): - self.source.features = ['listen'] - self.source.put() - superfeedr.handle_feed(self.feed, self.source) - self.assert_blogposts([]) - - def test_handle_feed_allows_bridgy_publish_links(self): - item = {'permalinkUrl': 'A', 'content': 'a https://brid.gy/publish/twitter b'} - self.expect_task('propagate-blogpost', key=BlogPost(id='A')) - self.mox.ReplayAll() - - superfeedr.handle_feed({'items': [item]}, self.source) - self.assert_equals(['https://brid.gy/publish/twitter'], - BlogPost.get_by_id('A').unsent) - - def test_handle_feed_unwraps_t_umblr_com_links(self): - item = { - 'permalinkUrl': 'A', - 'id': 'A', - 'content': 'x y', - } - post = BlogPost(id='A', source=self.source.key, feed_item=item, - unsent=['http://wrap/ped']) - self.expect_task('propagate-blogpost', key=post) - self.mox.ReplayAll() - - superfeedr.handle_feed({'items': [item]}, self.source) - self.assert_blogposts([post]) - - def test_handle_feed_cleans_links(self): - item = { - 'permalinkUrl': 'A', - 'id': 'A', - 'content': 'x ", + methods=["POST"], + view_func=FakeNotify.as_view("test_superfeedr"), + ) + self.app.config["ENV"] = "development" + self.client = self.app.test_client() + + self.source = testutil.FakeSource( + id="foo.com", domains=["foo.com"], features=["webmention"] + ) + self.source.put() + self.item = {"id": "A", "content": "B"} + self.feed = {"items": [self.item]} + + def assert_blogposts(self, expected): + got = list(BlogPost.query()) + self.assert_entities_equal(expected, got, ignore=("created", "updated")) + + def test_subscribe(self): + expected = { + "hub.mode": "subscribe", + "hub.topic": "fake feed url", + "hub.callback": "http://localhost/fake/notify/foo.com", + "format": "json", + "retrieve": "true", + } + item_a = {"permalinkUrl": "A", "content": "a http://a.com a"} + item_b = {"permalinkUrl": "B", "summary": "b http://b.com b"} + feed = {"items": [item_a, {}, item_b]} + self.expect_requests_post( + superfeedr.PUSH_API_URL, feed, data=expected, auth=mox.IgnoreArg() + ) + + post_a = BlogPost( + id="A", source=self.source.key, feed_item=item_a, unsent=["http://a.com/"] + ) + post_b = BlogPost( + id="B", source=self.source.key, feed_item=item_b, unsent=["http://b.com/"] + ) + self.expect_task("propagate-blogpost", key=post_a) + self.expect_task("propagate-blogpost", key=post_b) + self.mox.ReplayAll() + + with self.app.test_request_context(): + superfeedr.subscribe(self.source) + self.assert_blogposts([post_a, post_b]) + + def test_handle_feed(self): + item_a = { + "permalinkUrl": "A", + "content": "a http://a.com http://foo.com/self/link b", + } + post_a = BlogPost( + id="A", + source=self.source.key, + feed_item=item_a, + # self link should be discarded + unsent=["http://a.com/"], + ) + self.expect_task("propagate-blogpost", key=post_a) + self.mox.ReplayAll() + + superfeedr.handle_feed({"items": [item_a]}, self.source) + self.assert_blogposts([post_a]) + + def test_handle_feed_no_items(self): + superfeedr.handle_feed({}, self.source) + self.assert_blogposts([]) + + superfeedr.handle_feed(None, self.source) + self.assert_blogposts([]) + + def test_handle_feed_disabled_source(self): + self.source.status = "disabled" + self.source.put() + superfeedr.handle_feed(self.feed, self.source) + self.assert_blogposts([]) + + def test_handle_feed_source_missing_webmention_feature(self): + self.source.features = ["listen"] + self.source.put() + superfeedr.handle_feed(self.feed, self.source) + self.assert_blogposts([]) + + def test_handle_feed_allows_bridgy_publish_links(self): + item = {"permalinkUrl": "A", "content": "a https://brid.gy/publish/twitter b"} + self.expect_task("propagate-blogpost", key=BlogPost(id="A")) + self.mox.ReplayAll() + + superfeedr.handle_feed({"items": [item]}, self.source) + self.assert_equals( + ["https://brid.gy/publish/twitter"], BlogPost.get_by_id("A").unsent + ) + + def test_handle_feed_unwraps_t_umblr_com_links(self): + item = { + "permalinkUrl": "A", + "id": "A", + "content": 'x y', + } + post = BlogPost( + id="A", source=self.source.key, feed_item=item, unsent=["http://wrap/ped"] + ) + self.expect_task("propagate-blogpost", key=post) + self.mox.ReplayAll() + + superfeedr.handle_feed({"items": [item]}, self.source) + self.assert_blogposts([post]) + + def test_handle_feed_cleans_links(self): + item = { + "permalinkUrl": "A", + "id": "A", + "content": 'x
- """) + """, + ) + + def test_set_last_syndication_url(self): + """A successful posse-post-discovery round should set + last_syndication_url to approximately the current time. + """ + self.sources[0].domain_urls = ["http://author"] + FakeGrSource.DOMAIN = "source" + self.sources[0].last_syndication_url = None + self.sources[0].put() + + # leave at least one new response to trigger PPD + for r in self.responses[:-1]: + r.status = "complete" + r.put() + + self._expect_fetch_hfeed() + self.mox.ReplayAll() + self.post_task() - def test_set_last_syndication_url(self): - """A successful posse-post-discovery round should set - last_syndication_url to approximately the current time. - """ - self.sources[0].domain_urls = ['http://author'] - FakeGrSource.DOMAIN = 'source' - self.sources[0].last_syndication_url = None - self.sources[0].put() - - # leave at least one new response to trigger PPD - for r in self.responses[:-1]: - r.status = 'complete' - r.put() - - self._expect_fetch_hfeed() - self.mox.ReplayAll() - self.post_task() - - # query source - source = self.sources[0].key.get() - self.assertEqual(NOW, source.last_syndication_url) - - def test_multiple_activities_fetch_hfeed_once(self): - """Make sure that multiple activities only fetch the author's h-feed once. - """ - self.sources[0].domain_urls = ['http://author'] - self.sources[0].put() + # query source + source = self.sources[0].key.get() + self.assertEqual(NOW, source.last_syndication_url) - FakeGrSource.activities = self.activities + def test_multiple_activities_fetch_hfeed_once(self): + """Make sure that multiple activities only fetch the author's h-feed once.""" + self.sources[0].domain_urls = ["http://author"] + self.sources[0].put() - # syndicated urls need to be unique for this to be interesting - for letter, activity in zip(string.ascii_letters, FakeGrSource.activities): - activity['url'] = activity['object']['url'] = 'http://fa.ke/post/' + letter - activity['object']['content'] = 'foo bar' + FakeGrSource.activities = self.activities - self._expect_fetch_hfeed() - self.mox.ReplayAll() - self.post_task() + # syndicated urls need to be unique for this to be interesting + for letter, activity in zip(string.ascii_letters, FakeGrSource.activities): + activity["url"] = activity["object"]["url"] = "http://fa.ke/post/" + letter + activity["object"]["content"] = "foo bar" - def test_syndicated_post_does_not_prevent_fetch_hfeed(self): - """The original fix to fetch the source's h-feed only once per task - had a bug that prevented us from fetching the h-feed *at all* if - there was already a SyndicatedPost for the first activity. + self._expect_fetch_hfeed() + self.mox.ReplayAll() + self.post_task() - https://github.com/snarfed/bridgy/issues/597#issuecomment-214079860 - """ - self.sources[0].domain_urls = ['http://author'] - self.sources[0].put() - - FakeGrSource.activities = self.activities - - # syndicated urls need to be unique for this to be interesting - for letter, activity in zip(string.ascii_letters, FakeGrSource.activities): - activity['url'] = activity['object']['url'] = 'http://fa.ke/post/' + letter - activity['object']['content'] = 'foo bar' - - # set up a blank, which will short-circuit fetch for the first activity - SyndicatedPost.insert_syndication_blank( - self.sources[0], - self.sources[0].canonicalize_url(self.activities[0].get('url'))) - - self._expect_fetch_hfeed() - self.mox.ReplayAll() - self.post_task() - - def _setup_refetch_hfeed(self): - self.sources[0].domain_urls = ['http://author'] - ten_min = datetime.timedelta(minutes=10) - self.sources[0].last_syndication_url = NOW - ten_min - self.sources[0].last_hfeed_refetch = NOW - models.Source.FAST_REFETCH - ten_min - self.sources[0].put() - - # pretend we've already done posse-post-discovery for the source - # and checked this permalink and found no back-links - SyndicatedPost(parent=self.sources[0].key, original=None, - syndication='https://fa.ke/post/url').put() - SyndicatedPost(parent=self.sources[0].key, - original='http://author/permalink', - syndication=None).put() - - # and all the status have already been sent - for r in self.responses: - r.status = 'complete' - r.put() - - def test_do_not_refetch_hfeed(self): - """Only 1 hour has passed since we last re-fetched the user's h-feed. Make - sure it is not fetched again.""" - self._setup_refetch_hfeed() - # too recent to fetch again - self.sources[0].last_hfeed_refetch = hour_ago = NOW - datetime.timedelta(hours=1) - self.sources[0].put() - - self.post_task(expect_poll=FakeSource.FAST_POLL) - self.assertEqual(hour_ago, self.sources[0].key.get().last_hfeed_refetch) - - # should still be a blank SyndicatedPost - relationships = SyndicatedPost.query( - SyndicatedPost.original == 'http://author/permalink', - ancestor=self.sources[0].key).fetch() - self.assertEqual(1, len(relationships)) - self.assertIsNone(relationships[0].syndication) - - # should not have repropagated any responses. tasks_client is stubbed - # out in tests, mox will complain if it gets called. - - def test_dont_repropagate_posses(self): - """If we find a syndication URL for a POSSE post, we shouldn't repropagate it. - """ - self.sources[0].domain_urls = ['http://author'] - self.sources[0].last_syndication_url = NOW - datetime.timedelta(minutes=10) - FakeGrSource.activities = [] - self.sources[0].put() - - # the one existing response is a POSSE of that post - resp = Response( - id='tag:or.ig,2013:9', - response_json='{}', - activities_json=['{"url": "http://fa.ke/post/url"}'], - source=self.sources[0].key, - status='complete', - original_posts=['http://author/permalink'], - ) - resp.put() - self.responses = [resp] - - self._expect_fetch_hfeed() - self.post_task(expect_poll=FakeSource.FAST_POLL) - - # shouldn't repropagate it - self.assertEqual('complete', resp.key.get().status) - - def test_do_refetch_hfeed(self): - """Emulate a situation where we've done posse-post-discovery earlier and - found no rel=syndication relationships for a particular silo URL. Every - two hours or so, we should refetch the author's page and check to see if - any new syndication links have been added or updated. - """ - self._setup_refetch_hfeed() - self._expect_fetch_hfeed() - # should repropagate all 12 responses - for resp in self.responses: - self.expect_task('propagate', response_key=resp) - - self.post_task(expect_poll=FakeSource.FAST_POLL) - - # should have a new SyndicatedPost - relationships = SyndicatedPost.query( - SyndicatedPost.original == 'http://author/permalink', - ancestor=self.sources[0].key).fetch() - self.assertEqual(1, len(relationships)) - self.assertEqual('https://fa.ke/post/url', relationships[0].syndication) - - source = self.sources[0].key.get() - self.assertEqual(NOW, source.last_syndication_url) - self.assertEqual(NOW, source.last_hfeed_refetch) - - def test_refetch_hfeed_trigger(self): - self.sources[0].domain_urls = ['http://author'] - FakeGrSource.DOMAIN = 'source' - self.sources[0].last_syndication_url = None - self.sources[0].last_hfeed_refetch = models.REFETCH_HFEED_TRIGGER - self.sources[0].put() - - FakeGrSource.activities = [] - - self._expect_fetch_hfeed() - self.mox.ReplayAll() - self.post_task() - - def test_refetch_hfeed_repropagate_responses_query_expired(self): - """https://github.com/snarfed/bridgy/issues/515""" - class BadRequestError(BaseException): - pass - - self._test_refetch_hfeed_repropagate_responses_exception( - BadRequestError('The requested query has expired. Please restart it with the last cursor to read more results.')) - - def test_refetch_hfeed_repropagate_responses_timeout(self): - """https://github.com/snarfed/bridgy/issues/514""" - class Timeout(BaseException): - pass - - self._test_refetch_hfeed_repropagate_responses_exception( - Timeout('The datastore operation timed out, or the data was temporarily unavailable.')) - - def test_refetch_hfeed_repropagate_responses_http_exception_deadline(self): - self._test_refetch_hfeed_repropagate_responses_exception( - http.client.HTTPException('Deadline exceeded foo bar')) - - def _test_refetch_hfeed_repropagate_responses_exception(self, exception): - self._setup_refetch_hfeed() - self._expect_fetch_hfeed() - - self.mox.StubOutWithMock(Response, 'query') - Response.query(Response.source == self.sources[0].key).AndRaise(exception) - self.mox.ReplayAll() - - # should 200 - self.post_task() - self.assertEqual(NOW, self.sources[0].key.get().last_hfeed_refetch) - - def test_response_changed(self): - """If a response changes, we should repropagate it from scratch. - """ - source = self.sources[0] - activity = self.activities[0] - - # just one response: self.responses[0] - tags = activity['object']['tags'] - del activity['object']['tags'] - FakeGrSource.activities = [activity] - - # first change to response - self._change_response_and_poll() - - # second change to response - self._change_response_and_poll() - - # return new response *and* existing response. both should be stored in - # Source.seen_responses_cache_json - replies = activity['object']['replies']['items'] - replies.append(self.activities[1]['object']['replies']['items'][0]) - - self.expect_task('propagate', response_key=self.responses[4]) - - self.post_task(reset=True, expect_poll=FakeSource.FAST_POLL) - self.assert_equals(replies, json_loads(source.key.get().seen_responses_cache_json)) - self.responses[4].key.delete() - - # new responses that don't include existing response. cache will have - # existing response. - del activity['object']['replies'] - activity['object']['tags'] = tags - - self.mox.VerifyAll() - self.mox.UnsetStubs() - self.mox.StubOutWithMock(tasks_client, 'create_task') - for resp in self.responses[1:4]: - self.expect_task('propagate', response_key=resp) - - self.post_task(reset=True, expect_poll=FakeSource.FAST_POLL) - self.assert_equals([r.key for r in self.responses[:4]], - list(Response.query().iter(keys_only=True))) - self.assert_equals(tags, json_loads(source.key.get().seen_responses_cache_json)) - - def _change_response_and_poll(self): - resp = self.responses[0].key.get() or self.responses[0] - old_resp_jsons = resp.old_response_jsons + [resp.response_json] - targets = resp.sent = resp.unsent - resp.unsent = [] - resp.status = 'complete' - resp.put() - - reply = self.activities[0]['object']['replies']['items'][0] - reply['content'] += ' xyz' - new_resp_json = json_dumps(reply) - - self.expect_task('propagate', response_key=resp) - self.post_task(reset=True, expect_poll=FakeSource.FAST_POLL) - - resp = resp.key.get() - self.assertEqual(new_resp_json, resp.response_json) - self.assertEqual(old_resp_jsons, resp.old_response_jsons) - self.assertEqual('new', resp.status) - self.assertEqual(targets, resp.unsent) - self.assertEqual([], resp.sent) - - source = self.sources[0].key.get() - self.assert_equals([reply], json_loads(source.seen_responses_cache_json)) - - self.mox.VerifyAll() - self.mox.UnsetStubs() - self.mox.StubOutWithMock(tasks_client, 'create_task') - - def test_in_blocklist(self): - """Responses from blocked users should be ignored.""" - self.mox.StubOutWithMock(FakeSource, 'is_blocked') - FakeSource.is_blocked(mox.IgnoreArg()).AndReturn(False) - FakeSource.is_blocked(mox.IgnoreArg()).AndReturn(True) # block second response - FakeSource.is_blocked(mox.IgnoreArg()).MultipleTimes(10).AndReturn(False) - - expected = [self.responses[0]] + self.responses[2:] - for resp in expected: - self.expect_task('propagate', response_key=resp) - - self.post_task(expect_poll=FakeSource.FAST_POLL) - self.assertEqual(11, Response.query().count()) - self.assert_responses(expected) + def test_syndicated_post_does_not_prevent_fetch_hfeed(self): + """The original fix to fetch the source's h-feed only once per task + had a bug that prevented us from fetching the h-feed *at all* if + there was already a SyndicatedPost for the first activity. + + https://github.com/snarfed/bridgy/issues/597#issuecomment-214079860 + """ + self.sources[0].domain_urls = ["http://author"] + self.sources[0].put() + + FakeGrSource.activities = self.activities + + # syndicated urls need to be unique for this to be interesting + for letter, activity in zip(string.ascii_letters, FakeGrSource.activities): + activity["url"] = activity["object"]["url"] = "http://fa.ke/post/" + letter + activity["object"]["content"] = "foo bar" + + # set up a blank, which will short-circuit fetch for the first activity + SyndicatedPost.insert_syndication_blank( + self.sources[0], + self.sources[0].canonicalize_url(self.activities[0].get("url")), + ) + + self._expect_fetch_hfeed() + self.mox.ReplayAll() + self.post_task() + + def _setup_refetch_hfeed(self): + self.sources[0].domain_urls = ["http://author"] + ten_min = datetime.timedelta(minutes=10) + self.sources[0].last_syndication_url = NOW - ten_min + self.sources[0].last_hfeed_refetch = NOW - models.Source.FAST_REFETCH - ten_min + self.sources[0].put() + + # pretend we've already done posse-post-discovery for the source + # and checked this permalink and found no back-links + SyndicatedPost( + parent=self.sources[0].key, + original=None, + syndication="https://fa.ke/post/url", + ).put() + SyndicatedPost( + parent=self.sources[0].key, + original="http://author/permalink", + syndication=None, + ).put() + + # and all the status have already been sent + for r in self.responses: + r.status = "complete" + r.put() + + def test_do_not_refetch_hfeed(self): + """Only 1 hour has passed since we last re-fetched the user's h-feed. Make + sure it is not fetched again.""" + self._setup_refetch_hfeed() + # too recent to fetch again + self.sources[0].last_hfeed_refetch = hour_ago = NOW - datetime.timedelta( + hours=1 + ) + self.sources[0].put() + + self.post_task(expect_poll=FakeSource.FAST_POLL) + self.assertEqual(hour_ago, self.sources[0].key.get().last_hfeed_refetch) + + # should still be a blank SyndicatedPost + relationships = SyndicatedPost.query( + SyndicatedPost.original == "http://author/permalink", + ancestor=self.sources[0].key, + ).fetch() + self.assertEqual(1, len(relationships)) + self.assertIsNone(relationships[0].syndication) + + # should not have repropagated any responses. tasks_client is stubbed + # out in tests, mox will complain if it gets called. + + def test_dont_repropagate_posses(self): + """If we find a syndication URL for a POSSE post, we shouldn't repropagate it.""" + self.sources[0].domain_urls = ["http://author"] + self.sources[0].last_syndication_url = NOW - datetime.timedelta(minutes=10) + FakeGrSource.activities = [] + self.sources[0].put() + + # the one existing response is a POSSE of that post + resp = Response( + id="tag:or.ig,2013:9", + response_json="{}", + activities_json=['{"url": "http://fa.ke/post/url"}'], + source=self.sources[0].key, + status="complete", + original_posts=["http://author/permalink"], + ) + resp.put() + self.responses = [resp] + + self._expect_fetch_hfeed() + self.post_task(expect_poll=FakeSource.FAST_POLL) + + # shouldn't repropagate it + self.assertEqual("complete", resp.key.get().status) + + def test_do_refetch_hfeed(self): + """Emulate a situation where we've done posse-post-discovery earlier and + found no rel=syndication relationships for a particular silo URL. Every + two hours or so, we should refetch the author's page and check to see if + any new syndication links have been added or updated. + """ + self._setup_refetch_hfeed() + self._expect_fetch_hfeed() + # should repropagate all 12 responses + for resp in self.responses: + self.expect_task("propagate", response_key=resp) + + self.post_task(expect_poll=FakeSource.FAST_POLL) + + # should have a new SyndicatedPost + relationships = SyndicatedPost.query( + SyndicatedPost.original == "http://author/permalink", + ancestor=self.sources[0].key, + ).fetch() + self.assertEqual(1, len(relationships)) + self.assertEqual("https://fa.ke/post/url", relationships[0].syndication) + + source = self.sources[0].key.get() + self.assertEqual(NOW, source.last_syndication_url) + self.assertEqual(NOW, source.last_hfeed_refetch) + + def test_refetch_hfeed_trigger(self): + self.sources[0].domain_urls = ["http://author"] + FakeGrSource.DOMAIN = "source" + self.sources[0].last_syndication_url = None + self.sources[0].last_hfeed_refetch = models.REFETCH_HFEED_TRIGGER + self.sources[0].put() + + FakeGrSource.activities = [] + + self._expect_fetch_hfeed() + self.mox.ReplayAll() + self.post_task() + + def test_refetch_hfeed_repropagate_responses_query_expired(self): + """https://github.com/snarfed/bridgy/issues/515""" + + class BadRequestError(BaseException): + pass + + self._test_refetch_hfeed_repropagate_responses_exception( + BadRequestError( + "The requested query has expired. Please restart it with the last cursor to read more results." + ) + ) + + def test_refetch_hfeed_repropagate_responses_timeout(self): + """https://github.com/snarfed/bridgy/issues/514""" + + class Timeout(BaseException): + pass + + self._test_refetch_hfeed_repropagate_responses_exception( + Timeout( + "The datastore operation timed out, or the data was temporarily unavailable." + ) + ) + + def test_refetch_hfeed_repropagate_responses_http_exception_deadline(self): + self._test_refetch_hfeed_repropagate_responses_exception( + http.client.HTTPException("Deadline exceeded foo bar") + ) + + def _test_refetch_hfeed_repropagate_responses_exception(self, exception): + self._setup_refetch_hfeed() + self._expect_fetch_hfeed() + + self.mox.StubOutWithMock(Response, "query") + Response.query(Response.source == self.sources[0].key).AndRaise(exception) + self.mox.ReplayAll() + + # should 200 + self.post_task() + self.assertEqual(NOW, self.sources[0].key.get().last_hfeed_refetch) + + def test_response_changed(self): + """If a response changes, we should repropagate it from scratch.""" + source = self.sources[0] + activity = self.activities[0] + + # just one response: self.responses[0] + tags = activity["object"]["tags"] + del activity["object"]["tags"] + FakeGrSource.activities = [activity] + + # first change to response + self._change_response_and_poll() + + # second change to response + self._change_response_and_poll() + + # return new response *and* existing response. both should be stored in + # Source.seen_responses_cache_json + replies = activity["object"]["replies"]["items"] + replies.append(self.activities[1]["object"]["replies"]["items"][0]) + + self.expect_task("propagate", response_key=self.responses[4]) + + self.post_task(reset=True, expect_poll=FakeSource.FAST_POLL) + self.assert_equals( + replies, json_loads(source.key.get().seen_responses_cache_json) + ) + self.responses[4].key.delete() + + # new responses that don't include existing response. cache will have + # existing response. + del activity["object"]["replies"] + activity["object"]["tags"] = tags + + self.mox.VerifyAll() + self.mox.UnsetStubs() + self.mox.StubOutWithMock(tasks_client, "create_task") + for resp in self.responses[1:4]: + self.expect_task("propagate", response_key=resp) + + self.post_task(reset=True, expect_poll=FakeSource.FAST_POLL) + self.assert_equals( + [r.key for r in self.responses[:4]], + list(Response.query().iter(keys_only=True)), + ) + self.assert_equals(tags, json_loads(source.key.get().seen_responses_cache_json)) + + def _change_response_and_poll(self): + resp = self.responses[0].key.get() or self.responses[0] + old_resp_jsons = resp.old_response_jsons + [resp.response_json] + targets = resp.sent = resp.unsent + resp.unsent = [] + resp.status = "complete" + resp.put() + + reply = self.activities[0]["object"]["replies"]["items"][0] + reply["content"] += " xyz" + new_resp_json = json_dumps(reply) + + self.expect_task("propagate", response_key=resp) + self.post_task(reset=True, expect_poll=FakeSource.FAST_POLL) + + resp = resp.key.get() + self.assertEqual(new_resp_json, resp.response_json) + self.assertEqual(old_resp_jsons, resp.old_response_jsons) + self.assertEqual("new", resp.status) + self.assertEqual(targets, resp.unsent) + self.assertEqual([], resp.sent) + + source = self.sources[0].key.get() + self.assert_equals([reply], json_loads(source.seen_responses_cache_json)) + + self.mox.VerifyAll() + self.mox.UnsetStubs() + self.mox.StubOutWithMock(tasks_client, "create_task") + + def test_in_blocklist(self): + """Responses from blocked users should be ignored.""" + self.mox.StubOutWithMock(FakeSource, "is_blocked") + FakeSource.is_blocked(mox.IgnoreArg()).AndReturn(False) + FakeSource.is_blocked(mox.IgnoreArg()).AndReturn(True) # block second response + FakeSource.is_blocked(mox.IgnoreArg()).MultipleTimes(10).AndReturn(False) + + expected = [self.responses[0]] + self.responses[2:] + for resp in expected: + self.expect_task("propagate", response_key=resp) + + self.post_task(expect_poll=FakeSource.FAST_POLL) + self.assertEqual(11, Response.query().count()) + self.assert_responses(expected) class DiscoverTest(TaskTest): - post_url = '/_ah/queue/discover' - - def setUp(self): - super().setUp() - appengine_info.LOCAL = True - - def tearDown(self): - appengine_info.LOCAL = False - super().tearDown() - - def discover(self, **kwargs): - super().post_task(params={ - 'source_key': self.sources[0].key.urlsafe().decode(), - 'post_id': 'b', - }, **kwargs) - - def test_new(self): - """A new silo post we haven't seen before.""" - self.mox.StubOutWithMock(FakeSource, 'get_activities') - FakeSource.get_activities( - activity_id='b', fetch_replies=True, fetch_likes=True, fetch_shares=True, - user_id=self.sources[0].key.id()).AndReturn([self.activities[1]]) - for resp in self.responses[4:8]: - self.expect_task('propagate', response_key=resp) - self.mox.ReplayAll() - - self.assertEqual(0, Response.query().count()) - self.discover() - self.assert_responses(self.responses[4:8] + [Response( - id=self.activities[1]['id'], - type='post', - source=self.sources[0].key, - status='complete', - )], ignore=('activities_json', 'response_json', 'original_posts')) - - def test_no_post(self): - """Silo post not found.""" - self.mox.StubOutWithMock(tasks_client, 'create_task') - FakeGrSource.activities = [] - self.discover() - self.assert_responses([]) - - def test_restart_existing_tasks(self): - FakeGrSource.activities = [self.activities[1]] - - resps = self.responses[4:8] - resps[0].status = 'new' - resps[1].status = 'processing' - resps[2].status = 'complete' - resps[3].status = 'error' - resps[0].sent = resps[1].error = resps[2].failed = resps[3].skipped = \ - ['http://target/2'] - for resp in resps: - resp.put() - for resp in resps: - self.expect_task('propagate', response_key=resp) - self.mox.ReplayAll() - - self.discover() - - for resp in Response.query(): - if resp.key.id() == self.activities[1]['id']: - continue - self.assert_equals('new', resp.status) - self.assert_equals(['http://target1/post/url', 'http://target/2'], - resp.unsent, resp.key) - - def test_reply(self): - """If the activity is a reply, we should also enqueue the in-reply-to post.""" - self.mox.StubOutWithMock(FakeSource, 'get_activities') - FakeSource.get_activities( - activity_id='b', fetch_replies=True, fetch_likes=True, fetch_shares=True, - user_id=self.sources[0].key.id()).AndReturn([{ - 'id': 'tag:fake.com:123', - 'object': { - 'id': 'tag:fake.com:123', - 'url': 'https://twitter.com/_/status/123', - 'inReplyTo': [{'id': 'tag:fake.com:456'}], - }, - }]) - self.expect_task('discover', source_key=self.sources[0], post_id='456') - self.mox.ReplayAll() - self.discover() - - def test_link_to_post(self): - """If the activity links to a post, we should enqueue it itself.""" - source = self.sources[0] - source.domain_urls = ['http://foo/'] - source.domains = ['foo'] - source.put() - - self.mox.StubOutWithMock(FakeSource, 'get_activities') - FakeSource.get_activities( - activity_id='b', fetch_replies=True, fetch_likes=True, fetch_shares=True, - user_id=self.sources[0].key.id()).AndReturn([{ - 'id': 'tag:fake.com:123', - 'object': { - 'author': {'id': 'tag:not-source'}, - 'id': 'tag:fake.com:123', - 'url': 'https://fake.com/_/status/123', - 'content': 'i like https://foo/post a lot', - }, - }]) - resp_key = ndb.Key('Response', 'tag:fake.com:123') - self.expect_task('propagate', response_key=resp_key) - self.mox.ReplayAll() - - self.discover() - resp = resp_key.get() - self.assert_equals('new', resp.status) - self.assert_equals(['https://foo/post'], resp.unsent) - - def test_get_activities_error(self): - self._test_get_activities_error(400) - - def test_get_activities_rate_limited(self): - self._test_get_activities_error(429) - - def _test_get_activities_error(self, status): - self.expect_get_activities(activity_id='b', user_id=self.sources[0].key.id() - ).AndRaise(urllib.error.HTTPError('url', status, 'Rate limited', {}, None)) - self.mox.StubOutWithMock(tasks_client, 'create_task') - self.mox.ReplayAll() - - self.discover(expected_status=ERROR_HTTP_RETURN_CODE) - self.assert_responses([]) - - def test_event_type(self): - self.mox.StubOutWithMock(FakeGrSource, 'get_event') - FakeGrSource.get_event('321').AndReturn(self.activities[0]) - for resp in self.responses[:4]: - self.expect_task('propagate', response_key=resp) - self.mox.ReplayAll() - - self.post_task(params={ - 'source_key': self.sources[0].key.urlsafe().decode(), - 'post_id': '321', - 'type': 'event', - }) - self.assert_responses(self.responses[:4] + [Response( - id=self.activities[0]['id'], - type='post', - source=self.sources[0].key, - status='complete', - )], ignore=('activities_json', 'response_json', 'original_posts')) + post_url = "/_ah/queue/discover" + + def setUp(self): + super().setUp() + appengine_info.LOCAL = True + + def tearDown(self): + appengine_info.LOCAL = False + super().tearDown() + + def discover(self, **kwargs): + super().post_task( + params={ + "source_key": self.sources[0].key.urlsafe().decode(), + "post_id": "b", + }, + **kwargs, + ) + + def test_new(self): + """A new silo post we haven't seen before.""" + self.mox.StubOutWithMock(FakeSource, "get_activities") + FakeSource.get_activities( + activity_id="b", + fetch_replies=True, + fetch_likes=True, + fetch_shares=True, + user_id=self.sources[0].key.id(), + ).AndReturn([self.activities[1]]) + for resp in self.responses[4:8]: + self.expect_task("propagate", response_key=resp) + self.mox.ReplayAll() + + self.assertEqual(0, Response.query().count()) + self.discover() + self.assert_responses( + self.responses[4:8] + + [ + Response( + id=self.activities[1]["id"], + type="post", + source=self.sources[0].key, + status="complete", + ) + ], + ignore=("activities_json", "response_json", "original_posts"), + ) + + def test_no_post(self): + """Silo post not found.""" + self.mox.StubOutWithMock(tasks_client, "create_task") + FakeGrSource.activities = [] + self.discover() + self.assert_responses([]) + + def test_restart_existing_tasks(self): + FakeGrSource.activities = [self.activities[1]] + + resps = self.responses[4:8] + resps[0].status = "new" + resps[1].status = "processing" + resps[2].status = "complete" + resps[3].status = "error" + resps[0].sent = resps[1].error = resps[2].failed = resps[3].skipped = [ + "http://target/2" + ] + for resp in resps: + resp.put() + for resp in resps: + self.expect_task("propagate", response_key=resp) + self.mox.ReplayAll() + self.discover() + + for resp in Response.query(): + if resp.key.id() == self.activities[1]["id"]: + continue + self.assert_equals("new", resp.status) + self.assert_equals( + ["http://target1/post/url", "http://target/2"], resp.unsent, resp.key + ) + + def test_reply(self): + """If the activity is a reply, we should also enqueue the in-reply-to post.""" + self.mox.StubOutWithMock(FakeSource, "get_activities") + FakeSource.get_activities( + activity_id="b", + fetch_replies=True, + fetch_likes=True, + fetch_shares=True, + user_id=self.sources[0].key.id(), + ).AndReturn( + [ + { + "id": "tag:fake.com:123", + "object": { + "id": "tag:fake.com:123", + "url": "https://twitter.com/_/status/123", + "inReplyTo": [{"id": "tag:fake.com:456"}], + }, + } + ] + ) + self.expect_task("discover", source_key=self.sources[0], post_id="456") + self.mox.ReplayAll() + self.discover() + + def test_link_to_post(self): + """If the activity links to a post, we should enqueue it itself.""" + source = self.sources[0] + source.domain_urls = ["http://foo/"] + source.domains = ["foo"] + source.put() + + self.mox.StubOutWithMock(FakeSource, "get_activities") + FakeSource.get_activities( + activity_id="b", + fetch_replies=True, + fetch_likes=True, + fetch_shares=True, + user_id=self.sources[0].key.id(), + ).AndReturn( + [ + { + "id": "tag:fake.com:123", + "object": { + "author": {"id": "tag:not-source"}, + "id": "tag:fake.com:123", + "url": "https://fake.com/_/status/123", + "content": "i like https://foo/post a lot", + }, + } + ] + ) + resp_key = ndb.Key("Response", "tag:fake.com:123") + self.expect_task("propagate", response_key=resp_key) + self.mox.ReplayAll() -class PropagateTest(TaskTest): + self.discover() + resp = resp_key.get() + self.assert_equals("new", resp.status) + self.assert_equals(["https://foo/post"], resp.unsent) - post_url = '/_ah/queue/propagate' + def test_get_activities_error(self): + self._test_get_activities_error(400) - def setUp(self): - super().setUp() - for r in self.responses[:4]: - r.put() + def test_get_activities_rate_limited(self): + self._test_get_activities_error(429) - def post_task(self, expected_status=200, response=None, **kwargs): - if response is None: - response = self.responses[0] - super().post_task( - expected_status=expected_status, - params={'response_key': response.key.urlsafe().decode()}, - **kwargs) + def _test_get_activities_error(self, status): + self.expect_get_activities( + activity_id="b", user_id=self.sources[0].key.id() + ).AndRaise(urllib.error.HTTPError("url", status, "Rate limited", {}, None)) + self.mox.StubOutWithMock(tasks_client, "create_task") + self.mox.ReplayAll() - def assert_response_is(self, status, leased_until=False, sent=[], error=[], - unsent=[], skipped=[], failed=[], response=None): - """Asserts that responses[0] has the given values in the datastore. - """ - if response is None: - response = self.responses[0] - response = response.key.get() - self.assertEqual(status, response.status) - if leased_until is not False: - self.assertEqual(leased_until, response.leased_until) - self.assert_equals(unsent, response.unsent) - self.assert_equals(sent, response.sent) - self.assert_equals(error, response.error) - self.assert_equals(skipped, response.skipped) - self.assert_equals(failed, response.failed) - - def expect_webmention(self, source_url=None, target='http://target1/post/url', - endpoint='http://webmention/endpoint', - discover=True, send=None, headers=util.REQUEST_HEADERS, - discover_status=200, send_status=200, **kwargs): - if source_url is None: - source_url = 'http://localhost/comment/fake/%s/a/1_2_a' % \ - self.sources[0].key.string_id() - - # discover - if discover: - html = f'' - call = self.expect_requests_get(target, html, headers=headers, - status_code=discover_status, **kwargs - ).InAnyOrder() - - # send - if send: - assert endpoint - if send or (send is None and endpoint): - call = self.expect_requests_post(endpoint, data={ - 'source': source_url, - 'target': target, - }, status_code=send_status, headers=headers, allow_redirects=False, - timeout=999, **kwargs).InAnyOrder() - - return call - - def test_propagate(self): - """Normal propagate tasks.""" - self.assertEqual('new', self.responses[0].status) - - id = self.sources[0].key.string_id() - for url in ( - 'http://localhost/comment/fake/%s/a/1_2_a' % id, - 'http://localhost/like/fake/%s/a/alice' % id, - 'http://localhost/repost/fake/%s/a/bob' % id, - 'http://localhost/react/fake/%s/a/bob/a_scissors_by_bob' % id, + self.discover(expected_status=ERROR_HTTP_RETURN_CODE) + self.assert_responses([]) + + def test_event_type(self): + self.mox.StubOutWithMock(FakeGrSource, "get_event") + FakeGrSource.get_event("321").AndReturn(self.activities[0]) + for resp in self.responses[:4]: + self.expect_task("propagate", response_key=resp) + self.mox.ReplayAll() + + self.post_task( + params={ + "source_key": self.sources[0].key.urlsafe().decode(), + "post_id": "321", + "type": "event", + } + ) + self.assert_responses( + self.responses[:4] + + [ + Response( + id=self.activities[0]["id"], + type="post", + source=self.sources[0].key, + status="complete", + ) + ], + ignore=("activities_json", "response_json", "original_posts"), + ) + + +class PropagateTest(TaskTest): + + post_url = "/_ah/queue/propagate" + + def setUp(self): + super().setUp() + for r in self.responses[:4]: + r.put() + + def post_task(self, expected_status=200, response=None, **kwargs): + if response is None: + response = self.responses[0] + super().post_task( + expected_status=expected_status, + params={"response_key": response.key.urlsafe().decode()}, + **kwargs, + ) + + def assert_response_is( + self, + status, + leased_until=False, + sent=[], + error=[], + unsent=[], + skipped=[], + failed=[], + response=None, ): - self.expect_webmention(source_url=url) - self.mox.ReplayAll() - - now = NOW - util.now_fn = lambda: now - - for r in self.responses[:4]: - now += datetime.timedelta(hours=1) - self.post_task(response=r) - self.assert_response_is('complete', now + LEASE_LENGTH, - sent=['http://target1/post/url'], response=r) - self.assert_equals(now, self.sources[0].key.get().last_webmention_sent) - util.webmention_endpoint_cache.clear() - - def test_propagate_from_error(self): - """A normal propagate task, with a response starting as 'error'.""" - self.responses[0].status = 'error' - self.responses[0].put() - - self.expect_webmention() - self.mox.ReplayAll() - self.post_task() - self.assert_response_is('complete', NOW + LEASE_LENGTH, - sent=['http://target1/post/url']) - self.assert_equals(NOW, self.sources[0].key.get().last_webmention_sent) - - def test_success_and_errors(self): - """We should send webmentions to the unsent and error targets.""" - self.responses[0].unsent = ['http://1', 'http://2', 'http://3', 'http://8'] - self.responses[0].error = ['http://4', 'http://5', 'http://6'] - self.responses[0].sent = ['http://7'] - self.responses[0].put() - - self.expect_webmention(target='http://1') - self.expect_webmention(target='http://8', discover_status=204) - self.expect_webmention(target='http://2', endpoint=None) - self.expect_webmention(target='http://3', send_status=500) - # 4XX should go into 'failed' - self.expect_webmention(target='http://4', send_status=404) - self.expect_webmention(target='http://5', send_status=403) - # 5XX should go into 'error' - self.expect_webmention(target='http://6', send_status=500) - - self.mox.ReplayAll() - self.post_task(expected_status=ERROR_HTTP_RETURN_CODE) - self.assert_response_is('error', - sent=['http://7', 'http://1', 'http://8'], - error=['http://3', 'http://6'], - failed=['http://4', 'http://5'], - skipped=['http://2']) - self.assertEqual(NOW, self.sources[0].key.get().last_webmention_sent) - - def test_cached_webmention_discovery(self): - """Webmention endpoints should be cached.""" - self.expect_webmention() - # second webmention should use the cached endpoint - self.expect_webmention(discover=False) - - self.mox.ReplayAll() - self.post_task() - - self.responses[0].status = 'new' - self.responses[0].put() - self.post_task() - - def test_cached_webmention_discovery_error(self): - """Failed webmention discovery should be cached too.""" - self.expect_webmention(endpoint=None) - # second time shouldn't try to send a webmention - - self.mox.ReplayAll() - self.post_task() - self.assert_response_is('complete', skipped=['http://target1/post/url']) - - self.responses[0].status = 'new' - self.responses[0].put() - self.post_task() - self.assert_response_is('complete', skipped=['http://target1/post/url']) - - def test_errors_and_caching_endpoint(self): - """Only cache on wm endpoint failures, not discovery failures.""" - self.expect_webmention(send=False).AndRaise(requests.ConnectionError()) - # shouldn't have a cached endpoint - self.expect_webmention(send_status=500) - # should have and use a cached endpoint - self.expect_webmention(discover=False) - self.mox.ReplayAll() - - self.post_task(expected_status=ERROR_HTTP_RETURN_CODE) - self.assert_response_is('error', error=['http://target1/post/url']) - - self.responses[0].status = 'new' - self.responses[0].put() - self.post_task(expected_status=ERROR_HTTP_RETURN_CODE) - self.assert_response_is('error', error=['http://target1/post/url']) - - self.responses[0].status = 'new' - self.responses[0].put() - self.post_task() - self.assert_response_is('complete', sent=['http://target1/post/url']) - - def test_cached_webmention_discovery_shouldnt_refresh_cache(self): - """A cached webmention discovery shouldn't be written back to the cache.""" - # first wm discovers and finds no endpoint, second uses cache, third rediscovers - self.expect_webmention(endpoint=None) - self.expect_webmention() - self.mox.ReplayAll() - - # inject a fake time.time into the cache - now = time.time() - util.webmention_endpoint_cache = TTLCache(500, 2, timer=lambda: now) - - self.post_task() - self.assert_response_is('complete', skipped=['http://target1/post/url']) - - now += 1 - self.responses[0].status = 'new' - self.responses[0].put() - self.post_task() - self.assert_response_is('complete', skipped=['http://target1/post/url']) - - now += 2 - self.responses[0].status = 'new' - self.responses[0].put() - self.post_task() - self.assert_response_is('complete', sent=['http://target1/post/url']) - - def test_webmention_blocklist(self): - """Target URLs with domains in the blocklist should be ignored. - - TODO: also invalid URLs that can't be parsed by urlparse? - """ - self.responses[0].unsent = ['http://t.co/bad', 'http://foo/good'] - self.responses[0].error = ['http://instagr.am/bad', - # urlparse raises ValueError: Invalid IPv6 URL - 'http://foo]'] - self.responses[0].put() - - self.expect_webmention(target='http://foo/good') - self.mox.ReplayAll() - - self.post_task() - self.assert_response_is('complete', sent=['http://foo/good']) - - def test_non_html_url(self): - """Target URLs that aren't HTML should be ignored.""" - self.expect_requests_head('http://target1/post/url', - content_type='application/mpeg') - self.mox.ReplayAll() - self.post_task() - self.assert_response_is('complete') - - def test_non_html_file(self): - """If our HEAD fails, we should still require content-type text/html.""" - self.expect_requests_head('http://target1/post/url', status_code=405) - self.expect_webmention(content_type='image/gif', send=False) - - self.mox.ReplayAll() - self.post_task() - self.assert_response_is('complete', skipped=['http://target1/post/url']) - - def test_non_html_file_extension(self): - """If our HEAD fails, we should infer type from file extension.""" - self.responses[0].unsent = ['http://this/is/a.pdf'] - self.responses[0].put() - - self.expect_webmention(target='http://this/is/a.pdf', send_status=405, - # we should ignore an error response's content type - content_type='text/html') - - self.mox.ReplayAll() - self.post_task() - self.assert_response_is('complete', failed=['http://this/is/a.pdf']) - - def test_content_type_html_with_charset(self): - """We should handle Content-Type: text/html; charset=... ok.""" - self.expect_webmention(content_type='text/html; charset=utf-8') - self.mox.ReplayAll() - self.post_task() - self.assert_response_is('complete', sent=['http://target1/post/url']) - - def test_no_content_type_header(self): - """If the Content-Type header is missing, we should assume text/html.""" - self.expect_webmention(content_type=None) - self.mox.ReplayAll() - self.post_task() - self.assert_response_is('complete', sent=['http://target1/post/url']) - - def test_link_header_rel_webmention_unquoted(self): - """We should support rel=webmention (no quotes) in the Link header.""" - self.expect_webmention( - response_headers={'Link': '; rel=webmention'}) - self.mox.ReplayAll() - self.post_task() - self.assert_response_is('complete', sent=['http://target1/post/url']) - - def test_webmention_post_accept_header(self): - """The webmention POST request should send Accept: */*.""" - self.responses[0].source = Twitter(id='rhiaro').put() - self.responses[0].put() - self.expect_requests_get( - 'http://target1/post/url', timeout=15, headers=util.REQUEST_HEADERS_CONNEG, - response_headers={'Link': '; rel=webmention'}) - - self.expect_requests_post( - 'http://my/endpoint', timeout=999, - data={'source': 'http://localhost/comment/twitter/rhiaro/a/1_2_a', - 'target': 'http://target1/post/url'}, - allow_redirects=False, headers={'Accept': '*/*'}) - - self.mox.ReplayAll() - self.post_task() - self.assert_response_is('complete', sent=['http://target1/post/url']) - - def test_no_targets(self): - """No target URLs.""" - self.responses[0].unsent = [] - self.responses[0].put() - - self.mox.ReplayAll() - self.post_task() - self.assert_response_is('complete', NOW + LEASE_LENGTH) - - def test_unicode_in_target_url(self): - """Target URLs with escaped unicode chars should work ok. - Background: https://github.com/snarfed/bridgy/issues/248 - """ - url = 'https://maps/?q=' + urllib.parse.quote_plus('3 Cours de la République'.encode()) - self.responses[0].unsent = [url] - self.responses[0].put() - - self.expect_webmention(target=url) - self.mox.ReplayAll() - - self.post_task() - self.assert_response_is('complete', sent=[url]) - - def test_already_complete(self): - """If the response has already been propagated, do nothing.""" - self.responses[0].status = 'complete' - self.responses[0].put() - - self.post_task() - self.assert_response_is('complete', unsent=['http://target1/post/url']) - - def test_set_webmention_endpoint(self): - """Should set Source.webmention_endpoint if it's unset.""" - self.responses[0].unsent = ['http://bar/1', 'http://foo/2'] - self.responses[0].put() - - self.assertIsNone(self.sources[0].webmention_endpoint) - self.sources[0].domains = ['foo'] - self.sources[0].put() - - # target isn't in source.domains - self.expect_webmention(target='http://bar/1', endpoint='http://no') - # target is in source.domains - self.expect_webmention(target='http://foo/2', endpoint='http://yes') - - self.mox.ReplayAll() - self.post_task() - self.assert_equals('http://yes', self.sources[0].key.get().webmention_endpoint) - - def test_leased(self): - """If the response is processing and the lease hasn't expired, do nothing.""" - self.responses[0].status = 'processing' - leased_until = NOW + datetime.timedelta(minutes=1) - self.responses[0].leased_until = leased_until - self.responses[0].put() - - self.post_task(expected_status=ERROR_HTTP_RETURN_CODE) - self.assert_response_is('processing', leased_until, - unsent=['http://target1/post/url']) - - response = self.responses[0].key.get() - self.assertEqual('processing', response.status) - self.assertEqual(leased_until, response.leased_until) - - def test_lease_expired(self): - """If the response is processing but the lease has expired, process it.""" - self.responses[0].status = 'processing' - self.responses[0].leased_until = NOW - datetime.timedelta(minutes=1) - self.responses[0].put() - - self.expect_webmention() - self.mox.ReplayAll() - self.post_task() - self.assert_response_is('complete', NOW + LEASE_LENGTH, - sent=['http://target1/post/url']) - - def test_no_response(self): - """If the response doesn't exist, the request should fail.""" - self.responses[0].key.delete() - self.post_task(expected_status=ERROR_HTTP_RETURN_CODE) - - def test_no_source(self): - """If the source doesn't exist, the request should give up.""" - self.sources[0].key.delete() - self.post_task(expected_status=200) - - def test_non_public_activity(self): - """If the activity is non-public, we should give up.""" - activity = json_loads(self.responses[0].activities_json[0]) - activity['to'] = [{'objectType':'group', 'alias':'@private'}] - self.responses[0].activities_json = [json_dumps(activity)] - self.responses[0].put() - - self.post_task() - self.assert_response_is('complete', unsent=['http://target1/post/url'], sent=[]) - - def test_non_public_response(self): - """If the response is non-public, we should give up.""" - resp = json_loads(self.responses[0].response_json) - resp['to'] = [{'objectType':'group', 'alias':'@private'}] - self.responses[0].response_json = json_dumps(resp) - self.responses[0].put() - - self.post_task() - self.assert_response_is('complete', unsent=['http://target1/post/url'], sent=[]) - - def test_webmention_no_endpoint(self): - self.expect_webmention(endpoint=None) - self.mox.ReplayAll() - self.post_task() - self.assert_response_is('complete', skipped=['http://target1/post/url']) - - def test_webmention_discover_400(self): - self.expect_webmention(discover_status=400) - self.mox.ReplayAll() - self.post_task() - self.assert_response_is('complete', sent=['http://target1/post/url']) - - def test_webmention_send_400(self): - self.expect_webmention(send_status=400) - self.mox.ReplayAll() - self.post_task() - self.assert_response_is('complete', failed=['http://target1/post/url']) - - def test_webmention_discover_500(self): - self.expect_webmention(discover_status=500) - self.mox.ReplayAll() - self.post_task() - self.assert_response_is('complete', sent=['http://target1/post/url']) - - def test_webmention_send_500(self): - self.expect_webmention(send_status=500) - self.mox.ReplayAll() - self.post_task(expected_status=ERROR_HTTP_RETURN_CODE) - self.assert_response_is('error', error=['http://target1/post/url']) - - def test_webmention_bad_target_url(self): - self.responses[0].unsent = ['not a url'] - self.responses[0].put() - self.post_task() - self.assert_response_is('complete') - - def test_webmention_fail_and_succeed(self): - """All webmentions should be attempted, but any failure sets error status.""" - self.responses[0].unsent = ['http://first', 'http://second'] - self.responses[0].put() - self.expect_webmention(target='http://first', send_status=500) - self.expect_webmention(target='http://second') - - self.mox.ReplayAll() - self.post_task(expected_status=ERROR_HTTP_RETURN_CODE) - self.assert_response_is('error', None, error=['http://first'], - sent=['http://second']) - self.assert_equals(NOW, self.sources[0].key.get().last_webmention_sent) - - def test_webmention_exception(self): - """Exceptions on individual target URLs shouldn't stop the whole task.""" - self.responses[0].unsent = ['http://error', 'http://good'] - self.responses[0].put() - self.expect_webmention(target='http://error').AndRaise(Exception('foo')) - self.expect_webmention(target='http://good') - self.mox.ReplayAll() - - self.post_task(expected_status=ERROR_HTTP_RETURN_CODE) - self.assert_response_is('error', None, error=['http://error'], - sent=['http://good']) - self.assert_equals(NOW, self.sources[0].key.get().last_webmention_sent) - - def test_dns_failure(self): - """If DNS lookup fails for a URL, we should give up. - https://github.com/snarfed/bridgy/issues/254 - """ - self.responses[0].put() - self.expect_webmention(send=False).AndRaise( - requests.exceptions.ConnectionError('DNS lookup failed for URL: foo')) - self.mox.ReplayAll() + """Asserts that responses[0] has the given values in the datastore.""" + if response is None: + response = self.responses[0] + response = response.key.get() + self.assertEqual(status, response.status) + if leased_until is not False: + self.assertEqual(leased_until, response.leased_until) + self.assert_equals(unsent, response.unsent) + self.assert_equals(sent, response.sent) + self.assert_equals(error, response.error) + self.assert_equals(skipped, response.skipped) + self.assert_equals(failed, response.failed) + + def expect_webmention( + self, + source_url=None, + target="http://target1/post/url", + endpoint="http://webmention/endpoint", + discover=True, + send=None, + headers=util.REQUEST_HEADERS, + discover_status=200, + send_status=200, + **kwargs, + ): + if source_url is None: + source_url = ( + "http://localhost/comment/fake/%s/a/1_2_a" + % self.sources[0].key.string_id() + ) + + # discover + if discover: + html = f'' + call = self.expect_requests_get( + target, html, headers=headers, status_code=discover_status, **kwargs + ).InAnyOrder() + + # send + if send: + assert endpoint + if send or (send is None and endpoint): + call = self.expect_requests_post( + endpoint, + data={ + "source": source_url, + "target": target, + }, + status_code=send_status, + headers=headers, + allow_redirects=False, + timeout=999, + **kwargs, + ).InAnyOrder() + + return call + + def test_propagate(self): + """Normal propagate tasks.""" + self.assertEqual("new", self.responses[0].status) + + id = self.sources[0].key.string_id() + for url in ( + "http://localhost/comment/fake/%s/a/1_2_a" % id, + "http://localhost/like/fake/%s/a/alice" % id, + "http://localhost/repost/fake/%s/a/bob" % id, + "http://localhost/react/fake/%s/a/bob/a_scissors_by_bob" % id, + ): + self.expect_webmention(source_url=url) + self.mox.ReplayAll() - self.post_task() - self.assert_response_is('complete', failed=['http://target1/post/url']) + now = NOW + util.now_fn = lambda: now + + for r in self.responses[:4]: + now += datetime.timedelta(hours=1) + self.post_task(response=r) + self.assert_response_is( + "complete", + now + LEASE_LENGTH, + sent=["http://target1/post/url"], + response=r, + ) + self.assert_equals(now, self.sources[0].key.get().last_webmention_sent) + util.webmention_endpoint_cache.clear() + + def test_propagate_from_error(self): + """A normal propagate task, with a response starting as 'error'.""" + self.responses[0].status = "error" + self.responses[0].put() + + self.expect_webmention() + self.mox.ReplayAll() + self.post_task() + self.assert_response_is( + "complete", NOW + LEASE_LENGTH, sent=["http://target1/post/url"] + ) + self.assert_equals(NOW, self.sources[0].key.get().last_webmention_sent) + + def test_success_and_errors(self): + """We should send webmentions to the unsent and error targets.""" + self.responses[0].unsent = ["http://1", "http://2", "http://3", "http://8"] + self.responses[0].error = ["http://4", "http://5", "http://6"] + self.responses[0].sent = ["http://7"] + self.responses[0].put() + + self.expect_webmention(target="http://1") + self.expect_webmention(target="http://8", discover_status=204) + self.expect_webmention(target="http://2", endpoint=None) + self.expect_webmention(target="http://3", send_status=500) + # 4XX should go into 'failed' + self.expect_webmention(target="http://4", send_status=404) + self.expect_webmention(target="http://5", send_status=403) + # 5XX should go into 'error' + self.expect_webmention(target="http://6", send_status=500) - def test_redirect_to_too_long_url(self): - """If a URL redirects to one over the URL length limit, we should skip it. + self.mox.ReplayAll() + self.post_task(expected_status=ERROR_HTTP_RETURN_CODE) + self.assert_response_is( + "error", + sent=["http://7", "http://1", "http://8"], + error=["http://3", "http://6"], + failed=["http://4", "http://5"], + skipped=["http://2"], + ) + self.assertEqual(NOW, self.sources[0].key.get().last_webmention_sent) + + def test_cached_webmention_discovery(self): + """Webmention endpoints should be cached.""" + self.expect_webmention() + # second webmention should use the cached endpoint + self.expect_webmention(discover=False) - https://github.com/snarfed/bridgy/issues/273 - """ - too_long = 'http://host/' + 'x' * _MAX_STRING_LENGTH - self.expect_requests_head('http://target1/post/url', redirected_url=too_long) - self.mox.ReplayAll() - - self.post_task() - self.assert_response_is('complete', failed=['http://target1/post/url']) - - def test_translate_appspot_to_brid_gy(self): - """Tasks on brid-gy.appspot.com should translate source URLs to brid.gy.""" - self.responses[0].unsent = ['http://good'] - self.responses[0].put() - source_url = 'https://brid.gy/comment/fake/%s/a/1_2_a' % \ - self.sources[0].key.string_id() - self.expect_webmention(source_url=source_url, target='http://good') - - self.mox.ReplayAll() - self.post_task(base_url='http://brid-gy.appspot.com') - - def test_activity_id_not_tag_uri(self): - """If the activity id isn't a tag uri, we should just use it verbatim.""" - activity = json_loads(self.responses[0].activities_json[0]) - activity['id'] = 'AAA' - self.responses[0].activities_json = [json_dumps(activity)] - - self.responses[0].unsent = ['http://good'] - self.responses[0].put() - - source_url = 'https://brid.gy/comment/fake/%s/AAA/1_2_a' % \ - self.sources[0].key.string_id() - self.expect_webmention(source_url=source_url, target='http://good') - - self.mox.ReplayAll() - self.post_task(base_url='https://brid.gy') - - def test_response_with_multiple_activities(self): - """Should use Response.urls_to_activity to generate the source URLs. - """ - self.responses[0].activities_json = [ - '{"id": "000"}', '{"id": "111"}', '{"id": "222"}'] - self.responses[0].unsent = ['http://AAA', 'http://BBB', 'http://CCC'] - self.responses[0].urls_to_activity = json_dumps( - {'http://AAA': 0, 'http://BBB': 1, 'http://CCC': 2}) - self.responses[0].put() - - source_url = 'https://brid.gy/comment/fake/%s/%%s/1_2_a' % \ - self.sources[0].key.string_id() - self.expect_webmention(source_url=source_url % '000', target='http://AAA') - self.expect_webmention(source_url=source_url % '111', target='http://BBB') - self.expect_webmention(source_url=source_url % '222', target='http://CCC') - - self.mox.ReplayAll() - self.post_task(base_url='https://brid.gy') - - def test_complete_exception(self): - """If completing raises an exception, the lease should be released.""" - self.expect_webmention() - self.mox.StubOutWithMock(tasks.PropagateResponse, 'complete') - tasks.PropagateResponse.complete().AndRaise(Exception('foo')) - self.mox.ReplayAll() - - self.post_task(expected_status=500) - self.assert_response_is('error', None, sent=['http://target1/post/url']) - - def test_source_url_key_error(self): - """We should gracefully retry when we hit the KeyError bug. - - https://github.com/snarfed/bridgy/issues/237 - """ - self.responses[0].urls_to_activity = json_dumps({'bad': 9}) - self.responses[0].put() - self.mox.ReplayAll() - self.post_task(expected_status=ERROR_HTTP_RETURN_CODE) + self.mox.ReplayAll() + self.post_task() - def test_source_url_index_error(self): - """We should gracefully retry when we hit the IndexError bug. + self.responses[0].status = "new" + self.responses[0].put() + self.post_task() - https://github.com/snarfed/bridgy/issues/237 - """ - self.responses[0].activities_json = [] - self.responses[0].put() - self.mox.ReplayAll() - self.post_task(expected_status=ERROR_HTTP_RETURN_CODE) - - def test_propagate_blogpost(self): - """Blog post propagate task.""" - source_key = FakeSource.new(domains=['fake']).put() - links = ['http://fake/post', '/no/domain', 'http://ok/one.png', - 'http://ok/two', 'http://ok/two', # repeated - ] - blogpost = models.BlogPost(id='http://x', source=source_key, unsent=links) - blogpost.put() - - self.expect_requests_head('http://fake/post') - self.expect_requests_head('http://ok/one.png', content_type='image/png') - self.expect_requests_head('http://ok/two') - self.expect_webmention(source_url='http://x', target='http://ok/two') - self.mox.ReplayAll() - - self.post_url = '/_ah/queue/propagate-blogpost' - super().post_task(params={'key': blogpost.key.urlsafe().decode()}) - self.assert_response_is('complete', NOW + LEASE_LENGTH, - sent=['http://ok/two'], response=blogpost) - self.assert_equals(NOW, source_key.get().last_webmention_sent) - - def test_propagate_blogpost_allows_bridgy_publish_links(self): - source_key = FakeSource.new(domains=['fake']).put() - blogpost = models.BlogPost(id='http://x', source=source_key, - unsent=['https://brid.gy/publish/twitter']) - blogpost.put() - - self.expect_requests_head('https://brid.gy/publish/twitter') - self.expect_webmention( - source_url='http://x', - target='https://brid.gy/publish/twitter', - endpoint='https://brid.gy/publish/webmention') - self.mox.ReplayAll() - - self.post_url = '/_ah/queue/propagate-blogpost' - super().post_task(params={'key': blogpost.key.urlsafe().decode()}) - self.assert_response_is('complete', response=blogpost, - sent=['https://brid.gy/publish/twitter']) - - def test_propagate_blogpost_follows_redirects_before_checking_self_link(self): - source_key = FakeSource.new(domains=['fake']).put() - blogpost = models.BlogPost(id='http://x', source=source_key, - unsent=['http://will/redirect']) - blogpost.put() - - self.expect_requests_head('http://will/redirect', - redirected_url='http://www.fake/self/link') - self.mox.ReplayAll() - - self.post_url = '/_ah/queue/propagate-blogpost' - super().post_task(params={'key': blogpost.key.urlsafe().decode()}) - self.assert_response_is('complete', response=blogpost) - - def test_post_response(self): - """Responses with type 'post' (ie mentions) are their own activity. - - https://github.com/snarfed/bridgy/issues/456 - """ - self.responses[0].type = 'post' - self.responses[0].response_json = json_dumps(json_loads( - self.responses[0].activities_json[0])) - self.responses[0].put() + def test_cached_webmention_discovery_error(self): + """Failed webmention discovery should be cached too.""" + self.expect_webmention(endpoint=None) + # second time shouldn't try to send a webmention + + self.mox.ReplayAll() + self.post_task() + self.assert_response_is("complete", skipped=["http://target1/post/url"]) + + self.responses[0].status = "new" + self.responses[0].put() + self.post_task() + self.assert_response_is("complete", skipped=["http://target1/post/url"]) + + def test_errors_and_caching_endpoint(self): + """Only cache on wm endpoint failures, not discovery failures.""" + self.expect_webmention(send=False).AndRaise(requests.ConnectionError()) + # shouldn't have a cached endpoint + self.expect_webmention(send_status=500) + # should have and use a cached endpoint + self.expect_webmention(discover=False) + self.mox.ReplayAll() + + self.post_task(expected_status=ERROR_HTTP_RETURN_CODE) + self.assert_response_is("error", error=["http://target1/post/url"]) - self.expect_webmention(source_url='http://localhost/post/fake/0123456789/a') - self.mox.ReplayAll() - self.post_task() + self.responses[0].status = "new" + self.responses[0].put() + self.post_task(expected_status=ERROR_HTTP_RETURN_CODE) + self.assert_response_is("error", error=["http://target1/post/url"]) + + self.responses[0].status = "new" + self.responses[0].put() + self.post_task() + self.assert_response_is("complete", sent=["http://target1/post/url"]) + + def test_cached_webmention_discovery_shouldnt_refresh_cache(self): + """A cached webmention discovery shouldn't be written back to the cache.""" + # first wm discovers and finds no endpoint, second uses cache, third rediscovers + self.expect_webmention(endpoint=None) + self.expect_webmention() + self.mox.ReplayAll() + + # inject a fake time.time into the cache + now = time.time() + util.webmention_endpoint_cache = TTLCache(500, 2, timer=lambda: now) + + self.post_task() + self.assert_response_is("complete", skipped=["http://target1/post/url"]) + + now += 1 + self.responses[0].status = "new" + self.responses[0].put() + self.post_task() + self.assert_response_is("complete", skipped=["http://target1/post/url"]) + + now += 2 + self.responses[0].status = "new" + self.responses[0].put() + self.post_task() + self.assert_response_is("complete", sent=["http://target1/post/url"]) + + def test_webmention_blocklist(self): + """Target URLs with domains in the blocklist should be ignored. + + TODO: also invalid URLs that can't be parsed by urlparse? + """ + self.responses[0].unsent = ["http://t.co/bad", "http://foo/good"] + self.responses[0].error = [ + "http://instagr.am/bad", + # urlparse raises ValueError: Invalid IPv6 URL + "http://foo]", + ] + self.responses[0].put() + + self.expect_webmention(target="http://foo/good") + self.mox.ReplayAll() + + self.post_task() + self.assert_response_is("complete", sent=["http://foo/good"]) + + def test_non_html_url(self): + """Target URLs that aren't HTML should be ignored.""" + self.expect_requests_head( + "http://target1/post/url", content_type="application/mpeg" + ) + self.mox.ReplayAll() + self.post_task() + self.assert_response_is("complete") + + def test_non_html_file(self): + """If our HEAD fails, we should still require content-type text/html.""" + self.expect_requests_head("http://target1/post/url", status_code=405) + self.expect_webmention(content_type="image/gif", send=False) + + self.mox.ReplayAll() + self.post_task() + self.assert_response_is("complete", skipped=["http://target1/post/url"]) + + def test_non_html_file_extension(self): + """If our HEAD fails, we should infer type from file extension.""" + self.responses[0].unsent = ["http://this/is/a.pdf"] + self.responses[0].put() + + self.expect_webmention( + target="http://this/is/a.pdf", + send_status=405, + # we should ignore an error response's content type + content_type="text/html", + ) + + self.mox.ReplayAll() + self.post_task() + self.assert_response_is("complete", failed=["http://this/is/a.pdf"]) + + def test_content_type_html_with_charset(self): + """We should handle Content-Type: text/html; charset=... ok.""" + self.expect_webmention(content_type="text/html; charset=utf-8") + self.mox.ReplayAll() + self.post_task() + self.assert_response_is("complete", sent=["http://target1/post/url"]) + + def test_no_content_type_header(self): + """If the Content-Type header is missing, we should assume text/html.""" + self.expect_webmention(content_type=None) + self.mox.ReplayAll() + self.post_task() + self.assert_response_is("complete", sent=["http://target1/post/url"]) + + def test_link_header_rel_webmention_unquoted(self): + """We should support rel=webmention (no quotes) in the Link header.""" + self.expect_webmention( + response_headers={"Link": "; rel=webmention"} + ) + self.mox.ReplayAll() + self.post_task() + self.assert_response_is("complete", sent=["http://target1/post/url"]) + + def test_webmention_post_accept_header(self): + """The webmention POST request should send Accept: */*.""" + self.responses[0].source = Twitter(id="rhiaro").put() + self.responses[0].put() + self.expect_requests_get( + "http://target1/post/url", + timeout=15, + headers=util.REQUEST_HEADERS_CONNEG, + response_headers={"Link": "; rel=webmention"}, + ) + + self.expect_requests_post( + "http://my/endpoint", + timeout=999, + data={ + "source": "http://localhost/comment/twitter/rhiaro/a/1_2_a", + "target": "http://target1/post/url", + }, + allow_redirects=False, + headers={"Accept": "*/*"}, + ) + + self.mox.ReplayAll() + self.post_task() + self.assert_response_is("complete", sent=["http://target1/post/url"]) + + def test_no_targets(self): + """No target URLs.""" + self.responses[0].unsent = [] + self.responses[0].put() + + self.mox.ReplayAll() + self.post_task() + self.assert_response_is("complete", NOW + LEASE_LENGTH) + + def test_unicode_in_target_url(self): + """Target URLs with escaped unicode chars should work ok. + Background: https://github.com/snarfed/bridgy/issues/248 + """ + url = "https://maps/?q=" + urllib.parse.quote_plus( + "3 Cours de la République".encode() + ) + self.responses[0].unsent = [url] + self.responses[0].put() + + self.expect_webmention(target=url) + self.mox.ReplayAll() + + self.post_task() + self.assert_response_is("complete", sent=[url]) + + def test_already_complete(self): + """If the response has already been propagated, do nothing.""" + self.responses[0].status = "complete" + self.responses[0].put() + + self.post_task() + self.assert_response_is("complete", unsent=["http://target1/post/url"]) + + def test_set_webmention_endpoint(self): + """Should set Source.webmention_endpoint if it's unset.""" + self.responses[0].unsent = ["http://bar/1", "http://foo/2"] + self.responses[0].put() + + self.assertIsNone(self.sources[0].webmention_endpoint) + self.sources[0].domains = ["foo"] + self.sources[0].put() + + # target isn't in source.domains + self.expect_webmention(target="http://bar/1", endpoint="http://no") + # target is in source.domains + self.expect_webmention(target="http://foo/2", endpoint="http://yes") + + self.mox.ReplayAll() + self.post_task() + self.assert_equals("http://yes", self.sources[0].key.get().webmention_endpoint) + + def test_leased(self): + """If the response is processing and the lease hasn't expired, do nothing.""" + self.responses[0].status = "processing" + leased_until = NOW + datetime.timedelta(minutes=1) + self.responses[0].leased_until = leased_until + self.responses[0].put() + + self.post_task(expected_status=ERROR_HTTP_RETURN_CODE) + self.assert_response_is( + "processing", leased_until, unsent=["http://target1/post/url"] + ) + + response = self.responses[0].key.get() + self.assertEqual("processing", response.status) + self.assertEqual(leased_until, response.leased_until) + + def test_lease_expired(self): + """If the response is processing but the lease has expired, process it.""" + self.responses[0].status = "processing" + self.responses[0].leased_until = NOW - datetime.timedelta(minutes=1) + self.responses[0].put() + + self.expect_webmention() + self.mox.ReplayAll() + self.post_task() + self.assert_response_is( + "complete", NOW + LEASE_LENGTH, sent=["http://target1/post/url"] + ) + + def test_no_response(self): + """If the response doesn't exist, the request should fail.""" + self.responses[0].key.delete() + self.post_task(expected_status=ERROR_HTTP_RETURN_CODE) + + def test_no_source(self): + """If the source doesn't exist, the request should give up.""" + self.sources[0].key.delete() + self.post_task(expected_status=200) + + def test_non_public_activity(self): + """If the activity is non-public, we should give up.""" + activity = json_loads(self.responses[0].activities_json[0]) + activity["to"] = [{"objectType": "group", "alias": "@private"}] + self.responses[0].activities_json = [json_dumps(activity)] + self.responses[0].put() + + self.post_task() + self.assert_response_is("complete", unsent=["http://target1/post/url"], sent=[]) + + def test_non_public_response(self): + """If the response is non-public, we should give up.""" + resp = json_loads(self.responses[0].response_json) + resp["to"] = [{"objectType": "group", "alias": "@private"}] + self.responses[0].response_json = json_dumps(resp) + self.responses[0].put() + + self.post_task() + self.assert_response_is("complete", unsent=["http://target1/post/url"], sent=[]) + + def test_webmention_no_endpoint(self): + self.expect_webmention(endpoint=None) + self.mox.ReplayAll() + self.post_task() + self.assert_response_is("complete", skipped=["http://target1/post/url"]) + + def test_webmention_discover_400(self): + self.expect_webmention(discover_status=400) + self.mox.ReplayAll() + self.post_task() + self.assert_response_is("complete", sent=["http://target1/post/url"]) + + def test_webmention_send_400(self): + self.expect_webmention(send_status=400) + self.mox.ReplayAll() + self.post_task() + self.assert_response_is("complete", failed=["http://target1/post/url"]) + + def test_webmention_discover_500(self): + self.expect_webmention(discover_status=500) + self.mox.ReplayAll() + self.post_task() + self.assert_response_is("complete", sent=["http://target1/post/url"]) + + def test_webmention_send_500(self): + self.expect_webmention(send_status=500) + self.mox.ReplayAll() + self.post_task(expected_status=ERROR_HTTP_RETURN_CODE) + self.assert_response_is("error", error=["http://target1/post/url"]) + + def test_webmention_bad_target_url(self): + self.responses[0].unsent = ["not a url"] + self.responses[0].put() + self.post_task() + self.assert_response_is("complete") + + def test_webmention_fail_and_succeed(self): + """All webmentions should be attempted, but any failure sets error status.""" + self.responses[0].unsent = ["http://first", "http://second"] + self.responses[0].put() + self.expect_webmention(target="http://first", send_status=500) + self.expect_webmention(target="http://second") + + self.mox.ReplayAll() + self.post_task(expected_status=ERROR_HTTP_RETURN_CODE) + self.assert_response_is( + "error", None, error=["http://first"], sent=["http://second"] + ) + self.assert_equals(NOW, self.sources[0].key.get().last_webmention_sent) + + def test_webmention_exception(self): + """Exceptions on individual target URLs shouldn't stop the whole task.""" + self.responses[0].unsent = ["http://error", "http://good"] + self.responses[0].put() + self.expect_webmention(target="http://error").AndRaise(Exception("foo")) + self.expect_webmention(target="http://good") + self.mox.ReplayAll() + + self.post_task(expected_status=ERROR_HTTP_RETURN_CODE) + self.assert_response_is( + "error", None, error=["http://error"], sent=["http://good"] + ) + self.assert_equals(NOW, self.sources[0].key.get().last_webmention_sent) + + def test_dns_failure(self): + """If DNS lookup fails for a URL, we should give up. + https://github.com/snarfed/bridgy/issues/254 + """ + self.responses[0].put() + self.expect_webmention(send=False).AndRaise( + requests.exceptions.ConnectionError("DNS lookup failed for URL: foo") + ) + self.mox.ReplayAll() + + self.post_task() + self.assert_response_is("complete", failed=["http://target1/post/url"]) + + def test_redirect_to_too_long_url(self): + """If a URL redirects to one over the URL length limit, we should skip it. + + https://github.com/snarfed/bridgy/issues/273 + """ + too_long = "http://host/" + "x" * _MAX_STRING_LENGTH + self.expect_requests_head("http://target1/post/url", redirected_url=too_long) + self.mox.ReplayAll() + + self.post_task() + self.assert_response_is("complete", failed=["http://target1/post/url"]) + + def test_translate_appspot_to_brid_gy(self): + """Tasks on brid-gy.appspot.com should translate source URLs to brid.gy.""" + self.responses[0].unsent = ["http://good"] + self.responses[0].put() + source_url = ( + "https://brid.gy/comment/fake/%s/a/1_2_a" % self.sources[0].key.string_id() + ) + self.expect_webmention(source_url=source_url, target="http://good") + + self.mox.ReplayAll() + self.post_task(base_url="http://brid-gy.appspot.com") + + def test_activity_id_not_tag_uri(self): + """If the activity id isn't a tag uri, we should just use it verbatim.""" + activity = json_loads(self.responses[0].activities_json[0]) + activity["id"] = "AAA" + self.responses[0].activities_json = [json_dumps(activity)] + + self.responses[0].unsent = ["http://good"] + self.responses[0].put() + + source_url = ( + "https://brid.gy/comment/fake/%s/AAA/1_2_a" + % self.sources[0].key.string_id() + ) + self.expect_webmention(source_url=source_url, target="http://good") + + self.mox.ReplayAll() + self.post_task(base_url="https://brid.gy") + + def test_response_with_multiple_activities(self): + """Should use Response.urls_to_activity to generate the source URLs.""" + self.responses[0].activities_json = [ + '{"id": "000"}', + '{"id": "111"}', + '{"id": "222"}', + ] + self.responses[0].unsent = ["http://AAA", "http://BBB", "http://CCC"] + self.responses[0].urls_to_activity = json_dumps( + {"http://AAA": 0, "http://BBB": 1, "http://CCC": 2} + ) + self.responses[0].put() + + source_url = ( + "https://brid.gy/comment/fake/%s/%%s/1_2_a" + % self.sources[0].key.string_id() + ) + self.expect_webmention(source_url=source_url % "000", target="http://AAA") + self.expect_webmention(source_url=source_url % "111", target="http://BBB") + self.expect_webmention(source_url=source_url % "222", target="http://CCC") + + self.mox.ReplayAll() + self.post_task(base_url="https://brid.gy") + + def test_complete_exception(self): + """If completing raises an exception, the lease should be released.""" + self.expect_webmention() + self.mox.StubOutWithMock(tasks.PropagateResponse, "complete") + tasks.PropagateResponse.complete().AndRaise(Exception("foo")) + self.mox.ReplayAll() + + self.post_task(expected_status=500) + self.assert_response_is("error", None, sent=["http://target1/post/url"]) + + def test_source_url_key_error(self): + """We should gracefully retry when we hit the KeyError bug. + + https://github.com/snarfed/bridgy/issues/237 + """ + self.responses[0].urls_to_activity = json_dumps({"bad": 9}) + self.responses[0].put() + self.mox.ReplayAll() + self.post_task(expected_status=ERROR_HTTP_RETURN_CODE) + + def test_source_url_index_error(self): + """We should gracefully retry when we hit the IndexError bug. + + https://github.com/snarfed/bridgy/issues/237 + """ + self.responses[0].activities_json = [] + self.responses[0].put() + self.mox.ReplayAll() + self.post_task(expected_status=ERROR_HTTP_RETURN_CODE) + + def test_propagate_blogpost(self): + """Blog post propagate task.""" + source_key = FakeSource.new(domains=["fake"]).put() + links = [ + "http://fake/post", + "/no/domain", + "http://ok/one.png", + "http://ok/two", + "http://ok/two", # repeated + ] + blogpost = models.BlogPost(id="http://x", source=source_key, unsent=links) + blogpost.put() + + self.expect_requests_head("http://fake/post") + self.expect_requests_head("http://ok/one.png", content_type="image/png") + self.expect_requests_head("http://ok/two") + self.expect_webmention(source_url="http://x", target="http://ok/two") + self.mox.ReplayAll() + + self.post_url = "/_ah/queue/propagate-blogpost" + super().post_task(params={"key": blogpost.key.urlsafe().decode()}) + self.assert_response_is( + "complete", NOW + LEASE_LENGTH, sent=["http://ok/two"], response=blogpost + ) + self.assert_equals(NOW, source_key.get().last_webmention_sent) + + def test_propagate_blogpost_allows_bridgy_publish_links(self): + source_key = FakeSource.new(domains=["fake"]).put() + blogpost = models.BlogPost( + id="http://x", source=source_key, unsent=["https://brid.gy/publish/twitter"] + ) + blogpost.put() + + self.expect_requests_head("https://brid.gy/publish/twitter") + self.expect_webmention( + source_url="http://x", + target="https://brid.gy/publish/twitter", + endpoint="https://brid.gy/publish/webmention", + ) + self.mox.ReplayAll() + + self.post_url = "/_ah/queue/propagate-blogpost" + super().post_task(params={"key": blogpost.key.urlsafe().decode()}) + self.assert_response_is( + "complete", response=blogpost, sent=["https://brid.gy/publish/twitter"] + ) + + def test_propagate_blogpost_follows_redirects_before_checking_self_link(self): + source_key = FakeSource.new(domains=["fake"]).put() + blogpost = models.BlogPost( + id="http://x", source=source_key, unsent=["http://will/redirect"] + ) + blogpost.put() + + self.expect_requests_head( + "http://will/redirect", redirected_url="http://www.fake/self/link" + ) + self.mox.ReplayAll() + + self.post_url = "/_ah/queue/propagate-blogpost" + super().post_task(params={"key": blogpost.key.urlsafe().decode()}) + self.assert_response_is("complete", response=blogpost) + + def test_post_response(self): + """Responses with type 'post' (ie mentions) are their own activity. + + https://github.com/snarfed/bridgy/issues/456 + """ + self.responses[0].type = "post" + self.responses[0].response_json = json_dumps( + json_loads(self.responses[0].activities_json[0]) + ) + self.responses[0].put() + + self.expect_webmention(source_url="http://localhost/post/fake/0123456789/a") + self.mox.ReplayAll() + self.post_task() class PropagateBlogPostTest(TaskTest): - post_url = '/_ah/queue/propagate-blogpost' + post_url = "/_ah/queue/propagate-blogpost" - def setUp(self): - super().setUp() - self.blogposts[0].unsent = ['http://foo', 'http://bar'] - self.blogposts[0].status = 'new' - self.blogposts[0].put() + def setUp(self): + super().setUp() + self.blogposts[0].unsent = ["http://foo", "http://bar"] + self.blogposts[0].status = "new" + self.blogposts[0].put() - def post_task(self, **kwargs): - super().post_task(params={'key': self.blogposts[0].key.urlsafe().decode()}, - **kwargs) + def post_task(self, **kwargs): + super().post_task( + params={"key": self.blogposts[0].key.urlsafe().decode()}, **kwargs + ) - def test_no_source(self): - """If the source doesn't exist, do nothing and let the task die.""" - self.sources[0].key.delete() - self.post_task() + def test_no_source(self): + """If the source doesn't exist, do nothing and let the task die.""" + self.sources[0].key.delete() + self.post_task() - def test_disabled_source(self): - """If the source is disabled, do nothing and let the task die.""" - self.sources[0].status = 'disabled' - self.sources[0].put() - self.post_task() + def test_disabled_source(self): + """If the source is disabled, do nothing and let the task die.""" + self.sources[0].status = "disabled" + self.sources[0].put() + self.post_task() diff --git a/tests/test_tumblr.py b/tests/test_tumblr.py index 53ec6bfd..86b7ad31 100644 --- a/tests/test_tumblr.py +++ b/tests/test_tumblr.py @@ -14,153 +14,188 @@ class TumblrTest(testutil.AppTest): - - def setUp(self): - super().setUp() - self.auth_entity = TumblrAuth(id='name', user_json=json_dumps({ - 'user': {'blogs': [{'url': 'other'}, - {'url': 'http://primary/', 'primary': True}]}})) - self.tumblr = Tumblr(id='my id', disqus_shortname='my-disqus-name') - - tumblr.DISQUS_API_KEY = 'my key' - tumblr.DISQUS_API_SECRET = 'my secret' - tumblr.DISQUS_ACCESS_TOKEN = 'my token' - - def disqus_params(self, params): - params.update({ - 'api_key': 'my key', - 'api_secret': 'my secret', - 'access_token': 'my token', - }) - return params - - def expect_thread_details(self, resp=None, **kwargs): - if resp is None: - resp = {'response': {'id': '87654'}} - self.expect_requests_get( - tumblr.DISQUS_API_THREAD_DETAILS_URL, - json_dumps(resp), - params=self.disqus_params({'forum': 'my-disqus-name', - 'thread':'link:http://primary/post/123999'}), - **kwargs) - - def test_new(self): - t = Tumblr.new(auth_entity=self.auth_entity) - self.assertEqual(self.auth_entity.key, t.auth_entity) - self.assertEqual('name', t.name) - self.assertEqual(['http://primary/'], t.domain_urls) - self.assertEqual(['primary'], t.domains) - self.assertEqual('http://api.tumblr.com/v2/blog/primary/avatar/512', t.picture) - - def test_new_no_primary_blog(self): - self.auth_entity.user_json = json_dumps({'user': {'blogs': [{'url': 'foo'}]}}) - with app.test_request_context(): - self.assertIsNone(Tumblr.new(auth_entity=self.auth_entity)) - self.assertIn('Tumblr blog not found', get_flashed_messages()[0]) - - def test_new_with_blog_name(self): - self.auth_entity.user_json = json_dumps({ - 'user': {'blogs': [{'url': 'foo'}, - {'name': 'bar', 'url': 'baz'}, - {'name': 'biff', 'url': 'http://boff/'}, - ]}}) - got = Tumblr.new(auth_entity=self.auth_entity, blog_name='biff') - self.assertEqual(['http://boff/'], got.domain_urls) - self.assertEqual(['boff'], got.domains) - - def test_verify_default(self): - # based on http://snarfed.tumblr.com/ - self._test_verify_finds_disqus('') - - def test_verify_inspirewell_theme_1(self): - # based on http://circusriot.tumblr.com/ - self._test_verify_finds_disqus(" var disqus_shortname = 'my-disqus-name';") - - def test_verify_inspirewell_theme_2(self): - # based on http://circusriot.tumblr.com/ - self._test_verify_finds_disqus(' disqusUsername = "my-disqus-name";') - - def test_verify_require_aorcsik_theme(self): - # based on http://require.aorcsik.com/ - self._test_verify_finds_disqus( - ' dsq.src = "http://my-disqus-name.disqus.com/embed.js";') - - def _test_verify_finds_disqus(self, snippet): - self.expect_requests_get( - 'http://primary/', '\nstuff\n%s\n' % snippet) - self.mox.ReplayAll() - t = Tumblr.new(auth_entity=self.auth_entity, features=['webmention']) - t.verify() - self.assertEqual('my-disqus-name', t.disqus_shortname) - - def test_verify_without_disqus(self): - self.expect_requests_get('http://primary/', 'no disqus here!') - self.mox.ReplayAll() - t = Tumblr.new(auth_entity=self.auth_entity, features=['webmention']) - t.verify() - self.assertIsNone(t.disqus_shortname) - - def test_create_comment(self): - self.expect_thread_details() - self.expect_requests_post( - tumblr.DISQUS_API_CREATE_POST_URL, - json_dumps({'response': {'ok': 'sgtm'}}), - params=self.disqus_params({ - 'thread': '87654', - 'message': 'who: foo bar'})) - self.mox.ReplayAll() - - resp = self.tumblr.create_comment('http://primary/post/123999/xyz_abc?asdf', - 'who', 'http://who', 'foo bar') - self.assertEqual({'ok': 'sgtm'}, resp) - - def test_create_comment_with_unicode_chars(self): - self.expect_thread_details() - self.expect_requests_post( - tumblr.DISQUS_API_CREATE_POST_URL, - json_dumps({}), - params=self.disqus_params({ - 'thread': '87654', - 'message': 'Degenève: foo Degenève bar', - })) - self.mox.ReplayAll() - - resp = self.tumblr.create_comment('http://primary/post/123999/xyz_abc', - 'Degenève', 'http://who', 'foo Degenève bar') - self.assertEqual({}, resp) - - def test_create_comment_finds_disqus_shortname(self): - self.tumblr.disqus_shortname = None - - self.expect_requests_get('http://primary/post/123999', - "fooo var disqus_shortname = 'my-disqus-name';") - self.expect_thread_details() - self.expect_requests_post(tumblr.DISQUS_API_CREATE_POST_URL, - json_dumps({}), params=mox.IgnoreArg()) - self.mox.ReplayAll() - - self.tumblr.create_comment('http://primary/post/123999', '', '', '') - self.assertEqual('my-disqus-name', self.tumblr.key.get().disqus_shortname) - - def test_create_comment_doesnt_find_disqus_shortname(self): - self.tumblr.disqus_shortname = None - - self.expect_requests_get('http://primary/post/123999', 'no shortname here') - self.mox.ReplayAll() - - with self.assertRaises(BadRequest): - self.tumblr.create_comment('http://primary/post/123999', '', '', '') - - # not implemented yet. see https://github.com/snarfed/bridgy/issues/177. - # currently handled in webmention.error(). - # def test_create_comment_thread_lookup_fails(self): - # error = { - # 'code':2, - # 'response': "Invalid argument, 'thread': Unable to find thread 'link:xyz'", - # } - # self.expect_thread_details(status_code=400, resp=error) - # self.mox.ReplayAll() - - # resp = self.tumblr.create_comment('http://primary/post/123999/xyz_abc', - # 'who', 'http://who', 'foo bar') - # self.assert_equals(error, resp) + def setUp(self): + super().setUp() + self.auth_entity = TumblrAuth( + id="name", + user_json=json_dumps( + { + "user": { + "blogs": [ + {"url": "other"}, + {"url": "http://primary/", "primary": True}, + ] + } + } + ), + ) + self.tumblr = Tumblr(id="my id", disqus_shortname="my-disqus-name") + + tumblr.DISQUS_API_KEY = "my key" + tumblr.DISQUS_API_SECRET = "my secret" + tumblr.DISQUS_ACCESS_TOKEN = "my token" + + def disqus_params(self, params): + params.update( + { + "api_key": "my key", + "api_secret": "my secret", + "access_token": "my token", + } + ) + return params + + def expect_thread_details(self, resp=None, **kwargs): + if resp is None: + resp = {"response": {"id": "87654"}} + self.expect_requests_get( + tumblr.DISQUS_API_THREAD_DETAILS_URL, + json_dumps(resp), + params=self.disqus_params( + {"forum": "my-disqus-name", "thread": "link:http://primary/post/123999"} + ), + **kwargs + ) + + def test_new(self): + t = Tumblr.new(auth_entity=self.auth_entity) + self.assertEqual(self.auth_entity.key, t.auth_entity) + self.assertEqual("name", t.name) + self.assertEqual(["http://primary/"], t.domain_urls) + self.assertEqual(["primary"], t.domains) + self.assertEqual("http://api.tumblr.com/v2/blog/primary/avatar/512", t.picture) + + def test_new_no_primary_blog(self): + self.auth_entity.user_json = json_dumps({"user": {"blogs": [{"url": "foo"}]}}) + with app.test_request_context(): + self.assertIsNone(Tumblr.new(auth_entity=self.auth_entity)) + self.assertIn("Tumblr blog not found", get_flashed_messages()[0]) + + def test_new_with_blog_name(self): + self.auth_entity.user_json = json_dumps( + { + "user": { + "blogs": [ + {"url": "foo"}, + {"name": "bar", "url": "baz"}, + {"name": "biff", "url": "http://boff/"}, + ] + } + } + ) + got = Tumblr.new(auth_entity=self.auth_entity, blog_name="biff") + self.assertEqual(["http://boff/"], got.domain_urls) + self.assertEqual(["boff"], got.domains) + + def test_verify_default(self): + # based on http://snarfed.tumblr.com/ + self._test_verify_finds_disqus( + '' + ) + + def test_verify_inspirewell_theme_1(self): + # based on http://circusriot.tumblr.com/ + self._test_verify_finds_disqus(" var disqus_shortname = 'my-disqus-name';") + + def test_verify_inspirewell_theme_2(self): + # based on http://circusriot.tumblr.com/ + self._test_verify_finds_disqus(' disqusUsername = "my-disqus-name";') + + def test_verify_require_aorcsik_theme(self): + # based on http://require.aorcsik.com/ + self._test_verify_finds_disqus( + ' dsq.src = "http://my-disqus-name.disqus.com/embed.js";' + ) + + def _test_verify_finds_disqus(self, snippet): + self.expect_requests_get( + "http://primary/", "\nstuff\n%s\n" % snippet + ) + self.mox.ReplayAll() + t = Tumblr.new(auth_entity=self.auth_entity, features=["webmention"]) + t.verify() + self.assertEqual("my-disqus-name", t.disqus_shortname) + + def test_verify_without_disqus(self): + self.expect_requests_get("http://primary/", "no disqus here!") + self.mox.ReplayAll() + t = Tumblr.new(auth_entity=self.auth_entity, features=["webmention"]) + t.verify() + self.assertIsNone(t.disqus_shortname) + + def test_create_comment(self): + self.expect_thread_details() + self.expect_requests_post( + tumblr.DISQUS_API_CREATE_POST_URL, + json_dumps({"response": {"ok": "sgtm"}}), + params=self.disqus_params( + {"thread": "87654", "message": 'who: foo bar'} + ), + ) + self.mox.ReplayAll() + + resp = self.tumblr.create_comment( + "http://primary/post/123999/xyz_abc?asdf", "who", "http://who", "foo bar" + ) + self.assertEqual({"ok": "sgtm"}, resp) + + def test_create_comment_with_unicode_chars(self): + self.expect_thread_details() + self.expect_requests_post( + tumblr.DISQUS_API_CREATE_POST_URL, + json_dumps({}), + params=self.disqus_params( + { + "thread": "87654", + "message": 'Degenève: foo Degenève bar', + } + ), + ) + self.mox.ReplayAll() + + resp = self.tumblr.create_comment( + "http://primary/post/123999/xyz_abc", + "Degenève", + "http://who", + "foo Degenève bar", + ) + self.assertEqual({}, resp) + + def test_create_comment_finds_disqus_shortname(self): + self.tumblr.disqus_shortname = None + + self.expect_requests_get( + "http://primary/post/123999", + "fooo var disqus_shortname = 'my-disqus-name';", + ) + self.expect_thread_details() + self.expect_requests_post( + tumblr.DISQUS_API_CREATE_POST_URL, json_dumps({}), params=mox.IgnoreArg() + ) + self.mox.ReplayAll() + + self.tumblr.create_comment("http://primary/post/123999", "", "", "") + self.assertEqual("my-disqus-name", self.tumblr.key.get().disqus_shortname) + + def test_create_comment_doesnt_find_disqus_shortname(self): + self.tumblr.disqus_shortname = None + + self.expect_requests_get("http://primary/post/123999", "no shortname here") + self.mox.ReplayAll() + + with self.assertRaises(BadRequest): + self.tumblr.create_comment("http://primary/post/123999", "", "", "") + + # not implemented yet. see https://github.com/snarfed/bridgy/issues/177. + # currently handled in webmention.error(). + # def test_create_comment_thread_lookup_fails(self): + # error = { + # 'code':2, + # 'response': "Invalid argument, 'thread': Unable to find thread 'link:xyz'", + # } + # self.expect_thread_details(status_code=400, resp=error) + # self.mox.ReplayAll() + + # resp = self.tumblr.create_comment('http://primary/post/123999/xyz_abc', + # 'who', 'http://who', 'foo bar') + # self.assert_equals(error, resp) diff --git a/tests/test_twitter.py b/tests/test_twitter.py index 7a0a6a67..f24121ee 100644 --- a/tests/test_twitter.py +++ b/tests/test_twitter.py @@ -17,155 +17,194 @@ class TwitterTest(testutil.AppTest): - - def setUp(self): - super().setUp() - oauth_dropins.twitter.TWITTER_APP_KEY = 'my_app_key' - oauth_dropins.twitter.TWITTER_APP_SECRET = 'my_app_secret' - self.auth_entity = oauth_dropins.twitter.TwitterAuth( - id='my_string_id', - token_key='my_key', token_secret='my_secret', - user_json=json_dumps({'name': 'Ryan Barrett', - 'screen_name': 'snarfed_org', - 'description': 'something about me', - 'profile_image_url': 'http://pi.ct/ure', - })) - self.auth_entity.put() - self.tw = Twitter.new(auth_entity=self.auth_entity) - - def test_new(self): - self.assertEqual(self.auth_entity, self.tw.auth_entity.get()) - self.assertEqual('my_key', self.tw.gr_source.access_token_key) - self.assertEqual('my_secret', self.tw.gr_source.access_token_secret) - self.assertEqual('snarfed_org', self.tw.key.string_id()) - self.assertEqual('http://pi.ct/ure', self.tw.picture) - self.assertEqual('Ryan Barrett', self.tw.name) - self.assertEqual('https://twitter.com/snarfed_org', self.tw.url) - self.assertEqual('https://twitter.com/snarfed_org', self.tw.silo_url()) - self.assertEqual('tag:twitter.com,2013:snarfed_org', self.tw.user_tag_id()) - self.assertEqual('snarfed_org (Twitter)', self.tw.label()) - - def test_new_massages_profile_image(self): - """We should use profile_image_url_https and drop '_normal' if possible.""" - user = json_loads(self.auth_entity.user_json) - user['profile_image_url_https'] = 'https://foo_normal.xyz' - self.auth_entity.user_json = json_dumps(user) - - self.assertEqual('https://foo.xyz', Twitter.new(auth_entity=self.auth_entity).picture) - - def test_get_like(self): - """get_like() should use the Response stored in the datastore.""" - like = { - 'objectType': 'activity', - 'verb': 'like', - 'id': 'tag:twitter.com,2013:222', - 'object': {'url': 'http://my/favorite'}, - } - models.Response(id='tag:twitter.com,2013:000_favorited_by_222', - response_json=json_dumps(like)).put() - self.assert_equals(like, self.tw.get_like('unused', '000', '222')) - - def test_get_like_fallback(self): - """If there's no Response in the datastore, fall back to get_activities.""" - models.TWITTER_SCRAPE_HEADERS = {'x': 'y'} - - tweet = copy.deepcopy(gr_twitter_test.TWEET) - tweet['favorite_count'] = 1 - - self.expect_urlopen(API_BASE + API_STATUS % '100', json_dumps(tweet)) - self.expect_requests_get(SCRAPE_LIKES_URL % '100', gr_twitter_test.LIKES_SCRAPED, - headers={'x': 'y'}) - - self.mox.ReplayAll() - like = copy.deepcopy(gr_twitter_test.LIKE_OBJECTS[0]) - like['id'] = 'tag:twitter.com,2013:100_favorited_by_353' - like['author']['id'] = 'tag:twitter.com,2013:ge' - self.assert_equals(like, self.tw.get_like('unused', '100', '353')) - - def test_canonicalize_url(self): - good = 'https://twitter.com/x/status/123' - self.assertEqual(good, self.tw.canonicalize_url(good)) - self.assertEqual(good, self.tw.canonicalize_url( - 'https://twitter.com/x/statuses/123')) - self.assertEqual(good, self.tw.canonicalize_url( - 'https://twitter.com/x/status/123/')) - self.assertIsNone(self.tw.canonicalize_url( - 'https://twitter.com/x?protected_redirect=true')) - - def test_search_for_links(self): - """https://github.com/snarfed/bridgy/issues/565""" - self.tw.domain_urls = ['http://foo/', 'http://bar/baz', 'https://t.co/xyz'] - self.tw.put() - - results = [{ - 'id_str': '0', # no link - 'text': 'x foo/ y /bar/baz z', - }, { - 'id_str': '1', # yes, ignore http vs https for bar/baz - 'text': 'no link here', - 'entities': {'urls': [{'expanded_url': 'http://bar'}, - {'expanded_url': 'https://bar/baz'}, - ]}, - }, { - 'id_str': '2', # no, retweet - 'text': 'a http://bar/baz ok', - 'retweeted_status': { - 'id_str': '456', - 'text': 'a http://bar/baz ok', - }, - }, { - 'id_str': '3', # no, link domain is blocklisted - 'text': 'x https://t.co/xyz/abc z', - }, { - 'id_str': '4', # no link - 'text': 'x http://bar/baz z', - }, { - 'id_str': '5', # yes - 'text': 'no link here', - 'entities': {'urls': [{'expanded_url': 'http://foo/x?y'}]}, - }, { - 'id_str': '6', # yes - 'text': 'a link http://bar/baz here', - 'entities': {'urls': [{'expanded_url': 'http://foo/'}, - {'expanded_url': 'http://other'}]}, - }] - self.expect_urlopen(API_BASE + API_SEARCH % - {'q': urllib.parse.quote_plus('bar/baz OR foo'), 'count': 50}, - json_dumps({'statuses': results})) - - self.mox.ReplayAll() - self.assert_equals( - ['tag:twitter.com,2013:1', 'tag:twitter.com,2013:5', 'tag:twitter.com,2013:6'], - [a['id'] for a in self.tw.search_for_links()]) - - def test_search_for_links_no_urls(self): - # only a blocklisted domain - self.tw.domain_urls = ['https://t.co/xyz'] - self.tw.put() - self.mox.ReplayAll() - self.assert_equals([], self.tw.search_for_links()) - - def test_is_private(self): - self.assertFalse(self.tw.is_private()) - - self.auth_entity.user_json = json_dumps({'protected': True}) - self.auth_entity.put() - self.assertTrue(self.tw.is_private()) - - def test_gr_source_username(self): - self.assertEqual('snarfed_org', self.tw.gr_source.username) - - def test_load_blocklist_rate_limited(self): - """If we get rate limited, we should use the partial result.""" - api_url = gr_twitter.API_BASE + gr_twitter.API_BLOCK_IDS % '-1' - self.expect_urlopen(api_url, json_dumps({ - 'ids': ['1', '2'], - 'next_cursor_str': '2', - })) - api_url = gr_twitter.API_BASE + gr_twitter.API_BLOCK_IDS % '2' - self.expect_urlopen(api_url, status=429) - - self.mox.ReplayAll() - self.tw.load_blocklist() - self.assert_equals(['1', '2'], self.tw.blocked_ids) - + def setUp(self): + super().setUp() + oauth_dropins.twitter.TWITTER_APP_KEY = "my_app_key" + oauth_dropins.twitter.TWITTER_APP_SECRET = "my_app_secret" + self.auth_entity = oauth_dropins.twitter.TwitterAuth( + id="my_string_id", + token_key="my_key", + token_secret="my_secret", + user_json=json_dumps( + { + "name": "Ryan Barrett", + "screen_name": "snarfed_org", + "description": "something about me", + "profile_image_url": "http://pi.ct/ure", + } + ), + ) + self.auth_entity.put() + self.tw = Twitter.new(auth_entity=self.auth_entity) + + def test_new(self): + self.assertEqual(self.auth_entity, self.tw.auth_entity.get()) + self.assertEqual("my_key", self.tw.gr_source.access_token_key) + self.assertEqual("my_secret", self.tw.gr_source.access_token_secret) + self.assertEqual("snarfed_org", self.tw.key.string_id()) + self.assertEqual("http://pi.ct/ure", self.tw.picture) + self.assertEqual("Ryan Barrett", self.tw.name) + self.assertEqual("https://twitter.com/snarfed_org", self.tw.url) + self.assertEqual("https://twitter.com/snarfed_org", self.tw.silo_url()) + self.assertEqual("tag:twitter.com,2013:snarfed_org", self.tw.user_tag_id()) + self.assertEqual("snarfed_org (Twitter)", self.tw.label()) + + def test_new_massages_profile_image(self): + """We should use profile_image_url_https and drop '_normal' if possible.""" + user = json_loads(self.auth_entity.user_json) + user["profile_image_url_https"] = "https://foo_normal.xyz" + self.auth_entity.user_json = json_dumps(user) + + self.assertEqual( + "https://foo.xyz", Twitter.new(auth_entity=self.auth_entity).picture + ) + + def test_get_like(self): + """get_like() should use the Response stored in the datastore.""" + like = { + "objectType": "activity", + "verb": "like", + "id": "tag:twitter.com,2013:222", + "object": {"url": "http://my/favorite"}, + } + models.Response( + id="tag:twitter.com,2013:000_favorited_by_222", + response_json=json_dumps(like), + ).put() + self.assert_equals(like, self.tw.get_like("unused", "000", "222")) + + def test_get_like_fallback(self): + """If there's no Response in the datastore, fall back to get_activities.""" + models.TWITTER_SCRAPE_HEADERS = {"x": "y"} + + tweet = copy.deepcopy(gr_twitter_test.TWEET) + tweet["favorite_count"] = 1 + + self.expect_urlopen(API_BASE + API_STATUS % "100", json_dumps(tweet)) + self.expect_requests_get( + SCRAPE_LIKES_URL % "100", gr_twitter_test.LIKES_SCRAPED, headers={"x": "y"} + ) + + self.mox.ReplayAll() + like = copy.deepcopy(gr_twitter_test.LIKE_OBJECTS[0]) + like["id"] = "tag:twitter.com,2013:100_favorited_by_353" + like["author"]["id"] = "tag:twitter.com,2013:ge" + self.assert_equals(like, self.tw.get_like("unused", "100", "353")) + + def test_canonicalize_url(self): + good = "https://twitter.com/x/status/123" + self.assertEqual(good, self.tw.canonicalize_url(good)) + self.assertEqual( + good, self.tw.canonicalize_url("https://twitter.com/x/statuses/123") + ) + self.assertEqual( + good, self.tw.canonicalize_url("https://twitter.com/x/status/123/") + ) + self.assertIsNone( + self.tw.canonicalize_url("https://twitter.com/x?protected_redirect=true") + ) + + def test_search_for_links(self): + """https://github.com/snarfed/bridgy/issues/565""" + self.tw.domain_urls = ["http://foo/", "http://bar/baz", "https://t.co/xyz"] + self.tw.put() + + results = [ + { + "id_str": "0", # no link + "text": "x foo/ y /bar/baz z", + }, + { + "id_str": "1", # yes, ignore http vs https for bar/baz + "text": "no link here", + "entities": { + "urls": [ + {"expanded_url": "http://bar"}, + {"expanded_url": "https://bar/baz"}, + ] + }, + }, + { + "id_str": "2", # no, retweet + "text": "a http://bar/baz ok", + "retweeted_status": { + "id_str": "456", + "text": "a http://bar/baz ok", + }, + }, + { + "id_str": "3", # no, link domain is blocklisted + "text": "x https://t.co/xyz/abc z", + }, + { + "id_str": "4", # no link + "text": "x http://bar/baz z", + }, + { + "id_str": "5", # yes + "text": "no link here", + "entities": {"urls": [{"expanded_url": "http://foo/x?y"}]}, + }, + { + "id_str": "6", # yes + "text": "a link http://bar/baz here", + "entities": { + "urls": [ + {"expanded_url": "http://foo/"}, + {"expanded_url": "http://other"}, + ] + }, + }, + ] + self.expect_urlopen( + API_BASE + + API_SEARCH + % {"q": urllib.parse.quote_plus("bar/baz OR foo"), "count": 50}, + json_dumps({"statuses": results}), + ) + + self.mox.ReplayAll() + self.assert_equals( + [ + "tag:twitter.com,2013:1", + "tag:twitter.com,2013:5", + "tag:twitter.com,2013:6", + ], + [a["id"] for a in self.tw.search_for_links()], + ) + + def test_search_for_links_no_urls(self): + # only a blocklisted domain + self.tw.domain_urls = ["https://t.co/xyz"] + self.tw.put() + self.mox.ReplayAll() + self.assert_equals([], self.tw.search_for_links()) + + def test_is_private(self): + self.assertFalse(self.tw.is_private()) + + self.auth_entity.user_json = json_dumps({"protected": True}) + self.auth_entity.put() + self.assertTrue(self.tw.is_private()) + + def test_gr_source_username(self): + self.assertEqual("snarfed_org", self.tw.gr_source.username) + + def test_load_blocklist_rate_limited(self): + """If we get rate limited, we should use the partial result.""" + api_url = gr_twitter.API_BASE + gr_twitter.API_BLOCK_IDS % "-1" + self.expect_urlopen( + api_url, + json_dumps( + { + "ids": ["1", "2"], + "next_cursor_str": "2", + } + ), + ) + api_url = gr_twitter.API_BASE + gr_twitter.API_BLOCK_IDS % "2" + self.expect_urlopen(api_url, status=429) + + self.mox.ReplayAll() + self.tw.load_blocklist() + self.assert_equals(["1", "2"], self.tw.blocked_ids) diff --git a/tests/test_util.py b/tests/test_util.py index c93380bf..f34fc97f 100644 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -21,433 +21,527 @@ from util import Login # the character in the middle is an unusual unicode character -UNICODE_STR = 'a ✁ b' +UNICODE_STR = "a ✁ b" class UtilTest(testutil.AppTest): - - def setUp(self): - super().setUp() - util.now_fn = lambda: datetime.datetime(2000, 1, 1) - - def test_maybe_add_or_delete_source(self): - auth_entity = FakeAuthEntity(id='x', user_json=json_dumps( - {'url': 'http://foo.com/', 'name': UNICODE_STR})) - auth_entity.put() - - key = FakeSource.next_key() - with app.test_request_context(), self.assertRaises(RequestRedirect) as rr: - state = util.construct_state_param_for_add(feature='publish') - util.maybe_add_or_delete_source(FakeSource, auth_entity, state) - self.assertIn(UNICODE_STR, get_flashed_messages()[0]) - - self.assertEqual(302, rr.exception.code) - self.assertEqual(['publish'], key.get().features) - - name = urllib.parse.quote_plus(UNICODE_STR.encode()) - self.assertIn(f'logins="/fake/{key.id()}?{name}";', - rr.exception.get_response().headers['Set-Cookie']) - - for feature in None, '': - key = FakeSource.next_key() - with app.test_request_context(), self.assertRaises(RequestRedirect) as rr: - state = util.construct_state_param_for_add(feature) - util.maybe_add_or_delete_source(FakeSource, auth_entity, state) - self.assertEqual([], key.get().features) - - def test_maybe_add_or_delete_source_bad_state(self): - auth_entity = FakeAuthEntity(id='x', user_json='{}') - auth_entity.put() - with self.assertRaises(BadRequest): - util.maybe_add_or_delete_source(FakeSource, auth_entity, 'bad') - - def test_maybe_add_or_delete_source_delete_declined(self): - state = { - 'feature': 'webmention', - 'operation': 'delete', - } - msg = 'If you want to disable, please approve the FakeSource prompt.' - - # no source - with app.test_request_context(): - with self.assertRaises(RequestRedirect) as rr: - util.maybe_add_or_delete_source( - FakeSource, None, util.encode_oauth_state(state)) - - self.assert_equals(302, rr.exception.code) - self.assert_equals('http://localhost/', rr.exception.new_url) - self.assertEqual([msg], get_flashed_messages()) - - # source - state['source'] = self.sources[0].key.urlsafe().decode() - with app.test_request_context(): - with self.assertRaises(RequestRedirect) as rr: - util.maybe_add_or_delete_source( - FakeSource, None, util.encode_oauth_state(state)) - - self.assert_equals(302, rr.exception.code) - self.assert_equals(self.source_bridgy_url, rr.exception.new_url) - self.assertEqual([msg], get_flashed_messages()) - - def test_maybe_add_or_delete_without_web_site_redirects_to_edit_websites(self): - for bad_url in None, 'not>aa= eta_seconds >= got - delta): - # print('expect_task: expected schedule_time %r, got %r' % (eta_seconds, got)) - return False - - return True - - return tasks_client.create_task( - mox.Func(check_task)).InAnyOrder().AndReturn(Task(name='my task')) - - def expect_requests_get(self, *args, **kwargs): - if 'headers' not in kwargs: - kwargs['headers'] = util.REQUEST_HEADERS - return super().expect_requests_get(*args, **kwargs) - - def expect_requests_post(self, *args, **kwargs): - kwargs.setdefault('headers', {}).update(util.REQUEST_HEADERS) - return super().expect_requests_post(*args, **kwargs) - - def expect_requests_head(self, *args, **kwargs): - kwargs.setdefault('headers', {}).update(util.REQUEST_HEADERS) - return super().expect_requests_head(*args, **kwargs) + + # activities + self.activities = FakeGrSource.activities = [ + { + "id": "tag:source.com,2013:%s" % id, + "url": "http://fa.ke/post/url", + "object": { + "objectType": "note", + "id": "tag:source.com,2013:%s" % id, + "url": "http://fa.ke/post/url", + "content": "foo http://target1/post/url bar", + "to": [{"objectType": "group", "alias": "@public"}], + "replies": { + "items": [ + { + "objectType": "comment", + "id": "tag:source.com,2013:1_2_%s" % id, + "url": "http://fa.ke/comment/url", + "content": "foo bar", + } + ], + "totalItems": 1, + }, + "tags": [ + { + "objectType": "activity", + "verb": "like", + "id": "tag:source.com,2013:%s_liked_by_alice" % id, + "object": {"url": "http://example.com/abc"}, + "author": { + "id": "tag:source.com,2013:alice", + "url": "http://example.com/alice", + }, + }, + { + "id": "tag:source.com,2013:%s_reposted_by_bob" % id, + "objectType": "activity", + "verb": "share", + "object": {"url": "http://example.com/def"}, + "author": {"url": "http://example.com/bob"}, + }, + { + "id": "tag:source.com,2013:%s_scissors_by_bob" % id, + "objectType": "activity", + "verb": "react", + "content": "✁", + "object": {"url": "http://example.com/def"}, + "author": {"url": "http://example.com/bob"}, + }, + ], + }, + } + for id in ("a", "b", "c") + ] + + # responses + self.responses = [] + created = datetime.datetime.utcnow() - datetime.timedelta(days=10) + + for activity in self.activities: + obj = activity["object"] + pruned_activity = { + "id": activity["id"], + "url": "http://fa.ke/post/url", + "object": { + "content": "foo http://target1/post/url bar", + }, + } + + comment = obj["replies"]["items"][0] + self.responses.append( + Response( + id=comment["id"], + activities_json=[json_dumps(pruned_activity)], + response_json=json_dumps(comment), + type="comment", + source=self.sources[0].key, + unsent=["http://target1/post/url"], + created=created, + ) + ) + + created += datetime.timedelta(hours=1) + + like = obj["tags"][0] + self.responses.append( + Response( + id=like["id"], + activities_json=[json_dumps(pruned_activity)], + response_json=json_dumps(like), + type="like", + source=self.sources[0].key, + unsent=["http://target1/post/url"], + created=created, + ) + ) + + created += datetime.timedelta(hours=1) + + share = obj["tags"][1] + self.responses.append( + Response( + id=share["id"], + activities_json=[json_dumps(pruned_activity)], + response_json=json_dumps(share), + type="repost", + source=self.sources[0].key, + unsent=["http://target1/post/url"], + created=created, + ) + ) + + created += datetime.timedelta(hours=1) + + reaction = obj["tags"][2] + self.responses.append( + Response( + id=reaction["id"], + activities_json=[json_dumps(pruned_activity)], + response_json=json_dumps(reaction), + type="react", + source=self.sources[0].key, + unsent=["http://target1/post/url"], + created=created, + ) + ) + + created += datetime.timedelta(hours=1) + + # publishes + self.publishes = [ + Publish( + parent=PublishedPage(id="https://post").key, + source=self.sources[0].key, + status="complete", + published={"url": "http://fa.ke/syndpost"}, + ) + ] + + # blogposts + self.blogposts = [ + BlogPost( + id="https://post", + source=self.sources[0].key, + status="complete", + feed_item={"title": "a post"}, + sent=["http://a/link"], + ) + ] + + def tearDown(self): + self.ndb_context.__exit__(None, None, None) + self.client.__exit__(None, None, None) + super().tearDown() + + @staticmethod + def clear_datastore(): + orig_requests_post(f"http://{ndb_client.host}/reset") + + def stub_create_task(self): + if not self.stubbed_create_task: + self.mox.StubOutWithMock(tasks_client, "create_task") + self.stubbed_create_task = True + + def expect_task(self, queue, eta_seconds=None, **kwargs): + self.stub_create_task() + + def check_task(task): + if not task.parent.endswith("/" + queue): + # These can help for debugging, but can also be misleading, since many + # tests insert multiple tasks, so check_task() runs on all of them (due + # to InAnyOrder() below) until it finds one that matches. + # print("expect_task: %s doesn't end with /%s!" % (task.parent, queue)) + return False + + req = task.task.app_engine_http_request + if not req.relative_uri.endswith("/" + queue): + # print("expect_task: relative_uri %s doesn't end with /%s!" % ( + # req.relative_uri, queue)) + return False + + # convert model objects and keys to url-safe key strings for comparison + for name, val in kwargs.items(): + if isinstance(val, ndb.Model): + kwargs[name] = val.key.urlsafe().decode() + elif isinstance(val, ndb.Key): + kwargs[name] = val.urlsafe().decode() + + got = set(urllib.parse.parse_qsl(req.body.decode())) + expected = set(kwargs.items()) + if got != expected: + # print('expect_task: expected %s, got %s' % (expected, got)) + return False + + if eta_seconds is not None: + got = util.to_utc_timestamp( + task.task.schedule_time + ) - util.to_utc_timestamp(util.now_fn()) + delta = eta_seconds * 0.2 + 10 + if not (got + delta >= eta_seconds >= got - delta): + # print('expect_task: expected schedule_time %r, got %r' % (eta_seconds, got)) + return False + + return True + + return ( + tasks_client.create_task(mox.Func(check_task)) + .InAnyOrder() + .AndReturn(Task(name="my task")) + ) + + def expect_requests_get(self, *args, **kwargs): + if "headers" not in kwargs: + kwargs["headers"] = util.REQUEST_HEADERS + return super().expect_requests_get(*args, **kwargs) + + def expect_requests_post(self, *args, **kwargs): + kwargs.setdefault("headers", {}).update(util.REQUEST_HEADERS) + return super().expect_requests_post(*args, **kwargs) + + def expect_requests_head(self, *args, **kwargs): + kwargs.setdefault("headers", {}).update(util.REQUEST_HEADERS) + return super().expect_requests_head(*args, **kwargs) class AppTest(TestCase): - app = flask_app.app + app = flask_app.app class BackgroundTest(TestCase): - app = flask_background.app + app = flask_background.app diff --git a/tests/twitter_live_test.py b/tests/twitter_live_test.py index c25907fe..2c4befec 100644 --- a/tests/twitter_live_test.py +++ b/tests/twitter_live_test.py @@ -17,35 +17,39 @@ from models import TWITTER_SCRAPE_HEADERS -twitter_auth.TWITTER_APP_KEY = (os.getenv('TWITTER_LIVE_TEST_APP_KEY') or - util.read('twitter_live_test_app_key')) +twitter_auth.TWITTER_APP_KEY = os.getenv("TWITTER_LIVE_TEST_APP_KEY") or util.read( + "twitter_live_test_app_key" +) assert twitter_auth.TWITTER_APP_KEY -twitter_auth.TWITTER_APP_SECRET = (os.getenv('TWITTER_LIVE_TEST_APP_SECRET') or - util.read('twitter_live_test_app_secret')) +twitter_auth.TWITTER_APP_SECRET = os.getenv( + "TWITTER_LIVE_TEST_APP_SECRET" +) or util.read("twitter_live_test_app_secret") assert twitter_auth.TWITTER_APP_SECRET -TOKEN_KEY = (os.getenv('TWITTER_ACCESS_TOKEN_KEY') or - util.read('twitter_access_token_key')) +TOKEN_KEY = os.getenv("TWITTER_ACCESS_TOKEN_KEY") or util.read( + "twitter_access_token_key" +) assert TOKEN_KEY -TOKEN_SECRET = (os.getenv('TWITTER_ACCESS_TOKEN_SECRET') or - util.read('twitter_access_token_secret')) +TOKEN_SECRET = os.getenv("TWITTER_ACCESS_TOKEN_SECRET") or util.read( + "twitter_access_token_secret" +) assert TOKEN_SECRET -TWEET_ID = '1270018109630369797' +TWEET_ID = "1270018109630369797" class TwitterLiveTest(unittest.TestCase): - - def test_like_scraping(self): - tw = twitter.Twitter(TOKEN_KEY, TOKEN_SECRET, - scrape_headers=TWITTER_SCRAPE_HEADERS) - activities = tw.get_activities(activity_id=TWEET_ID, fetch_likes=True) - likes = [t for t in activities[0]['object']['tags'] if t.get('verb') == 'like'] - self.assertGreater(len(likes), 0) - - -if __name__ == '__main__': - if '--debug' in sys.argv: - sys.argv.remove('--debug') - logging.getLogger().setLevel(logging.DEBUG) - else: - logging.getLogger().setLevel(logging.CRITICAL + 1) - unittest.main() + def test_like_scraping(self): + tw = twitter.Twitter( + TOKEN_KEY, TOKEN_SECRET, scrape_headers=TWITTER_SCRAPE_HEADERS + ) + activities = tw.get_activities(activity_id=TWEET_ID, fetch_likes=True) + likes = [t for t in activities[0]["object"]["tags"] if t.get("verb") == "like"] + self.assertGreater(len(likes), 0) + + +if __name__ == "__main__": + if "--debug" in sys.argv: + sys.argv.remove("--debug") + logging.getLogger().setLevel(logging.DEBUG) + else: + logging.getLogger().setLevel(logging.CRITICAL + 1) + unittest.main() diff --git a/tumblr.py b/tumblr.py index 2a87c413..d7681546 100644 --- a/tumblr.py +++ b/tumblr.py @@ -41,236 +41,267 @@ import util -TUMBLR_AVATAR_URL = 'http://api.tumblr.com/v2/blog/%s/avatar/512' -DISQUS_API_CREATE_POST_URL = 'https://disqus.com/api/3.0/posts/create.json' -DISQUS_API_THREAD_DETAILS_URL = 'http://disqus.com/api/3.0/threads/details.json' -DISQUS_ACCESS_TOKEN = util.read('disqus_access_token') -DISQUS_API_KEY = util.read('disqus_api_key') -DISQUS_API_SECRET = util.read('disqus_api_secret') +TUMBLR_AVATAR_URL = "http://api.tumblr.com/v2/blog/%s/avatar/512" +DISQUS_API_CREATE_POST_URL = "https://disqus.com/api/3.0/posts/create.json" +DISQUS_API_THREAD_DETAILS_URL = "http://disqus.com/api/3.0/threads/details.json" +DISQUS_ACCESS_TOKEN = util.read("disqus_access_token") +DISQUS_API_KEY = util.read("disqus_api_key") +DISQUS_API_SECRET = util.read("disqus_api_secret") # Tumblr has no single standard markup or JS for integrating Disqus. It does # have a default way, but themes often do it themselves, differently. Sigh. # Details in https://github.com/snarfed/bridgy/issues/278 DISQUS_SHORTNAME_RES = ( - re.compile(""" + re.compile( + """ (?:https?://disqus\.com/forums|disqus[ -_]?(?:user|short)?name) \ *[=:/]\ *['"]? ([^/"\' ]+) # the actual shortname - """, re.IGNORECASE | re.VERBOSE), - re.compile('https?://([^./"\' ]+)\.disqus\.com/embed\.js'), - ) + """, + re.IGNORECASE | re.VERBOSE, + ), + re.compile("https?://([^./\"' ]+)\.disqus\.com/embed\.js"), +) class Tumblr(models.Source): - """A Tumblr blog. + """A Tumblr blog. - The key name is the blog domain. - """ - GR_CLASS = collections.namedtuple('FakeGrClass', ('NAME',))(NAME='Tumblr') - OAUTH_START = oauth_tumblr.Start - SHORT_NAME = 'tumblr' - - disqus_shortname = ndb.StringProperty() - - def feed_url(self): - # http://www.tumblr.com/help (search for feed) - return urllib.parse.urljoin(self.silo_url(), '/rss') - - def silo_url(self): - return self.domain_urls[0] - - def edit_template_url(self): - return 'http://www.tumblr.com/customize/%s' % self.auth_entity.id() - - @staticmethod - def new(auth_entity=None, blog_name=None, **kwargs): - """Creates and returns a :class:`Tumblr` for the logged in user. - - Args: - auth_entity: :class:`oauth_dropins.tumblr.TumblrAuth` - blog_name: which blog. optional. passed to _urls_and_domains. - """ - urls, domains = Tumblr._urls_and_domains(auth_entity, blog_name=blog_name) - if not urls or not domains: - flash('Tumblr blog not found. Please create one first!') - return None - - id = domains[0] - return Tumblr(id=id, - auth_entity=auth_entity.key, - domains=domains, - domain_urls=urls, - name=auth_entity.user_display_name(), - picture=TUMBLR_AVATAR_URL % id, - superfeedr_secret=util.generate_secret(), - **kwargs) - - @staticmethod - def _urls_and_domains(auth_entity, blog_name=None): - """Returns this blog's URL and domain. - - Args: - auth_entity: :class:`oauth_dropins.tumblr.TumblrAuth` - blog_name: which blog. optional. matches the 'name' field for one of the - blogs in auth_entity.user_json['user']['blogs']. - - Returns: - ([string url], [string domain]) + The key name is the blog domain. """ - for blog in json_loads(auth_entity.user_json).get('user', {}).get('blogs', []): - if ((blog_name and blog_name == blog.get('name')) or - (not blog_name and blog.get('primary'))): - return [blog['url']], [util.domain_from_link(blog['url']).lower()] - return [], [] - - def verified(self): - """Returns True if we've found the webmention endpoint and Disqus.""" - return self.webmention_endpoint and self.disqus_shortname - - def verify(self): - """Checks that Disqus is installed as well as the webmention endpoint. - - Stores the result in webmention_endpoint. - """ - if self.verified(): - return - - super().verify(force=True) - - html = getattr(self, '_fetched_html', None) # set by Source.verify() - if not self.disqus_shortname and html: - self.discover_disqus_shortname(html) - - def discover_disqus_shortname(self, html): - # scrape the disqus shortname out of the page - logging.info("Looking for Disqus shortname in fetched HTML") - for regex in DISQUS_SHORTNAME_RES: - match = regex.search(html) - if match: - self.disqus_shortname = match.group(1) - logging.info("Found Disqus shortname %s", self.disqus_shortname) - self.put() - - def create_comment(self, post_url, author_name, author_url, content): - """Creates a new comment in the source silo. - - Must be implemented by subclasses. - - Args: - post_url: string - author_name: string - author_url: string - content: string - - Returns: - JSON response dict with 'id' and other fields - """ - if not self.disqus_shortname: - resp = util.requests_get(post_url) - resp.raise_for_status() - self.discover_disqus_shortname(resp.text) - if not self.disqus_shortname: - raise BadRequest("Your Bridgy account isn't fully set up yet: " - "we haven't found your Disqus account.") - - # strip slug, query and fragment from post url - parsed = urllib.parse.urlparse(post_url) - path = parsed.path.split('/') - if not util.is_int(path[-1]): - path.pop(-1) - post_url = urllib.parse.urlunparse(parsed[:2] + ('/'.join(path), '', '', '')) - - # get the disqus thread id. details on thread queries: - # http://stackoverflow.com/questions/4549282/disqus-api-adding-comment - # https://disqus.com/api/docs/threads/details/ - resp = self.disqus_call(util.requests_get, DISQUS_API_THREAD_DETAILS_URL, - {'forum': self.disqus_shortname, - # ident:[tumblr_post_id] should work, but doesn't :/ - 'thread': 'link:%s' % post_url, - }) - thread_id = resp['id'] - - # create the comment - message = '%s: %s' % (author_url, author_name, content) - resp = self.disqus_call(util.requests_post, DISQUS_API_CREATE_POST_URL, - {'thread': thread_id, - 'message': message, - # only allowed when authed as moderator/owner - # 'state': 'approved', - }) - return resp - - @staticmethod - def disqus_call(method, url, params, **kwargs): - """Makes a Disqus API call. - - Args: - method: requests function to use, e.g. requests.get - url: string - params: query parameters - kwargs: passed through to method - - Returns: - dict, JSON response - """ - logging.info('Calling Disqus %s with %s', url.split('/')[-2:], params) - params.update({ - 'api_key': DISQUS_API_KEY, - 'api_secret': DISQUS_API_SECRET, - 'access_token': DISQUS_ACCESS_TOKEN, - }) - kwargs.setdefault('headers', {}).update(util.REQUEST_HEADERS) - resp = method(url, params=params, **kwargs) - resp.raise_for_status() - resp = resp.json().get('response', {}) - logging.info('Response: %s', resp) - return resp + GR_CLASS = collections.namedtuple("FakeGrClass", ("NAME",))(NAME="Tumblr") + OAUTH_START = oauth_tumblr.Start + SHORT_NAME = "tumblr" + + disqus_shortname = ndb.StringProperty() + + def feed_url(self): + # http://www.tumblr.com/help (search for feed) + return urllib.parse.urljoin(self.silo_url(), "/rss") + + def silo_url(self): + return self.domain_urls[0] + + def edit_template_url(self): + return "http://www.tumblr.com/customize/%s" % self.auth_entity.id() + + @staticmethod + def new(auth_entity=None, blog_name=None, **kwargs): + """Creates and returns a :class:`Tumblr` for the logged in user. + + Args: + auth_entity: :class:`oauth_dropins.tumblr.TumblrAuth` + blog_name: which blog. optional. passed to _urls_and_domains. + """ + urls, domains = Tumblr._urls_and_domains(auth_entity, blog_name=blog_name) + if not urls or not domains: + flash("Tumblr blog not found. Please create one first!") + return None + + id = domains[0] + return Tumblr( + id=id, + auth_entity=auth_entity.key, + domains=domains, + domain_urls=urls, + name=auth_entity.user_display_name(), + picture=TUMBLR_AVATAR_URL % id, + superfeedr_secret=util.generate_secret(), + **kwargs + ) + + @staticmethod + def _urls_and_domains(auth_entity, blog_name=None): + """Returns this blog's URL and domain. + + Args: + auth_entity: :class:`oauth_dropins.tumblr.TumblrAuth` + blog_name: which blog. optional. matches the 'name' field for one of the + blogs in auth_entity.user_json['user']['blogs']. + + Returns: + ([string url], [string domain]) + """ + for blog in json_loads(auth_entity.user_json).get("user", {}).get("blogs", []): + if (blog_name and blog_name == blog.get("name")) or ( + not blog_name and blog.get("primary") + ): + return [blog["url"]], [util.domain_from_link(blog["url"]).lower()] + + return [], [] + + def verified(self): + """Returns True if we've found the webmention endpoint and Disqus.""" + return self.webmention_endpoint and self.disqus_shortname + + def verify(self): + """Checks that Disqus is installed as well as the webmention endpoint. + + Stores the result in webmention_endpoint. + """ + if self.verified(): + return + + super().verify(force=True) + + html = getattr(self, "_fetched_html", None) # set by Source.verify() + if not self.disqus_shortname and html: + self.discover_disqus_shortname(html) + + def discover_disqus_shortname(self, html): + # scrape the disqus shortname out of the page + logging.info("Looking for Disqus shortname in fetched HTML") + for regex in DISQUS_SHORTNAME_RES: + match = regex.search(html) + if match: + self.disqus_shortname = match.group(1) + logging.info("Found Disqus shortname %s", self.disqus_shortname) + self.put() + + def create_comment(self, post_url, author_name, author_url, content): + """Creates a new comment in the source silo. + + Must be implemented by subclasses. + + Args: + post_url: string + author_name: string + author_url: string + content: string + + Returns: + JSON response dict with 'id' and other fields + """ + if not self.disqus_shortname: + resp = util.requests_get(post_url) + resp.raise_for_status() + self.discover_disqus_shortname(resp.text) + if not self.disqus_shortname: + raise BadRequest( + "Your Bridgy account isn't fully set up yet: " + "we haven't found your Disqus account." + ) + + # strip slug, query and fragment from post url + parsed = urllib.parse.urlparse(post_url) + path = parsed.path.split("/") + if not util.is_int(path[-1]): + path.pop(-1) + post_url = urllib.parse.urlunparse(parsed[:2] + ("/".join(path), "", "", "")) + + # get the disqus thread id. details on thread queries: + # http://stackoverflow.com/questions/4549282/disqus-api-adding-comment + # https://disqus.com/api/docs/threads/details/ + resp = self.disqus_call( + util.requests_get, + DISQUS_API_THREAD_DETAILS_URL, + { + "forum": self.disqus_shortname, + # ident:[tumblr_post_id] should work, but doesn't :/ + "thread": "link:%s" % post_url, + }, + ) + thread_id = resp["id"] + + # create the comment + message = '%s: %s' % (author_url, author_name, content) + resp = self.disqus_call( + util.requests_post, + DISQUS_API_CREATE_POST_URL, + { + "thread": thread_id, + "message": message, + # only allowed when authed as moderator/owner + # 'state': 'approved', + }, + ) + return resp + + @staticmethod + def disqus_call(method, url, params, **kwargs): + """Makes a Disqus API call. + + Args: + method: requests function to use, e.g. requests.get + url: string + params: query parameters + kwargs: passed through to method + + Returns: + dict, JSON response + """ + logging.info("Calling Disqus %s with %s", url.split("/")[-2:], params) + params.update( + { + "api_key": DISQUS_API_KEY, + "api_secret": DISQUS_API_SECRET, + "access_token": DISQUS_ACCESS_TOKEN, + } + ) + kwargs.setdefault("headers", {}).update(util.REQUEST_HEADERS) + resp = method(url, params=params, **kwargs) + resp.raise_for_status() + resp = resp.json().get("response", {}) + logging.info("Response: %s", resp) + return resp class ChooseBlog(oauth_tumblr.Callback): - def finish(self, auth_entity, state=None): - if not auth_entity: - util.maybe_add_or_delete_source(Tumblr, auth_entity, state) - return - - vars = { - 'action': '/tumblr/add', - 'state': state, - 'auth_entity_key': auth_entity.key.urlsafe().decode(), - 'blogs': [{'id': b['name'], - 'title': b.get('title', ''), - 'domain': util.domain_from_link(b['url'])} + def finish(self, auth_entity, state=None): + if not auth_entity: + util.maybe_add_or_delete_source(Tumblr, auth_entity, state) + return + + vars = { + "action": "/tumblr/add", + "state": state, + "auth_entity_key": auth_entity.key.urlsafe().decode(), + "blogs": [ + { + "id": b["name"], + "title": b.get("title", ""), + "domain": util.domain_from_link(b["url"]), + } # user_json is the user/info response: # http://www.tumblr.com/docs/en/api/v2#user-methods - for b in json_loads(auth_entity.user_json)['user']['blogs'] - if b.get('name') and b.get('url')], - } - logging.info('Rendering choose_blog.html with %s', vars) - return render_template('choose_blog.html', **vars) + for b in json_loads(auth_entity.user_json)["user"]["blogs"] + if b.get("name") and b.get("url") + ], + } + logging.info("Rendering choose_blog.html with %s", vars) + return render_template("choose_blog.html", **vars) -@app.route('/tumblr/add', methods=['POST']) +@app.route("/tumblr/add", methods=["POST"]) def tumblr_add(): - util.maybe_add_or_delete_source( - Tumblr, - ndb.Key(urlsafe=request.form['auth_entity_key']).get(), - request.form['state'], - blog_name=request.form['blog'], - ) + util.maybe_add_or_delete_source( + Tumblr, + ndb.Key(urlsafe=request.form["auth_entity_key"]).get(), + request.form["state"], + blog_name=request.form["blog"], + ) class SuperfeedrNotify(superfeedr.Notify): - SOURCE_CLS = Tumblr + SOURCE_CLS = Tumblr # Tumblr doesn't seem to use scope # http://www.tumblr.com/docs/en/api/v2#oauth start = util.oauth_starter(oauth_tumblr.Start).as_view( - 'tumblr_start', '/tumblr/choose_blog') -app.add_url_rule('/tumblr/start', view_func=start, methods=['POST']) -app.add_url_rule('/tumblr/choose_blog', view_func=ChooseBlog.as_view( - 'tumblr_choose_blog', 'unused')) -app.add_url_rule('/tumblr/delete/finish', view_func=oauth_tumblr.Callback.as_view( - 'tumblr_delete_finish', '/delete/finish')) -app.add_url_rule('/tumblr/notify/', view_func=SuperfeedrNotify.as_view('tumblr_notify'), methods=['POST']) + "tumblr_start", "/tumblr/choose_blog" +) +app.add_url_rule("/tumblr/start", view_func=start, methods=["POST"]) +app.add_url_rule( + "/tumblr/choose_blog", view_func=ChooseBlog.as_view("tumblr_choose_blog", "unused") +) +app.add_url_rule( + "/tumblr/delete/finish", + view_func=oauth_tumblr.Callback.as_view("tumblr_delete_finish", "/delete/finish"), +) +app.add_url_rule( + "/tumblr/notify/", + view_func=SuperfeedrNotify.as_view("tumblr_notify"), + methods=["POST"], +) diff --git a/twitter.py b/twitter.py index b3a74e3d..db8c84c8 100644 --- a/twitter.py +++ b/twitter.py @@ -19,182 +19,211 @@ class Twitter(models.Source): - """A Twitter account. - - The key name is the username. - """ - GR_CLASS = gr_twitter.Twitter - OAUTH_START = oauth_twitter.Start - SHORT_NAME = 'twitter' - TYPE_LABELS = { - 'post': 'tweet', - 'comment': '@-reply', - 'repost': 'retweet', - 'like': 'favorite', - } - TRANSIENT_ERROR_HTTP_CODES = ('404',) - CAN_PUBLISH = True - HAS_BLOCKS = True - URL_CANONICALIZER = gr_twitter.Twitter.URL_CANONICALIZER - URL_CANONICALIZER.headers = util.REQUEST_HEADERS - - @staticmethod - def new(auth_entity=None, **kwargs): - """Creates and returns a :class:`Twitter` entity. - - Args: - auth_entity: :class:`oauth_dropins.twitter.TwitterAuth` - kwargs: property values - """ - user = json_loads(auth_entity.user_json) - gr_source = gr_twitter.Twitter(*auth_entity.access_token()) - actor = gr_source.user_to_actor(user) - return Twitter(id=user['screen_name'], - auth_entity=auth_entity.key, - url=actor.get('url'), - name=actor.get('displayName'), - picture=actor.get('image', {}).get('url'), - **kwargs) - - def silo_url(self): - """Returns the Twitter account URL, e.g. https://twitter.com/foo.""" - return self.gr_source.user_url(self.key_id()) - - def label_name(self): - """Returns the username.""" - return self.key_id() - - def search_for_links(self): - """Searches for activities with links to any of this source's web sites. - - Twitter search supports OR: - https://dev.twitter.com/rest/public/search - - ...but it only returns complete(ish) results if we strip scheme from URLs, - ie search for example.com instead of http://example.com/, and that also - returns false positivies, so we check that the returned tweets actually have - matching links. https://github.com/snarfed/bridgy/issues/565 - - Returns: - sequence of ActivityStreams activity dicts - """ - urls = set(util.schemeless(util.fragmentless(url), slashes=False) - for url in self.domain_urls - if not util.in_webmention_blocklist(util.domain_from_link(url))) - if not urls: - return [] - - query = ' OR '.join(sorted(urls)) - candidates = self.get_activities( - search_query=query, group_id=gr_source.SEARCH, etag=self.last_activities_etag, - fetch_replies=False, fetch_likes=False, fetch_shares=False, count=50) - - # filter out retweets and search false positives that don't actually link to us - results = [] - for candidate in candidates: - if candidate.get('verb') == 'share': - continue - obj = candidate['object'] - tags = obj.get('tags', []) - atts = obj.get('attachments', []) - for url in urls: - if (any(util.schemeless(t.get('url', ''), slashes=False).startswith(url) - for t in tags + atts)): - results.append(candidate) - break - - return results - - def get_like(self, activity_user_id, activity_id, like_user_id, **kwargs): - """Returns an ActivityStreams 'like' activity object for a favorite. - - We get Twitter favorites by scraping HTML, and we only get the first page, - which only has 25. So, use a :class:`models.Response` in the datastore - first, if we have one, and only re-scrape HTML as a fallback. - - Args: - activity_user_id: string id of the user who posted the original activity - activity_id: string activity id - like_user_id: string id of the user who liked the activity - kwargs: passed to :meth:`granary.source.Source.get_comment` - """ - id = self.gr_source.tag_uri('%s_favorited_by_%s' % (activity_id, like_user_id)) - resp = models.Response.get_by_id(id) - if resp: - return json_loads(resp.response_json) - else: - return super().get_like(activity_user_id, activity_id, - like_user_id, **kwargs) - - def is_private(self): - """Returns True if this Twitter account is protected. - - https://dev.twitter.com/rest/reference/get/users/show#highlighter_25173 - https://support.twitter.com/articles/14016 - https://support.twitter.com/articles/20169886 - """ - return json_loads(self.auth_entity.get().user_json).get('protected') - - def canonicalize_url(self, url, activity=None, **kwargs): - """Normalize /statuses/ to /status/. - - https://github.com/snarfed/bridgy/issues/618 - """ - url = url.replace('/statuses/', '/status/') - return super().canonicalize_url(url, **kwargs) - - -class Auth(): - """Base OAuth handler class.""" + """A Twitter account. - def start_oauth_flow(self, feature): - """Redirects to Twitter's OAuth endpoint to start the OAuth flow. - - Args: - feature: 'listen' or 'publish' + The key name is the username. """ - features = feature.split(',') if feature else [] - for feature in features: - if feature not in models.Source.FEATURES: - util.error(f'Unknown feature: {feature}') - # pass explicit 'write' instead of None for publish so that oauth-dropins - # (and tweepy) don't use signin_with_twitter ie /authorize. this works - # around a twitter API bug: https://dev.twitter.com/discussions/21281 - access_type = 'write' if 'publish' in features else 'read' - view = util.oauth_starter(oauth_twitter.Start, feature=feature)( - '/twitter/add', access_type=access_type) - return view.dispatch_request() + GR_CLASS = gr_twitter.Twitter + OAUTH_START = oauth_twitter.Start + SHORT_NAME = "twitter" + TYPE_LABELS = { + "post": "tweet", + "comment": "@-reply", + "repost": "retweet", + "like": "favorite", + } + TRANSIENT_ERROR_HTTP_CODES = ("404",) + CAN_PUBLISH = True + HAS_BLOCKS = True + URL_CANONICALIZER = gr_twitter.Twitter.URL_CANONICALIZER + URL_CANONICALIZER.headers = util.REQUEST_HEADERS + + @staticmethod + def new(auth_entity=None, **kwargs): + """Creates and returns a :class:`Twitter` entity. + + Args: + auth_entity: :class:`oauth_dropins.twitter.TwitterAuth` + kwargs: property values + """ + user = json_loads(auth_entity.user_json) + gr_source = gr_twitter.Twitter(*auth_entity.access_token()) + actor = gr_source.user_to_actor(user) + return Twitter( + id=user["screen_name"], + auth_entity=auth_entity.key, + url=actor.get("url"), + name=actor.get("displayName"), + picture=actor.get("image", {}).get("url"), + **kwargs, + ) + + def silo_url(self): + """Returns the Twitter account URL, e.g. https://twitter.com/foo.""" + return self.gr_source.user_url(self.key_id()) + + def label_name(self): + """Returns the username.""" + return self.key_id() + + def search_for_links(self): + """Searches for activities with links to any of this source's web sites. + + Twitter search supports OR: + https://dev.twitter.com/rest/public/search + + ...but it only returns complete(ish) results if we strip scheme from URLs, + ie search for example.com instead of http://example.com/, and that also + returns false positivies, so we check that the returned tweets actually have + matching links. https://github.com/snarfed/bridgy/issues/565 + + Returns: + sequence of ActivityStreams activity dicts + """ + urls = set( + util.schemeless(util.fragmentless(url), slashes=False) + for url in self.domain_urls + if not util.in_webmention_blocklist(util.domain_from_link(url)) + ) + if not urls: + return [] + + query = " OR ".join(sorted(urls)) + candidates = self.get_activities( + search_query=query, + group_id=gr_source.SEARCH, + etag=self.last_activities_etag, + fetch_replies=False, + fetch_likes=False, + fetch_shares=False, + count=50, + ) + + # filter out retweets and search false positives that don't actually link to us + results = [] + for candidate in candidates: + if candidate.get("verb") == "share": + continue + obj = candidate["object"] + tags = obj.get("tags", []) + atts = obj.get("attachments", []) + for url in urls: + if any( + util.schemeless(t.get("url", ""), slashes=False).startswith(url) + for t in tags + atts + ): + results.append(candidate) + break + + return results + + def get_like(self, activity_user_id, activity_id, like_user_id, **kwargs): + """Returns an ActivityStreams 'like' activity object for a favorite. + + We get Twitter favorites by scraping HTML, and we only get the first page, + which only has 25. So, use a :class:`models.Response` in the datastore + first, if we have one, and only re-scrape HTML as a fallback. + + Args: + activity_user_id: string id of the user who posted the original activity + activity_id: string activity id + like_user_id: string id of the user who liked the activity + kwargs: passed to :meth:`granary.source.Source.get_comment` + """ + id = self.gr_source.tag_uri("%s_favorited_by_%s" % (activity_id, like_user_id)) + resp = models.Response.get_by_id(id) + if resp: + return json_loads(resp.response_json) + else: + return super().get_like( + activity_user_id, activity_id, like_user_id, **kwargs + ) + + def is_private(self): + """Returns True if this Twitter account is protected. + + https://dev.twitter.com/rest/reference/get/users/show#highlighter_25173 + https://support.twitter.com/articles/14016 + https://support.twitter.com/articles/20169886 + """ + return json_loads(self.auth_entity.get().user_json).get("protected") + + def canonicalize_url(self, url, activity=None, **kwargs): + """Normalize /statuses/ to /status/. + + https://github.com/snarfed/bridgy/issues/618 + """ + url = url.replace("/statuses/", "/status/") + return super().canonicalize_url(url, **kwargs) + + +class Auth: + """Base OAuth handler class.""" + + def start_oauth_flow(self, feature): + """Redirects to Twitter's OAuth endpoint to start the OAuth flow. + + Args: + feature: 'listen' or 'publish' + """ + features = feature.split(",") if feature else [] + for feature in features: + if feature not in models.Source.FEATURES: + util.error(f"Unknown feature: {feature}") + + # pass explicit 'write' instead of None for publish so that oauth-dropins + # (and tweepy) don't use signin_with_twitter ie /authorize. this works + # around a twitter API bug: https://dev.twitter.com/discussions/21281 + access_type = "write" if "publish" in features else "read" + view = util.oauth_starter(oauth_twitter.Start, feature=feature)( + "/twitter/add", access_type=access_type + ) + return view.dispatch_request() class Add(oauth_twitter.Callback, Auth): - def finish(self, auth_entity, state=None): - source = util.maybe_add_or_delete_source(Twitter, auth_entity, state) - feature = util.decode_oauth_state(state).get('feature') + def finish(self, auth_entity, state=None): + source = util.maybe_add_or_delete_source(Twitter, auth_entity, state) + feature = util.decode_oauth_state(state).get("feature") - if source is not None and feature == 'listen' and 'publish' in source.features: - # if we were already signed up for publish, we had a read/write token. - # when we sign up for listen, we use x_auth_access_type=read to request - # just read permissions, which *demotes* us to a read only token! ugh. - # so, do the whole oauth flow again to get a read/write token. - logging.info('Restarting OAuth flow to get publish permissions.') - source.features.remove('publish') - source.put() - return self.start_oauth_flow('publish') + if source is not None and feature == "listen" and "publish" in source.features: + # if we were already signed up for publish, we had a read/write token. + # when we sign up for listen, we use x_auth_access_type=read to request + # just read permissions, which *demotes* us to a read only token! ugh. + # so, do the whole oauth flow again to get a read/write token. + logging.info("Restarting OAuth flow to get publish permissions.") + source.features.remove("publish") + source.put() + return self.start_oauth_flow("publish") class Start(oauth_twitter.Start, Auth): - """Custom OAuth start handler so we can use access_type=read for state=listen. - - Tweepy converts access_type to x_auth_access_type for Twitter's - oauth/request_token endpoint. Details: - https://dev.twitter.com/docs/api/1/post/oauth/request_token - """ - def dispatch_request(self): - return self.start_oauth_flow(request.form['feature']) + """Custom OAuth start handler so we can use access_type=read for state=listen. + Tweepy converts access_type to x_auth_access_type for Twitter's + oauth/request_token endpoint. Details: + https://dev.twitter.com/docs/api/1/post/oauth/request_token + """ -app.add_url_rule('/twitter/start', view_func=Start.as_view('twitter_start', '/twitter/add'), methods=['POST']) -app.add_url_rule('/twitter/add', view_func=Add.as_view('twitter_add', 'unused')) -app.add_url_rule('/twitter/delete/finish', view_func=oauth_twitter.Callback.as_view('twitter_delete_finish', '/delete/finish')) -app.add_url_rule('/twitter/publish/start', view_func=oauth_twitter.Start.as_view('twitter_publish_finish', '/publish/twitter/finish'), methods=['POST']) + def dispatch_request(self): + return self.start_oauth_flow(request.form["feature"]) + + +app.add_url_rule( + "/twitter/start", + view_func=Start.as_view("twitter_start", "/twitter/add"), + methods=["POST"], +) +app.add_url_rule("/twitter/add", view_func=Add.as_view("twitter_add", "unused")) +app.add_url_rule( + "/twitter/delete/finish", + view_func=oauth_twitter.Callback.as_view("twitter_delete_finish", "/delete/finish"), +) +app.add_url_rule( + "/twitter/publish/start", + view_func=oauth_twitter.Start.as_view( + "twitter_publish_finish", "/publish/twitter/finish" + ), + methods=["POST"], +) diff --git a/util.py b/util.py index 5ba2b845..ecb39676 100644 --- a/util.py +++ b/util.py @@ -28,25 +28,27 @@ from werkzeug.routing import RequestRedirect # when running in dev_appserver, replace these domains in links with localhost -LOCALHOST_TEST_DOMAINS = frozenset([ - ('snarfed.org', 'localhost'), - ('kylewm.com', 'redwind.dev'), -]) +LOCALHOST_TEST_DOMAINS = frozenset( + [ + ("snarfed.org", "localhost"), + ("kylewm.com", "redwind.dev"), + ] +) -LOCAL_HOSTS = {'localhost', '127.0.0.1'} +LOCAL_HOSTS = {"localhost", "127.0.0.1"} -POLL_TASK_DATETIME_FORMAT = '%Y-%m-%d-%H-%M-%S' +POLL_TASK_DATETIME_FORMAT = "%Y-%m-%d-%H-%M-%S" REQUEST_HEADERS = { - 'User-Agent': 'Bridgy (https://brid.gy/about)', + "User-Agent": "Bridgy (https://brid.gy/about)", } # Only send Accept header to rhiaro.co.uk right now because it needs it, but # Known breaks on it. # https://github.com/snarfed/bridgy/issues/713 REQUEST_HEADERS_CONNEG = copy.copy(REQUEST_HEADERS) -REQUEST_HEADERS_CONNEG['Accept'] = 'text/html, application/json; q=0.9, */*; q=0.8' -CONNEG_DOMAINS = {'rhiaro.co.uk'} -CONNEG_PATHS = {'/twitter/rhiaro'} +REQUEST_HEADERS_CONNEG["Accept"] = "text/html, application/json; q=0.9, */*; q=0.8" +CONNEG_DOMAINS = {"rhiaro.co.uk"} +CONNEG_PATHS = {"/twitter/rhiaro"} # alias allows unit tests to mock the function now_fn = datetime.datetime.now @@ -57,20 +59,18 @@ # We also check this when a user sign up and we extract the web site links from # their profile. We automatically omit links to these domains. _dir = os.path.dirname(__file__) -with open(os.path.join(_dir, 'domain_blocklist.txt'), 'rt', encoding='utf-8') as f: - BLOCKLIST = util.load_file_lines(f) +with open(os.path.join(_dir, "domain_blocklist.txt"), "rt", encoding="utf-8") as f: + BLOCKLIST = util.load_file_lines(f) # Individual URLs that we shouldn't fetch. Started because of # https://github.com/snarfed/bridgy/issues/525 . Hopefully temporary and can be # removed once https://github.com/idno/Known/issues/1088 is fixed! -URL_BLOCKLIST = frozenset(( - 'http://www.evdemon.org/2015/learning-more-about-quill', -)) +URL_BLOCKLIST = frozenset(("http://www.evdemon.org/2015/learning-more-about-quill",)) # URL paths of users who opt into testing new "beta" features and changes # before we roll them out to everyone. -with open(os.path.join(_dir, 'beta_users.txt'), 'rt', encoding='utf-8') as f: - BETA_USER_PATHS = util.load_file_lines(f) +with open(os.path.join(_dir, "beta_users.txt"), "rt", encoding="utf-8") as f: + BETA_USER_PATHS = util.load_file_lines(f) # Returned as the HTTP status code when an upstream API fails. Not 5xx so that # it doesn't show up as a server error in graphs or trigger StackDriver's error @@ -81,542 +81,612 @@ HTTP_REQUEST_REFUSED_STATUS_CODE = 599 # Unpacked representation of logged in account in the logins cookie. -Login = collections.namedtuple('Login', ('site', 'name', 'path')) +Login = collections.namedtuple("Login", ("site", "name", "path")) -HOST_URL = 'https://brid.gy' -PRIMARY_DOMAIN = 'brid.gy' +HOST_URL = "https://brid.gy" +PRIMARY_DOMAIN = "brid.gy" OTHER_DOMAINS = ( - 'background.brid-gy.appspot.com', - 'default.brid-gy.appspot.com', - 'brid-gy.appspot.com', - 'www.brid.gy', - 'bridgy.org', - 'www.bridgy.org', + "background.brid-gy.appspot.com", + "default.brid-gy.appspot.com", + "brid-gy.appspot.com", + "www.brid.gy", + "bridgy.org", + "www.bridgy.org", ) LOCAL_DOMAINS = ( - 'localhost:8080', - 'my.dev.com:8080', + "localhost:8080", + "my.dev.com:8080", ) DOMAINS = (PRIMARY_DOMAIN,) + OTHER_DOMAINS + LOCAL_DOMAINS # https://cloud.google.com/appengine/docs/locations -TASKS_LOCATION = 'us-central1' +TASKS_LOCATION = "us-central1" webmention_endpoint_cache_lock = threading.RLock() webmention_endpoint_cache = TTLCache(5000, 60 * 60 * 2) # 2h expiration def add_poll_task(source, now=False): - """Adds a poll task for the given source entity. - - Pass now=True to insert a poll-now task. - """ - if now: - queue = 'poll-now' - eta_seconds = None - else: - queue = 'poll' - eta_seconds = int(util.to_utc_timestamp(now_fn())) - if source.AUTO_POLL: - # add poll period. randomize task ETA to within +/- 20% to try to spread - # out tasks and prevent thundering herds. - eta_seconds += int(source.poll_period().total_seconds() * random.uniform(.8, 1.2)) + """Adds a poll task for the given source entity. - add_task(queue, eta_seconds=eta_seconds, source_key=source.key.urlsafe().decode(), - last_polled=source.last_polled.strftime(POLL_TASK_DATETIME_FORMAT)) + Pass now=True to insert a poll-now task. + """ + if now: + queue = "poll-now" + eta_seconds = None + else: + queue = "poll" + eta_seconds = int(util.to_utc_timestamp(now_fn())) + if source.AUTO_POLL: + # add poll period. randomize task ETA to within +/- 20% to try to spread + # out tasks and prevent thundering herds. + eta_seconds += int( + source.poll_period().total_seconds() * random.uniform(0.8, 1.2) + ) + + add_task( + queue, + eta_seconds=eta_seconds, + source_key=source.key.urlsafe().decode(), + last_polled=source.last_polled.strftime(POLL_TASK_DATETIME_FORMAT), + ) def add_propagate_task(entity): - """Adds a propagate task for the given response entity.""" - add_task('propagate', response_key=entity.key.urlsafe().decode()) + """Adds a propagate task for the given response entity.""" + add_task("propagate", response_key=entity.key.urlsafe().decode()) def add_propagate_blogpost_task(entity): - """Adds a propagate-blogpost task for the given response entity.""" - add_task('propagate-blogpost', key=entity.key.urlsafe().decode()) + """Adds a propagate-blogpost task for the given response entity.""" + add_task("propagate-blogpost", key=entity.key.urlsafe().decode()) def add_discover_task(source, post_id, type=None): - """Adds a discover task for the given source and silo post id.""" - add_task('discover', source_key=source.key.urlsafe().decode(), - post_id=post_id, type=type) + """Adds a discover task for the given source and silo post id.""" + add_task( + "discover", source_key=source.key.urlsafe().decode(), post_id=post_id, type=type + ) def add_task(queue, eta_seconds=None, **kwargs): - """Adds a Cloud Tasks task for the given entity. - - Args: - queue: string, queue name - entity: Source or Webmentions instance - eta_seconds: integer, optional - kwargs: added to task's POST body (form-encoded) - """ - params = { - 'app_engine_http_request': { - 'http_method': 'POST', - 'relative_uri': '/_ah/queue/%s' % queue, - 'body': urllib.parse.urlencode(util.trim_nulls(kwargs)).encode(), - # https://googleapis.dev/python/cloudtasks/latest/gapic/v2/types.html#google.cloud.tasks_v2.types.AppEngineHttpRequest.headers - 'headers': {'Content-Type': 'application/x-www-form-urlencoded'}, + """Adds a Cloud Tasks task for the given entity. + + Args: + queue: string, queue name + entity: Source or Webmentions instance + eta_seconds: integer, optional + kwargs: added to task's POST body (form-encoded) + """ + params = { + "app_engine_http_request": { + "http_method": "POST", + "relative_uri": "/_ah/queue/%s" % queue, + "body": urllib.parse.urlencode(util.trim_nulls(kwargs)).encode(), + # https://googleapis.dev/python/cloudtasks/latest/gapic/v2/types.html#google.cloud.tasks_v2.types.AppEngineHttpRequest.headers + "headers": {"Content-Type": "application/x-www-form-urlencoded"}, + } } - } - if eta_seconds: - params['schedule_time'] = Timestamp(seconds=eta_seconds) + if eta_seconds: + params["schedule_time"] = Timestamp(seconds=eta_seconds) - queue_path = tasks_client.queue_path(APP_ID, TASKS_LOCATION, queue) - if LOCAL: - logging.info('Would add task: %s %s', queue_path, params) - else: - task = tasks_client.create_task(CreateTaskRequest(parent=queue_path, task=params)) - logging.info('Added %s task %s with ETA %s', queue, task.name, eta_seconds) + queue_path = tasks_client.queue_path(APP_ID, TASKS_LOCATION, queue) + if LOCAL: + logging.info("Would add task: %s %s", queue_path, params) + else: + task = tasks_client.create_task( + CreateTaskRequest(parent=queue_path, task=params) + ) + logging.info("Added %s task %s with ETA %s", queue, task.name, eta_seconds) class Redirect(RequestRedirect): - """Adds login cookie support to :class:`werkzeug.exceptions.RequestRedirect`.""" - logins = None + """Adds login cookie support to :class:`werkzeug.exceptions.RequestRedirect`.""" - def get_response(self, *args, **kwargs): - resp = super().get_response() + logins = None - if self.logins is not None: - # cookie docs: http://curl.haxx.se/rfc/cookie_spec.html - cookie = '|'.join(sorted(set( - f'{login.path}?{urllib.parse.quote_plus(login.name)}' - for login in self.logins))) + def get_response(self, *args, **kwargs): + resp = super().get_response() - logging.info(f'setting logins cookie: {cookie}') - age = datetime.timedelta(days=365 * 2) - expires = (now_fn() + age).replace(microsecond=0) - resp.set_cookie('logins', cookie, max_age=age, expires=expires) + if self.logins is not None: + # cookie docs: http://curl.haxx.se/rfc/cookie_spec.html + cookie = "|".join( + sorted( + set( + f"{login.path}?{urllib.parse.quote_plus(login.name)}" + for login in self.logins + ) + ) + ) - return resp + logging.info(f"setting logins cookie: {cookie}") + age = datetime.timedelta(days=365 * 2) + expires = (now_fn() + age).replace(microsecond=0) + resp.set_cookie("logins", cookie, max_age=age, expires=expires) + + return resp def redirect(path, code=302, logins=None): - """Stops execution and redirects to the absolute URL for a given path. + """Stops execution and redirects to the absolute URL for a given path. - Specifically, raises :class:`werkzeug.routing.RequestRedirect`. + Specifically, raises :class:`werkzeug.routing.RequestRedirect`. - Args: - url: str - code: int, HTTP status code - logins: optional, list of :class:`util.Login` to be set in a Set-Cookie HTTP - header - """ - logging.info(f'Redirecting to {path}') - rr = Redirect(host_url(path)) - rr.code = code - rr.logins = logins - raise rr + Args: + url: str + code: int, HTTP status code + logins: optional, list of :class:`util.Login` to be set in a Set-Cookie HTTP + header + """ + logging.info(f"Redirecting to {path}") + rr = Redirect(host_url(path)) + rr.code = code + rr.logins = logins + raise rr def webmention_endpoint_cache_key(url): - """Returns cache key for a cached webmention endpoint for a given URL. + """Returns cache key for a cached webmention endpoint for a given URL. - Example: 'W https snarfed.org /' + Example: 'W https snarfed.org /' - If the URL is the home page, ie path is / , the key includes a / at the end, - so that we cache webmention endpoints for home pages separate from other pages. - https://github.com/snarfed/bridgy/issues/701 - """ - domain = util.domain_from_link(url) - scheme = urllib.parse.urlparse(url).scheme + If the URL is the home page, ie path is / , the key includes a / at the end, + so that we cache webmention endpoints for home pages separate from other pages. + https://github.com/snarfed/bridgy/issues/701 + """ + domain = util.domain_from_link(url) + scheme = urllib.parse.urlparse(url).scheme - parts = ['W', scheme, domain] - if urllib.parse.urlparse(url).path in ('', '/'): - parts.append('/') + parts = ["W", scheme, domain] + if urllib.parse.urlparse(url).path in ("", "/"): + parts.append("/") - return ' '.join(parts) + return " ".join(parts) def report_error(msg, **kwargs): - """Reports an error to StackDriver Error Reporting. + """Reports an error to StackDriver Error Reporting. - https://cloud.google.com/error-reporting/docs/reference/libraries#client-libraries-install-python + https://cloud.google.com/error-reporting/docs/reference/libraries#client-libraries-install-python - Args: - msg: string - """ - try: - error_reporting_client.report(msg, **kwargs) - except BaseException: - if not DEBUG: - logging.warning('Failed to report error to StackDriver! %s %s', msg, kwargs, - exc_info=True) + Args: + msg: string + """ + try: + error_reporting_client.report(msg, **kwargs) + except BaseException: + if not DEBUG: + logging.warning( + "Failed to report error to StackDriver! %s %s", + msg, + kwargs, + exc_info=True, + ) def requests_get(url, **kwargs): - """Wraps :func:`requests.get` with extra semantics and our user agent. + """Wraps :func:`requests.get` with extra semantics and our user agent. - If a server tells us a response will be too big (based on Content-Length), we - hijack the response and return 599 and an error response body instead. We pass - stream=True to :func:`requests.get` so that it doesn't fetch the response body - until we access :attr:`requests.Response.content` (or - :attr:`requests.Response.text`). + If a server tells us a response will be too big (based on Content-Length), we + hijack the response and return 599 and an error response body instead. We pass + stream=True to :func:`requests.get` so that it doesn't fetch the response body + until we access :attr:`requests.Response.content` (or + :attr:`requests.Response.text`). - http://docs.python-requests.org/en/latest/user/advanced/#body-content-workflow - """ - host = urllib.parse.urlparse(url).netloc.split(':')[0] - if url in URL_BLOCKLIST or (not LOCAL and host in LOCAL_HOSTS): - resp = requests.Response() - resp.status_code = HTTP_REQUEST_REFUSED_STATUS_CODE - resp._text = 'Sorry, Bridgy has blocklisted this URL.' - resp._content = resp._text.encode() - return resp + http://docs.python-requests.org/en/latest/user/advanced/#body-content-workflow + """ + host = urllib.parse.urlparse(url).netloc.split(":")[0] + if url in URL_BLOCKLIST or (not LOCAL and host in LOCAL_HOSTS): + resp = requests.Response() + resp.status_code = HTTP_REQUEST_REFUSED_STATUS_CODE + resp._text = "Sorry, Bridgy has blocklisted this URL." + resp._content = resp._text.encode() + return resp - kwargs.setdefault('headers', {}).update(request_headers(url=url)) - return util.requests_get(url, **kwargs) + kwargs.setdefault("headers", {}).update(request_headers(url=url)) + return util.requests_get(url, **kwargs) def fetch_mf2(url, **kwargs): - """Injects :func:`requests_get` into :func:`oauth_dropins.webutil.util.fetch_mf2`.""" - return util.fetch_mf2(url, get_fn=requests_get, **kwargs) + """Injects :func:`requests_get` into :func:`oauth_dropins.webutil.util.fetch_mf2`.""" + return util.fetch_mf2(url, get_fn=requests_get, **kwargs) def requests_post(url, **kwargs): - """Wraps :func:`requests.get` with our user agent.""" - kwargs.setdefault('headers', {}).update(request_headers(url=url)) - return util.requests_post(url, **kwargs) + """Wraps :func:`requests.get` with our user agent.""" + kwargs.setdefault("headers", {}).update(request_headers(url=url)) + return util.requests_post(url, **kwargs) def follow_redirects(url): - """Wraps :func:`oauth_dropins.webutil.util.follow_redirects` with our headers.""" - return util.follow_redirects(url, headers=request_headers(url=url)) + """Wraps :func:`oauth_dropins.webutil.util.follow_redirects` with our headers.""" + return util.follow_redirects(url, headers=request_headers(url=url)) def request_headers(url=None, source=None): - if (url and util.domain_from_link(url) in CONNEG_DOMAINS or - source and source.bridgy_path() in CONNEG_PATHS): - return REQUEST_HEADERS_CONNEG + if ( + url + and util.domain_from_link(url) in CONNEG_DOMAINS + or source + and source.bridgy_path() in CONNEG_PATHS + ): + return REQUEST_HEADERS_CONNEG - return REQUEST_HEADERS + return REQUEST_HEADERS def get_webmention_target(url, resolve=True, replace_test_domains=True): - """Resolves a URL and decides whether we should try to send it a webmention. - - Note that this ignores failed HTTP requests, ie the boolean in the returned - tuple will be True! TODO: check callers and reconsider this. - - Args: - url: string - resolve: whether to follow redirects - replace_test_domains: whether to replace test user domains with localhost - - Returns: - (string url, string pretty domain, boolean) tuple. The boolean is - True if we should send a webmention, False otherwise, e.g. if it's a bad - URL, not text/html, or in the blocklist. - """ - url = util.clean_url(url) - try: - domain = domain_from_link(url).lower() - except BaseException: - logging.info('Dropping bad URL: %r.', url) - return url, None, False - - send = True - if resolve: - # this follows *all* redirects, until the end - resolved = follow_redirects(url) - html = resolved.headers.get('content-type', '').startswith('text/html') - send = html and resolved.status_code != util.HTTP_RESPONSE_TOO_BIG_STATUS_CODE - url, domain, _ = get_webmention_target( - resolved.url, resolve=False, replace_test_domains=replace_test_domains) - - scheme = urllib.parse.urlparse(url).scheme # require http or https - send = (send and domain and scheme in ('http', 'https') and - not in_webmention_blocklist(domain)) - - if replace_test_domains: - url = replace_test_domains_with_localhost(url) - - return url, domain, send + """Resolves a URL and decides whether we should try to send it a webmention. + + Note that this ignores failed HTTP requests, ie the boolean in the returned + tuple will be True! TODO: check callers and reconsider this. + + Args: + url: string + resolve: whether to follow redirects + replace_test_domains: whether to replace test user domains with localhost + + Returns: + (string url, string pretty domain, boolean) tuple. The boolean is + True if we should send a webmention, False otherwise, e.g. if it's a bad + URL, not text/html, or in the blocklist. + """ + url = util.clean_url(url) + try: + domain = domain_from_link(url).lower() + except BaseException: + logging.info("Dropping bad URL: %r.", url) + return url, None, False + + send = True + if resolve: + # this follows *all* redirects, until the end + resolved = follow_redirects(url) + html = resolved.headers.get("content-type", "").startswith("text/html") + send = html and resolved.status_code != util.HTTP_RESPONSE_TOO_BIG_STATUS_CODE + url, domain, _ = get_webmention_target( + resolved.url, resolve=False, replace_test_domains=replace_test_domains + ) + + scheme = urllib.parse.urlparse(url).scheme # require http or https + send = ( + send + and domain + and scheme in ("http", "https") + and not in_webmention_blocklist(domain) + ) + + if replace_test_domains: + url = replace_test_domains_with_localhost(url) + + return url, domain, send def in_webmention_blocklist(domain): - """Returns True if the domain or its root domain is in BLOCKLIST.""" - domain = domain.lower() - return (util.domain_or_parent_in(domain, BLOCKLIST) or - (not LOCAL and domain in LOCAL_HOSTS)) + """Returns True if the domain or its root domain is in BLOCKLIST.""" + domain = domain.lower() + return util.domain_or_parent_in(domain, BLOCKLIST) or ( + not LOCAL and domain in LOCAL_HOSTS + ) def prune_activity(activity, source): - """Prunes an activity down to just id, url, content, to, and object, in place. + """Prunes an activity down to just id, url, content, to, and object, in place. - If the object field exists, it's pruned down to the same fields. Any fields - duplicated in both the activity and the object are removed from the object. + If the object field exists, it's pruned down to the same fields. Any fields + duplicated in both the activity and the object are removed from the object. - Note that this only prunes the to field if it says the activity is public, - since :meth:`granary.source.Source.is_public()` defaults to saying an activity - is public if the to field is missing. If that ever changes, we'll need to - start preserving the to field here. + Note that this only prunes the to field if it says the activity is public, + since :meth:`granary.source.Source.is_public()` defaults to saying an activity + is public if the to field is missing. If that ever changes, we'll need to + start preserving the to field here. - Args: - activity: ActivityStreams activity dict + Args: + activity: ActivityStreams activity dict - Returns: - pruned activity dict - """ - keep = ['id', 'url', 'content', 'fb_id', 'fb_object_id', 'fb_object_type'] - if not source.is_activity_public(activity): - keep += ['to'] - pruned = {f: activity.get(f) for f in keep} + Returns: + pruned activity dict + """ + keep = ["id", "url", "content", "fb_id", "fb_object_id", "fb_object_type"] + if not source.is_activity_public(activity): + keep += ["to"] + pruned = {f: activity.get(f) for f in keep} - obj = activity.get('object') - if obj: - obj = pruned['object'] = prune_activity(obj, source) - for k, v in list(obj.items()): - if pruned.get(k) == v: - del obj[k] + obj = activity.get("object") + if obj: + obj = pruned["object"] = prune_activity(obj, source) + for k, v in list(obj.items()): + if pruned.get(k) == v: + del obj[k] - return trim_nulls(pruned) + return trim_nulls(pruned) def prune_response(response): - """Returns a response object dict with a few fields removed. + """Returns a response object dict with a few fields removed. - Args: - response: ActivityStreams response object + Args: + response: ActivityStreams response object - Returns: - pruned response object - """ - obj = response.get('object') - if obj: - response['object'] = prune_response(obj) + Returns: + pruned response object + """ + obj = response.get("object") + if obj: + response["object"] = prune_response(obj) - drop = ['activity', 'mentions', 'originals', 'replies', 'tags'] - return trim_nulls({k: v for k, v in response.items() if k not in drop}) + drop = ["activity", "mentions", "originals", "replies", "tags"] + return trim_nulls({k: v for k, v in response.items() if k not in drop}) def replace_test_domains_with_localhost(url): - """Replace domains in LOCALHOST_TEST_DOMAINS with localhost for local testing. + """Replace domains in LOCALHOST_TEST_DOMAINS with localhost for local testing. - Args: - url: a string + Args: + url: a string - Returns: - a string with certain well-known domains replaced by localhost - """ - if url and LOCAL: - for test_domain, local_domain in LOCALHOST_TEST_DOMAINS: - url = re.sub('https?://' + test_domain, - 'http://' + local_domain, url) - return url + Returns: + a string with certain well-known domains replaced by localhost + """ + if url and LOCAL: + for test_domain, local_domain in LOCALHOST_TEST_DOMAINS: + url = re.sub("https?://" + test_domain, "http://" + local_domain, url) + return url def host_url(path_query=None): - domain = util.domain_from_link(request.host_url) - base = (HOST_URL if util.domain_or_parent_in(domain, OTHER_DOMAINS) - else request.host_url) - return urllib.parse.urljoin(base, path_query) + domain = util.domain_from_link(request.host_url) + base = ( + HOST_URL + if util.domain_or_parent_in(domain, OTHER_DOMAINS) + else request.host_url + ) + return urllib.parse.urljoin(base, path_query) def load_source(): - """Extracts a URL-safe key from a query parameter and loads a source object. + """Extracts a URL-safe key from a query parameter and loads a source object. - Returns HTTP 400 if the parameter is not provided or the source doesn't exist. + Returns HTTP 400 if the parameter is not provided or the source doesn't exist. - Args: - handler: RequestHandler - param: string + Args: + handler: RequestHandler + param: string - Returns: Source object - """ - for param in 'source_key', 'key': - try: - val = request.values.get(param) - if val: - source = ndb.Key(urlsafe=val).get() - if source: - return source - except (binascii.Error, google.protobuf.message.DecodeError): - error(f'Bad value for {param}') + Returns: Source object + """ + for param in "source_key", "key": + try: + val = request.values.get(param) + if val: + source = ndb.Key(urlsafe=val).get() + if source: + return source + except (binascii.Error, google.protobuf.message.DecodeError): + error(f"Bad value for {param}") - error('Source key not found') + error("Source key not found") def maybe_add_or_delete_source(source_cls, auth_entity, state, **kwargs): - """Adds or deletes a source if auth_entity is not None. - - Used in each source's oauth-dropins :meth:`Callback.finish()` and - :meth:`Callback.get()` methods, respectively. - - Args: - source_cls: source class, e.g. :class:`instagram.Instagram` - auth_entity: ouath-dropins auth entity - state: string, OAuth callback state parameter. a JSON serialized dict - with operation, feature, and an optional callback URL. For deletes, - it will also include the source key - kwargs: passed through to the source_cls constructor - - Returns: - source entity if it was created or updated, otherwise None - """ - state_obj = util.decode_oauth_state(state) - operation = state_obj.get('operation', 'add') - feature = state_obj.get('feature') - callback = state_obj.get('callback') - user_url = state_obj.get('user_url') - - logging.debug( - 'maybe_add_or_delete_source with operation=%s, feature=%s, callback=%s', - operation, feature, callback) - logins = None - - if operation == 'add': # this is an add/update - if not auth_entity: - # TODO: only show if we haven't already flashed another message? - # get_flashed_messages() caches so it's dangerous to call to check; - # use eg session.get('_flashes', []) instead. - # https://stackoverflow.com/a/17243946/186123 - flash("OK, you're not signed up. Hope you reconsider!") - if callback: - callback = util.add_query_params(callback, {'result': 'declined'}) - logging.debug( - f'user declined adding source, redirect to external callback {callback}') - redirect(callback) - else: - redirect('/') - - logging.info('%s.create_new with %s', source_cls.__class__.__name__, - (auth_entity.key, state, kwargs)) - source = source_cls.create_new(auth_entity=auth_entity, - features=feature.split(',') if feature else [], - user_url=user_url, **kwargs) - - if source: - # add to login cookie - logins = get_logins() - logins.append(Login(path=source.bridgy_path(), site=source.SHORT_NAME, - name=source.label_name())) - - if callback: - callback = util.add_query_params(callback, { - 'result': 'success', - 'user': source.bridgy_url(), - 'key': source.key.urlsafe().decode(), - } if source else {'result': 'failure'}) - logging.debug( - 'finished adding source, redirect to external callback %s', callback) - redirect(callback, logins=logins) - - elif source and not source.domains: - redirect('/edit-websites?' + urllib.parse.urlencode({ - 'source_key': source.key.urlsafe().decode(), - }), logins=logins) - - else: - redirect(source.bridgy_url() if source else '/', logins=logins) + """Adds or deletes a source if auth_entity is not None. + + Used in each source's oauth-dropins :meth:`Callback.finish()` and + :meth:`Callback.get()` methods, respectively. + + Args: + source_cls: source class, e.g. :class:`instagram.Instagram` + auth_entity: ouath-dropins auth entity + state: string, OAuth callback state parameter. a JSON serialized dict + with operation, feature, and an optional callback URL. For deletes, + it will also include the source key + kwargs: passed through to the source_cls constructor + + Returns: + source entity if it was created or updated, otherwise None + """ + state_obj = util.decode_oauth_state(state) + operation = state_obj.get("operation", "add") + feature = state_obj.get("feature") + callback = state_obj.get("callback") + user_url = state_obj.get("user_url") + + logging.debug( + "maybe_add_or_delete_source with operation=%s, feature=%s, callback=%s", + operation, + feature, + callback, + ) + logins = None + + if operation == "add": # this is an add/update + if not auth_entity: + # TODO: only show if we haven't already flashed another message? + # get_flashed_messages() caches so it's dangerous to call to check; + # use eg session.get('_flashes', []) instead. + # https://stackoverflow.com/a/17243946/186123 + flash("OK, you're not signed up. Hope you reconsider!") + if callback: + callback = util.add_query_params(callback, {"result": "declined"}) + logging.debug( + f"user declined adding source, redirect to external callback {callback}" + ) + redirect(callback) + else: + redirect("/") + + logging.info( + "%s.create_new with %s", + source_cls.__class__.__name__, + (auth_entity.key, state, kwargs), + ) + source = source_cls.create_new( + auth_entity=auth_entity, + features=feature.split(",") if feature else [], + user_url=user_url, + **kwargs, + ) - else: # this is a delete - if auth_entity: - redirect('/delete/finish?auth_entity=%s&state=%s' % - (auth_entity.key.urlsafe().decode(), state), logins=logins) - else: - flash('If you want to disable, please approve the %s prompt.' % - source_cls.GR_CLASS.NAME) - source_key = state_obj.get('source') - if source_key: - source = ndb.Key(urlsafe=source_key).get() if source: - redirect(source.bridgy_url()) - - redirect('/') + # add to login cookie + logins = get_logins() + logins.append( + Login( + path=source.bridgy_path(), + site=source.SHORT_NAME, + name=source.label_name(), + ) + ) + + if callback: + callback = util.add_query_params( + callback, + { + "result": "success", + "user": source.bridgy_url(), + "key": source.key.urlsafe().decode(), + } + if source + else {"result": "failure"}, + ) + logging.debug( + "finished adding source, redirect to external callback %s", callback + ) + redirect(callback, logins=logins) + + elif source and not source.domains: + redirect( + "/edit-websites?" + + urllib.parse.urlencode( + { + "source_key": source.key.urlsafe().decode(), + } + ), + logins=logins, + ) + + else: + redirect(source.bridgy_url() if source else "/", logins=logins) + + else: # this is a delete + if auth_entity: + redirect( + "/delete/finish?auth_entity=%s&state=%s" + % (auth_entity.key.urlsafe().decode(), state), + logins=logins, + ) + else: + flash( + "If you want to disable, please approve the %s prompt." + % source_cls.GR_CLASS.NAME + ) + source_key = state_obj.get("source") + if source_key: + source = ndb.Key(urlsafe=source_key).get() + if source: + redirect(source.bridgy_url()) + + redirect("/") def construct_state_param_for_add(state=None, **kwargs): - """Construct the state parameter if one isn't explicitly passed in. + """Construct the state parameter if one isn't explicitly passed in. - The following keys are common: - - operation: 'add' or 'delete' - - feature: 'listen', 'publish', or 'webmention' - - callback: an optional external callback, that we will redirect to at - the end of the authorization handshake - - source: the source key, only applicable to deletes - """ - state_obj = util.decode_oauth_state(state) - if not state_obj: - state_obj = {field: request.values.get(field) for field in - ('callback', 'feature', 'id', 'user_url')} - state_obj['operation'] = request.values.get('operation') or 'add' + The following keys are common: + - operation: 'add' or 'delete' + - feature: 'listen', 'publish', or 'webmention' + - callback: an optional external callback, that we will redirect to at + the end of the authorization handshake + - source: the source key, only applicable to deletes + """ + state_obj = util.decode_oauth_state(state) + if not state_obj: + state_obj = { + field: request.values.get(field) + for field in ("callback", "feature", "id", "user_url") + } + state_obj["operation"] = request.values.get("operation") or "add" - if kwargs: - state_obj.update(kwargs) + if kwargs: + state_obj.update(kwargs) - return util.encode_oauth_state(state_obj) + return util.encode_oauth_state(state_obj) def get_logins(): - """Extracts the current user page paths from the logins cookie. + """Extracts the current user page paths from the logins cookie. - The logins cookie is set in :meth:`redirect` and :class:`Redirect`. + The logins cookie is set in :meth:`redirect` and :class:`Redirect`. - Returns: - list of :class:`Login` objects - """ - logins_str = request.cookies.get('logins') - if not logins_str: - return [] + Returns: + list of :class:`Login` objects + """ + logins_str = request.cookies.get("logins") + if not logins_str: + return [] - logins = [] - for val in set(urllib.parse.unquote_plus(logins_str).split('|')): - parts = val.split('?', 1) - path = parts[0] - if not path: - continue - name = parts[1] if len(parts) > 1 else '' - site, _ = path.strip('/').split('/') - logins.append(Login(path=path, site=site, name=name)) + logins = [] + for val in set(urllib.parse.unquote_plus(logins_str).split("|")): + parts = val.split("?", 1) + path = parts[0] + if not path: + continue + name = parts[1] if len(parts) > 1 else "" + site, _ = path.strip("/").split("/") + logins.append(Login(path=path, site=site, name=name)) - return logins + return logins def preprocess_source(source): - """Prepares a source entity for rendering in the source.html template. + """Prepares a source entity for rendering in the source.html template. - - convert image URLs to https if we're serving over SSL - - set 'website_links' attr to list of pretty HTML links to domain_urls + - convert image URLs to https if we're serving over SSL + - set 'website_links' attr to list of pretty HTML links to domain_urls - Args: - source: :class:`models.Source` entity - """ - if source.picture: - source.picture = util.update_scheme(source.picture, request) - source.website_links = [ - util.pretty_link(url, attrs={'rel': 'me', 'class': 'u-url'}) - for url in source.domain_urls] - return source + Args: + source: :class:`models.Source` entity + """ + if source.picture: + source.picture = util.update_scheme(source.picture, request) + source.website_links = [ + util.pretty_link(url, attrs={"rel": "me", "class": "u-url"}) + for url in source.domain_urls + ] + return source def oauth_starter(oauth_start_view, **kwargs): - """Returns an oauth-dropins start view that injects the state param. + """Returns an oauth-dropins start view that injects the state param. + + Args: + oauth_start_view: oauth-dropins :class:`Start` to use, + e.g. :class:`oauth_dropins.twitter.Start`. + kwargs: passed to :meth:`construct_state_param_for_add()` + """ - Args: - oauth_start_view: oauth-dropins :class:`Start` to use, - e.g. :class:`oauth_dropins.twitter.Start`. - kwargs: passed to :meth:`construct_state_param_for_add()` - """ - class Start(oauth_start_view): - def redirect_url(self, state=None, **ru_kwargs): - return super().redirect_url( - state=construct_state_param_for_add(state, **kwargs), **ru_kwargs) + class Start(oauth_start_view): + def redirect_url(self, state=None, **ru_kwargs): + return super().redirect_url( + state=construct_state_param_for_add(state, **kwargs), **ru_kwargs + ) - return Start + return Start def unwrap_t_umblr_com(url): - """If url is a t.umblr.com short link, extract its destination URL. + """If url is a t.umblr.com short link, extract its destination URL. - Otherwise, return url unchanged. + Otherwise, return url unchanged. - Not in tumblr.py since models imports superfeedr, so it would be a circular - import. + Not in tumblr.py since models imports superfeedr, so it would be a circular + import. - Background: https://github.com/snarfed/bridgy/issues/609 - """ - parsed = urllib.parse.urlparse(url) - return (urllib.parse.parse_qs(parsed.query).get('z', [''])[0] - if parsed.netloc == 't.umblr.com' - else url) + Background: https://github.com/snarfed/bridgy/issues/609 + """ + parsed = urllib.parse.urlparse(url) + return ( + urllib.parse.parse_qs(parsed.query).get("z", [""])[0] + if parsed.netloc == "t.umblr.com" + else url + ) diff --git a/webmention.py b/webmention.py index 3235b19c..9e9fe156 100644 --- a/webmention.py +++ b/webmention.py @@ -17,180 +17,220 @@ import util -@app.route('/publish/', - methods=['GET', 'HEAD']) +@app.route( + "/publish/", + methods=["GET", "HEAD"], +) def webmention_get_or_head(silo): - """Serves webmention discovery for HEADs to webmention endpoints.""" - return f"""\ + """Serves webmention discovery for HEADs to webmention endpoints.""" + return f"""\ Nothing here! Try the docs instead. """, { - 'Link': f'<{util.host_url("/publish/webmention")}>; rel="webmention"', - } + "Link": f'<{util.host_url("/publish/webmention")}>; rel="webmention"', + } class Webmention(View): - """Webmention base view. + """Webmention base view. - Attributes: + Attributes: - * source: the :class:`models.Source` for this webmention - * entity: the :class:`models.Publish` or :class:`models.Webmention` entity for - this webmention - """ - source = None - entity = None - - def fetch_mf2(self, url, id=None, require_mf2=True, raise_errors=False): - """Fetches a URL and extracts its mf2 data. - - Side effects: sets :attr:`entity`\ .html on success, calls :attr:`error()` - on errors. - - Args: - url: string - id: string, optional id of specific element to extract and parse. defaults - to the whole page. - require_mf2: boolean, whether to return error if no mf2 are found - raise_errors: boolean, whether to let error exceptions propagate up or - handle them - - Returns: - (:class:`requests.Response`, mf2 data dict) on success, None on failure + * source: the :class:`models.Source` for this webmention + * entity: the :class:`models.Publish` or :class:`models.Webmention` entity for + this webmention """ - try: - resp = util.requests_get(url) - resp.raise_for_status() - except werkzeug.exceptions.HTTPException: - # raised by us, probably via self.error() - raise - except BaseException as e: - if raise_errors: - raise - util.interpret_http_exception(e) # log exception - self.error('Could not fetch source URL %s' % url) - - if self.entity: - self.entity.html = resp.text - - # parse microformats - soup = util.parse_html(resp) - mf2 = util.parse_mf2(soup, url=resp.url, id=id) - if id and not mf2: - self.error('Got fragment %s but no element found with that id.' % id) - - # special case tumblr's markup: div#content > div.post > div.copy - # convert to mf2 and re-parse - if not mf2.get('items'): - contents = soup.find_all(id='content') - if contents: - post = contents[0].find_next(class_='post') - if post: - post['class'] = 'h-entry' - copy = post.find_next(class_='copy') - if copy: - copy['class'] = 'e-content' - photo = post.find_next(class_='photo-wrapper') - if photo: - img = photo.find_next('img') - if img: - img['class'] = 'u-photo' - # TODO: i should be able to pass post or contents[0] to mf2py instead - # here, but it returns no items. mf2py bug? - doc = str(post) - mf2 = util.parse_mf2(doc, resp.url) - - logging.debug('Parsed microformats2: %s', json_dumps(mf2, indent=2)) - items = mf2.get('items', []) - if require_mf2 and (not items or not items[0]): - self.error('No microformats2 data found in ' + resp.url, data=mf2, html=""" + + source = None + entity = None + + def fetch_mf2(self, url, id=None, require_mf2=True, raise_errors=False): + """Fetches a URL and extracts its mf2 data. + + Side effects: sets :attr:`entity`\ .html on success, calls :attr:`error()` + on errors. + + Args: + url: string + id: string, optional id of specific element to extract and parse. defaults + to the whole page. + require_mf2: boolean, whether to return error if no mf2 are found + raise_errors: boolean, whether to let error exceptions propagate up or + handle them + + Returns: + (:class:`requests.Response`, mf2 data dict) on success, None on failure + """ + try: + resp = util.requests_get(url) + resp.raise_for_status() + except werkzeug.exceptions.HTTPException: + # raised by us, probably via self.error() + raise + except BaseException as e: + if raise_errors: + raise + util.interpret_http_exception(e) # log exception + self.error("Could not fetch source URL %s" % url) + + if self.entity: + self.entity.html = resp.text + + # parse microformats + soup = util.parse_html(resp) + mf2 = util.parse_mf2(soup, url=resp.url, id=id) + if id and not mf2: + self.error("Got fragment %s but no element found with that id." % id) + + # special case tumblr's markup: div#content > div.post > div.copy + # convert to mf2 and re-parse + if not mf2.get("items"): + contents = soup.find_all(id="content") + if contents: + post = contents[0].find_next(class_="post") + if post: + post["class"] = "h-entry" + copy = post.find_next(class_="copy") + if copy: + copy["class"] = "e-content" + photo = post.find_next(class_="photo-wrapper") + if photo: + img = photo.find_next("img") + if img: + img["class"] = "u-photo" + # TODO: i should be able to pass post or contents[0] to mf2py instead + # here, but it returns no items. mf2py bug? + doc = str(post) + mf2 = util.parse_mf2(doc, resp.url) + + logging.debug("Parsed microformats2: %s", json_dumps(mf2, indent=2)) + items = mf2.get("items", []) + if require_mf2 and (not items or not items[0]): + self.error( + "No microformats2 data found in " + resp.url, + data=mf2, + html=""" No microformats or microformats2 found in %s! See indiewebify.me for details (skip to level 2, Publishing on the IndieWeb). -""" % (resp.url, util.pretty_link(resp.url))) - - return resp, mf2 - - def error(self, error, html=None, status=400, data=None, log_exception=False, - report=False, extra_json=None): - """Handle an error. May be overridden by subclasses. - - Args: - error: string human-readable error message - html: string HTML human-readable error message - status: int HTTP response status code - data: mf2 data dict parsed from source page - log_exception: boolean, whether to include a stack trace in the log msg - report: boolean, whether to report to StackDriver Error Reporting - extra_json: dict to be merged into the JSON response body - """ - if self.entity and self.entity.status == 'new': - self.entity.status = 'failed' - self.entity.put() - - resp = {'error': error} - if data: - resp['parsed'] = data - if extra_json: - assert 'error' not in extra_json - assert 'parsed' not in extra_json - resp.update(extra_json) - - if report and status != 404: - self.report_error(error, status=status) - - flask_util.error(str(resp), status=status, response=jsonify(resp), - exc_info=log_exception) - - def report_error(self, resp, status=None): - """Report an error to StackDriver Error reporting.""" - # don't report specific known failures - if ('Deadline exceeded while waiting for HTTP response' in resp or - 'urlfetch.Fetch() took too long' in resp or - # WordPress Jetpack bugs - # https://github.com/snarfed/bridgy/issues/161 - '"resp": "invalid_input"' in resp or - # https://github.com/snarfed/bridgy/issues/750 - '"error": "jetpack_verification_failed"' in resp or - # https://console.cloud.google.com/errors/CMjIg52NkMLQYA - 'The Jetpack site encountered an error and could not process the API request' in resp or - # Blogger known bug - # https://github.com/snarfed/bridgy/issues/175 - 'bX-2i87au' in resp or - # Tumblr: transient Disqus error looking up thread - # https://github.com/snarfed/bridgy/issues/177 - "Invalid argument, 'thread': Unable to find thread" in resp or - # expected for partially set up tumblr accounts - "we haven't found your Disqus account" in resp or - # Twitter 5MB image file size limit - '"message":"Image file size must be' in resp or - # Twitter media file number limits - 'Tweet with media must have exactly 1 gif or video' in resp or - # Facebook image type/size req'ts - 'Missing or invalid image file' in resp or - "Your photos couldn't be uploaded. Photos should be less than 4 MB" in resp or - # Twitter duplicate publish attempts - 'Status is a duplicate.' in resp or - 'You have already favorited this status.' in resp or - 'You have already retweeted this' in resp or - # Facebook duplicate publish attempts - 'This status update is identical to the last one you posted.' in resp or - # WordPress duplicate comment - # "error": "Error: 409 HTTP Error 409: Conflict; {\n \"error\": \"comment_duplicate\",\n \"message\": \"Duplicate comment detected; it looks as though you’ve already said that!\"\n}\n" - 'comment_duplicate' in resp): - return - - subject = '%s %s' % (self.__class__.__name__, - '%s %s' % (self.entity.type, self.entity.status) - if self.entity else 'failed') - user = self.source.bridgy_url() if self.source else None - util.report_error(subject, user=user, - http_context=error_reporting.HTTPContext( - method=request.method, - url=request.url, - response_status_code=status, - remote_ip=request.remote_addr)) +""" + % (resp.url, util.pretty_link(resp.url)), + ) + + return resp, mf2 + + def error( + self, + error, + html=None, + status=400, + data=None, + log_exception=False, + report=False, + extra_json=None, + ): + """Handle an error. May be overridden by subclasses. + + Args: + error: string human-readable error message + html: string HTML human-readable error message + status: int HTTP response status code + data: mf2 data dict parsed from source page + log_exception: boolean, whether to include a stack trace in the log msg + report: boolean, whether to report to StackDriver Error Reporting + extra_json: dict to be merged into the JSON response body + """ + if self.entity and self.entity.status == "new": + self.entity.status = "failed" + self.entity.put() + + resp = {"error": error} + if data: + resp["parsed"] = data + if extra_json: + assert "error" not in extra_json + assert "parsed" not in extra_json + resp.update(extra_json) + + if report and status != 404: + self.report_error(error, status=status) + + flask_util.error( + str(resp), status=status, response=jsonify(resp), exc_info=log_exception + ) + + def report_error(self, resp, status=None): + """Report an error to StackDriver Error reporting.""" + # don't report specific known failures + if ( + "Deadline exceeded while waiting for HTTP response" in resp + or "urlfetch.Fetch() took too long" in resp + or + # WordPress Jetpack bugs + # https://github.com/snarfed/bridgy/issues/161 + '"resp": "invalid_input"' in resp + or + # https://github.com/snarfed/bridgy/issues/750 + '"error": "jetpack_verification_failed"' in resp + or + # https://console.cloud.google.com/errors/CMjIg52NkMLQYA + "The Jetpack site encountered an error and could not process the API request" + in resp + or + # Blogger known bug + # https://github.com/snarfed/bridgy/issues/175 + "bX-2i87au" in resp + or + # Tumblr: transient Disqus error looking up thread + # https://github.com/snarfed/bridgy/issues/177 + "Invalid argument, 'thread': Unable to find thread" in resp + or + # expected for partially set up tumblr accounts + "we haven't found your Disqus account" in resp + or + # Twitter 5MB image file size limit + '"message":"Image file size must be' in resp + or + # Twitter media file number limits + "Tweet with media must have exactly 1 gif or video" in resp + or + # Facebook image type/size req'ts + "Missing or invalid image file" in resp + or "Your photos couldn't be uploaded. Photos should be less than 4 MB" + in resp + or + # Twitter duplicate publish attempts + "Status is a duplicate." in resp + or "You have already favorited this status." in resp + or "You have already retweeted this" in resp + or + # Facebook duplicate publish attempts + "This status update is identical to the last one you posted." in resp + or + # WordPress duplicate comment + # "error": "Error: 409 HTTP Error 409: Conflict; {\n \"error\": \"comment_duplicate\",\n \"message\": \"Duplicate comment detected; it looks as though you’ve already said that!\"\n}\n" + "comment_duplicate" in resp + ): + return + + subject = "%s %s" % ( + self.__class__.__name__, + "%s %s" % (self.entity.type, self.entity.status) + if self.entity + else "failed", + ) + user = self.source.bridgy_url() if self.source else None + util.report_error( + subject, + user=user, + http_context=error_reporting.HTTPContext( + method=request.method, + url=request.url, + response_status_code=status, + remote_ip=request.remote_addr, + ), + ) diff --git a/wordpress_rest.py b/wordpress_rest.py index 3da8f9c1..594d017c 100644 --- a/wordpress_rest.py +++ b/wordpress_rest.py @@ -34,195 +34,222 @@ from util import redirect -API_CREATE_COMMENT_URL = 'https://public-api.wordpress.com/rest/v1/sites/%s/posts/%d/replies/new?pretty=true' -API_POST_SLUG_URL = 'https://public-api.wordpress.com/rest/v1/sites/%s/posts/slug:%s?pretty=true' -API_SITE_URL = 'https://public-api.wordpress.com/rest/v1/sites/%s?pretty=true' +API_CREATE_COMMENT_URL = ( + "https://public-api.wordpress.com/rest/v1/sites/%s/posts/%d/replies/new?pretty=true" +) +API_POST_SLUG_URL = ( + "https://public-api.wordpress.com/rest/v1/sites/%s/posts/slug:%s?pretty=true" +) +API_SITE_URL = "https://public-api.wordpress.com/rest/v1/sites/%s?pretty=true" class WordPress(models.Source): - """A WordPress blog. + """A WordPress blog. - The key name is the blog hostname. - """ - GR_CLASS = collections.namedtuple('FakeGrClass', ('NAME',))(NAME='WordPress.com') - OAUTH_START = oauth_wordpress.Start - SHORT_NAME = 'wordpress' - - site_info = ndb.JsonProperty(compressed=True) # from /sites/$site API call - - def feed_url(self): - # http://en.support.wordpress.com/feeds/ - return urllib.parse.urljoin(self.silo_url(), 'feed/') - - def silo_url(self): - return self.domain_urls[0] - - def edit_template_url(self): - return urllib.parse.urljoin(self.silo_url(), 'wp-admin/widgets.php') - - @staticmethod - def new(auth_entity=None, **kwargs): - """Creates and returns a WordPress for the logged in user. - - Args: - auth_entity: :class:`oauth_dropins.wordpress_rest.WordPressAuth` + The key name is the blog hostname. """ - site_info = WordPress.get_site_info(auth_entity) - if site_info is None: - return - - urls = util.dedupe_urls(util.trim_nulls( - [site_info.get('URL'), auth_entity.blog_url])) - domains = [util.domain_from_link(u) for u in urls] - - avatar = (json_loads(auth_entity.user_json).get('avatar_URL') - if auth_entity.user_json else None) - return WordPress(id=domains[0], - auth_entity=auth_entity.key, - name=auth_entity.user_display_name(), - picture=avatar, - superfeedr_secret=util.generate_secret(), - url=urls[0], - domain_urls=urls, - domains=domains, - site_info=site_info, - **kwargs) - - def _urls_and_domains(self, auth_entity): - """Returns this blog's URL and domain. - - Args: - auth_entity: unused - - Returns: - ([string url], [string domain]) - """ - return [self.url], [self.key_id()] - def create_comment(self, post_url, author_name, author_url, content): - """Creates a new comment in the source silo. + GR_CLASS = collections.namedtuple("FakeGrClass", ("NAME",))(NAME="WordPress.com") + OAUTH_START = oauth_wordpress.Start + SHORT_NAME = "wordpress" + + site_info = ndb.JsonProperty(compressed=True) # from /sites/$site API call + + def feed_url(self): + # http://en.support.wordpress.com/feeds/ + return urllib.parse.urljoin(self.silo_url(), "feed/") + + def silo_url(self): + return self.domain_urls[0] + + def edit_template_url(self): + return urllib.parse.urljoin(self.silo_url(), "wp-admin/widgets.php") + + @staticmethod + def new(auth_entity=None, **kwargs): + """Creates and returns a WordPress for the logged in user. + + Args: + auth_entity: :class:`oauth_dropins.wordpress_rest.WordPressAuth` + """ + site_info = WordPress.get_site_info(auth_entity) + if site_info is None: + return + + urls = util.dedupe_urls( + util.trim_nulls([site_info.get("URL"), auth_entity.blog_url]) + ) + domains = [util.domain_from_link(u) for u in urls] + + avatar = ( + json_loads(auth_entity.user_json).get("avatar_URL") + if auth_entity.user_json + else None + ) + return WordPress( + id=domains[0], + auth_entity=auth_entity.key, + name=auth_entity.user_display_name(), + picture=avatar, + superfeedr_secret=util.generate_secret(), + url=urls[0], + domain_urls=urls, + domains=domains, + site_info=site_info, + **kwargs, + ) + + def _urls_and_domains(self, auth_entity): + """Returns this blog's URL and domain. + + Args: + auth_entity: unused + + Returns: + ([string url], [string domain]) + """ + return [self.url], [self.key_id()] + + def create_comment(self, post_url, author_name, author_url, content): + """Creates a new comment in the source silo. + + If the last part of the post URL is numeric, e.g. http://site/post/123999, + it's used as the post id. Otherwise, we extract the last part of + the path as the slug, e.g. http: / / site / post / the-slug, + and look up the post id via the API. + + Args: + post_url: string + author_name: string + author_url: string + content: string + + Returns: + JSON response dict with 'id' and other fields + """ + auth_entity = self.auth_entity.get() + logging.info("Determining WordPress.com post id for %s", post_url) + + # extract the post's slug and look up its post id + path = urllib.parse.urlparse(post_url).path + if path.endswith("/"): + path = path[:-1] + slug = path.split("/")[-1] + try: + post_id = int(slug) + except ValueError: + logging.info("Looking up post id for slug %s", slug) + url = API_POST_SLUG_URL % (auth_entity.blog_id, slug) + post_id = self.urlopen(auth_entity, url).get("ID") + if not post_id: + return self.error("Could not find post id", report=False) + + logging.info("Post id is %d", post_id) + + # create the comment + url = API_CREATE_COMMENT_URL % (auth_entity.blog_id, post_id) + content = '%s: %s' % (author_url, author_name, content) + data = {"content": content.encode()} + try: + resp = self.urlopen(auth_entity, url, data=urllib.parse.urlencode(data)) + except urllib.error.HTTPError as e: + code, body = util.interpret_http_exception(e) + try: + parsed = json_loads(body) if body else {} + if (code == "400" and parsed.get("error") == "invalid_input") or ( + code == "403" + and parsed.get("message") == "Comments on this post are closed" + ): + return parsed # known error: https://github.com/snarfed/bridgy/issues/161 + except ValueError: + pass # fall through + raise e + + resp["id"] = resp.pop("ID", None) + return resp + + @classmethod + def get_site_info(cls, auth_entity): + """Fetches the site info from the API. + + Args: + auth_entity: :class:`oauth_dropins.wordpress_rest.WordPressAuth` + + Returns: + site info dict, or None if API calls are disabled for this blog + """ + try: + return cls.urlopen(auth_entity, API_SITE_URL % auth_entity.blog_id) + except urllib.error.HTTPError as e: + code, body = util.interpret_http_exception(e) + if code == "403" and '"API calls to this blog have been disabled."' in body: + flash( + f'You need to enable the Jetpack JSON API in {util.pretty_link(auth_entity.blog_url)}\'s WordPress admin console.' + ) + redirect("/") + return None + raise + + @staticmethod + def urlopen(auth_entity, url, **kwargs): + resp = auth_entity.urlopen(url, **kwargs).read() + logging.debug(resp) + return json_loads(resp) - If the last part of the post URL is numeric, e.g. http://site/post/123999, - it's used as the post id. Otherwise, we extract the last part of - the path as the slug, e.g. http: / / site / post / the-slug, - and look up the post id via the API. - Args: - post_url: string - author_name: string - author_url: string - content: string +class Add(oauth_wordpress.Callback): + """This handles both add and delete. - Returns: - JSON response dict with 'id' and other fields - """ - auth_entity = self.auth_entity.get() - logging.info('Determining WordPress.com post id for %s', post_url) - - # extract the post's slug and look up its post id - path = urllib.parse.urlparse(post_url).path - if path.endswith('/'): - path = path[:-1] - slug = path.split('/')[-1] - try: - post_id = int(slug) - except ValueError: - logging.info('Looking up post id for slug %s', slug) - url = API_POST_SLUG_URL % (auth_entity.blog_id, slug) - post_id = self.urlopen(auth_entity, url).get('ID') - if not post_id: - return self.error('Could not find post id', report=False) - - logging.info('Post id is %d', post_id) - - # create the comment - url = API_CREATE_COMMENT_URL % (auth_entity.blog_id, post_id) - content = '%s: %s' % (author_url, author_name, content) - data = {'content': content.encode()} - try: - resp = self.urlopen(auth_entity, url, data=urllib.parse.urlencode(data)) - except urllib.error.HTTPError as e: - code, body = util.interpret_http_exception(e) - try: - parsed = json_loads(body) if body else {} - if ((code == '400' and parsed.get('error') == 'invalid_input') or - (code == '403' and parsed.get('message') == 'Comments on this post are closed')): - return parsed # known error: https://github.com/snarfed/bridgy/issues/161 - except ValueError: - pass # fall through - raise e - - resp['id'] = resp.pop('ID', None) - return resp - - @classmethod - def get_site_info(cls, auth_entity): - """Fetches the site info from the API. - - Args: - auth_entity: :class:`oauth_dropins.wordpress_rest.WordPressAuth` - - Returns: - site info dict, or None if API calls are disabled for this blog + (WordPress.com only allows a single OAuth redirect URL.) """ - try: - return cls.urlopen(auth_entity, API_SITE_URL % auth_entity.blog_id) - except urllib.error.HTTPError as e: - code, body = util.interpret_http_exception(e) - if (code == '403' and '"API calls to this blog have been disabled."' in body): - flash(f'You need to enable the Jetpack JSON API in {util.pretty_link(auth_entity.blog_url)}\'s WordPress admin console.') - redirect('/') - return None - raise - - @staticmethod - def urlopen(auth_entity, url, **kwargs): - resp = auth_entity.urlopen(url, **kwargs).read() - logging.debug(resp) - return json_loads(resp) - -class Add(oauth_wordpress.Callback): - """This handles both add and delete. - - (WordPress.com only allows a single OAuth redirect URL.) - """ - def finish(self, auth_entity, state=None): - if auth_entity: - if int(auth_entity.blog_id) == 0: - flash('Please try again and choose a blog before clicking Authorize.') - return redirect('/') - - # Check if this is a self-hosted WordPress blog - site_info = WordPress.get_site_info(auth_entity) - if site_info is None: - return - elif site_info.get('jetpack'): - logging.info('This is a self-hosted WordPress blog! %s %s', - auth_entity.key_id(), auth_entity.blog_id) - return render_template('confirm_self_hosted_wordpress.html', - auth_entity_key=auth_entity.key.urlsafe().decode(), - state=state) - - util.maybe_add_or_delete_source(WordPress, auth_entity, state) - - -@app.route('/wordpress/confirm', methods=['POST']) + def finish(self, auth_entity, state=None): + if auth_entity: + if int(auth_entity.blog_id) == 0: + flash("Please try again and choose a blog before clicking Authorize.") + return redirect("/") + + # Check if this is a self-hosted WordPress blog + site_info = WordPress.get_site_info(auth_entity) + if site_info is None: + return + elif site_info.get("jetpack"): + logging.info( + "This is a self-hosted WordPress blog! %s %s", + auth_entity.key_id(), + auth_entity.blog_id, + ) + return render_template( + "confirm_self_hosted_wordpress.html", + auth_entity_key=auth_entity.key.urlsafe().decode(), + state=state, + ) + + util.maybe_add_or_delete_source(WordPress, auth_entity, state) + + +@app.route("/wordpress/confirm", methods=["POST"]) def confirm_self_hosted(): - util.maybe_add_or_delete_source( - WordPress, - ndb.Key(urlsafe=request.form['auth_entity_key']).get(), - request.form['state']) + util.maybe_add_or_delete_source( + WordPress, + ndb.Key(urlsafe=request.form["auth_entity_key"]).get(), + request.form["state"], + ) class SuperfeedrNotify(superfeedr.Notify): - SOURCE_CLS = WordPress + SOURCE_CLS = WordPress # wordpress.com doesn't seem to use scope # https://developer.wordpress.com/docs/oauth2/ start = util.oauth_starter(oauth_wordpress.Start).as_view( - 'wordpress_start', '/wordpress/add') -app.add_url_rule('/wordpress/start', view_func=start, methods=['POST']) -app.add_url_rule('/wordpress/add', view_func=Add.as_view('wordpress_add', 'unused')) -app.add_url_rule('/wordpress/notify/', view_func=SuperfeedrNotify.as_view('wordpress_notify'), methods=['POST']) + "wordpress_start", "/wordpress/add" +) +app.add_url_rule("/wordpress/start", view_func=start, methods=["POST"]) +app.add_url_rule("/wordpress/add", view_func=Add.as_view("wordpress_add", "unused")) +app.add_url_rule( + "/wordpress/notify/", + view_func=SuperfeedrNotify.as_view("wordpress_notify"), + methods=["POST"], +)