Skip to content

Commit

Permalink
add /crawl-now endpoint to allow explicitly refetching h-feed. for #538
Browse files Browse the repository at this point in the history
  • Loading branch information
snarfed committed Nov 17, 2015
1 parent 9d71acb commit b4984d1
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 29 deletions.
44 changes: 27 additions & 17 deletions app.py
Expand Up @@ -44,10 +44,6 @@
from google.appengine.ext.ndb.stats import KindStat, KindPropertyNameStat
import webapp2

# The retry button refetches h-feed if it's been more than this long since the
# last fetch.
RETRY_REFETCH_HFEED_BUFFER = datetime.timedelta(minutes=2)


class DashboardHandler(webutil_handlers.TemplateHandler, util.Handler):
"""Base handler for both the front page and user pages."""
Expand Down Expand Up @@ -509,14 +505,33 @@ def get(self):


class PollNowHandler(util.Handler):
source = None

def post(self):
source = ndb.Key(urlsafe=util.get_required_param(self, 'key')).get()
if not source:
self.get_source()
util.add_poll_task(self.source, now=True)
self.messages.add("Polling now. Refresh in a minute to see what's new!")
self.redirect(self.source.bridgy_url(self))

def get_source(self):
if self.source:
return self.source

self.source = ndb.Key(urlsafe=util.get_required_param(self, 'key')).get()
if not self.source:
self.abort(400, 'source not found')

util.add_poll_task(source, now=True)
self.messages.add("Polling now. Refresh in a minute to see what's new!")
self.redirect(source.bridgy_url(self))

class CrawlNowHandler(PollNowHandler):
def post(self):
self.setup_refetch_hfeed()
super(CrawlNowHandler, self).post()

@ndb.transactional
def setup_refetch_hfeed(self):
self.get_source()
self.source.last_hfeed_fetch = models.REFETCH_HFEED_TRIGGER
self.source.put()


class RetryHandler(util.Handler):
Expand All @@ -535,20 +550,14 @@ def post(self):

# run OPD to pick up any new SyndicatedPosts. note that we don't refetch
# their h-feed, so if they've added a syndication URL since we last crawled,
# retry won't make us pick it up. meh. background in #524.
# retry won't make us pick it up. background in #524.
if entity.key.kind() == 'Response':
source = entity.source.get()
fetch_hfeed = (source.last_hfeed_fetch <
util.now_fn() - RETRY_REFETCH_HFEED_BUFFER)
for activity in [json.loads(a) for a in entity.activities_json]:
originals, mentions = original_post_discovery.discover(
source, activity, fetch_hfeed=fetch_hfeed, include_redirect_sources=False)
source, activity, fetch_hfeed=False, include_redirect_sources=False)
targets |= original_post_discovery.targets_for_response(
json.loads(entity.response_json), originals=originals, mentions=mentions)
if fetch_hfeed:
fetch_hfeed = False # only do it for the first activity

Source.put_updates(source)

entity.unsent = targets
entity.put()
Expand Down Expand Up @@ -605,6 +614,7 @@ def get(self):
('/delete/start', DeleteStartHandler),
('/delete/finish', DeleteFinishHandler),
('/poll-now', PollNowHandler),
('/crawl-now', CrawlNowHandler),
('/retry', RetryHandler),
('/(listen|publish)/?', RedirectToFrontPageHandler),
('/logout', LogoutHandler),
Expand Down
2 changes: 2 additions & 0 deletions models.py
Expand Up @@ -24,6 +24,8 @@

MAX_AUTHOR_URLS = 5

REFETCH_HFEED_TRIGGER = datetime.datetime.utcfromtimestamp(-1)

# maps string short name to Source subclass. populated by SourceMeta.
sources = {}

Expand Down
22 changes: 10 additions & 12 deletions test/test_app.py
Expand Up @@ -102,24 +102,22 @@ def test_retry_redirect_to(self):
self.assertEquals('http://localhost/foo/bar',
response.headers['Location'].split('#')[0])

def test_retry_refetches_hfeed(self):
def test_crawl_now(self):
source = self.sources[0]
source.domain_urls = ['http://orig']
source.last_hfeed_fetch = \
testutil.NOW - app.RETRY_REFETCH_HFEED_BUFFER - datetime.timedelta(minutes=1)
source.last_hfeed_fetch = testutil.NOW
source.put()

self.expect_requests_get('http://orig', '<html class="h-feed"></html>')
self.mox.ReplayAll()

resp = self.responses[0]
resp.put()
response = app.application.get_response('/retry', method='POST',
body='key=' + resp.key.urlsafe())
key = source.key.urlsafe()
response = app.application.get_response(
'/crawl-now', method='POST', body='key=%s' % key)
self.assertEquals(source.bridgy_url(self.handler),
response.headers['Location'].split('#')[0])
self.assertEquals(302, response.status_int)

# should have refetched h-feed
self.assertEqual(testutil.NOW, source.key.get().last_hfeed_fetch)
params = testutil.get_task_params(self.taskqueue_stub.GetTasks('poll-now')[0])
self.assertEqual(key, params['source_key'])
self.assertEqual(models.REFETCH_HFEED_TRIGGER, source.key.get().last_hfeed_fetch)

def test_poll_now_and_retry_response_missing_key(self):
for endpoint in '/poll-now', '/retry':
Expand Down

0 comments on commit b4984d1

Please sign in to comment.