Permalink
Browse files

Geometrically decaying feed fetches on known good feeds that have gon…

…e bad.
  • Loading branch information...
1 parent 742403e commit 6c7034343b93ca79b0225edb7395459bfb965405 @samuelclay committed Feb 23, 2012
Showing with 38 additions and 17 deletions.
  1. +17 −9 apps/rss_feeds/models.py
  2. +15 −0 config/com.redis.plist
  3. +2 −4 config/mongodb.dev.conf
  4. +3 −3 fabfile.py
  5. 0 logs/__init__.py
  6. +1 −1 utils/feed_fetcher.py
@@ -304,9 +304,7 @@ def save_feed_history(self, status_code, message, exception=None):
# for history in old_fetch_histories:
# history.delete()
if status_code not in (200, 304):
- fetch_history = map(lambda h: h.status_code,
- MFeedFetchHistory.objects(feed_id=self.pk)[:50])
- self.count_errors_in_history(fetch_history, status_code, 'feed')
+ self.count_errors_in_history('feed', status_code)
elif self.has_feed_exception:
self.has_feed_exception = False
self.active = True
@@ -323,15 +321,16 @@ def save_page_history(self, status_code, message, exception=None):
# history.delete()
if status_code not in (200, 304):
- fetch_history = map(lambda h: h.status_code,
- MPageFetchHistory.objects(feed_id=self.pk)[:50])
- self.count_errors_in_history(fetch_history, status_code, 'page')
+ self.count_errors_in_history('page', status_code)
elif self.has_page_exception:
self.has_page_exception = False
self.active = True
self.save()
- def count_errors_in_history(self, fetch_history, status_code, exception_type):
+ def count_errors_in_history(self, exception_type='feed', status_code=None):
+ history_class = MFeedFetchHistory if exception_type == 'feed' else MPageFetchHistory
+ fetch_history = map(lambda h: h.status_code,
+ history_class.objects(feed_id=self.pk)[:50])
non_errors = [h for h in fetch_history if int(h) in (200, 304)]
errors = [h for h in fetch_history if int(h) not in (200, 304)]
@@ -341,12 +340,14 @@ def count_errors_in_history(self, fetch_history, status_code, exception_type):
self.active = False
elif exception_type == 'page':
self.has_page_exception = True
- self.exception_code = status_code
+ self.exception_code = status_code or int(errors[0])
self.save()
elif self.exception_code > 0:
self.active = True
self.exception_code = 0
self.save()
+
+ return errors, non_errors
def count_subscribers(self, verbose=False):
SUBSCRIBER_EXPIRE = datetime.datetime.now() - datetime.timedelta(days=settings.SUBSCRIBER_EXPIRE)
@@ -1006,9 +1007,12 @@ def get_next_scheduled_update(self, force=False, verbose=True):
return total, random_factor*2
- def set_next_scheduled_update(self):
+ def set_next_scheduled_update(self, multiplier=1):
total, random_factor = self.get_next_scheduled_update(force=True, verbose=False)
+ if multiplier > 1:
+ total = total * multiplier
+
next_scheduled_update = datetime.datetime.utcnow() + datetime.timedelta(
minutes = total + random_factor)
@@ -1022,6 +1026,10 @@ def schedule_feed_fetch_immediately(self):
self.save()
+ def schedule_feed_fetch_geometrically(self):
+ errors, non_errors = self.count_errors_in_history('feed')
+ self.set_next_scheduled_update(multiplier=len(errors))
+
# def calculate_collocations_story_content(self,
# collocation_measures=TrigramAssocMeasures,
# collocation_finder=TrigramCollocationFinder):
@@ -0,0 +1,15 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+ <key>Label</key>
+ <string>com.redis</string>
+ <key>RunAtLoad</key>
+ <true/>
+ <key>ProgramArguments</key>
+ <array>
+ <string>/usr/local/bin/redis-server</string>
+ <string>/Users/sclay/projects/newsblur/config/redis.conf</string>
+ </array>
+</dict>
+</plist>
@@ -5,12 +5,10 @@
# Note: if you run mongodb as a non-root user (recommended) you may
# need to create and set permissions for this directory manually,
# e.g., if the parent directory isn't mutable by the mongodb user.
-dbpath=/Users/conesus/newsblur/data/db/unsharded
+dbpath=/Users/sclay/projects/data/db/unsharded
#where to log
-logpath=/Users/conesus/newsblur/data/unsharded.log
-
-logappend=false
+logpath=/Users/sclay/projects/data/unsharded.log
#port = 27017
View
@@ -303,11 +303,11 @@ def setup_libxml_code():
run('./configure && make && sudo make install')
def setup_psycopg():
- sudo('easy_install psycopg2')
+ sudo('easy_install -U psycopg2')
def setup_python():
- sudo('easy_install pip')
- sudo('easy_install fabric django celery django-celery django-compress South django-extensions pymongo BeautifulSoup pyyaml nltk==0.9.9 lxml oauth2 pytz boto seacucumber django_ses mongoengine redis requests')
+ sudo('easy_install -U pip')
+ sudo('easy_install -U fabric django readline pyflakes iconv celery django-celery django-compress South django-extensions pymongo BeautifulSoup pyyaml nltk==0.9.9 lxml oauth2 pytz boto seacucumber django_ses mongoengine redis requests')
put('config/pystartup.py', '.pystartup')
with cd(os.path.join(env.NEWSBLUR_PATH, 'vendor/cjson')):
View
No changes.
@@ -144,7 +144,7 @@ def process(self):
self.feed.save_feed_history(self.fpf.status, "HTTP Error")
else:
self.feed.has_feed_exception = True
- self.feed.schedule_feed_fetch_immediately()
+ self.feed.schedule_feed_fetch_geometrically()
self.feed.save()
return FEED_ERRHTTP, ret_values

0 comments on commit 6c70343

Please sign in to comment.