Permalink
Browse files

Deprecating FeedLoadtime and moving to MongoDB-backed aggregate data …

…for feed load time graphs.
  • Loading branch information...
1 parent 2461b2f commit f9ed7fdd262bbc680f610d6cbbf2d7a610a8cba7 @samuelclay committed Sep 27, 2012
View
@@ -4,6 +4,8 @@ logs/*.pid
*.pyc
static/*
local_settings.py
+celerybeat-schedule
+celerybeat.pid
media/iphone/NewsBlur/build
media/iphone/build
build/
View
@@ -41,7 +41,6 @@ class CollectStats(Task):
def run(self, **kwargs):
logging.debug(" ---> Collecting stats...")
MStatistics.collect_statistics()
- MStatistics.delete_old_stats()
class CollectFeedback(Task):
View
@@ -30,7 +30,7 @@
from apps.rss_feeds.models import MFeedIcon
from apps.statistics.models import MStatistics
try:
- from apps.rss_feeds.models import Feed, MFeedPage, DuplicateFeed, MStory, MStarredStory, FeedLoadtime
+ from apps.rss_feeds.models import Feed, MFeedPage, DuplicateFeed, MStory, MStarredStory
except:
pass
from apps.social.models import MSharedStory, MSocialProfile, MSocialServices
@@ -542,7 +542,6 @@ def load_single_feed(request, feed_id):
if timediff > 0.50 else "")
logging.user(request, "~FYLoading feed: ~SB%s%s (%s/%s) %s" % (
feed.feed_title[:22], ('~SN/p%s' % page) if page > 1 else '', order, read_filter, time_breakdown))
- FeedLoadtime.objects.create(feed=feed, loadtime=timediff)
data = dict(stories=stories,
user_profiles=user_profiles,
@@ -8,5 +8,4 @@ class Command(BaseCommand):
def handle(self, *args, **options):
MStatistics.collect_statistics()
-
- MStatistics.delete_old_stats()
+
View
@@ -1,10 +1,8 @@
import datetime
import mongoengine as mongo
import urllib2
-from django.db.models import Avg, Count
from django.conf import settings
from apps.rss_feeds.models import MFeedFetchHistory, MPageFetchHistory, MFeedPushHistory
-from apps.rss_feeds.models import FeedLoadtime
from apps.social.models import MSharedStory
from apps.profile.models import Profile
from utils import json_functions as json
@@ -57,24 +55,22 @@ def all(cls):
@classmethod
def collect_statistics(cls):
now = datetime.datetime.now()
- last_day = datetime.datetime.now() - datetime.timedelta(hours=24)
- cls.collect_statistics_feeds_fetched(last_day)
+ cls.collect_statistics_feeds_fetched()
print "Feeds Fetched: %s" % (datetime.datetime.now() - now)
- cls.collect_statistics_premium_users(last_day)
+ cls.collect_statistics_premium_users()
print "Premiums: %s" % (datetime.datetime.now() - now)
- cls.collect_statistics_standard_users(last_day)
+ cls.collect_statistics_standard_users()
print "Standard users: %s" % (datetime.datetime.now() - now)
- cls.collect_statistics_sites_loaded(last_day)
+ cls.collect_statistics_sites_loaded()
print "Sites loaded: %s" % (datetime.datetime.now() - now)
- cls.collect_statistics_stories_shared(last_day)
+ cls.collect_statistics_stories_shared()
print "Stories shared: %s" % (datetime.datetime.now() - now)
cls.collect_statistics_for_db()
print "DB Stats: %s" % (datetime.datetime.now() - now)
@classmethod
- def collect_statistics_feeds_fetched(cls, last_day=None):
- if not last_day:
- last_day = datetime.datetime.now() - datetime.timedelta(hours=24)
+ def collect_statistics_feeds_fetched(cls):
+ last_day = datetime.datetime.now() - datetime.timedelta(hours=24)
last_month = datetime.datetime.now() - datetime.timedelta(days=30)
feeds_fetched = MFeedFetchHistory.objects.filter(fetch_date__gte=last_day).count()
@@ -100,43 +96,65 @@ def delete_old_history():
return feeds_fetched
@classmethod
- def collect_statistics_premium_users(cls, last_day=None):
- if not last_day:
- last_day = datetime.datetime.now() - datetime.timedelta(hours=24)
-
+ def collect_statistics_premium_users(cls):
+ last_day = datetime.datetime.now() - datetime.timedelta(hours=24)
+
premium_users = Profile.objects.filter(last_seen_on__gte=last_day, is_premium=True).count()
cls.objects(key='premium_users').update_one(upsert=True, set__key='premium_users', set__value=premium_users)
return premium_users
@classmethod
- def collect_statistics_standard_users(cls, last_day=None):
- if not last_day:
- last_day = datetime.datetime.now() - datetime.timedelta(hours=24)
+ def collect_statistics_standard_users(cls):
+ last_day = datetime.datetime.now() - datetime.timedelta(hours=24)
standard_users = Profile.objects.filter(last_seen_on__gte=last_day, is_premium=False).count()
cls.objects(key='standard_users').update_one(upsert=True, set__key='standard_users', set__value=standard_users)
return standard_users
@classmethod
- def collect_statistics_sites_loaded(cls, last_day=None):
- if not last_day:
- last_day = datetime.datetime.now() - datetime.timedelta(hours=24)
+ def collect_statistics_sites_loaded(cls):
now = datetime.datetime.now()
sites_loaded = []
avg_time_taken = []
for hour in range(24):
start_hours_ago = now - datetime.timedelta(hours=hour)
end_hours_ago = now - datetime.timedelta(hours=hour+1)
- aggregates = dict(count=Count('loadtime'), avg=Avg('loadtime'))
- load_times = FeedLoadtime.objects.filter(
- date_accessed__lte=start_hours_ago,
- date_accessed__gte=end_hours_ago
- ).aggregate(**aggregates)
- sites_loaded.append(load_times['count'] or 0)
- avg_time_taken.append(load_times['avg'] or 0)
+
+ load_times = settings.MONGOANALYTICSDB.nbanalytics.page_loads.aggregate([{
+ "$match": {
+ "date": {
+ "$gte": end_hours_ago,
+ "$lte": start_hours_ago,
+ },
+ "path": {
+ "$in": [
+ "/reader/feed/",
+ "/social/stories/",
+ "/reader/river_stories/",
+ "/social/river_stories/",
+ ]
+ }
+ },
+ }, {
+ "$group": {
+ "_id" : 1,
+ "count" : {"$sum": 1},
+ "avg" : {"$avg": "$duration"},
+ },
+ }])
+
+ count = 0
+ avg = 0
+ if load_times['result']:
+ count = load_times['result'][0]['count']
+ avg = load_times['result'][0]['avg']
+
+ sites_loaded.append(count)
+ avg_time_taken.append(avg)
+
sites_loaded.reverse()
avg_time_taken.reverse()
@@ -152,9 +170,7 @@ def collect_statistics_sites_loaded(cls, last_day=None):
cls.objects(key=key).update_one(upsert=True, set__key=key, set__value=value)
@classmethod
- def collect_statistics_stories_shared(cls, last_day=None):
- if not last_day:
- last_day = datetime.datetime.now() - datetime.timedelta(hours=24)
+ def collect_statistics_stories_shared(cls):
now = datetime.datetime.now()
stories_shared = []
@@ -182,11 +198,6 @@ def collect_statistics_for_db(cls):
lag = db_functions.mongo_max_replication_lag(settings.MONGODB)
cls.set('mongodb_replication_lag', lag)
- @classmethod
- def delete_old_stats(cls):
- now = datetime.datetime.now()
- old_age = now - datetime.timedelta(days=7)
- FeedLoadtime.objects.filter(date_accessed__lte=old_age).delete()
class MFeedback(mongo.Document):
date = mongo.StringField()
View
@@ -9,7 +9,7 @@
# 199.15.253.226 db03 db03.newsblur.com
199.15.249.98 db04 db04.newsblur.com
199.15.249.99 db05 db05.newsblur.com
-# 199.15.249.100 db06 db06.newsblur.com
+
199.15.249.101 db07 db07.newsblur.com
199.15.250.231 task01 task01.newsblur.com
199.15.250.250 task02 task02.newsblur.com
@@ -18,6 +18,7 @@
199.15.252.106 task05 task05.newsblur.com
199.15.252.107 task06 task06.newsblur.com
199.15.252.108 task07 task07.newsblur.com
-
-# EC2
-23.20.165.187 db10 db10.newsblur.com
+199.15.251.144 task08 task08.newsblur.com
+199.15.251.154 task09 task09.newsblur.com
+199.15.251.137 task10 task10.newsblur.com
+199.15.251.155 task11 task11.newsblur.com
View
@@ -1,8 +1,14 @@
+import pymongo
+
PRIMARY_STATE = 1
SECONDARY_STATE = 2
def mongo_max_replication_lag(connection):
- status = connection.admin.command('replSetGetStatus')
+ try:
+ status = connection.admin.command('replSetGetStatus')
+ except pymongo.errors.OperationFailure:
+ return 0
+
members = status['members']
primary_optime = None
oldest_secondary_optime = None
@@ -1,6 +1,7 @@
#!/usr/bin/env python
from utils.munin.base import MuninGraph
-
+from django.conf import settings
+import datetime
class NBMuninGraph(MuninGraph):
@@ -17,18 +18,46 @@ def graph_config(self):
}
def calculate_metrics(self):
- from django.db.models import Avg, Min, Max, Count
- import datetime
- from apps.rss_feeds.models import FeedLoadtime
hour_ago = datetime.datetime.utcnow() - datetime.timedelta(minutes=60)
-
- averages = dict(avg=Avg('loadtime'), max=Max('loadtime'), min=Min('loadtime'), count=Count('loadtime'))
- hour = FeedLoadtime.objects.filter(date_accessed__gte=hour_ago).aggregate(**averages)
+ times = settings.MONGOANALYTICSDB.nbanalytics.page_loads.aggregate([{
+ "$match": {
+ "date": {
+ "$gte": hour_ago,
+ },
+ "path": {
+ "$in": [
+ "/reader/feed/",
+ "/social/stories/",
+ "/reader/river_stories/",
+ "/social/river_stories/",
+ ]
+ }
+ },
+ }, {
+ "$group": {
+ "_id" : 1,
+ "count" : {"$sum": 1},
+ "avg" : {"$avg": "$duration"},
+ "min" : {"$min": "$duration"},
+ "max" : {"$max": "$duration"},
+ },
+ }])
+
+ load_avg = 0
+ load_min = 0
+ load_max = 0
+ load_count = 0
+ if times['result']:
+ load_avg = times['result'][0]['avg']
+ load_min = times['result'][0]['min']
+ load_max = times['result'][0]['max']
+ load_count = times['result'][0]['count']
+
return {
- 'feed_loadtimes_avg_hour': hour['avg'],
- 'feed_loadtimes_min_hour': hour['min'],
- 'feed_loadtimes_max_hour': hour['max'],
- 'feeds_loaded_hour': hour['count'],
+ 'feed_loadtimes_avg_hour': load_avg,
+ 'feed_loadtimes_min_hour': load_min,
+ 'feed_loadtimes_max_hour': load_max,
+ 'feeds_loaded_hour': load_count,
}
if __name__ == '__main__':

0 comments on commit f9ed7fd

Please sign in to comment.