Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Features:

    * Cassandra
      * Add new cassandra libraries that we'll need to setup.py
      * Select Cassandra seeds at random rather than in order
      * Bugfix in CassandraCache.delete and a faster permacache migration function
      * Like other caches, CassandraCaches need to be able to take (and ignore) a 'time' parameter
      * add Cassandra to the permacache chain
    * beginning of jury duty (later called deputy moderation)

    Additions:
    * Make /r/friends much cheaper at the expense of sorting
    * Add Jury.delete_old(), which removes Account-Trial relations > 3 days old
    * Make the pretty_button() template function's callback optional, so that
      actionless pretty-buttons can be used on the admin details page
    * make .embed listings work for permalink pages (think of this as a first pass to getting blog comments working).  Adds 'limit' and 'depth' parameter to permalink pages
    * Added final redditheader.html pretty-button class
    * new iframe ads; also make button.js static
    * Usage sampling

    Bugfixes:
    * Stop adding batched time query recalculations to the queue at all except through the catch_up_batch_queries function
    * Superflous comma might be causing IE7 to barf
    * Change the byurl keys again, to fit in memcaches 251 character limit
    * Indentation error causing non-sponsors to be able to get to the advert listing
    * Move to a custom build of pylibmc that doesn't hold the GIL during blocking operations
    * Convert some cache.gets to cache.get_multis, and implement our own thread-safety around pylibmc's client
    * Make search caching a little smarter for time searches
    * Make the ads not be cached for 30 seconds each, ie. more random
    * fix deleted things on profile pages
  • Loading branch information...
commit 67814d543b704080317764791a63d01eb2ea2c6b 1 parent a402d48
@ketralnis ketralnis authored KeyserSosa committed
Showing with 2,613 additions and 401 deletions.
  1. +10 −2 r2/example.ini
  2. +7 −3 r2/r2/config/routing.py
  3. +1 −0  r2/r2/controllers/__init__.py
  4. +3 −3 r2/r2/controllers/ads.py
  5. +79 −28 r2/r2/controllers/api.py
  6. +2 −0  r2/r2/controllers/buttons.py
  7. +66 −4 r2/r2/controllers/front.py
  8. +70 −38 r2/r2/controllers/listingcontroller.py
  9. +16 −2 r2/r2/controllers/mediaembed.py
  10. +31 −23 r2/r2/controllers/reddit_base.py
  11. +6 −0 r2/r2/controllers/validator/validator.py
  12. +2 −1  r2/r2/lib/amqp.py
  13. +41 −19 r2/r2/lib/app_globals.py
  14. +167 −86 r2/r2/lib/cache.py
  15. +13 −10 r2/r2/lib/db/queries.py
  16. +4 −2 r2/r2/lib/db/thing.py
  17. +9 −4 r2/r2/lib/jsontemplates.py
  18. +1 −1  r2/r2/lib/lock.py
  19. +252 −0 r2/r2/lib/migrate.py
  20. +14 −5 r2/r2/lib/organic.py
  21. +138 −51 r2/r2/lib/pages/pages.py
  22. +13 −2 r2/r2/lib/pages/things.py
  23. +3 −3 r2/r2/lib/queues.py
  24. +1 −1  r2/r2/lib/services.py
  25. +14 −6 r2/r2/lib/solrsearch.py
  26. +6 −0 r2/r2/lib/strings.py
  27. +1 −1  r2/r2/lib/template_helpers.py
  28. +0 −1  r2/r2/lib/utils/__init__.py
  29. +27 −0 r2/r2/lib/utils/admin_utils.py
  30. +1 −1  r2/r2/lib/utils/cmd_utils.py
  31. +198 −0 r2/r2/lib/utils/trial_utils.py
  32. +45 −5 r2/r2/lib/utils/utils.py
  33. +2 −0  r2/r2/models/__init__.py
  34. +12 −0 r2/r2/models/account.py
  35. +12 −5 r2/r2/models/ad.py
  36. +7 −3 r2/r2/models/builder.py
  37. +100 −0 r2/r2/models/jury.py
  38. +33 −12 r2/r2/models/link.py
  39. +4 −4 r2/r2/models/listing.py
  40. +6 −0 r2/r2/models/populatedb.py
  41. +25 −6 r2/r2/models/subreddit.py
  42. +143 −0 r2/r2/models/trial.py
  43. BIN  r2/r2/public/static/bg-button-negative-pressed.png
  44. BIN  r2/r2/public/static/bg-button-negative-unpressed.png
  45. BIN  r2/r2/public/static/bg-button-positive-pressed.png
  46. BIN  r2/r2/public/static/bg-button-positive-unpressed.png
  47. +61 −0 r2/r2/public/static/button/button1.html
  48. +28 −0 r2/r2/public/static/button/button1.js
  49. +45 −0 r2/r2/public/static/button/button2.html
  50. +28 −0 r2/r2/public/static/button/button2.js
  51. +48 −0 r2/r2/public/static/button/button3.html
  52. +28 −0 r2/r2/public/static/button/button3.js
  53. +40 −0 r2/r2/public/static/button/button4.html
  54. +127 −0 r2/r2/public/static/button/button5.html
  55. BIN  r2/r2/public/static/cake.png
  56. +140 −9 r2/r2/public/static/css/reddit.css
  57. +173 −0 r2/r2/public/static/js/blogbutton.js
  58. +10 −1 r2/r2/public/static/js/jquery.reddit.js
  59. +19 −0 r2/r2/public/static/js/reddit.js
  60. BIN  r2/r2/public/static/noimage.png
  61. +13 −4 r2/r2/templates/adminads.html
  62. +5 −2 r2/r2/templates/base.htmllite
  63. +12 −7 r2/r2/templates/buttonembed.js
  64. +7 −4 r2/r2/templates/comment.htmllite
  65. +1 −1  r2/r2/templates/housead.html
  66. +4 −1 r2/r2/templates/infobar.html
  67. +15 −5 r2/r2/templates/link.html
  68. +30 −0 r2/r2/templates/linkinfopage.htmllite
  69. +81 −0 r2/r2/templates/linkontrial.html
  70. +2 −4 r2/r2/templates/messagecompose.html
  71. +12 −1 r2/r2/templates/prefoptions.html
  72. +4 −2 r2/r2/templates/printable.htmllite
  73. +5 −0 r2/r2/templates/printablebuttons.html
  74. +1 −1  r2/r2/templates/profilebar.html
  75. +0 −11 r2/r2/templates/reddit.html
  76. +8 −0 r2/r2/templates/redditheader.html
  77. +24 −3 r2/r2/templates/{organiclisting.html → spotlightlisting.html}
  78. +13 −0 r2/r2/templates/utils.html
  79. +21 −1 r2/setup.py
  80. +23 −12 scripts/usage_q.py
View
12 r2/example.ini
@@ -17,10 +17,18 @@ log_path =
locale = C
memcaches = 127.0.0.1:11211
-permacaches = 127.0.0.1:11211
rendercaches = 127.0.0.1:11211
rec_cache = 127.0.0.1:11311
+# -- permacache options --
+# permacache is memcaches -> cassanda -> memcachedb
+# memcaches that sit in front of cassandra
+permacache_memcaches = 127.0.0.1:11211
+# cassandra hosts. one of these will be chosen at random by pycassa
+cassandra_seeds = 127.0.0.1:9160
+# memcachedbs
+permacaches = 127.0.0.1:11211
+
# site tracking urls. All urls are assumed to be to an image unless
# otherwise noted:
tracker_url =
@@ -109,7 +117,7 @@ timezone = UTC
lang = en
monitored_servers = localhost
-enable_usage_stats = false
+usage_sampling = 0.0
#query cache settings
num_query_queue_workers = 0
View
10 r2/r2/config/routing.py
@@ -106,6 +106,8 @@ def make_map(global_conf={}, app_conf={}):
mc('/prefs/:location', controller='front',
action='prefs', location='options')
+ mc('/juryduty', controller='front', action='juryduty')
+
mc('/info/0:article/*rest', controller = 'front',
action='oldinfo', dest='comments', type='ancient')
mc('/info/:article/:dest/:comment', controller='front',
@@ -206,6 +208,8 @@ def make_map(global_conf={}, app_conf={}):
requirements=dict(action="promote|unpromote|new_promo|link_thumb|freebie|promote_note|update_pay|refund|traffic_viewer|rm_traffic_viewer"))
mc('/api/:action', controller='api')
+ mc("/button_info", controller="api", action="info", limit = 1)
+
mc('/captcha/:iden', controller='captcha', action='captchaimg')
mc('/mediaembed/:link', controller="mediaembed", action="mediaembed")
@@ -224,9 +228,9 @@ def make_map(global_conf={}, app_conf={}):
mc('/authorize_embed', controller = 'front', action = 'authorize_embed')
# Used for showing ads
- mc("/ads/", controller = "mediaembed", action = "ad")
- mc("/ads/r/:reddit_name", controller = "mediaembed", action = "ad")
- mc("/ads/:codename", controller = "mediaembed", action = "ad_by_codename")
+ mc("/ads/", controller = "ad", action = "ad")
+ mc("/ads/r/:reddit_name", controller = "ad", action = "ad")
+ mc("/ads/:codename", controller = "ad", action = "ad_by_codename")
mc('/comscore-iframe/', controller='mediaembed', action='comscore')
mc('/comscore-iframe/*url', controller='mediaembed', action='comscore')
View
1  r2/r2/controllers/__init__.py
@@ -51,6 +51,7 @@
from errorlog import ErrorlogController
from promotecontroller import PromoteController
from mediaembed import MediaembedController
+from mediaembed import AdController
from querycontroller import QueryController
View
6 r2/r2/controllers/ads.py
@@ -26,14 +26,14 @@
class AdsController(RedditController):
- @validate(VSponsor())
+ @validate(VAdmin())
def GET_index(self):
res = AdminPage(content = AdminAds(),
show_sidebar = False,
title = 'ads').render()
return res
- @validate(VSponsor(),
+ @validate(VAdmin(),
ad = VAdByCodename('adcn'))
def GET_assign(self, ad):
if ad is None:
@@ -44,7 +44,7 @@ def GET_assign(self, ad):
title='assign an ad to a community').render()
return res
- @validate(VSponsor(),
+ @validate(VAdmin(),
ad = VAdByCodename('adcn'))
def GET_srs(self, ad):
if ad is None:
View
107 r2/r2/controllers/api.py
@@ -30,11 +30,12 @@
from r2.models.subreddit import Default as DefaultSR
from r2.lib.utils import get_title, sanitize_url, timeuntil, set_last_modified
-from r2.lib.utils import query_string, link_from_url, timefromnow
-from r2.lib.utils import timeago, tup
+from r2.lib.utils import query_string, timefromnow
+from r2.lib.utils import timeago, tup, filter_links
from r2.lib.pages import FriendList, ContributorList, ModList, \
BannedList, BoringPage, FormPage, CssError, UploadedImage, \
ClickGadget
+from r2.lib.utils.trial_utils import indict, on_trial
from r2.lib.pages.things import wrap_links, default_thing_wrapper
from r2.lib import spreadshirt
@@ -78,16 +79,19 @@ class ApiController(RedditController):
def ajax_login_redirect(self, form, jquery, dest):
form.redirect("/login" + query_string(dict(dest=dest)))
- @validate(link = VUrl(['url']),
+ @validate(link1 = VUrl(['url']),
+ link2 = VByName('id'),
count = VLimit('limit'))
- def GET_info(self, link, count):
+ def GET_info(self, link1, link2, count):
"""
Gets a listing of links which have the provided url.
"""
- if not link or 'url' not in request.params:
- return abort(404, 'not found')
+ links = []
+ if link2:
+ links = filter_links(tup(link2), filter_spam = False)
+ elif link1 and ('ALREADY_SUB', 'url') in c.errors:
+ links = filter_links(tup(link1), filter_spam = False)
- links = link_from_url(request.params.get('url'), filter_spam = False)
if not links:
return abort(404, 'not found')
@@ -577,6 +581,15 @@ def POST_report(self, thing):
return
Report.new(c.user, thing)
+ @noresponse(VAdmin(), VModhash(),
+ thing = VByName('id'))
+ def POST_indict(self, thing):
+ '''put something on trial'''
+ if not thing:
+ log_text("indict: no thing", level="warning")
+
+ indict(thing)
+
@validatedForm(VUser(),
VModhash(),
item = VByNameIfAuthor('thing_id'),
@@ -753,6 +766,42 @@ def POST_share(self, shareform, jquery, emails, thing, share_from, reply_to,
@noresponse(VUser(),
VModhash(),
+ ip = ValidIP(),
+ dir = VInt('dir', min=-1, max=1),
+ thing = VByName('id'))
+ def POST_juryvote(self, dir, thing, ip):
+ if not thing:
+ log_text("juryvote: no thing", level="warning")
+ return
+
+ if not ip:
+ log_text("juryvote: no ip", level="warning")
+ return
+
+ if dir is None:
+ log_text("juryvote: no dir", level="warning")
+ return
+
+ j = Jury.by_account_and_defendant(c.user, thing)
+
+ if not on_trial([thing]).get(thing._fullname,False):
+ log_text("juryvote: not on trial", level="warning")
+ return
+
+ if not j:
+ log_text("juryvote: not on the jury", level="warning")
+ return
+
+ log_text("juryvote",
+ "%s cast a %d juryvote on %r" % (c.user.name, dir, thing),
+ level="info")
+
+ j._name = str(dir)
+ j._date = c.start_time
+ j._commit()
+
+ @noresponse(VUser(),
+ VModhash(),
vote_type = VVotehash(('vh', 'id')),
ip = ValidIP(),
dir = VInt('dir', min=-1, max=1),
@@ -774,24 +823,21 @@ def POST_vote(self, dir, thing, ip, vote_type):
g.log.debug("POST_vote: ignoring old vote on %s" % thing._fullname)
store = False
- # in a lock to prevent duplicate votes from people
- # double-clicking the arrows
- with g.make_lock('vote_lock(%s,%s)' % (c.user._id36, thing._id36)):
- dir = (True if dir > 0
- else False if dir < 0
- else None)
-
- organic = vote_type == 'organic'
- queries.queue_vote(user, thing, dir, ip, organic, store = store,
- cheater = (errors.CHEATER, None) in c.errors)
- if store:
- #update relevant caches
- if isinstance(thing, Link):
- set_last_modified(c.user, 'liked')
- set_last_modified(c.user, 'disliked')
-
- # flag search indexer that something has changed
- changed(thing)
+ dir = (True if dir > 0
+ else False if dir < 0
+ else None)
+
+ organic = vote_type == 'organic'
+ queries.queue_vote(user, thing, dir, ip, organic, store = store,
+ cheater = (errors.CHEATER, None) in c.errors)
+ if store:
+ # update relevant caches
+ if isinstance(thing, Link):
+ set_last_modified(c.user, 'liked')
+ set_last_modified(c.user, 'disliked')
+
+ # flag search indexer that something has changed
+ changed(thing)
@validatedForm(VUser(),
VModhash(),
@@ -1444,9 +1490,10 @@ def POST_edit_error(self, form, jquery, hexkey, nickname, status):
colliding_ad=VAdByCodename(("codename", "fullname")),
codename = VLength("codename", max_length = 100),
imgurl = VLength("imgurl", max_length = 1000),
+ raw_html = VLength("raw_html", max_length = 10000),
linkurl = VLength("linkurl", max_length = 1000))
def POST_editad(self, form, jquery, ad, colliding_ad, codename,
- imgurl, linkurl):
+ imgurl, raw_html, linkurl):
if form.has_errors(("codename", "imgurl", "linkurl"),
errors.NO_TEXT):
pass
@@ -1459,12 +1506,16 @@ def POST_editad(self, form, jquery, ad, colliding_ad, codename,
return
if ad is None:
- Ad._new(codename, imgurl, linkurl)
+ Ad._new(codename,
+ imgurl=imgurl,
+ raw_html=raw_html,
+ linkurl=linkurl)
form.set_html(".status", "saved. reload to see it.")
return
ad.codename = codename
ad.imgurl = imgurl
+ ad.raw_html = raw_html
ad.linkurl = linkurl
ad._commit()
form.set_html(".status", _('saved'))
@@ -1614,7 +1665,7 @@ def POST_removetrophy(self, form, jquery, trophy):
@validatedForm(links = VByName('links', thing_cls = Link, multiple = True),
show = VByName('show', thing_cls = Link, multiple = False))
def POST_fetch_links(self, form, jquery, links, show):
- l = wrap_links(links, listing_cls = OrganicListing,
+ l = wrap_links(links, listing_cls = SpotlightListing,
num_margin = 0, mid_margin = 0)
jquery(".content").replace_things(l, stubs = True)
View
2  r2/r2/controllers/buttons.py
@@ -236,3 +236,5 @@ def GET_bookmarklets(self):
return BoringPage(_("bookmarklets"),
show_sidebar = False,
content=Bookmarklets()).render()
+
+
View
70 r2/r2/controllers/front.py
@@ -189,8 +189,11 @@ def GET_shirt(self, article):
comment = VCommentID('comment'),
context = VInt('context', min = 0, max = 8),
sort = VMenu('controller', CommentSortMenu),
- num_comments = VMenu('controller', NumCommentsMenu))
- def GET_comments(self, article, comment, context, sort, num_comments):
+ num_comments = VMenu('controller', NumCommentsMenu),
+ limit = VInt('limit'),
+ depth = VInt('depth'))
+ def GET_comments(self, article, comment, context, sort, num_comments,
+ limit, depth):
"""Comment page for a given 'article'."""
if comment and comment.link_id != article._id:
return self.abort404()
@@ -228,8 +231,18 @@ def GET_comments(self, article, comment, context, sort, num_comments):
user_num = c.user.pref_num_comments or g.num_comments
num = g.max_comments if num_comments == 'true' else user_num
+ kw = {}
+ # allow depth to be reset (I suspect I'll turn the VInt into a
+ # validator on my next pass of .compact)
+ if depth is not None and 0 < depth < MAX_RECURSION:
+ kw['max_depth'] = depth
+ # allow the user's total count preferences to be overwritten
+ # (think of .embed as the use case together with depth=1)x
+ if limit is not None and 0 < limit < g.max_comments:
+ num = limit
+
builder = CommentBuilder(article, CommentSortMenu.operator(sort),
- comment, context)
+ comment, context, **kw)
listing = NestedListing(builder, num = num,
parent_name = article._fullname)
@@ -262,6 +275,55 @@ def GET_comments(self, article, comment, context, sort, num_comments):
infotext = infotext).render()
return res
+ @validate(VUser())
+ def GET_juryduty(self):
+ displayPane = PaneStack()
+
+ active_trials = {}
+ finished_trials = {}
+
+ juries = Jury.by_account(c.user)
+
+ trials = on_trial([j._thing2 for j in juries])
+
+ for j in juries:
+ defendant = j._thing2
+
+ if trials.get(defendant._fullname, False):
+ active_trials[defendant._fullname] = j._name
+ else:
+ finished_trials[defendant._fullname] = j._name
+
+ if active_trials:
+ fullnames = sorted(active_trials.keys(), reverse=True)
+
+ def my_wrap(thing):
+ w = Wrapped(thing)
+ w.hide_score = True
+ w.likes = None
+ w.trial_mode = True
+ w.render_class = LinkOnTrial
+ w.juryvote = active_trials[thing._fullname]
+ return w
+
+ listing = wrap_links(fullnames, wrapper=my_wrap)
+ displayPane.append(InfoBar(strings.active_trials,
+ extra_class="mellow"))
+ displayPane.append(listing)
+
+ if finished_trials:
+ fullnames = sorted(finished_trials.keys(), reverse=True)
+ listing = wrap_links(fullnames)
+ displayPane.append(InfoBar(strings.finished_trials,
+ extra_class="mellow"))
+ displayPane.append(listing)
+
+ displayPane.append(InfoBar(strings.more_info_link %
+ dict(link="/help/juryduty"),
+ extra_class="mellow"))
+
+ return Reddit(content = displayPane).render()
+
@validate(VUser(),
location = nop("location"))
def GET_prefs(self, location=''):
@@ -320,7 +382,7 @@ def GET_editreddit(self, location, num, after, reverse, count, created):
if is_moderator and location == 'edit':
pane = PaneStack()
if created == 'true':
- pane.append(InfoBar(message = _('your reddit has been created')))
+ pane.append(InfoBar(message = strings.sr_created))
pane.append(CreateSubreddit(site = c.site))
elif location == 'moderators':
pane = ModList(editable = is_moderator)
View
108 r2/r2/controllers/listingcontroller.py
@@ -25,7 +25,8 @@
from r2.models import *
from r2.lib.pages import *
from r2.lib.pages.things import wrap_links
-from r2.lib.menus import NewMenu, TimeMenu, SortMenu, RecSortMenu, ControversyTimeMenu
+from r2.lib.menus import NewMenu, TimeMenu, SortMenu, RecSortMenu
+from r2.lib.menus import ControversyTimeMenu
from r2.lib.rising import get_rising
from r2.lib.wrapped import Wrapped
from r2.lib.normalized_hot import normalized_hot, get_hot
@@ -37,6 +38,7 @@
from r2.lib.jsontemplates import is_api
from r2.lib.solrsearch import SearchQuery
from r2.lib.utils import iters, check_cheating, timeago
+from r2.lib.utils.trial_utils import populate_spotlight
from r2.lib import sup
from r2.lib.promote import PromoteSR
from r2.lib.contrib.pysolr import SolrError
@@ -96,11 +98,12 @@ def build_listing(self, num, after, reverse, count):
self.builder_obj = self.builder()
self.listing_obj = self.listing()
content = self.content()
- res = self.render_cls(content = content,
- show_sidebar = self.show_sidebar,
- nav_menus = self.menus,
- title = self.title(),
- **self.render_params).render()
+
+ res = self.render_cls(content = content,
+ show_sidebar = self.show_sidebar,
+ nav_menus = self.menus,
+ title = self.title(),
+ **self.render_params).render()
return res
@@ -215,29 +218,56 @@ def listing(self):
class HotController(FixListing, ListingController):
where = 'hot'
- def organic(self):
- o_links, pos = organic.organic_links(c.user)
- if o_links:
- # get links in proximity to pos
- l = min(len(o_links) - 3, 8)
- disp_links = [o_links[(i + pos) % len(o_links)]
- for i in xrange(-2, l)]
- def keep_fn(item):
- return item.likes is None and item.keep_item(item)
- b = IDBuilder(disp_links, wrap = self.builder_wrapper,
- skip = True, keep_fn = keep_fn)
- o = OrganicListing(b,
- org_links = o_links,
- visible_link = o_links[pos],
- max_num = self.listing_obj.max_num,
- max_score = self.listing_obj.max_score).listing()
-
- if len(o.things) > 0:
- # only pass through a listing if the links made it
- # through our builder
- organic.update_pos(pos+1)
-
- return o
+ def spotlight(self):
+ spotlight_links, pos = organic.organic_links(c.user)
+
+ trial = populate_spotlight()
+
+ if trial:
+ spotlight_links.insert(pos, trial._fullname)
+
+ if not spotlight_links:
+ return None
+
+ # get links in proximity to pos
+ num_tl = len(spotlight_links)
+ if num_tl <= 3:
+ disp_links = spotlight_links
+ else:
+ left_side = max(-1, min(num_tl - 3, 8))
+ disp_links = [spotlight_links[(i + pos) % num_tl]
+ for i in xrange(-2, left_side)]
+
+ def keep_fn(item):
+ if trial and trial._fullname == item._fullname:
+ return True
+ elif item.likes is not None:
+ return False
+ else:
+ return item.keep_item(item)
+
+ def wrap(item):
+ if item is trial:
+ w = Wrapped(item)
+ w.trial_mode = True
+ w.render_class = LinkOnTrial
+ return w
+ return self.builder_wrapper(item)
+
+ b = IDBuilder(disp_links, wrap = wrap,
+ skip = True, keep_fn = keep_fn)
+
+ s = SpotlightListing(b,
+ spotlight_links = spotlight_links,
+ visible_link = spotlight_links[pos],
+ max_num = self.listing_obj.max_num,
+ max_score = self.listing_obj.max_score).listing()
+
+ if len(s.things) > 0:
+ # only pass through a listing if the links made it
+ # through our builder
+ organic.update_pos(pos+1)
+ return s
def query(self):
@@ -258,13 +288,13 @@ def query(self):
return c.site.get_links('hot', 'all')
def content(self):
- # only send an organic listing for HTML rendering
+ # only send a spotlight listing for HTML rendering
if (c.site == Default and c.render_style == "html"
and (not c.user_is_loggedin
or (c.user_is_loggedin and c.user.pref_organic))):
- org = self.organic()
- if org:
- return PaneStack([org, self.listing_obj], css_class='spacer')
+ spotlight = self.spotlight()
+ if spotlight:
+ return PaneStack([spotlight, self.listing_obj], css_class='spacer')
return self.listing_obj
def title(self):
@@ -421,7 +451,9 @@ def title(self):
def keep_fn(self):
# keep promotions off of profile pages.
def keep(item):
- return getattr(item, "promoted", None) is None
+ return (getattr(item, "promoted", None) is None and
+ (self.where == "deleted" or
+ not getattr(item, "deleted", False)))
return keep
def query(self):
@@ -457,7 +489,7 @@ def query(self):
if q is None:
return self.abort404()
- return q
+ return q
@validate(vuser = VExistingUname('username'))
def GET_listing(self, where, vuser, **env):
@@ -468,7 +500,7 @@ def GET_listing(self, where, vuser, **env):
return self.abort404()
# hide spammers profile pages
- if (not c.user_is_loggedin or
+ if (not c.user_is_loggedin or
(c.user._id != vuser._id and not c.user_is_admin)) \
and vuser._spam:
return self.abort404()
@@ -478,7 +510,7 @@ def GET_listing(self, where, vuser, **env):
return self.abort404()
check_cheating('user')
-
+
self.vuser = vuser
self.render_params = {'user' : vuser}
c.profilepage = True
@@ -664,7 +696,7 @@ def query(self):
reddits._filter(Subreddit.c.lang == c.content_langs)
if not c.over18:
reddits._filter(Subreddit.c.over_18 == False)
-
+
return reddits
def GET_listing(self, where, **env):
self.where = where
View
18 r2/r2/controllers/mediaembed.py
@@ -27,6 +27,9 @@
from pylons import request
from pylons.controllers.util import abort
+from r2.lib.cache import make_key
+
+import random
class MediaembedController(MinimalController):
@validate(link = VLink('link'))
@@ -52,6 +55,19 @@ def GET_mediaembed(self, link):
return MediaEmbedBody(body = content).render()
+ def GET_comscore(self, reddit = None):
+ return ComScore().render(style="html")
+
+class AdController(MinimalController):
+ def request_key(self):
+ return make_key('request_key',
+ c.lang,
+ c.content_langs,
+ request.host,
+ c.cname,
+ request.fullpath,
+ random.choice(xrange(100)))
+
def GET_ad(self, reddit_name = None):
c.render_style = "html"
return render_ad(reddit_name=reddit_name)
@@ -62,5 +78,3 @@ def GET_ad_by_codename(self, codename = None):
c.render_style = "html"
return render_ad(codename=codename)
- def GET_comscore(self, reddit = None):
- return ComScore().render(style="html")
View
54 r2/r2/controllers/reddit_base.py
@@ -26,7 +26,7 @@
from pylons.i18n.translation import LanguageError
from r2.lib.base import BaseController, proxyurl
from r2.lib import pages, utils, filters, amqp
-from r2.lib.utils import http_utils, UniqueIterator
+from r2.lib.utils import http_utils, UniqueIterator, ip_and_slash16
from r2.lib.cache import LocalCache, make_key, MemcachedError
import random as rand
from r2.models.account import valid_cookie, FakeAccount, valid_feed
@@ -52,8 +52,6 @@
cache_affecting_cookies = ('reddit_first','over18','_options')
-r_subnet = re.compile("^(\d+\.\d+)\.\d+\.\d+$")
-
class Cookies(dict):
def add(self, name, value, *k, **kw):
self[name] = Cookie(value, *k, **kw)
@@ -368,15 +366,6 @@ def set_cnameframe():
if hasattr(c.site, 'domain'):
c.authorized_cname = request.environ.get('authorized_cname', False)
-def set_colors():
- theme_rx = re.compile(r'')
- color_rx = re.compile(r'^([a-fA-F0-9]){3}(([a-fA-F0-9]){3})?$')
- c.theme = None
- if color_rx.match(request.get.get('bgcolor') or ''):
- c.bgcolor = request.get.get('bgcolor')
- if color_rx.match(request.get.get('bordercolor') or ''):
- c.bordercolor = request.get.get('bordercolor')
-
def set_recent_reddits():
names = read_user_cookie('recent_reddits')
c.recent_reddits = []
@@ -388,6 +377,15 @@ def set_recent_reddits():
except NotFound:
pass
+def set_colors():
+ theme_rx = re.compile(r'')
+ color_rx = re.compile(r'^([a-fA-F0-9]){3}(([a-fA-F0-9]){3})?$')
+ c.theme = None
+ if color_rx.match(request.get.get('bgcolor') or ''):
+ c.bgcolor = request.get.get('bgcolor')
+ if color_rx.match(request.get.get('bordercolor') or ''):
+ c.bordercolor = request.get.get('bordercolor')
+
def ratelimit_agents():
user_agent = request.user_agent
for s in g.agents:
@@ -402,15 +400,10 @@ def throttled(key):
return g.cache.get("throttle_" + key)
def ratelimit_throttled():
- ip = request.ip
+ ip, slash16 = ip_and_slash16(request)
- m = r_subnet.match(ip)
- if m is None:
- g.log.error("ratelimit_throttled: couldn't parse IP %s" % ip)
- else:
- subnet = m.group(1) + '.x.x'
- if throttled(ip) or throttled(subnet):
- abort(503, 'service temporarily unavailable')
+ if throttled(ip) or throttled(slash16):
+ abort(503, 'service temporarily unavailable')
#TODO i want to get rid of this function. once the listings in front.py are
@@ -556,11 +549,25 @@ def post(self):
# the key was too big to set in the rendercache
g.log.debug("Ignored too-big render cache")
- if g.enable_usage_stats:
+ if g.usage_sampling <= 0.0:
+ return
+
+ if g.usage_sampling >= 1.0 or rand.random() < g.usage_sampling:
+ if ('pylons.routes_dict' in request.environ and
+ 'action' in request.environ['pylons.routes_dict']):
+ action = str(request.environ['pylons.routes_dict']['action'])
+ else:
+ action = "unknown"
+ log_text("unknown action",
+ "no action for %r" % path_info,
+ "warning")
+
amqp.add_kw("usage_q",
start_time = c.start_time,
end_time = datetime.now(g.tz),
- action = str(c.action) or "static")
+ sampling_rate = g.usage_sampling,
+ action = action)
+
class RedditController(MinimalController):
@@ -633,9 +640,10 @@ def pre(self):
set_content_type()
set_iface_lang()
set_content_lang()
- set_colors()
set_recent_reddits()
set_recent_clicks()
+ # used for HTML-lite templates
+ set_colors()
# set some environmental variables in case we hit an abort
if not isinstance(c.site, FakeSubreddit):
View
6 r2/r2/controllers/validator/validator.py
@@ -561,6 +561,12 @@ def run(self):
raise VerifiedUserRequiredException
class VSponsor(VVerifiedUser):
+ """
+ Not intended to be used as a check for c.user_is_sponsor, but
+ rather is the user allowed to use the sponsored link system and,
+ if there is a link passed in, is the user allowed to edit the link
+ in question.
+ """
def user_test(self, thing):
return (thing.author_id == c.user._id)
View
3  r2/r2/lib/amqp.py
@@ -40,6 +40,7 @@
amqp_pass = g.amqp_pass
log = g.log
amqp_virtual_host = g.amqp_virtual_host
+amqp_logging = g.amqp_logging
connection = None
channel = local()
@@ -155,7 +156,7 @@ def _add_item(routing_key, body, message_id = None):
raise
def add_item(routing_key, body, message_id = None):
- if amqp_host:
+ if amqp_host and amqp_logging:
log.debug("amqp: adding item %r to %r" % (body, routing_key))
worker.do(_add_item, routing_key, body, message_id = message_id)
View
60 r2/r2/lib/app_globals.py
@@ -21,11 +21,12 @@
################################################################################
from __future__ import with_statement
from pylons import config
-import pytz, os, logging, sys, socket, re, subprocess
+import pytz, os, logging, sys, socket, re, subprocess, random
from datetime import timedelta, datetime
from r2.lib.cache import LocalCache, SelfEmptyingCache
-from r2.lib.cache import Memcache, Permacache, HardCache
-from r2.lib.cache import MemcacheChain, DoubleMemcacheChain, PermacacheChain, HardcacheChain
+from r2.lib.cache import PyMemcache, CMemcache
+from r2.lib.cache import HardCache, MemcacheChain, MemcacheChain, HardcacheChain
+from r2.lib.cache import CassandraCache, CassandraCacheChain
from r2.lib.db.stats import QueryStats
from r2.lib.translation import get_active_langs
from r2.lib.lock import make_lock_factory
@@ -55,6 +56,7 @@ class Globals(object):
float_props = ['min_promote_bid',
'max_promote_bid',
+ 'usage_sampling',
]
bool_props = ['debug', 'translator',
@@ -69,13 +71,15 @@ class Globals(object):
'db_create_tables',
'disallow_db_writes',
'exception_logging',
- 'enable_usage_stats',
+ 'amqp_logging',
]
tuple_props = ['memcaches',
'rec_cache',
- 'permacaches',
'rendercaches',
+ 'permacache_memcaches',
+ 'cassandra_seeds',
+ 'permacaches',
'admins',
'sponsors',
'monitored_servers',
@@ -129,24 +133,41 @@ def __init__(self, global_conf, app_conf, paths, **extra):
# to cache_chains (closed around by reset_caches) so that they
# can properly reset their local components
- localcache_cls = SelfEmptyingCache if self.running_as_script else LocalCache
+ localcache_cls = (SelfEmptyingCache if self.running_as_script
+ else LocalCache)
+ num_mc_clients = 2 if self.running_as_script else 10
+
+ py_mc = PyMemcache(self.memcaches)
+ c_mc = CMemcache(self.memcaches, num_clients = num_mc_clients)
+ rmc = CMemcache(self.rendercaches, num_clients = num_mc_clients)
+ rec_cache = None # we're not using this for now
+
+ pmc_chain = (localcache_cls(),)
+ if self.permacache_memcaches:
+ pmc_chain += (PyMemcache(self.permacache_memcaches),)
+ if self.cassandra_seeds:
+ self.cassandra_seeds = list(self.cassandra_seeds)
+ random.shuffle(self.cassandra_seeds)
+ pmc_chain += (CassandraCache('permacache', 'permacache',
+ self.cassandra_seeds),)
+ if self.permacaches:
+ pmc_chain += (PyMemcache(self.permacaches),)
+ if len(pmc_chain) == 1:
+ print 'Warning: proceding without a permacache'
+
+ self.permacache = CassandraCacheChain(pmc_chain)
- # we're going to temporarily run the old memcached behind the
- # new one so the caches can start warmer
- # mc = Memcache(self.memcaches, debug=self.debug)
- mc = Permacache(self.memcaches)
- rec_cache = Permacache(self.rec_cache)
- rmc = Permacache(self.rendercaches)
- pmc = Permacache(self.permacaches)
# hardcache is done after the db info is loaded, and then the
# chains are reset to use the appropriate initial entries
- self.memcache = mc
- self.cache = PermacacheChain((localcache_cls(), mc))
- self.permacache = PermacacheChain((localcache_cls(), pmc))
- self.rendercache = PermacacheChain((localcache_cls(), rmc))
+
+ self.memcache = py_mc # we'll keep using this one for locks
+ # intermediately
+
+ self.cache = MemcacheChain((localcache_cls(), py_mc))
+ self.rendercache = MemcacheChain((localcache_cls(), rmc))
self.rec_cache = rec_cache
- self.make_lock = make_lock_factory(mc)
+ self.make_lock = make_lock_factory(self.memcache)
cache_chains = [self.cache, self.permacache, self.rendercache]
# set default time zone if one is not set
@@ -160,7 +181,8 @@ def __init__(self, global_conf, app_conf, paths, **extra):
self.dbm = self.load_db_params(global_conf)
# can't do this until load_db_params() has been called
- self.hardcache = HardcacheChain((localcache_cls(), mc, HardCache(self)),
+ self.hardcache = HardcacheChain((localcache_cls(), py_mc,
+ HardCache(self)),
cache_negative_results = True)
cache_chains.append(self.hardcache)
View
253 r2/r2/lib/cache.py
@@ -21,13 +21,18 @@
################################################################################
from threading import local
from hashlib import md5
+import pickle
import pylibmc
from _pylibmc import MemcachedError
-from contrib import memcache
+import pycassa
+import cassandra.ttypes
+
+from contrib import memcache
from utils import lstrips, in_chunks, tup
from r2.lib.hardcachebackend import HardCacheBackend
+from r2.lib.utils import trace
class NoneResult(object): pass
@@ -43,20 +48,21 @@ def add_multi(self, keys, prefix=''):
for k,v in keys.iteritems():
self.add(prefix+str(k), v)
- def get_multi(self, keys, prefix='', partial=True):
- if prefix:
- key_map = dict((prefix+str(k), k) for k in keys)
+ def _prefix_keys(self, keys, prefix):
+ if len(prefix):
+ return dict((prefix+str(k), k) for k in keys)
else:
- key_map = dict((str(k), k) for k in keys)
+ return dict((str(k), k) for k in keys)
- r = self.simple_get_multi(key_map.keys())
-
- if not partial and len(r.keys()) < len(key_map):
- return None
+ def _unprefix_keys(self, results, key_map):
+ return dict((key_map[k], results[k]) for k in results.keys())
- return dict((key_map[k], r[k]) for k in r.keys())
+ def get_multi(self, keys, prefix=''):
+ key_map = self._prefix_keys(keys, prefix)
+ results = self.simple_get_multi(key_map.keys())
+ return self._unprefix_keys(results, key_map)
-class Permacache(CacheUtils, memcache.Client):
+class PyMemcache(CacheUtils, memcache.Client):
"""We still use our patched python-memcache to talk to the
permacaches for legacy reasons"""
simple_get_multi = memcache.Client.get_multi
@@ -87,61 +93,87 @@ def delete_multi(self, keys, prefix='', time=0):
memcache.Client.delete_multi(self, keys, time = time,
key_prefix = prefix)
- def get_local_client(self):
- return self # memcache.py handles this itself
+class CMemcache(CacheUtils):
+ def __init__(self,
+ servers,
+ debug = False,
+ binary = True,
+ noreply = False,
+ num_clients = 10):
+ self.servers = servers
+ self.clients = pylibmc.ClientPool(n_slots = num_clients)
+ for x in xrange(num_clients):
+ client = pylibmc.Client(servers, binary=binary)
+ behaviors = {
+ 'no_block': True, # use async I/O
+ 'cache_lookups': True, # cache DNS lookups
+ 'tcp_nodelay': True, # no nagle
+ 'ketama': True, # consistant hashing
+ '_noreply': int(noreply),
+ 'verify_key': int(debug), # spend the CPU to verify keys
+ }
+ client.behaviors.update(behaviors)
+ self.clients.put(client)
+
+ def get(self, key, default = None):
+ with self.clients.reserve() as mc:
+ ret = mc.get(key)
+ if ret is None:
+ return default
+ return ret
-class Memcache(CacheUtils, pylibmc.Client):
- simple_get_multi = pylibmc.Client.get_multi
+ def get_multi(self, keys, prefix = ''):
+ with self.clients.reserve() as mc:
+ return mc.get_multi(keys, key_prefix = prefix)
- def __init__(self, servers,
- debug = False,
- binary=True,
- noreply=False):
- pylibmc.Client.__init__(self, servers, binary=binary)
- behaviors = {'no_block': True, # use async I/O
- 'cache_lookups': True, # cache DNS lookups
- 'tcp_nodelay': True, # no nagle
- 'ketama': True, # consistant hashing
- '_noreply': int(noreply),
- 'verify_key': int(debug)} # spend the CPU to verify keys
- self.behaviors.update(behaviors)
- self.local_clients = local()
-
- def get_local_client(self):
- # if this thread hasn't had one yet, make one
- if not getattr(self.local_clients, 'client', None):
- self.local_clients.client = self.clone()
- return self.local_clients.client
+ # simple_get_multi exists so that a cache chain can
+ # single-instance the handling of prefixes for performance, but
+ # pylibmc does this in C which is faster anyway, so CMemcache
+ # implements get_multi itself. But the CacheChain still wants
+ # simple_get_multi to be available for when it's already prefixed
+ # them, so here it is
+ simple_get_multi = get_multi
+
+ def set(self, key, val, time = 0):
+ with self.clients.reserve() as mc:
+ return mc.set(key, val, time = time)
def set_multi(self, keys, prefix='', time=0):
new_keys = {}
for k,v in keys.iteritems():
new_keys[str(k)] = v
- pylibmc.Client.set_multi(self, new_keys, key_prefix = prefix,
- time = time)
+ with self.clients.reserve() as mc:
+ return mc.set_multi(new_keys, key_prefix = prefix,
+ time = time)
+
+ def append(self, key, val, time=0):
+ with self.clients.reserve() as mc:
+ return mc.append(key, val, time=time)
def incr(self, key, delta=1, time=0):
# ignore the time on these
- return pylibmc.Client.incr(self, key, delta)
+ with self.clients.reserve() as mc:
+ return mc.incr(key, delta)
def add(self, key, val, time=0):
try:
- return pylibmc.Client.add(self, key, val, time=time)
+ with self.clients.reserve() as mc:
+ return mc.add(key, val, time=time)
except pylibmc.DataExists:
return None
- def get(self, key, default=None):
- r = pylibmc.Client.get(self, key)
- if r is None:
- return default
- return r
-
- def set(self, key, val, time=0):
- pylibmc.Client.set(self, key, val, time = time)
+ def delete(self, key, time=0):
+ with self.clients.reserve() as mc:
+ return mc.delete(key)
def delete_multi(self, keys, prefix='', time=0):
- pylibmc.Client.delete_multi(self, keys, time = time,
- key_prefix = prefix)
+ with self.clients.reserve() as mc:
+ return mc.delete_multi(keys, time = time,
+ key_prefix = prefix)
+
+ def __repr__(self):
+ return '<%s(%r)>' % (self.__class__.__name__,
+ self.servers)
class HardCache(CacheUtils):
backend = None
@@ -305,9 +337,9 @@ def fn(self, *a, **kw):
flush_all = make_set_fn('flush_all')
cache_negative_results = False
- def get(self, key, default = None, local = True):
+ def get(self, key, default = None, allow_local = True):
for c in self.caches:
- if not local and isinstance(c,LocalCache):
+ if not allow_local and isinstance(c,LocalCache):
continue
val = c.get(key)
@@ -332,10 +364,19 @@ def get(self, key, default = None, local = True):
return default
- def simple_get_multi(self, keys):
+ def get_multi(self, keys, prefix='', allow_local = True):
+ key_map = self._prefix_keys(keys, prefix)
+ results = self.simple_get_multi(key_map.keys(),
+ allow_local = allow_local)
+ return self._unprefix_keys(results, key_map)
+
+ def simple_get_multi(self, keys, allow_local = True):
out = {}
need = set(keys)
for c in self.caches:
+ if not allow_local and isinstance(c, LocalCache):
+ continue
+
if len(out) == len(keys):
# we've found them all
break
@@ -378,37 +419,12 @@ def reset(self):
self.caches = (self.caches[0].__class__(),) + self.caches[1:]
class MemcacheChain(CacheChain):
- def __init__(self, caches):
- CacheChain.__init__(self, caches)
- self.mc_master = self.caches[-1]
-
- def reset(self):
- CacheChain.reset(self)
- localcache, old_mc = self.caches
- self.caches = (localcache, self.mc_master.get_local_client())
-
-class DoubleMemcacheChain(CacheChain):
- """Temporary cache chain that places the new cache ahead of the
- old one for easier deployment"""
- def __init__(self, caches):
- self.caches = localcache, memcache, permacache = caches
- self.mc_master = memcache
-
- def reset(self):
- CacheChain.reset(self)
- self.caches = (self.caches[0],
- self.mc_master.get_local_client(),
- self.caches[2])
+ pass
-class PermacacheChain(CacheChain):
+class CassandraCacheChain(CacheChain):
pass
class HardcacheChain(CacheChain):
- def __init__(self, caches, cache_negative_results = False):
- CacheChain.__init__(self, caches, cache_negative_results)
- localcache, memcache, hardcache = self.caches
- self.mc_master = memcache
-
def add(self, key, val, time=0):
authority = self.caches[-1] # the authority is the hardcache
# itself
@@ -417,6 +433,7 @@ def add(self, key, val, time=0):
# Calling set() rather than add() to ensure that all caches are
# in sync and that de-syncs repair themselves
cache.set(key, added_val, time=time)
+
return added_val
def accrue(self, key, time=0, delta=1):
@@ -440,18 +457,82 @@ def backend(self):
# the hardcache is always the last item in a HardCacheChain
return self.caches[-1].backend
- def reset(self):
- CacheChain.reset(self)
- assert len(self.caches) == 3
- self.caches = (self.caches[0],
- self.mc_master.get_local_client(),
- self.caches[2])
+CL_ZERO = cassandra.ttypes.ConsistencyLevel.ZERO
+CL_ONE = cassandra.ttypes.ConsistencyLevel.ONE
+CL_QUORUM = cassandra.ttypes.ConsistencyLevel.QUORUM
+CL_ALL = cassandra.ttypes.ConsistencyLevel.ALL
+
+class CassandraCache(CacheUtils):
+ """A cache that uses a Cassandra cluster. Uses a single keyspace
+ and column family and only the column-name 'value'"""
+ def __init__(self, keyspace, column_family, seeds,
+ read_consistency_level = CL_ONE,
+ write_consistency_level = CL_ONE):
+ self.keyspace = keyspace
+ self.column_family = column_family
+ self.seeds = seeds
+ self.client = pycassa.connect_thread_local(seeds)
+ self.cf = pycassa.ColumnFamily(self.client, self.keyspace,
+ self.column_family,
+ read_consistency_level = read_consistency_level,
+ write_consistency_level = write_consistency_level)
+
+ def _rcl(self, alternative):
+ return (alternative if alternative is not None
+ else self.cf.read_consistency_level)
+
+ def _wcl(self, alternative):
+ return (alternative if alternative is not None
+ else self.cf.write_consistency_level)
+
+ def get(self, key, default = None, read_consistency_level = None):
+ try:
+ rcl = self._rcl(read_consistency_level)
+ row = self.cf.get(key, columns=['value'],
+ read_consistency_level = rcl)
+ return pickle.loads(row['value'])
+ except (cassandra.ttypes.NotFoundException, KeyError):
+ return default
-#smart get multi
+ def simple_get_multi(self, keys, read_consistency_level = None):
+ rcl = self._rcl(read_consistency_level)
+ rows = self.cf.multiget(list(keys),
+ columns=['value'],
+ read_consistency_level = rcl)
+ return dict((key, pickle.loads(row['value']))
+ for (key, row) in rows.iteritems())
+
+ def set(self, key, val,
+ write_consistency_level = None, time = None):
+ wcl = self._wcl(write_consistency_level)
+ return self.cf.insert(key, {'value': pickle.dumps(val)},
+ write_consistency_level = wcl)
+
+ def set_multi(self, keys, prefix='',
+ write_consistency_level = None, time = None):
+ if not isinstance(keys, dict):
+ keys = dict(keys)
+ keys = dict(('%s%s' % (prefix, key), val)
+ for (key, val) in keys.iteritems())
+ wcl = self._wcl(write_consistency_level)
+ ret = {}
+ for key, val in keys.iteritems():
+ ret[key] = self.cf.insert(key, {'value': pickle.dumps(val)},
+ write_consistency_level = wcl)
+
+ return ret
+
+ def delete(self, key, write_consistency_level = None):
+ wcl = self._wcl(write_consistency_level)
+ self.cf.remove(key, write_consistency_level = wcl)
+
+# smart get multi:
+# For any keys not found in the cache, miss_fn() is run and the result is
+# stored in the cache. Then it returns everything, both the hits and misses.
def sgm(cache, keys, miss_fn, prefix='', time=0):
keys = set(keys)
s_keys = dict((str(k), k) for k in keys)
- r = cache.get_multi(s_keys.keys(), prefix)
+ r = cache.get_multi(s_keys.keys(), prefix=prefix)
if miss_fn and len(r.keys()) < len(keys):
need = set(s_keys.keys()) - set(r.keys())
#TODO i can't send a generator
View
23 r2/r2/lib/db/queries.py
@@ -20,6 +20,7 @@
query_cache = g.permacache
log = g.log
make_lock = g.make_lock
+worker = amqp.worker
precompute_limit = 1000
@@ -140,17 +141,18 @@ def postflight(self):
sr.last_batch_query = last_batch_query
sr._commit()
- def fetch(self):
+ def fetch(self, force=False):
"""Loads the query from the cache."""
- self.fetch_multi([self])
+ self.fetch_multi([self], force=force)
@classmethod
- def fetch_multi(cls, crs):
- unfetched = [cr for cr in crs if not cr._fetched]
+ def fetch_multi(cls, crs, force=False):
+ unfetched = [cr for cr in crs if not cr._fetched or force]
if not unfetched:
return
- cached = query_cache.get_multi([cr.iden for cr in unfetched])
+ cached = query_cache.get_multi([cr.iden for cr in unfetched],
+ allow_local = not force)
for cr in unfetched:
cr.data = cached.get(cr.iden) or []
cr._fetched = True
@@ -479,16 +481,18 @@ def _add_queries():
with make_lock("add_query(%s)" % q.iden):
if insert_items and q.can_insert():
+ q.fetch(force=True)
log.debug("Inserting %s into query %s" % (insert_items, q))
q.insert(insert_items)
elif delete_items and q.can_delete():
+ q.fetch(force=True)
log.debug("Deleting %s from query %s" % (delete_items, q))
q.delete(delete_items)
else:
log.debug('Adding precomputed query %s' % q)
query_queue.add_query(q)
# let the amqp worker handle this
- amqp.worker.do(_add_queries)
+ worker.do(_add_queries)
#can be rewritten to be more efficient
def all_queries(fn, obj, *param_lists):
@@ -588,7 +592,7 @@ def new_vote(vote):
# these less often; see the discussion above
# batched_time_times
for sort in batched_time_sorts:
- for time in db_times.keys():
+ for time in (set(db_times.keys()) - batched_time_times):
q = make_batched_time_query(sr, sort, time)
results.append(q)
@@ -995,8 +999,7 @@ def _handle_votes(msgs, chan):
def catch_up_batch_queries():
# catch up on batched_time_times queries that haven't been run
- # that should be, which should only happen to small
- # subreddits. This should be cronned to run about once an
+ # that should be, This should be cronned to run about once an
# hour. The more often, the more the work of rerunning the actual
# queries is spread out, but every run has a fixed-cost of looking
# at every single subreddit
@@ -1017,7 +1020,7 @@ def catch_up_batch_queries():
# make sure that all of the jobs have been completed or processed
# by the time we return
- amqp.worker.join()
+ worker.join()
try:
from r2admin.lib.admin_queries import *
View
6 r2/r2/lib/db/thing.py
@@ -175,7 +175,7 @@ def _cache_key(self):
def _other_self(self):
"""Load from the cached version of myself. Skip the local cache."""
- l = cache.get(self._cache_key(), local = False)
+ l = cache.get(self._cache_key(), allow_local = False)
if l and l._id != self._id:
g.log.error("thing.py: Doppleganger on read: got %s for %s",
(l, self))
@@ -913,7 +913,7 @@ def _retrieve():
with g.make_lock("lock_%s" % self._iden()):
# see if it was set while we were waiting for our
# lock
- names = cache.get(self._iden(), local = False)
+ names = cache.get(self._iden(), allow_local = False)
if not names:
lst = _retrieve()
cache.set(self._iden(),
@@ -1188,6 +1188,8 @@ def _query(cls, *rules, **kw):
#TODO it should be possible to send the rules and kw to
#the merge constructor
queries = [r._query(*rules, **kw) for r in cls.rels.values()]
+ if "sort" in kw:
+ print "sorting MultiRelations is not supported"
return Merge(queries)
@classmethod
View
13 r2/r2/lib/jsontemplates.py
@@ -193,7 +193,7 @@ class AccountJsonTemplate(ThingJsonTemplate):
comment_karma = "comment_karma",
has_mail = "has_mail",
has_mod_mail = "has_mod_mail",
- is_mod = "is_mod",
+ is_mod = "is_mod"
)
def thing_attr(self, thing, attr):
@@ -230,7 +230,9 @@ class LinkJsonTemplate(ThingJsonTemplate):
selftext_html= "selftext_html",
num_comments = "num_comments",
subreddit = "subreddit",
- subreddit_id = "subreddit_id")
+ subreddit_id = "subreddit_id",
+ permalink = "permalink"
+ )
def thing_attr(self, thing, attr):
from r2.lib.scraper import scrapers
@@ -392,10 +394,13 @@ def render(self, thing = None, *a, **kw):
class ListingJsonTemplate(ThingJsonTemplate):
_data_attrs_ = dict(children = "things",
after = "after",
- before = "before")
+ before = "before",
+ modhash = "modhash")
def thing_attr(self, thing, attr):
- if attr == "things":
+ if attr == "modhash":
+ return c.modhash
+ elif attr == "things":
res = []
for a in thing.things:
a.childlisting = False
View
2  r2/r2/lib/lock.py
@@ -40,7 +40,7 @@ def __init__(self, key, cache, time = 30, timeout = 30):
self.locks = locks.locks = getattr(locks, 'locks', set())
self.key = key
- self.cache = cache.get_local_client()
+ self.cache = cache
self.time = time
self.timeout = timeout
self.have_lock = False
View
252 r2/r2/lib/migrate.py
@@ -206,3 +206,255 @@ def load_accounts(inbox_rel):
print "%s / %s : %s" % (i, len(accounts), a)
queries.get_unread_comments(a).update()
queries.get_unread_selfreply(a).update()
+
+def pushup_permacache(verbosity=1000):
+ """When putting cassandra into the permacache chain, we need to
+ push everything up into the rest of the chain, so this is
+ everything that uses the permacache, as of that check-in."""
+ from pylons import g
+ from r2.models import Link, Subreddit, Account
+ from r2.lib.db.operators import desc
+ from r2.lib.comment_tree import comments_key, messages_key
+ from r2.lib.utils import fetch_things2, in_chunks
+ from r2.lib.utils import last_modified_key
+ from r2.lib.promote import promoted_memo_key
+ from r2.lib.subreddit_search import load_all_reddits
+ from r2.lib.db import queries
+ from r2.lib.cache import CassandraCacheChain
+
+ authority = g.permacache.caches[-1]
+ nonauthority = CassandraCacheChain(g.permacache.caches[1:-1])
+
+ def populate(keys):
+ vals = authority.simple_get_multi(keys)
+ if vals:
+ nonauthority.set_multi(vals)
+
+ def gen_keys():
+ yield promoted_memo_key
+
+ # just let this one do its own writing
+ load_all_reddits()
+
+ yield queries.get_all_comments().iden
+
+ l_q = Link._query(Link.c._spam == (True, False),
+ Link.c._deleted == (True, False),
+ sort=desc('_date'),
+ data=True,
+ )
+ for link in fetch_things2(l_q, verbosity):
+ yield comments_key(link._id)
+ yield last_modified_key(link, 'comments')
+ if not getattr(link, 'is_self', False) and hasattr(link, 'url'):
+ yield Link.by_url_key(link.url)
+
+ a_q = Account._query(Account.c._spam == (True, False),
+ sort=desc('_date'),
+ )
+ for account in fetch_things2(a_q, verbosity):
+ yield messages_key(account._id)
+ yield last_modified_key(account, 'overview')
+ yield last_modified_key(account, 'commented')
+ yield last_modified_key(account, 'submitted')
+ yield last_modified_key(account, 'liked')
+ yield last_modified_key(account, 'disliked')
+ yield queries.get_comments(account, 'new', 'all').iden
+ yield queries.get_submitted(account, 'new', 'all').iden
+ yield queries.get_liked(account).iden
+ yield queries.get_disliked(account).iden
+ yield queries.get_hidden(account).iden
+ yield queries.get_saved(account).iden
+ yield queries.get_inbox_messages(account).iden
+ yield queries.get_unread_messages(account).iden
+ yield queries.get_inbox_comments(account).iden
+ yield queries.get_unread_comments(account).iden
+ yield queries.get_inbox_selfreply(account).iden
+ yield queries.get_unread_selfreply(account).iden
+ yield queries.get_sent(account).iden
+
+ sr_q = Subreddit._query(Subreddit.c._spam == (True, False),
+ sort=desc('_date'),
+ )
+ for sr in fetch_things2(sr_q, verbosity):
+ yield last_modified_key(sr, 'stylesheet_contents')
+ yield queries.get_links(sr, 'hot', 'all').iden
+ yield queries.get_links(sr, 'new', 'all').iden
+
+ for sort in 'top', 'controversial':
+ for time in 'hour', 'day', 'week', 'month', 'year', 'all':
+ yield queries.get_links(sr, sort, time,
+ merge_batched=False).iden
+ yield queries.get_spam_links(sr).iden
+ yield queries.get_spam_comments(sr).iden
+ yield queries.get_reported_links(sr).iden
+ yield queries.get_reported_comments(sr).iden
+ yield queries.get_subreddit_messages(sr).iden
+ yield queries.get_unread_subreddit_messages(sr).iden
+
+ done = 0
+ for keys in in_chunks(gen_keys(), verbosity):
+ g.reset_caches()
+ done += len(keys)
+ print 'Done %d: %r' % (done, keys[-1])
+ populate(keys)
+
+def add_byurl_prefix():
+ """Run one before the byurl prefix is set, and once after (killing
+ it after it gets when it started the first time"""
+
+ from datetime import datetime
+ from r2.models import Link
+ from r2.lib.filters import _force_utf8
+ from pylons import g
+ from r2.lib.utils import fetch_things2
+ from r2.lib.db.operators import desc
+ from r2.lib.utils import base_url
+
+ now = datetime.now(g.tz)
+ print 'started at %s' % (now,)
+
+ l_q = Link._query(
+ Link.c._date < now,
+ data=True,
+ sort=desc('_date'))
+
+ # from link.py
+ def by_url_key(url, prefix=''):
+ s = _force_utf8(base_url(url.lower()))
+ return '%s%s' % (prefix, s)
+
+ done = 0
+ for links in fetch_things2(l_q, 1000, chunks=True):
+ done += len(links)
+ print 'Doing: %r, %s..%s' % (done, links[-1]._date, links[0]._date)
+
+ # only links with actual URLs
+ links = filter(lambda link: (not getattr(link, 'is_self', False)
+ and getattr(link, 'url', '')),
+ links)
+
+ # old key -> new key
+ translate = dict((by_url_key(link.url),
+ by_url_key(link.url, prefix='byurl_'))
+ for link in links)
+
+ old = g.permacache.get_multi(translate.keys())
+ new = dict((translate[old_key], value)
+ for (old_key, value)
+ in old.iteritems())
+ g.permacache.set_multi(new)
+
+def _progress(it, verbosity=100, key=repr, estimate=None, persec=False):
+ """An iterator that yields everything from `it', but prints progress
+ information along the way, including time-estimates if
+ possible"""
+ from datetime import datetime
+ import sys
+
+ now = start = datetime.now()
+ elapsed = start - start
+
+ print 'Starting at %s' % (start,)
+
+ seen = 0
+ for item in it:
+ seen += 1
+ if seen % verbosity == 0:
+ now = datetime.now()
+ elapsed = now - start
+ elapsed_seconds = elapsed.days * 86400 + elapsed.seconds
+
+ if estimate:
+ remaining = ((elapsed/seen)*estimate)-elapsed
+ completion = now + remaining
+ count_str = ('%d/%d %.2f%%'
+ % (seen, estimate, float(seen)/estimate*100))
+ estimate_str = (' (%s remaining; completion %s)'
+ % (remaining, completion))
+ else:
+ count_str = '%d' % seen
+ estimate_str = ''
+
+ if key:
+ key_str = ': %s' % key(item)
+ else:
+ key_str = ''
+
+ if persec and elapsed_seconds > 0:
+ persec_str = ' (%.2f/s)' % (seen/elapsed_seconds,)
+ else:
+ persec_str = ''
+
+ sys.stdout.write('%s%s, %s%s%s\n'
+ % (count_str, persec_str,
+ elapsed, estimate_str, key_str))
+ sys.stdout.flush()
+ this_chunk = 0
+ yield item
+
+ now = datetime.now()
+ elapsed = now - start
+ print 'Processed %d items in %s..%s (%s)' % (seen, start, now, elapsed)
+
+def shorten_byurl_keys():
+ """We changed by_url keys from a format like
+ byurl_google.com...
+ to:
+ byurl(1d5920f4b44b27a802bd77c4f0536f5a, google.com...)
+ so that they would fit in memcache's 251-char limit
+ """
+
+ from datetime import datetime
+ from hashlib import md5
+ from r2.models import Link
+ from r2.lib.filters import _force_utf8
+ from pylons import g
+ from r2.lib.utils import fetch_things2, in_chunks
+ from r2.lib.db.operators import desc
+ from r2.lib.utils import base_url
+
+ # from link.py
+ def old_by_url_key(url):
+ prefix='byurl_'
+ s = _force_utf8(base_url(url.lower()))
+ return '%s%s' % (prefix, s)
+ def new_by_url_key(url):
+ maxlen = 250
+ template = 'byurl(%s,%s)'
+ keyurl = _force_utf8(base_url(url.lower()))
+ hexdigest = md5(keyurl).hexdigest()
+ usable_len = maxlen-len(template)-len(hexdigest)
+ return template % (hexdigest, keyurl[:usable_len])
+
+ verbosity = 1000
+
+ l_q = Link._query(
+ Link.c._spam == (True, False),
+ data=True,
+ sort=desc('_date'))
+ for links in (
+ in_chunks(
+ _progress(
+ fetch_things2(l_q, verbosity),
+ key = lambda link: link._date,
+ verbosity=verbosity,
+ estimate=int(9.9e6),
+ persec=True,
+ ),
+ verbosity)):
+ # only links with actual URLs
+ links = filter(lambda link: (not getattr(link, 'is_self', False)
+ and getattr(link, 'url', '')),
+ links)
+
+ # old key -> new key
+ translate = dict((old_by_url_key(link.url),
+ new_by_url_key(link.url))
+ for link in links)
+
+ old = g.permacache.get_multi(translate.keys())
+ new = dict((translate[old_key], value)
+ for (old_key, value)
+ in old.iteritems())
+ g.permacache.set_multi(new)
View
19 r2/r2/lib/organic.py
@@ -61,7 +61,7 @@ def keep(item):
else:
return item.keep_item(item)
- builder = IDBuilder(promoted_items, keep_fn = keep,
+ builder = IDBuilder(promoted_items, keep_fn = keep,
skip = True, num = max_promoted)
promoted_items = builder.get_items()[0]
@@ -96,6 +96,13 @@ def cached_organic_links(user_id, langs):
#only use links from reddits that you're subscribed to
link_names = filter(lambda n: sr_count[n][1] in sr_ids, sr_count.keys())
link_names.sort(key = lambda n: sr_count[n][0])
+
+ if not link_names and g.debug:
+ q = All.get_links('new', 'all')
+ q._limit = 100 # this decomposes to a _query
+ link_names = [x._fullname for x in q]
+ g.log.debug('Used inorganic links')
+
#potentially add a up and coming link
if random.choice((True, False)) and sr_ids:
sr = Subreddit._byID(random.choice(sr_ids))
@@ -128,7 +135,7 @@ def cached_organic_links(user_id, langs):
def organic_links(user):
from r2.controllers.reddit_base import organic_pos
-
+
sr_ids = Subreddit.user_subreddits(user)
# make sure that these are sorted so the cache keys are constant
sr_ids.sort()
@@ -139,9 +146,11 @@ def organic_links(user):
links = cached_organic_links(None, c.content_langs)
pos = organic_pos()
- # pos will be 0 if it wasn't specified
- if links and pos != 0:
- # make sure that we're not running off the end of the list
+
+ # Make sure that links[pos] exists. Or, if links is [], at least set pos=0
+ if not links:
+ pos = 0
+ elif pos != 0:
pos = pos % len(links)
return links, pos
View
189 r2/r2/lib/pages/pages.py
@@ -44,6 +44,7 @@
from r2.lib.strings import plurals, rand_strings, strings, Score
from r2.lib.utils import title_to_url, query_string, UrlParser, to_js, vote_hash
from r2.lib.utils import link_duplicates, make_offset_date, to_csv, median
+from r2.lib.utils import trunc_time
from r2.lib.template_helpers import add_sr, get_domain
from r2.lib.subreddit_search import popular_searches
from r2.lib.scraper import scrapers
@@ -169,8 +170,9 @@ def sr_admin_menu(self):
css_class = "icon-menu", separator = '')]
def sr_moderators(self, limit = 10):
- accounts = [Account._byID(uid, True)
- for uid in c.site.moderators[:limit]]
+ accounts = Account._byID([uid
+ for uid in c.site.moderators[:limit]],
+ data=True, return_dict=False)
return [WrappedUser(a) for a in accounts if not a._deleted]
def rightbox(self):
@@ -184,11 +186,30 @@ def rightbox(self):
if not c.user_is_loggedin and self.loginbox:
ps.append(LoginFormWide())
+ if not isinstance(c.site, FakeSubreddit):
+ ps.append(SponsorshipBox())
+
+ no_ads_yet = True
#don't show the subreddit info bar on cnames
if not isinstance(c.site, FakeSubreddit) and not c.cname:
ps.append(SubredditInfoBar())
- ps.append(SponsorshipBox())
+ ps.append(Ads())
+ no_ads_yet = False
+
+ if self.submit_box:
+ ps.append(SideBox(_('Submit a link'),
+ '/submit', 'submit',
+ sr_path = True,
+ subtitles = [strings.submit_box_text],
+ show_cover = True))
+
+ if self.create_reddit_box:
+ ps.append(SideBox(_('Create your own reddit'),
+ '/reddits/create', 'create',
+ subtitles = rand_strings.get("create_reddit", 2),
+ show_cover = True, nocname=True))
+ if not isinstance(c.site, FakeSubreddit) and not c.cname:
moderators = self.sr_moderators()
if moderators:
total = len(c.site.moderators)
@@ -208,22 +229,15 @@ def rightbox(self):
ps.append(SideContentBox(_('admin box'), self.sr_admin_menu()))
- if self.submit_box:
- ps.append(SideBox(_('Submit a link'),
- '/submit', 'submit',
- sr_path = True,
- subtitles = [strings.submit_box_text],
- show_cover = True))
-
- if self.create_reddit_box:
- ps.append(SideBox(_('Create your own reddit'),
- '/reddits/create', 'create',
- subtitles = rand_strings.get("create_reddit", 2),
- show_cover = True, nocname=True))
+ if no_ads_yet:
+ ps.append(Ads())
- #we should do this here, but unless we move the ads into a
- #template of its own, it will render above the ad
- #ps.append(ClickGadget())
+ if c.user_is_admin:
+ ps.append(Admin_Rightbox())
+
+ if c.user.pref_clickgadget and c.recent_clicks:
+ ps.append(SideContentBox(_("Recently viewed links"),
+ [ClickGadget(c.recent_clicks)]))
return ps
@@ -941,8 +955,8 @@ def __init__(self, menus = []):
class InfoBar(Templated):
"""Draws the yellow box at the top of a page for info"""
- def __init__(self, message = ''):
- Templated.__init__(self, message = message)
+ def __init__(self, message = '', extra_class = ''):
+ Templated.__init__(self, message = message, extra_class = extra_class)
class RedditError(BoringPage):
@@ -1324,10 +1338,13 @@ class ButtonEmbed(CachedTemplate):
def __init__(self, button = None, width = 100,
height=100, referer = "", url = "", **kw):
arg = "cnameframe=1&" if c.cname else ""
+ sr = c.site.name if not isinstance(c.site, FakeSubreddit) else ""
+ if sr:
+ arg += "sr=%s&" % sr
Templated.__init__(self, button = button,
width = width, height = height,
referer=referer, url = url,
- domain = get_domain(),
+ domain = get_domain(subreddit = False),
arg = arg,
**kw)
@@ -1462,12 +1479,15 @@ def __init__(self):
date_groupings = {}
hexkeys_seen = {}
- for ids in hcb.ids_by_category("error"):
+ idses = hcb.ids_by_category("error")
+ errors = g.hardcache.get_multi(prefix="error-", keys=idses)
+
+ for ids in idses:
date, hexkey = ids.split("-")
hexkeys_seen[hexkey] = True
- d = g.hardcache.get("error-" + ids)
+ d = errors.get(ids, None)
if d is None:
log_text("error=None", "Why is error-%s None?" % ids,
@@ -1480,16 +1500,22 @@ def __init__(self):
self.nicknames = {}
self.statuses = {}
+ nicks = g.hardcache.get_multi(prefix="error_nickname-",
+ keys=hexkeys_seen.keys())
+ stati = g.hardcache.get_multi(prefix="error_status-",
+ keys=hexkeys_seen.keys())
+
for hexkey in hexkeys_seen.keys():
- nick = g.hardcache.get("error_nickname-%s" % hexkey, "???")
- self.nicknames[hexkey] = nick
- status = g.hardcache.get("error_status-%s" % hexkey, "normal")
- self.statuses[hexkey] = status
+ self.nicknames[hexkey] = nicks.get(hexkey, "???")
+ self.statuses[hexkey] = stati.get(hexkey, "normal")
+
+ idses = hcb.ids_by_category("logtext")
+ texts = g.hardcache.get_multi(prefix="logtext-", keys=idses)
- for ids in hcb.ids_by_category("logtext"):
+ for ids in idses:
date, level, classification = ids.split("-", 2)
textoccs = []
- dicts = g.hardcache.get("logtext-" + ids)
+ dicts = texts.get(ids, None)
if dicts is None:
log_text("logtext=None", "Why is logtext-%s None?" % ids,
"warning")
@@ -1584,35 +1610,77 @@ def __init__(self):
triples = set() # sorting key
daily_stats = {}
- for ids in hcb.ids_by_category("profile_count", limit=10000):
+ idses = hcb.ids_by_category("profile_count", limit=10000)
+ counts = g.hardcache.get_multi(prefix="profile_count-", keys=idses)
+ elapseds = g.hardcache.get_multi(prefix="profile_elapsed-", keys=idses)
+
+ # The next three code paragraphs are for the case where we're
+ # rendering the current period and trying to decide what load class
+ # to use. For example, if today's number of hits equals yesterday's,
+ # and we're 23:59 into the day, that's totally normal. But if we're
+ # only 12 hours into the day, that's twice what we'd expect. So
+ # we're going to scale the current period by the percent of the way
+ # into the period that we are.
+ #
+ # If we're less than 5% of the way into the period, we skip this
+ # step. This both avoids Div0 errors and keeps us from extrapolating
+ # ridiculously from a tiny sample size.
+
+ now = c.start_time.astimezone(g.display_tz)
+ t_midnight = trunc_time(now, hours=24, mins=60)
+ t_hour = trunc_time(now, mins=60)
+ t_5min = trunc_time(now, mins=5)
+
+ offset_day = (now - t_midnight).seconds / 86400.0
+ offset_hour = (now - t_hour).seconds / 3600.0
+ offset_5min = (now - t_5min).seconds / 300.0
+
+ this_day = t_midnight.strftime("%Y/%m/%d_xx:xx")
+ this_hour = t_hour.strftime("%Y/%m/%d_%H:xx")
+ this_5min = t_5min.strftime("%Y/%m/%d_%H:%M")
+
+ for ids in idses:
time, action = ids.split("-")
+ # coltype strings are carefully chosen to sort alphabetically
+ # in the order that they do
+
if time.endswith("xx:xx"):
+ coltype = 'Day'
factor = 1.0
label = time[5:10] # MM/DD
- day = True
+ if time == this_day and offset_day > 0.05:
+ factor /= offset_day
elif time.endswith(":xx"):
+ coltype = 'Hour'
factor = 24.0
label = time[11:] # HH:xx
+ if time == this_hour and offset_hour > 0.05:
+ factor /= offset_hour
else:
+ coltype = 'five-min'
factor = 288.0 # number of five-minute periods in a day
label = time[11:] # HH:MM
+ if time == this_5min and offset_5min > 0.05:
+ factor /= offset_5min
+
+ count = counts.get(ids, None)
+ if count is None or count == 0:
+ log_text("usage count=None", "For %r, it's %r" % (ids, count), "error")
+ continue
# Elapsed in hardcache is in hundredths of a second.
# Multiply it by 100 so from this point forward, we're
# dealing with seconds -- as floats with two decimal
# places of precision. Similarly, round the average
# to two decimal places.
- count = g.hardcache.get("profile_count-" + ids)
- if count is None or count == 0:
- log_text("usage count=None", "For %r, it's %r" % (ids, count), "error")
- continue
- elapsed = g.hardcache.get("profile_elapsed-" + ids, 0) / 100.0
+ elapsed = elapseds.get(ids, 0) / 100.0
average = int(100.0 * elapsed / count) / 100.0
- triples.add( (factor, time, label) )
+ # Again, the "triple" tuples are a sorting key for the columns
+ triples.add( (coltype, time, label) )
- if factor == 1.0:
+ if coltype == 'Day':
daily_stats.setdefault(action, []).append(
(count, elapsed, average)
)
@@ -1626,11 +1694,27 @@ def __init__(self):
# Figure out what a typical day looks like. For each action,