Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Bugfixes:

     * no-repeat on some background images in the sprite (#797)
     * fix deleted comments on the mobile site -- preserve author anonymity (#624
     * faulty permalinks on pages with all unicode title (#776)
     * no more spreadshirt
     * reorganize comment_tree and _builder.pyx to clean up cache handling
     * fix styling for Bug #78
     * Improve handling of /r/all+all (bug #699), etc (not always a 400 now)
     * Fix the time listings: sometimes we get passed unicode for self.time
     * Don't allow private reddits to show in /r/random
     * Try to allow remote sites to show their own favicons in the reddit
        toolbar

  Speed improvements:
  * CommentBuilder refactor rount 1:
     * create new sort and parent permacache entries so that we don't need to loa
     * update sorts and parents when a new comment comes in
     * update non-date sorts when a new comment vote comes in
     * add more trace info to timeouts in CommentBuilder
  * Some misc. performance hacks (incl. adding _utils.pyx)
  * Increase SR description box limit from 1k to 5k
  * Fix a bug where we weren't properly allowing hidden items to be hidden
    on time-filtered listings
  * Make Subreddit._by_name take a list of names like byID
  * Upload thumbs to S3 with reduced_redundancy==True
  * make rss feeds without a 'feed' get parameter act as if the user is not logged in
  * Require a boto that knows about reduced_redundancy
  * remove fast_queries from Message.add_props and put the onus on fetching the unread messages from the permacache
  * Store the list of popular subreddits in the permacache
    * make SubredditTopBar cacheable per-user.
  * add (as safe as we can make it) annotation for sql selects to track down requests hitting the db when they shouldn't be.

 User submitted features:
  * Merge and clean up Phire's selfs-only/no-selfs patch.

  * Don't show expired items on time-filtered controversy listings

  * Also add the 'hide' button to the toolbar just for breakneckridge.
    Next time someone calls me prickly, breakneckridge had better step
    up to the plate for me or I'm rolling it back.
  • Loading branch information...
commit 52da322156e8c1d38c872886aaffff64eb961f68 1 parent 9a4271f
@KeyserSosa KeyserSosa authored
Showing with 2,646 additions and 830 deletions.
  1. +1 −0  .gitignore
  2. +0 −2  r2/r2/config/routing.py
  3. +30 −13 r2/r2/controllers/api.py
  4. +3 −0  r2/r2/controllers/errors.py
  5. +12 −34 r2/r2/controllers/front.py
  6. +41 −18 r2/r2/controllers/listingcontroller.py
  7. +1 −1  r2/r2/controllers/promotecontroller.py
  8. +27 −14 r2/r2/controllers/reddit_base.py
  9. +27 −6 r2/r2/controllers/validator/validator.py
  10. +5 −19 r2/r2/lib/cache.py
  11. +138 −6 r2/r2/lib/comment_tree.py
  12. +1 −0  r2/r2/lib/count.py
  13. +6 −2 r2/r2/lib/db/queries.py
  14. +1 −1  r2/r2/lib/db/tdb_cassandra.py
  15. +33 −5 r2/r2/lib/db/tdb_sql.py
  16. +46 −49 r2/r2/lib/indextankupdate.py
  17. +2 −2 r2/r2/lib/lock.py
  18. +3 −2 r2/r2/lib/media.py
  19. +43 −27 r2/r2/lib/pages/pages.py
  20. +2 −1  r2/r2/lib/s3cp.py
  21. +0 −35 r2/r2/lib/set_reddit_pops.py
  22. +16 −14 r2/r2/lib/sgm.pyx
  23. +123 −0 r2/r2/lib/sr_pops.py
  24. +243 −0 r2/r2/lib/utils/_utils.pyx
  25. +18 −168 r2/r2/lib/utils/utils.py
  26. +104 −109 r2/r2/models/_builder.pyx
  27. +2 −7 r2/r2/models/account.py
  28. +5 −6 r2/r2/models/builder.py
  29. +26 −41 r2/r2/models/link.py
  30. +86 −104 r2/r2/models/subreddit.py
  31. +2 −0  r2/r2/models/vote.py
  32. +1,457 −101 r2/r2/public/static/css/reddit.css
  33. BIN  r2/r2/public/static/nsfw.png
  34. +2 −8 r2/r2/templates/comscore.html
  35. +31 −12 r2/r2/templates/createsubreddit.html
  36. +8 −1 r2/r2/templates/frame.html
  37. +22 −9 r2/r2/templates/frametoolbar.html
  38. +4 −1 r2/r2/templates/morechildren.compact
  39. +4 −2 r2/r2/templates/morechildren.html
  40. +26 −2 r2/r2/templates/newlink.compact
  41. +26 −3 r2/r2/templates/newlink.html
  42. +4 −1 r2/r2/templates/printable.mobile
  43. +11 −4 r2/setup.py
  44. +4 −0 scripts/update_reddits.sh
View
1  .gitignore
@@ -37,6 +37,7 @@ r2/_sorts.egg-info/
r2/r2/lib/_normalized_hot.c
r2/r2/lib/db/_sorts.c
r2/r2/lib/sgm.c
+r2/r2/lib/utils/_utils.c
r2/r2/lib/wrapped.c
r2/r2/models/_builder.c
r2/sgm.egg-info/
View
2  r2/r2/config/routing.py
@@ -121,8 +121,6 @@ def make_map(global_conf={}, app_conf={}):
action = 'details', title=None)
mc('/traffic/:article/:title', controller='front',
action = 'traffic', title=None)
- mc('/shirt/:article/:title', controller='front',
- action = 'shirt', title=None)
mc('/comments/:article/:title/:comment', controller='front',
action = 'comments', title=None, comment = None)
mc('/duplicates/:article/:title', controller = 'front',
View
43 r2/r2/controllers/api.py
@@ -189,12 +189,12 @@ def POST_compose(self, form, jquery, to, subject, body, ip):
VRatelimit(rate_user = True, rate_ip = True,
prefix = "rate_submit_"),
ip = ValidIP(),
- sr = VSubmitSR('sr'),
+ sr = VSubmitSR('sr', 'kind'),
url = VUrl(['url', 'sr']),
title = VTitle('title'),
save = VBoolean('save'),
selftext = VMarkdown('text'),
- kind = VOneOf('kind', ['link', 'self', 'poll']),
+ kind = VOneOf('kind', ['link', 'self']),
then = VOneOf('then', ('tb', 'comments'),
default='comments'),
extension = VLength("extension", 20))
@@ -210,23 +210,39 @@ def POST_submit(self, form, jquery, url, selftext, kind, title,
# VUrl may have replaced 'url' by adding 'http://'
form.set_inputs(url = url)
- if not kind:
+ if not kind or form.has_errors('sr', errors.INVALID_OPTION):
# this should only happen if somebody is trying to post
# links in some automated manner outside of the regular
# submission page, and hasn't updated their script
return
- if form.has_errors('sr', errors.SUBREDDIT_NOEXIST,
- errors.SUBREDDIT_NOTALLOWED,
- errors.SUBREDDIT_REQUIRED):
+ if (form.has_errors('sr',
+ errors.SUBREDDIT_NOEXIST,
+ errors.SUBREDDIT_NOTALLOWED,
+ errors.SUBREDDIT_REQUIRED,
+ errors.NO_SELFS,
+ errors.NO_LINKS)
+ or not sr):
# checking to get the error set in the form, but we can't
# check for rate-limiting if there's no subreddit
return
- else:
- should_ratelimit = sr.should_ratelimit(c.user, 'link')
- #remove the ratelimit error if the user's karma is high
- if not should_ratelimit:
- c.errors.remove((errors.RATELIMIT, 'ratelimit'))
+
+ if sr.link_type == 'link' and kind == 'self':
+ # this could happen if they actually typed "self" into the
+ # URL box and we helpfully translated it for them
+ c.errors.add(errors.NO_SELFS, field='sr')
+
+ # and trigger that by hand for the form
+ form.has_errors('sr', errors.NO_SELFS)
+
+ return
+
+ should_ratelimit = sr.should_ratelimit(c.user, 'link')
+ #remove the ratelimit error if the user's karma is high
+ if not should_ratelimit:
+ c.errors.remove((errors.RATELIMIT, 'ratelimit'))
+
+ banmsg = None
banmsg = None
@@ -1139,12 +1155,13 @@ def POST_upload_sr_img(self, file, header, sponsor, name, form_id):
name = VSubredditName("name"),
title = VLength("title", max_length = 100),
domain = VCnameDomain("domain"),
- description = VMarkdown("description", max_length = 1000),
+ description = VMarkdown("description", max_length = 5120),
lang = VLang("lang"),
over_18 = VBoolean('over_18'),
allow_top = VBoolean('allow_top'),
show_media = VBoolean('show_media'),
type = VOneOf('type', ('public', 'private', 'restricted')),
+ link_type = VOneOf('link_type', ('any', 'link', 'self')),
ip = ValidIP(),
sponsor_text =VLength('sponsorship-text', max_length = 500),
sponsor_name =VLength('sponsorship-name', max_length = 500),
@@ -1159,7 +1176,7 @@ def POST_site_admin(self, form, jquery, name, ip, sr,
redir = False
kw = dict((k, v) for k, v in kw.iteritems()
if k in ('name', 'title', 'domain', 'description', 'over_18',
- 'show_media', 'type', 'lang', "css_on_cname",
+ 'show_media', 'type', 'link_type', 'lang', "css_on_cname",
'allow_top'))
#if a user is banned, return rate-limit errors
View
3  r2/r2/controllers/errors.py
@@ -76,6 +76,9 @@
('BAD_CARD', _('card problem: %(message)s')),
('TOO_LONG', _("this is too long (max: %(max_length)s)")),
('NO_TEXT', _('we need something here')),
+
+ ('NO_SELFS', _("that reddit doesn't allow text posts")),
+ ('NO_LINKS', _("that reddit only allows text posts")),
))
errors = Storage([(e, e) for e in error_list.keys()])
View
46 r2/r2/controllers/front.py
@@ -83,7 +83,7 @@ def GET_oldinfo(self, article, type, dest, rest=None, comment=''):
def GET_random(self):
"""The Serendipity button"""
- sort = 'new' if rand.choice((True,False)) else 'hot'
+ sort = rand.choice(('new','hot'))
links = c.site.get_links(sort, 'all')
if isinstance(links, thing.Query):
links._limit = g.num_serendipity
@@ -318,28 +318,6 @@ def _edit_modcontrib_reddit(self, location, num, after, reverse, count, created)
extension_handling = "private"
else:
return self.abort404()
- if isinstance(c.site, ModSR):
- level = 'mod'
- elif isinstance(c.site, ContribSR):
- level = 'contrib'
- elif isinstance(c.site, AllSR):
- level = 'all'
- else:
- raise ValueError
-
- if ((level == 'mod' and
- location in ('reports', 'spam', 'trials', 'modqueue'))
- or
- (level == 'all' and
- location == 'trials')):
- pane = self._make_spamlisting(location, num, after, reverse, count)
- if c.user.pref_private_feeds:
- extension_handling = "private"
- else:
- return self.abort404()
-
- return EditReddit(content = pane,
- extension_handling = extension_handling).render()
return EditReddit(content = pane,
extension_handling = extension_handling).render()
@@ -617,17 +595,6 @@ def GET_submit(self, url, title, then):
captcha=captcha,
then = then)).render()
- def _render_opt_in_out(self, msg_hash, leave):
- """Generates the form for an optin/optout page"""
- email = Email.handler.get_recipient(msg_hash)
- if not email:
- return self.abort404()
- sent = (has_opted_out(email) == leave)
- return BoringPage(_("opt out") if leave else _("welcome back"),
- content = OptOut(email = email, leave = leave,
- sent = sent,
- msg_hash = msg_hash)).render()
-
def GET_frame(self):
"""used for cname support. makes a frame and
puts the proper url as the frame source"""
@@ -914,6 +881,17 @@ def GET_validuser(self):
c.response.content = ''
return c.response
+ def _render_opt_in_out(self, msg_hash, leave):
+ """Generates the form for an optin/optout page"""
+ email = Email.handler.get_recipient(msg_hash)
+ if not email:
+ return self.abort404()
+ sent = (has_opted_out(email) == leave)
+ return BoringPage(_("opt out") if leave else _("welcome back"),
+ content = OptOut(email = email, leave = leave,
+ sent = sent,
+ msg_hash = msg_hash)).render()
+
@validate(msg_hash = nop('x'))
def GET_optout(self, msg_hash):
"""handles /mail/optout to add an email to the optout mailing
View
59 r2/r2/controllers/listingcontroller.py
@@ -259,21 +259,25 @@ def spotlight(self):
left_side = max(-1, min(num_tl - 3, 8))
disp_links = [spotlight_links[(i + pos) % num_tl]
for i in xrange(-2, left_side)]
- def keep_fn(item):
+
+ def trial_keep_fn(item):
if trial and trial._fullname == item._fullname:
return True
return organic.keep_fresh_links(item)
- def wrap(item):
+ def trial_wrap(item):
if item is trial:
w = Wrapped(item)
w.trial_mode = True
w.render_class = LinkOnTrial
return w
return self.builder_wrapper(item)
- b = IDBuilder(disp_links, wrap = wrap,
+
+ b = IDBuilder(disp_links,
+ wrap = trial_wrap if trial else self.builder_wrapper,
num = organic.organic_length,
- skip = True, keep_fn = keep_fn)
+ skip = True,
+ keep_fn = trial_keep_fn if trial else organic.keep_fresh_links)
try:
vislink = spotlight_links[pos]
@@ -282,11 +286,10 @@ def wrap(item):
g.log.error("pos = %d" % pos)
raise
- s = SpotlightListing(b,
- spotlight_links = spotlight_links,
- visible_link = vislink,
- max_num = self.listing_obj.max_num,
- max_score = self.listing_obj.max_score).listing()
+ s = SpotlightListing(b, spotlight_links = spotlight_links,
+ visible_link = vislink,
+ max_num = self.listing_obj.max_num,
+ max_score = self.listing_obj.max_score).listing()
if len(s.things) > 0:
# only pass through a listing if the links made it
@@ -304,15 +307,16 @@ def wrap(item):
if res.things:
return res
-
-
def query(self):
#no need to worry when working from the cache
if g.use_query_cache or c.site == Default:
self.fix_listing = False
if c.site == Default:
- sr_ids = Subreddit.user_subreddits(c.user)
+ sr_ids = Subreddit.user_subreddits(c.user,
+ limit=(Subreddit.sr_limit
+ if c.user_is_loggedin
+ else g.num_default_reddits))
return normalized_hot(sr_ids)
#if not using the query_cache we still want cached front pages
elif (not g.use_query_cache
@@ -394,6 +398,17 @@ def GET_listing(self, sort, **env):
class BrowseController(ListingController):
where = 'browse'
+ def keep_fn(self):
+ """For merged time-listings, don't show items that are too old
+ (this can happen when mr_top hasn't run in a while)"""
+ if self.time != 'all' and c.default_sr:
+ oldest = timeago('1 %s' % (str(self.time),))
+ def keep(item):
+ return item._date > oldest and item.keep_item(item)
+ return keep
+ else:
+ return ListingController.keep_fn(self)
+
@property
def menus(self):
return [ControversyTimeMenu(default = self.time)]
@@ -726,20 +741,28 @@ def title(self):
def query(self):
if self.where == 'banned' and c.user_is_admin:
reddits = Subreddit._query(Subreddit.c._spam == True,
- sort = desc('_date'))
+ sort = desc('_date'),
+ write_cache = True,
+ read_cache = True,
+ cache_time = 5 * 60)
else:
- reddits = Subreddit._query()
+ reddits = None
if self.where == 'new':
+ reddits = Subreddit._query( write_cache = True,
+ read_cache = True,
+ cache_time = 5 * 60)
reddits._sort = desc('_date')
else:
+ reddits = Subreddit._query( write_cache = True,
+ read_cache = True,
+ cache_time = 60 * 60)
reddits._sort = desc('_downs')
- if c.content_langs != 'all':
- reddits._filter(Subreddit.c.lang == c.content_langs)
+ # Consider resurrecting when it is not the World Cup
+ #if c.content_langs != 'all':
+ # reddits._filter(Subreddit.c.lang == c.content_langs)
if not c.over18:
reddits._filter(Subreddit.c.over_18 == False)
- reddits._filter(Subreddit.c.author_id != -1)
-
return reddits
def GET_listing(self, where, **env):
self.where = where
View
2  r2/r2/controllers/promotecontroller.py
@@ -32,7 +32,7 @@
from r2.controllers.reddit_base import RedditController
-from r2.lib.utils import timetext, make_offset_date
+from r2.lib.utils import make_offset_date
from r2.lib.media import force_thumbnail, thumbnail_url
from r2.lib import cssfilter
from datetime import datetime
View
41 r2/r2/controllers/reddit_base.py
@@ -254,13 +254,20 @@ def set_subreddit():
srs = set()
sr_names = sr_name.split('+')
real_path = sr_name
- for sr_name in sr_names:
- sr = Subreddit._by_name(sr_name)
- if isinstance(sr, FakeSubreddit):
+ srs = Subreddit._by_name(sr_names).values()
+ if len(srs) != len(sr_names):
+ abort(404)
+ elif any(isinstance(sr, FakeSubreddit)
+ for sr in srs):
+ if All in srs:
+ c.site = All
+ elif Friend in srs:
+ c.site = Friend
+ else:
abort(400)
- srs.add(sr)
- sr_ids = [sr._id for sr in srs]
- c.site = MultiReddit(sr_ids, real_path)
+ else:
+ sr_ids = [sr._id for sr in srs]
+ c.site = MultiReddit(sr_ids, real_path)
else:
c.site = Subreddit._by_name(sr_name)
except NotFound:
@@ -483,6 +490,7 @@ def cached_response(self):
return c.response
def pre(self):
+
c.start_time = datetime.now(g.tz)
g.reset_caches()
@@ -499,6 +507,8 @@ def pre(self):
set_subreddit()
c.errors = ErrorSet()
c.cookies = Cookies()
+ # if an rss feed, this will also log the user in if a feed=
+ # GET param is included
set_content_type()
def try_pagecache(self):
@@ -656,15 +666,18 @@ def pre(self):
# the user could have been logged in via one of the feeds
maybe_admin = False
+
+ # no logins for RSS feed unless valid_feed has already been called
if not c.user_is_loggedin:
- (c.user, maybe_admin) = \
- valid_cookie(c.cookies[g.login_cookie].value
- if g.login_cookie in c.cookies
- else '')
-
- if c.user:
- c.user_is_loggedin = True
- else:
+ if c.extension != "rss":
+ (c.user, maybe_admin) = \
+ valid_cookie(c.cookies[g.login_cookie].value
+ if g.login_cookie in c.cookies
+ else '')
+ if c.user:
+ c.user_is_loggedin = True
+
+ if not c.user_is_loggedin:
c.user = UnloggedUser(get_browser_langs())
# patch for fixing mangled language preferences
if (not isinstance(c.user.pref_lang, basestring) or
View
33 r2/r2/controllers/validator/validator.py
@@ -690,21 +690,42 @@ def run(self, fullname, fullname2):
abort(403, "forbidden")
class VSubmitSR(Validator):
- def run(self, sr_name):
+ def __init__(self, srname_param, linktype_param = None):
+ self.require_linktype = False
+
+ if linktype_param:
+ self.require_linktype = True
+ Validator.__init__(self, (srname_param, linktype_param))
+ else:
+ Validator.__init__(self, srname_param)
+
+ def run(self, sr_name, link_type = None):
if not sr_name:
self.set_error(errors.SUBREDDIT_REQUIRED)
return None
try:
- sr = Subreddit._by_name(str(sr_name))
+ sr = Subreddit._by_name(str(sr_name).strip())
except (NotFound, AttributeError, UnicodeEncodeError):
self.set_error(errors.SUBREDDIT_NOEXIST)
- return None
+ return
- if sr and not (c.user_is_loggedin and sr.can_submit(c.user)):
+ if not c.user_is_loggedin or not sr.can_submit(c.user):
self.set_error(errors.SUBREDDIT_NOTALLOWED)
- else:
- return sr
+ return
+
+ if self.require_linktype:
+ if link_type not in ('link', 'self'):
+ self.set_error(errors.INVALID_OPTION)
+ return
+ elif link_type == 'link' and sr.link_type == 'self':
+ self.set_error(errors.NO_LINKS)
+ return
+ elif link_type == 'self' and sr.link_type == 'link':
+ self.set_error(errors.NO_SELFS)
+ return
+
+ return sr
pass_rx = re.compile(r"^.{3,20}$")
View
24 r2/r2/lib/cache.py
@@ -31,9 +31,8 @@
import cassandra.ttypes
from r2.lib.contrib import memcache
-from r2.lib.utils import lstrips, in_chunks, tup
+from r2.lib.utils import in_chunks, prefix_keys
from r2.lib.hardcachebackend import HardCacheBackend
-from r2.lib.utils import trace
from r2.lib.sgm import sgm # get this into our namespace so that it's
# importable from us
@@ -52,19 +51,8 @@ def add_multi(self, keys, prefix=''):
for k,v in keys.iteritems():
self.add(prefix+str(k), v)
- def _prefix_keys(self, keys, prefix):
- if len(prefix):
- return dict((prefix+str(k), k) for k in keys)
- else:
- return dict((str(k), k) for k in keys)
-
- def _unprefix_keys(self, results, key_map):
- return dict((key_map[k], results[k]) for k in results.keys())
-
def get_multi(self, keys, prefix=''):
- key_map = self._prefix_keys(keys, prefix)
- results = self.simple_get_multi(key_map.keys())
- return self._unprefix_keys(results, key_map)
+ return prefix_keys(keys, prefix, self.simple_get_multi)
class PyMemcache(CacheUtils, memcache.Client):
"""We still use our patched python-memcache to talk to the
@@ -395,10 +383,8 @@ def get(self, key, default = None, allow_local = True):
return default
def get_multi(self, keys, prefix='', allow_local = True):
- key_map = self._prefix_keys(keys, prefix)
- results = self.simple_get_multi(key_map.keys(),
- allow_local = allow_local)
- return self._unprefix_keys(results, key_map)
+ l = lambda ks: self.simple_get_multi(ks, allow_local = allow_local)
+ return prefix_keys(keys, prefix, l)
def simple_get_multi(self, keys, allow_local = True):
out = {}
@@ -520,7 +506,7 @@ def mutate(self, key, mutation_fn, default = None):
value = self.memcache.get(key)
if value is None:
value = self.cassa.get(key,
- read_consistency_level = CL_ONE)
+ read_consistency_level = CL_QUORUM)
except cassandra.ttypes.NotFoundException:
value = default
View
144 r2/r2/lib/comment_tree.py
@@ -32,6 +32,18 @@ def comments_key(link_id):
def lock_key(link_id):
return 'comment_lock_' + str(link_id)
+def parent_comments_key(link_id):
+ return 'comments_parents_' + str(link_id)
+
+def sort_comments_key(link_id, sort):
+ return 'comments_sort_%s_%s' % (link_id, sort)
+
+def _get_sort_value(comment, sort):
+ if sort == "_date":
+ return comment._date
+ return getattr(comment, sort), comment._date
+
+
def add_comment(comment):
with g.make_lock(lock_key(comment.link_id)):
add_comment_nolock(comment)
@@ -80,12 +92,118 @@ def find_parents():
for p_id in find_parents():
num_children[p_id] += 1
+ # update our cache of children -> parents as well:
+ key = parent_comments_key(link_id)
+ r = g.permacache.get(key)
+
+ if not r:
+ r = _parent_dict_from_tree(comment_tree)
+ r[cm_id] = p_id
+ g.permacache.set(key, r)
+
+ # update the list of sorts
+ for sort in ("_controversy", "_date", "_hot", "_confidence", "_score"):
+ key = sort_comments_key(link_id, sort)
+ r = g.permacache.get(key)
+ if r:
+ r[cm_id] = _get_sort_value(comment, sort)
+ g.permacache.set(key, r)
+
+ # do this last b/c we don't want the cids updated before the sorts
+ # and parents
g.permacache.set(comments_key(link_id),
(cids, comment_tree, depth, num_children))
+
+
+def update_comment_vote(comment):
+ link_id = comment.link_id
+ # update the list of sorts
+ with g.make_lock(lock_key(link_id)):
+ for sort in ("_controversy", "_hot", "_confidence", "_score"):
+ key = sort_comments_key(link_id, sort)
+ r = g.permacache.get(key)
+ # don't bother recomputing a non-existant sort dict, as
+ # we'll catch it next time we have to render something
+ if r:
+ r[comment._id] = _get_sort_value(comment, sort)
+ g.permacache.set(key, r)
+
+
def delete_comment(comment):
- #nothing really to do here, atm
- pass
+ with g.make_lock(lock_key(comment.link_id)):
+ cids, comment_tree, depth, num_children = link_comments(comment.link_id)
+
+ # only completely remove comments with no children
+ if comment._id not in comment_tree:
+ if comment._id in cids:
+ cids.remove(comment._id)
+ if comment._id in depth:
+ del depth[comment._id]
+ if comment._id in num_children:
+ del num_children[comment._id]
+ g.permacache.set(comments_key(comment.link_id),
+ (cids, comment_tree, depth, num_children))
+
+
+def _parent_dict_from_tree(comment_tree):
+ parents = {}
+ for parent, childs in comment_tree.iteritems():
+ for child in childs:
+ parents[child] = parent
+ return parents
+
+def _comment_sorter_from_cids(cids, sort):
+ from r2.models import Comment
+ comments = Comment._byID(cids, data = False, return_dict = False)
+ return dict((x._id, _get_sort_value(x, sort)) for x in comments)
+
+def link_comments_and_sort(link_id, sort):
+ cids, cid_tree, depth, num_children = link_comments(link_id)
+
+ # load the sorter
+ key = sort_comments_key(link_id, sort)
+ sorter = g.permacache.get(key)
+ if sorter is None:
+ g.log.error("comment_tree.py: sorter (%s) cache miss for Link %s"
+ % (sort, link_id))
+ sorter = {}
+ elif cids and not all(x in sorter for x in cids):
+ g.log.error("Error in comment_tree: sorter (%s) inconsistent for Link %s"
+ % (sort, link_id))
+ sorter = {}
+
+ # load the parents
+ key = parent_comments_key(link_id)
+ parents = g.permacache.get(key)
+ if parents is None:
+ g.log.error("comment_tree.py: parents cache miss for Link %s"
+ % link_id)
+ parents = {}
+ elif cids and not all(x in parents for x in cids):
+ g.log.error("Error in comment_tree: parents inconsistent for Link %s"
+ % link_id)
+ parents = {}
+
+ if not sorter or not parents:
+ with g.make_lock(lock_key(link_id)):
+ # reload from the cache so the sorter and parents are
+ # maximally consistent
+ r = g.permacache.get(comments_key(link_id))
+ cids, cid_tree, depth, num_children = r
+
+ key = sort_comments_key(link_id, sort)
+ if not sorter:
+ sorter = _comment_sorter_from_cids(cids, sort)
+ g.permacache.set(key, sorter)
+
+ key = parent_comments_key(link_id)
+ if not parents:
+ parents = _parent_dict_from_tree(cid_tree)
+ g.permacache.set(key, parents)
+
+ return cids, cid_tree, depth, num_children, parents, sorter
+
def link_comments(link_id, _update=False):
key = comments_key(link_id)
@@ -95,12 +213,26 @@ def link_comments(link_id, _update=False):
if r and not _update:
return r
else:
- with g.make_lock(lock_key(link_id)):
- r = load_link_comments(link_id)
+ # This operation can take longer than most (note the inner
+ # locks) better to time out request temporarily than to deal
+ # with an inconsistent tree
+ with g.make_lock(lock_key(link_id), timeout=180):
+ r = _load_link_comments(link_id)
+ # rebuild parent dict
+ cids, cid_tree, depth, num_children = r
+ g.permacache.set(parent_comments_key(link_id),
+ _parent_dict_from_tree(cid_tree))
+
+ # rebuild the sorts
+ for sort in ("_controversy","_date","_hot","_confidence","_score"):
+ g.permacache.set(sort_comments_key(link_id, sort),
+ _comment_sorter_from_cids(cids, sort))
+
g.permacache.set(key, r)
- return r
+ return r
+
-def load_link_comments(link_id):
+def _load_link_comments(link_id):
from r2.models import Comment
q = Comment._query(Comment.c.link_id == link_id,
Comment.c._deleted == (True, False),
View
1  r2/r2/lib/count.py
@@ -38,6 +38,7 @@ def get_link_counts(period = count_period):
def get_sr_counts(period = count_period):
srs = Subreddit._query()
+
return dict((l._fullname, (0, l.sr_id)) for l in links)
def clear_sr_counts(names):
View
8 r2/r2/lib/db/queries.py
@@ -7,7 +7,7 @@
from r2.lib import utils
from r2.lib.solrsearch import DomainSearchQuery
from r2.lib import amqp, sup
-from r2.lib.comment_tree import add_comment, link_comments
+from r2.lib.comment_tree import add_comment, link_comments, update_comment_vote
import cPickle as pickle
@@ -657,7 +657,10 @@ def set_unread(message, to, unread):
else:
for i in Inbox.set_unread(message, unread, to = to):
kw = dict(insert_items = i) if unread else dict(delete_items = i)
- if i._name == 'selfreply':
+ if isinstance(message, Comment) and not unread:
+ add_queries([get_unread_comments(i._thing1)], **kw)
+ add_queries([get_unread_selfreply(i._thing1)], **kw)
+ elif i._name == 'selfreply':
add_queries([get_unread_selfreply(i._thing1)], **kw)
elif isinstance(message, Comment):
add_queries([get_unread_comments(i._thing1)], **kw)
@@ -971,6 +974,7 @@ def handle_vote(user, thing, dir, ip, organic, cheater = False):
elif isinstance(thing, Comment):
#update last modified
+ update_comment_vote(thing)
if user._id == thing.author_id:
set_last_modified(user, 'overview')
set_last_modified(user, 'commented')
View
2  r2/r2/lib/db/tdb_cassandra.py
@@ -112,7 +112,7 @@ def __init__(cls, name, bases, dct):
cls.cf = pycassa.ColumnFamily(cassandra, keyspace,
cf_name,
read_consistency_level = CL.ONE,
- write_consistency_level = CL.ONE)
+ write_consistency_level = CL.QUORUM)
cls._kind = name
View
38 r2/r2/lib/db/tdb_sql.py
@@ -284,6 +284,35 @@ def get_write_table(tables):
else:
return tables[0]
+import re, traceback, cStringIO as StringIO
+_spaces = re.compile('[\s]+')
+def add_request_info(select):
+ from pylons import request
+ from r2.lib import filters
+ def sanitize(txt):
+ return _spaces.sub(' ', txt).replace("/", "|").replace("-", "_").replace(';', "").replace("*", "").replace(r"/", "")
+ s = StringIO.StringIO()
+ traceback.print_stack( file = s)
+ tb = s.getvalue()
+ if tb:
+ tb = tb.split('\n')[0::2]
+ tb = [x.split('/')[-1] for x in tb if "/r2/" in x]
+ tb = '\n'.join(tb[-15:-2])
+ try:
+ if (hasattr(request, 'path') and
+ hasattr(request, 'ip') and
+ hasattr(request, 'user_agent')):
+ comment = '/*\n%s\n%s\n%s\n*/' % (
+ tb or "",
+ filters._force_utf8(sanitize(request.fullpath)),
+ sanitize(request.ip))
+ return select.prefix_with(comment)
+ except UnicodeDecodeError:
+ pass
+
+ return select
+
+
def get_table(kind, action, tables, avoid_master_reads = False):
if action == 'write':
#if this is a write, store the kind in the c.use_write_db dict
@@ -303,7 +332,6 @@ def get_table(kind, action, tables, avoid_master_reads = False):
return dbm.get_read_table(tables)
-
def get_thing_table(type_id, action = 'read' ):
return get_table('t' + str(type_id), action,
types_id[type_id].tables,
@@ -487,7 +515,7 @@ def fetch_query(table, id_col, thing_id):
s = sa.select([table], sa.or_(*[id_col == tid
for tid in thing_id]))
try:
- r = s.execute().fetchall()
+ r = add_request_info(s).execute().fetchall()
except Exception, e:
dbm.mark_dead(table.bind)
# this thread must die so that others may live
@@ -710,7 +738,7 @@ def find_things(type_id, get_cols, sort, limit, constraints):
s = s.limit(limit)
try:
- r = s.execute()
+ r = add_request_info(s).execute()
except Exception, e:
dbm.mark_dead(table.bind)
# this thread must die so that others may live
@@ -792,7 +820,7 @@ def find_data(type_id, get_cols, sort, limit, constraints):
s = s.limit(limit)
try:
- r = s.execute()
+ r = add_request_info(s).execute()
except Exception, e:
dbm.mark_dead(t_table.bind)
# this thread must die so that others may live
@@ -869,7 +897,7 @@ def find_rels(rel_type_id, get_cols, sort, limit, constraints):
s = s.limit(limit)
try:
- r = s.execute()
+ r = add_request_info(s).execute()
except Exception, e:
dbm.mark_dead(r_table.bind)
# this thread must die so that others may live
View
95 r2/r2/lib/indextankupdate.py
@@ -23,17 +23,11 @@
Module for communication reddit-level communication with IndexTank
"""
-from __future__ import with_statement
-
from pylons import g, config
from r2.models import *
-from r2.lib.cache import SelfEmptyingCache
from r2.lib import amqp, indextank
-from r2.lib.solrsearch import indexed_types
-import simplejson
-import sys,os,os.path
-import time
+from r2.lib.utils import in_chunks, progress
indextank_indexed_types = (Link,)
@@ -41,6 +35,8 @@
index_code = g.INDEXTANK_IDX_CODE)
def maps_from_things(things):
+ """We only know how to do links for now"""
+
maps = []
author_ids = [ thing.author_id for thing in things ]
accounts = Account._byID(author_ids, data = True, return_dict = True)
@@ -54,19 +50,20 @@ def maps_from_things(things):
timestamp = thing._date.strftime("%s"),
ups = thing._ups,
downs = thing._downs,
- num_comments = getattr(thing, "num_comments", 0))
+ num_comments = getattr(thing, "num_comments", 0),
+ sr_id = str(thing.sr_id))
if thing.is_self and thing.selftext:
d['selftext'] = thing.selftext
- else:
+ elif not thing.is_self:
d['url'] = thing.url
maps.append(d)
return maps
def to_boosts(ups, downs, num_comments):
result = {}
- result[1] = ups
- result[2] = downs
- result[3] = num_comments
+ result[0] = ups
+ result[1] = downs
+ result[2] = num_comments
return result
def inject_maps(maps):
@@ -79,33 +76,47 @@ def inject_maps(maps):
if ups not in (0, 1) or downs != 0 or num_comments > 0:
ok, result = index.boost(fullname, boosts=boosts)
- if ok:
- print "Boost-updated %s in IndexTank" % fullname
- continue
- else:
- print "Failed to update(%r, %r) with IndexTank" % (fullname, boosts)
- f = open("/tmp/indextank-error.html", "w")
- f.write(str(result))
-# g.cache.set("stop-indextank", True)
+ if not ok:
+ raise Exception(result)
ok, result = index.add(fullname, d, boosts)
- if ok:
- print "Added %s to IndexTank" % fullname
- else:
- print "Failed to add(%r, %r, %r) to IndexTank" % (fullname, d, boosts)
- f = open("/tmp/indextank-error.html", "w")
- f.write(str(result))
- g.cache.set("stop-indextank", True)
+ if not ok:
+ raise Exception(result)
def delete_thing(thing):
ok, result = index.delete(thing._fullname)
- if ok:
- print "Deleted %s from IndexTank" % thing._fullname
- else:
- print "Failed to delete %s from IndexTank" % thing._fullname
- f = open("/tmp/indextank-error.html", "w")
- f.write(str(result))
- g.cache.set("stop-indextank", True)
+ if not ok:
+ raise Exception(result)
+
+def inject(things):
+ things = [x for x in things if isinstance(x, indextank_indexed_types)]
+
+ update_things = [x for x in things if not x._spam and not x._deleted
+ and x.promoted is None
+ and getattr(x, 'sr_id') != -1]
+ delete_things = [x for x in things if x._spam or x._deleted]
+
+ if update_things:
+ maps = maps_from_things(update_things)
+ inject_maps(maps)
+ if delete_things:
+ for thing in delete_things:
+ delete_thing(thing)
+
+def rebuild_index(after_id = None):
+ cls = Link
+
+ # don't pull spam/deleted
+ q = cls._query(sort=desc('_date'), data=True)
+
+ if after_id:
+ q._after(cls._byID(after_id))
+
+ q = fetch_things2(q)
+
+ q = progress(q, verbosity=1000, estimate=10000000, persec=True)
+ for chunk in in_chunks(q):
+ inject(chunk)
def run_changed(drain=False):
"""
@@ -113,23 +124,9 @@ def run_changed(drain=False):
IndexTank
"""
def _run_changed(msgs, chan):
- if g.cache.get("stop-indextank"):
- print "discarding %d msgs" % len(msgs)
- return
-
fullnames = set([x.body for x in msgs])
things = Thing._by_fullname(fullnames, data=True, return_dict=False)
- things = [x for x in things if isinstance(x, indextank_indexed_types)]
-
- update_things = [x for x in things if not x._spam and not x._deleted]
- delete_things = [x for x in things if x._spam or x._deleted]
-
- if update_things:
- maps = maps_from_things(update_things)
- inject_maps(maps)
- if delete_things:
- for thing in delete_things:
- delete_thing(thing)
+ inject(things)
amqp.handle_items('indextank_changes', _run_changed, limit=1000,
drain=drain)
View
4 r2/r2/lib/lock.py
@@ -87,6 +87,6 @@ def __exit__(self, type, value, tb):
self.locks.remove(self.key)
def make_lock_factory(cache):
- def factory(key):
- return MemcacheLock(key, cache)
+ def factory(key, **kw):
+ return MemcacheLock(key, cache, **kw)
return factory
View
5 r2/r2/lib/media.py
@@ -46,7 +46,7 @@ def thumbnail_url(link):
res += "?v=%s" % link.thumbnail_version
return res
-def upload_thumb(link, image, never_expire = True):
+def upload_thumb(link, image, never_expire = True, reduced_redundancy=True):
"""Given a link and an image, uploads the image to s3 into an image
based on the link's fullname"""
f = tempfile.NamedTemporaryFile(suffix = '.png', delete=False)
@@ -61,7 +61,8 @@ def upload_thumb(link, image, never_expire = True):
log.debug('uploading to s3: %s' % link._fullname)
s3cp.send_file(g.s3_thumb_bucket, s3fname, contents, 'image/png',
- never_expire=never_expire)
+ never_expire=never_expire,
+ reduced_redundancy=reduced_redundancy)
log.debug('thumbnail %s: %s' % (link._fullname, thumbnail_url(link)))
finally:
os.unlink(f.name)
View
70 r2/r2/lib/pages/pages.py
@@ -52,7 +52,7 @@
from r2.lib.memoize import memoize
import sys, random, datetime, locale, calendar, simplejson, re, time
-import graph, pycountry
+import graph, pycountry, time
from itertools import chain
from urllib import quote
@@ -149,7 +149,7 @@ def __init__(self, space_compress = True, nav_menus = None, loginbox = True,
self.infobar = InfoBar(message = infotext)
self.srtopbar = None
- if not c.cname:
+ if not c.cname and not is_api():
self.srtopbar = SubredditTopBar()
if c.user_is_loggedin and self.show_sidebar and not is_api():
@@ -784,9 +784,6 @@ def info_button(name, **fmt_args):
if not self.link.is_self and self.duplicates:
buttons.append(info_button('duplicates',
num = len(self.duplicates)))
- if (len(self.link.title) < 200 and g.spreadshirt_url
- and c.render_style == "html"):
- buttons += [info_button('shirt')]
if c.user_is_admin:
buttons += [info_button('details')]
@@ -1149,14 +1146,21 @@ class Over18(Templated):
"""The creepy 'over 18' check page for nsfw content."""
pass
-class SubredditTopBar(Templated):
+class SubredditTopBar(CachedTemplate):
+
"""The horizontal strip at the top of most pages for navigating
user-created reddits."""
def __init__(self):
self._my_reddits = None
self._pop_reddits = None
- Templated.__init__(self)
-
+ name = '' if not c.user_is_loggedin else c.user.name
+ langs = "" if name else c.content_langs
+ # poor man's expiration, with random initial time
+ t = int(time.time()) / 3600
+ if c.user_is_loggedin:
+ t += c.user._id
+ CachedTemplate.__init__(self, name = name, langs = langs, t = t,
+ over18 = c.over18)
@property
def my_reddits(self):
@@ -1451,28 +1455,40 @@ class NewLink(Templated):
"""Render the link submission form"""
def __init__(self, captcha = None, url = '', title= '', subreddits = (),
then = 'comments'):
- tabs = (('link', ('link-desc', 'url-field')),
- ('text', ('text-desc', 'text-field')))
- all_fields = set(chain(*(parts for (tab, parts) in tabs)))
- buttons = []
- self.default_tabs = tabs[0][1]
- self.default_tab = tabs[0][0]
- for tab_name, parts in tabs:
- to_show = ','.join('#' + p for p in parts)
- to_hide = ','.join('#' + p for p in all_fields if p not in parts)
- onclick = "return select_form_tab(this, '%s', '%s');"
- onclick = onclick % (to_show, to_hide)
- if tab_name == self.default_tab:
- self.default_show = to_show
- self.default_hide = to_hide
-
- buttons.append(JsButton(tab_name, onclick=onclick, css_class=tab_name + "-button"))
-
- self.formtabs_menu = JsNavMenu(buttons, type = 'formtab')
- self.default_tabs = tabs[0][1]
+
+ self.show_link = self.show_self = False
+
+ tabs = []
+ if c.default_sr or c.site.link_type != 'self':
+ tabs.append(('link', ('link-desc', 'url-field')))
+ self.show_link = True
+ if c.default_sr or c.site.link_type != 'link':
+ tabs.append(('text', ('text-desc', 'text-field')))
+ self.show_self = True
+
+ if self.show_self and self.show_link:
+ all_fields = set(chain(*(parts for (tab, parts) in tabs)))
+ buttons = []
+ self.default_tabs = tabs[0][1]
+ self.default_tab = tabs[0][0]
+ for tab_name, parts in tabs:
+ to_show = ','.join('#' + p for p in parts)
+ to_hide = ','.join('#' + p for p in all_fields if p not in parts)
+ onclick = "return select_form_tab(this, '%s', '%s');"
+ onclick = onclick % (to_show, to_hide)
+ if tab_name == self.default_tab:
+ self.default_show = to_show
+ self.default_hide = to_hide
+
+ buttons.append(JsButton(tab_name, onclick=onclick, css_class=tab_name + "-button"))
+
+ self.formtabs_menu = JsNavMenu(buttons, type = 'formtab')
+ self.default_tabs = tabs[0][1]
self.sr_searches = simplejson.dumps(popular_searches())
+ self.on_default_sr = c.default_sr
+
if isinstance(c.site, FakeSubreddit):
self.default_sr = subreddits[0] if subreddits else g.default_sr
else:
View
3  r2/r2/lib/s3cp.py
@@ -34,7 +34,7 @@
class S3Exception(Exception): pass
-def send_file(bucketname, filename, content, content_type = 'text/plain', never_expire = False):
+def send_file(bucketname, filename, content, content_type = 'text/plain', never_expire = False, reduced_redundancy=False):
# this function is pretty low-traffic, but if we start using it a
# lot more we'll want to maintain a connection pool across the app
# rather than connecting on every invocation
@@ -51,3 +51,4 @@ def send_file(bucketname, filename, content, content_type = 'text/plain', never_
k.set_contents_from_string(content, policy='public-read',
headers=headers)
+ # reduced_redundancy=reduced_redundancy)
View
35 r2/r2/lib/set_reddit_pops.py
@@ -1,35 +0,0 @@
-# The contents of this file are subject to the Common Public Attribution
-# License Version 1.0. (the "License"); you may not use this file except in
-# compliance with the License. You may obtain a copy of the License at
-# http://code.reddit.com/LICENSE. The License is based on the Mozilla Public
-# License Version 1.1, but Sections 14 and 15 have been added to cover use of
-# software over a computer network and provide for limited attribution for the
-# Original Developer. In addition, Exhibit A has been modified to be consistent
-# with Exhibit B.
-#
-# Software distributed under the License is distributed on an "AS IS" basis,
-# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for
-# the specific language governing rights and limitations under the License.
-#
-# The Original Code is Reddit.
-#
-# The Original Developer is the Initial Developer. The Initial Developer of the
-# Original Code is CondeNet, Inc.
-#
-# All portions of the code written by CondeNet are Copyright (c) 2006-2010
-# CondeNet, Inc. All Rights Reserved.
-################################################################################
-from r2.models import Subreddit
-from r2.lib.db.operators import desc
-from r2.lib import count
-
-def run():
- sr_counts = count.get_sr_counts()
- names = [k for k, v in sr_counts.iteritems() if v != 0]
- srs = Subreddit._by_fullname(names)
- for name in names:
- sr,c = srs[name], sr_counts[name]
- if c != sr._downs and c > 0:
- sr._downs = max(c, 0)
- sr._commit()
- count.clear_sr_counts(names)
View
30 r2/r2/lib/sgm.pyx
@@ -1,35 +1,37 @@
# smart get multi:
# For any keys not found in the cache, miss_fn() is run and the result is
# stored in the cache. Then it returns everything, both the hits and misses.
-def sgm(cache, keys, miss_fn, str prefix='', int time=0):
+def sgm(cache, keys, miss_fn, str prefix='', int time=0, _update=False):
cdef dict ret
cdef dict s_keys
cdef dict cached
cdef dict calculated
cdef dict calculated_to_cache
- cdef set s_need
- cdef list k_need
+ cdef set still_need
ret = {}
+ # map the string versions of the keys to the real version. we only
+ # need this to interprate the cache's response and turn it back
+ # into the version they asked for
s_keys = {}
for key in keys:
s_keys[str(key)] = key
- cached = cache.get_multi(s_keys.keys(), prefix=prefix)
- for k, v in cached.iteritems():
- ret[s_keys[k]] = v
+ if _update:
+ cached = {}
+ else:
+ cached = cache.get_multi(s_keys.keys(), prefix=prefix)
+ for k, v in cached.iteritems():
+ ret[s_keys[k]] = v
if miss_fn and len(cached) < len(s_keys):
- # if we didn't get all of the keys from the cache. take the
- # missing subset
- s_need = set(s_keys.keys()) - set(ret.keys())
+ # if we didn't get all of the keys from the cache, go to the
+ # miss_fn with the keys they asked for minus the ones that we
+ # found
+ still_need = set(s_keys.values()) - set(ret.keys())
- k_need = []
- for i in s_need:
- k_need.append(s_keys[i])
-
- calculated = miss_fn(k_need)
+ calculated = miss_fn(still_need)
ret.update(calculated)
calculated_to_cache = {}
View
123 r2/r2/lib/sr_pops.py
@@ -0,0 +1,123 @@
+# The contents of this file are subject to the Common Public Attribution
+# License Version 1.0. (the "License"); you may not use this file except in
+# compliance with the License. You may obtain a copy of the License at
+# http://code.reddit.com/LICENSE. The License is based on the Mozilla Public
+# License Version 1.1, but Sections 14 and 15 have been added to cover use of
+# software over a computer network and provide for limited attribution for the
+# Original Developer. In addition, Exhibit A has been modified to be consistent
+# with Exhibit B.
+#
+# Software distributed under the License is distributed on an "AS IS" basis,
+# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for
+# the specific language governing rights and limitations under the License.
+#
+# The Original Code is Reddit.
+#
+# The Original Developer is the Initial Developer. The Initial Developer of the
+# Original Code is CondeNet, Inc.
+#
+# All portions of the code written by CondeNet are Copyright (c) 2006-2010
+# CondeNet, Inc. All Rights Reserved.
+################################################################################
+from pylons import g
+from r2.models import Subreddit
+from r2.lib.db.operators import desc
+from r2.lib import count
+from r2.lib.memoize import memoize
+from r2.lib.utils import fetch_things2, flatten, keymap
+
+# the length of the stored per-language list
+limit = 1000
+
+def cached_srs_key(lang, over18_state):
+ assert over18_state in ('no_over18', 'allow_over18', 'only_over18')
+ return str('sr_pop_%s_%s' % (lang, over18_state))
+
+def set_downs():
+ sr_counts = count.get_sr_counts()
+ names = [k for k, v in sr_counts.iteritems() if v != 0]
+ srs = Subreddit._by_fullname(names)
+ for name in names:
+ sr,c = srs[name], sr_counts[name]
+ if c != sr._downs and c > 0:
+ sr._downs = max(c, 0)
+ sr._commit()
+ count.clear_sr_counts(names)
+
+def cache_lists():
+ def _chop(srs):
+ srs.sort(key=lambda s: s._downs, reverse=True)
+ return srs[:limit]
+
+ # bylang =:= dict((lang, over18_state) -> [Subreddit])
+ # lang =:= all | lang()
+ # nsfwstate =:= no_over18 | allow_over18 | only_over18
+ bylang = {}
+
+ for sr in fetch_things2(Subreddit._query(sort=desc('_date'),
+ data=True)):
+ aid = getattr(sr, 'author_id', None)
+ if aid is not None and aid < 0:
+ # skip special system reddits like promos
+ continue
+
+ if sr.type not in ('public', 'restricted'):
+ # skips reddits that can't appear in the default list
+ # because of permissions
+ continue
+
+ g.log.debug(sr.name)
+ for lang in 'all', sr.lang:
+ over18s = ['allow_over18']
+ if sr.over_18:
+ over18s.append('only_over18')
+ else:
+ over18s.append('no_over18')
+
+ for over18 in over18s:
+ k = (lang, over18)
+ bylang.setdefault(k, []).append(sr)
+
+ # keep the lists small while we work
+ if len(bylang[k]) > limit*2:
+ g.log.debug('Shrinking %s' % (k,))
+ bylang[k] = _chop(bylang[k])
+
+ for (lang, over18), srs in bylang.iteritems():
+ srs = _chop(srs)
+ sr_tuples = map(lambda sr: (sr._downs, sr.allow_top, sr._id), srs)
+
+ g.log.debug("For %s/%s setting %s" % (lang, over18,
+ map(lambda sr: sr.name, srs)))
+
+ g.permacache.set(cached_srs_key(lang, over18), sr_tuples)
+
+def run():
+ set_downs()
+ cache_lists()
+
+def pop_reddits(langs, over18, over18_only, filter_allow_top = False):
+ if not over18:
+ over18_state = 'no_over18'
+ elif over18_only:
+ over18_state = 'only_over18'
+ else:
+ over18_state = 'allow_over18'
+
+ keys = map(lambda lang: cached_srs_key(lang, over18_state), langs)
+
+ # dict(lang_key -> [(_downs, allow_top, sr_id)])
+ srs = g.permacache.get_multi(keys)
+
+ tups = flatten(srs.values())
+
+ if filter_allow_top:
+ # remove the folks that have opted out of being on the front
+ # page as appropriate
+ tups = filter(lambda tpl: tpl[1], tups)
+
+ if len(srs) > 1:
+ # if there was only one returned, it's already sorted
+ tups.sort(key = lambda tpl: tpl[0], reverse=True)
+
+ return map(lambda tpl: tpl[2], tups)
View
243 r2/r2/lib/utils/_utils.pyx
@@ -0,0 +1,243 @@
+import re
+from datetime import datetime, timedelta
+from pylons.i18n import ungettext, _
+import math
+
+cpdef str to_base(long q, str alphabet):
+ if q < 0: raise ValueError, "must supply a positive integer"
+ cdef long l
+ cdef long r
+ l = len(alphabet)
+ converted = []
+ while q != 0:
+ q, r = divmod(q, l)
+ converted.insert(0, alphabet[r])
+ return "".join(converted) or '0'
+
+cpdef str to36(long q):
+ return to_base(q, '0123456789abcdefghijklmnopqrstuvwxyz')
+
+def tup(item, ret_is_single=False):
+ """Forces casting of item to a tuple (for a list) or generates a
+ single element tuple (for anything else)"""
+ #return true for iterables, except for strings, which is what we want
+ if hasattr(item, '__iter__'):
+ return (item, False) if ret_is_single else item
+ else:
+ return ((item,), True) if ret_is_single else (item,)
+
+cdef _strips(str direction, text, remove):
+ if direction == 'l':
+ if text.startswith(remove):
+ return text[len(remove):]
+ elif direction == 'r':
+ if text.endswith(remove):
+ return text[:-len(remove)]
+ else:
+ raise ValueError, "Direction needs to be r or l."
+ return text
+
+cpdef rstrips(text, remove):
+ """
+ removes the string `remove` from the right of `text`
+
+ >>> rstrips("foobar", "bar")
+ 'foo'
+
+ """
+ return _strips('r', text, remove)
+
+cpdef lstrips(text, remove):
+ """
+ removes the string `remove` from the left of `text`
+
+ >>> lstrips("foobar", "foo")
+ 'bar'
+
+ """
+ return _strips('l', text, remove)
+
+def strips(text, remove):
+ """removes the string `remove` from the both sides of `text`
+
+ >>> strips("foobarfoo", "foo")
+ 'bar'
+
+ """
+ return rstrips(lstrips(text, remove), remove)
+
+ESCAPE = re.compile(r'[\x00-\x19\\"\b\f\n\r\t]')
+ESCAPE_ASCII = re.compile(r'([\\"/]|[^\ -~])')
+ESCAPE_DCT = {
+ # escape all forward slashes to prevent </script> attack
+ '/': '\\/',
+ '\\': '\\\\',
+ '"': '\\"',
+ '\b': '\\b',
+ '\f': '\\f',
+ '\n': '\\n',
+ '\r': '\\r',
+ '\t': '\\t',
+ }
+def _string2js_replace(match):
+ return ESCAPE_DCT[match.group(0)]
+def string2js(s):
+ """adapted from http://svn.red-bean.com/bob/simplejson/trunk/simplejson/encoder.py"""
+ for i in range(20):
+ ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,))
+
+ return '"' + ESCAPE.sub(_string2js_replace, s) + '"'
+
+def timeago(str interval):
+ """Returns a datetime object corresponding to time 'interval' in
+ the past. Interval is of the same form as is returned by
+ timetext(), i.e., '10 seconds'. The interval must be passed in in
+ English (i.e., untranslated) and the format is
+
+ [num] second|minute|hour|day|week|month|year(s)
+ """
+ from pylons import g
+ return datetime.now(g.tz) - timeinterval_fromstr(interval)
+
+def timefromnow(interval):
+ "The opposite of timeago"
+ from pylons import g
+ return datetime.now(g.tz) + timeinterval_fromstr(interval)
+
+cdef dict timeintervald = dict(second = 1,
+ minute = 60,
+ hour = 60 * 60,
+ day = 60 * 60 * 24,
+ week = 60 * 60 * 24 * 7,
+ month = 60 * 60 * 24 * 30,
+ year = 60 * 60 * 24 * 365)
+cdef timeinterval_fromstr(str interval):
+ "Used by timeago and timefromnow to generate timedeltas from friendly text"
+ parts = interval.strip().split(' ')
+ if len(parts) == 1:
+ num = 1
+ period = parts[0]
+ elif len(parts) == 2:
+ num, period = parts
+ num = int(num)
+ else:
+ raise ValueError, 'format should be ([num] second|minute|etc)'
+ period = rstrips(period, 's')
+
+ d = timeintervald[period]
+ delta = num * d
+ return timedelta(0, delta)
+
+cdef class TimeText(object):
+ __slots__ = ('single', 'plural')
+ cdef str single, plural
+
+ def __init__(self, single, plural):
+ self.single = single
+ self.plural = plural
+
+ def __call__(self, n):
+ return ungettext(self.single, self.plural, n)
+
+timechunks = (
+ (60 * 60 * 24 * 365, TimeText('year', 'years')),
+ (60 * 60 * 24 * 30, TimeText('month', 'months')),
+ (60 * 60 * 24, TimeText('day', 'days')),
+ (60 * 60, TimeText('hour', 'hours')),
+ (60, TimeText('minute', 'minutes')),
+ (1, TimeText('second', 'seconds'))
+ )
+cdef timetext(delta, resultion = 1, bare=True):
+ """
+ Takes a datetime object, returns the time between then and now
+ as a nicely formatted string, e.g "10 minutes"
+ Adapted from django which was adapted from
+ http://blog.natbat.co.uk/archive/2003/Jun/14/time_since
+ """
+ delta = max(delta, timedelta(0))
+ cdef long since = delta.days * 24 * 60 * 60 + delta.seconds
+ cdef int i, seconds, count, count2, n
+ cdef TimeText name, name2
+
+ for i, (seconds, name) in enumerate(timechunks):
+ count = since // seconds
+ if count != 0:
+ break
+
+ from r2.lib.strings import strings
+ if count == 0 and delta.seconds == 0 and delta != timedelta(0):
+ n = delta.microseconds // 1000
+ s = strings.time_label % dict(num=n,
+ time=ungettext("millisecond",
+ "milliseconds", n))
+ else:
+ s = strings.time_label % dict(num=count, time=name(int(count)))
+ if resultion > 1:
+ if i + 1 < len(timechunks):
+ # Now get the second item
+ seconds2, name2 = timechunks[i + 1]
+ count2 = (since - (seconds * count)) / seconds2
+ if count2 != 0:
+ s += ', %d %s' % (count2, name2(count2))
+
+ if not bare:
+ s += ' ' + _('ago')
+
+ return s
+
+def timesince(d, resultion = 1, bare = True):
+ from pylons import g
+ return timetext(datetime.now(g.tz) - d)
+
+def timeuntil(d, resultion = 1, bare = True):
+ from pylons import g
+ return timetext(d - datetime.now(g.tz))
+
+cpdef dict keymap(keys, callfn, mapfn = None, str prefix=''):
+ """map a set of keys before a get_multi to return a dict using the
+ original unmapped keys"""
+
+ cdef dict km = {}
+ cdef dict res # the result back from the callfn
+ cdef dict ret = {} # our return value
+
+ km = map_keys(keys, mapfn, prefix)
+ res = callfn(km.keys())
+ ret = unmap_keys(res, km)
+
+ return ret
+
+cdef map_keys(keys, mapfn, str prefix):
+ if (mapfn and prefix) or (not mapfn and not prefix):
+ raise ValueError("Set one of mapfn or prefix")
+
+ cdef dict km = {}
+ if mapfn:
+ for key in keys:
+ km[mapfn(key)] = key
+ else:
+ for key in keys:
+ km[prefix + str(key)] = key
+ return km
+
+cdef unmap_keys(mapped_keys, km):
+ cdef dict ret = {}
+ for key, value in mapped_keys.iteritems():
+ ret[km[key]] = value
+ return ret
+
+def prefix_keys(keys, str prefix, callfn):
+ if len(prefix):
+ return keymap(keys, callfn, prefix=prefix)
+ else:
+ return callfn(keys)
+
+def flatten(list lists):
+ """[[1,2], [3], [4,5,6]] -> [1,2,3,4,5,6]"""
+ cdef list ret = []
+ cdef list l
+
+ for l in lists:
+ ret.extend(l)
+
+ return ret
View
186 r2/r2/lib/utils/utils.py
@@ -35,18 +35,11 @@
from pylons.i18n import ungettext, _
from r2.lib.filters import _force_unicode
from mako.filters import url_escape
+
+from r2.lib.utils._utils import *
iters = (list, tuple, set)
-def tup(item, ret_is_single=False):
- """Forces casting of item to a tuple (for a list) or generates a
- single element tuple (for anything else)"""
- #return true for iterables, except for strings, which is what we want
- if hasattr(item, '__iter__'):
- return (item, False) if ret_is_single else item
- else:
- return ((item,), True) if ret_is_single else (item,)
-
def randstr(len, reallyrandom = False):
"""If reallyrandom = False, generates a random alphanumeric string
(base-36 compatible) of length len. If reallyrandom, add
@@ -165,45 +158,16 @@ def getvalue(x):
return stor
-def _strips(direction, text, remove):
- if direction == 'l':
- if text.startswith(remove):
- return text[len(remove):]
- elif direction == 'r':
- if text.endswith(remove):
- return text[:-len(remove)]
- else:
- raise ValueError, "Direction needs to be r or l."
- return text
-
-def rstrips(text, remove):
- """
- removes the string `remove` from the right of `text`
-
- >>> rstrips("foobar", "bar")
- 'foo'
-
- """
- return _strips('r', text, remove)
-
-def lstrips(text, remove):
- """
- removes the string `remove` from the left of `text`
-
- >>> lstrips("foobar", "foo")
- 'bar'
-
- """
- return _strips('l', text, remove)
-
-def strips(text, remove):
- """removes the string `remove` from the both sides of `text`
-
- >>> strips("foobarfoo", "foo")
- 'bar'
-
- """
- return rstrips(lstrips(text, remove), remove)
+class Enum(Storage):
+ def __init__(self, *a):
+ self.name = tuple(a)
+ Storage.__init__(self, ((e, i) for i, e in enumerate(a)))
+ def __contains__(self, item):
+ if isinstance(item, int):
+ return item in self.values()
+ else:
+ return Storage.__contains__(self, item)
+
class Enum(Storage):
def __init__(self, *a):
@@ -253,28 +217,6 @@ def fetchone(self):
else:
raise StopIteration
-def string2js(s):
- """adapted from http://svn.red-bean.com/bob/simplejson/trunk/simplejson/encoder.py"""
- ESCAPE = re.compile(r'[\x00-\x19\\"\b\f\n\r\t]')
- ESCAPE_ASCII = re.compile(r'([\\"/]|[^\ -~])')
- ESCAPE_DCT = {
- # escape all forward slashes to prevent </script> attack
- '/': '\\/',
- '\\': '\\\\',
- '"': '\\"',
- '\b': '\\b',
- '\f': '\\f',
- '\n': '\\n',
- '\r': '\\r',
- '\t': '\\t',
- }
- for i in range(20):
- ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,))
-
- def replace(match):
- return ESCAPE_DCT[match.group(0)]
- return '"' + ESCAPE.sub(replace, s) + '"'
-
r_base_url = re.compile("(?i)(?:.+?://)?(?:www[\d]*\.)?([^#]*[^#/])/?")
def base_url(url):
res = r_base_url.findall(url)
@@ -371,92 +313,6 @@ def sanitize_url(url, require_scheme = False):
return
return url
-def timeago(interval):
- """Returns a datetime object corresponding to time 'interval' in
- the past. Interval is of the same form as is returned by
- timetext(), i.e., '10 seconds'. The interval must be passed in in
- English (i.e., untranslated) and the format is
-
- [num] second|minute|hour|day|week|month|year(s)
- """
- from pylons import g
- return datetime.now(g.tz) - timeinterval_fromstr(interval)
-
-def timefromnow(interval):
- "The opposite of timeago"
- from pylons import g
- return datetime.now(g.tz) + timeinterval_fromstr(interval)
-
-def timeinterval_fromstr(interval):
- "Used by timeago and timefromnow to generate timedeltas from friendly text"
- parts = interval.strip().split(' ')
- if len(parts) == 1:
- num = 1
- period = parts[0]
- elif len(parts) == 2:
- num, period = parts
- num = int(num)
- else:
- raise ValueError, 'format should be ([num] second|minute|etc)'
- period = rstrips(period, 's')
-
- d = dict(second = 1,
- minute = 60,
- hour = 60 * 60,
- day = 60 * 60 * 24,
- week = 60 * 60 * 24 * 7,
- month = 60 * 60 * 24 * 30,
- year = 60 * 60 * 24 * 365)[period]
- delta = num * d
- return timedelta(0, delta)
-
-def timetext(delta, resultion = 1, bare=True):
- """
- Takes a datetime object, returns the time between then and now
- as a nicely formatted string, e.g "10 minutes"
- Adapted from django which was adapted from
- http://blog.natbat.co.uk/archive/2003/Jun/14/time_since
- """
- chunks = (
- (60 * 60 * 24 * 365, lambda n: ungettext('year', 'years', n)),
- (60 * 60 * 24 * 30, lambda n: ungettext('month', 'months', n)),
- (60 * 60 * 24, lambda n : ungettext('day', 'days', n)),
- (60 * 60, lambda n: ungettext('hour', 'hours', n)),
- (60, lambda n: ungettext('minute', 'minutes', n)),
- (1, lambda n: ungettext('second', 'seconds', n))
- )
- delta = max(delta, timedelta(0))
- since = delta.days * 24 * 60 * 60 + delta.seconds
- for i, (seconds, name) in enumerate(chunks):
- count = math.floor(since / seconds)
- if count != 0:
- break
-
- from r2.lib.strings import strings
- if count == 0 and delta.seconds == 0 and delta != timedelta(0):
- n = math.floor(delta.microseconds / 1000)
- s = strings.time_label % dict(num=n,
- time=ungettext("millisecond",
- "milliseconds", n))
- else:
- s = strings.time_label % dict(num=count, time=name(int(count)))
- if resultion > 1:
- if i + 1 < len(chunks):
- # Now get the second item
- seconds2, name2 = chunks[i + 1]
- count2 = (since - (seconds * count)) / seconds2
- if count2 != 0:
- s += ', %d %s' % (count2, name2(count2))
- if not bare: s += ' ' + _('ago')
- return s
-
-def timesince(d, resultion = 1, bare = True):
- from pylons import g
- return timetext(datetime.now(g.tz) - d)
-
-def timeuntil(d, resultion = 1, bare = True):
- from pylons import g
- return timetext(d - datetime.now(g.tz))
# Truncate a time to a certain number of minutes
# e.g, trunc_time(5:52, 30) == 5:30
@@ -474,17 +330,11 @@ def trunc_time(time, mins, hours=None):
microsecond = 0)
-def to_base(q, alphabet):
- if q < 0: raise ValueError, "must supply a positive integer"
- l = len(alphabet)
- converted = []
- while q != 0:
- q, r = divmod(q, l)
- converted.insert(0, alphabet[r])
- return "".join(converted) or '0'
-
-def to36(q):
- return to_base(q, '0123456789abcdefghijklmnopqrstuvwxyz')
+def median(l):
+ if l:
+ s = sorted(l)
+ i = len(s) / 2
+ return s[i]
def median(l):
if l:
@@ -1021,7 +871,7 @@ def title_to_url(title, max_length = 50):
last_word = title.rfind('_')
if (last_word > 0):
title = title[:last_word]
- return title
+ return title or "_"
def trace(fn):
import sys
View
213 r2/r2/models/_builder.pyx
@@ -1,21 +1,10 @@
from builder import Builder, MAX_RECURSION, empty_listing
from r2.lib.wrapped import Wrapped
-from r2.lib.comment_tree import link_comments
+from r2.lib.comment_tree import link_comments, link_comments_and_sort, tree_sort_fn
from r2.models.link import *
from r2.lib.db import operators
from r2.lib import utils
-from operator import attrgetter
-
-class _ColSorter(object):
- __slots__ = ['sort', 'x']
-
- def __init__(self, sort):
- self.sort = sort
-
- def key(self, x):
- return getattr(x, self.sort.col), x._date
-
class _CommentBuilder(Builder):
def __init__(self, link, sort, comment = None, context = None,
load_more=True, continue_this_thread=True,
@@ -28,128 +17,136 @@ class _CommentBuilder(Builder):
self.max_depth = max_depth
self.continue_this_thread = continue_this_thread
- if sort.col == '_date':
- self.sort_key = attrgetter('_date')
- else:
- self.sort_key = _ColSorter(sort).key
+ self.sort = sort
self.rev_sort = True if isinstance(sort, operators.desc) else False
def get_items(self, num):
- r = link_comments(self.link._id)
- cids, cid_tree, depth, num_children = r
+ from r2.lib.lock import TimeoutExpired
+ cdef list cid
+ cdef dict cid_tree
+ cdef dict depth
+ cdef dict num_children
+ cdef dict parents
+ cdef dict sorter
+
+ r = link_comments_and_sort(self.link._id, self.sort.col)
+ cids, cid_tree, depth, num_children, parents, sorter = r
if (not isinstance(self.comment, utils.iters)
and self.comment and not self.comment._id in depth):
- g.log.error("self.comment (%d) not in depth. Forcing update..."
+ g.log.error("Error - self.comment (%d) not in depth. Forcing update..."
% self.comment._id)
- r = link_comments(self.link._id, _update=True)
- cids, cid_tree, depth, num_children = r
+ try:
+ r = link_comments(self.link._id, _update=True)
+ cids, cid_tree, depth, num_children = r
+ except TimeoutExpired:
+ g.log.error("Error in _builder.pyx: timeout from tree reload (%r)" % self.link)
+ raise
if not self.comment._id in depth:
g.log.error("Update didn't help. This is gonna end in tears.")
- if cids:
- comments = set(Comment._byID(cids, data = True,
- return_dict = False))
- else:
- comments = set()
-
- comment_dict = {}
- for cm in comments:
- comment_dict[cm._id] = cm
-
- #convert tree into objects
- comment_tree = {}
- for k, v in cid_tree.iteritems():
- comment_tree[k] = [comment_dict[cid] for cid in cid_tree[k]]
- items = []
- extra = {}
- top = None
- dont_collapse = []
- ignored_parent_ids = []
- #loading a portion of the tree
-
- start_depth = 0
-
- candidates = []
+ cdef list items = []
+ cdef dict extra = {}
+ cdef list dont_collapse = []
+ cdef list ignored_parent_ids = []
+
+ cdef int start_depth = 0
+
+ cdef list candidates = []
+ cdef int offset_depth = 0
+
+ # more comments links:
if isinstance(self.comment, utils.iters):
- candidates.extend(self.comment)
for cm in self.comment:
- dont_collapse.append(cm._id)
- #assume the comments all have the same parent
- # TODO: removed by Chris to get rid of parent being sent
- # when morecomments is used.
- #if hasattr(candidates[0], "parent_id"):
- # parent = comment_dict[candidates[0].parent_id]
- # items.append(parent)
- if (hasattr(candidates[0], "parent_id") and
- candidates[0].parent_id is not None):
- ignored_parent_ids.append(candidates[0].parent_id)
- start_depth = depth[candidates[0].parent_id]
- #if permalink
+ # deleted comments will be removed from the cids list
+ if cm._id in cids:
+ dont_collapse.append(cm._id)
+ candidates.append(cm._id)
+ # if nothing but deleted comments, the candidate list might be empty
+ if candidates:
+ pid = parents[candidates[0]]
+ if pid is not None:
+ ignored_parent_ids.append(pid)
+ start_depth = depth[pid]
+
+ # permalinks:
elif self.comment:
- top = self.comment
- dont_collapse.append(top._id)
+ # we are going to messa round with the cid_tree's contents
+ # so better copy it
+ cid_tree = cid_tree.copy()
+ top = self.comment._id
+ dont_collapse.append(top)
#add parents for context
- while self.context > 0 and top.parent_id:
+ pid = parents[top]
+ while self.context > 0 and pid is not None:
self.context -= 1
- new_top = comment_dict[top.parent_id]
- comment_tree[new_top._id] = [top]
- num_children[new_top._id] = num_children[top._id] + 1
- dont_collapse.append(new_top._id)
- top = new_top
+ pid = parents[top]
+ cid_tree[pid] = [top]
+ num_children[pid] = num_children[top] + 1
+ dont_collapse.append(pid)
+ # top will be appended to candidates, so stop updating
+ # it if hit the top of the thread
+ if pid is not None:
+ top = pid
candidates.append(top)
+ # the reference depth is that of the focal element
+ if top is not None:
+ offset_depth = depth[top]
#else start with the root comments
else:
- candidates.extend(comment_tree.get(top, ()))
-
- #update the starting depth if required
- if top and depth[top._id] > 0:
- delta = depth[top._id]
- for k, v in depth.iteritems():
- depth[k] = v - delta
+ candidates.extend(cid_tree.get(None, ()))
#find the comments
- num_have = 0
- candidates.sort(key = self.sort_key, reverse = self.rev_sort)
+ cdef int num_have = 0
+ if candidates:
+ candidates = [x for x in candidates if sorter.get(x) is not None]
+ # complain if we removed a candidate and now have nothing
+ # to return to the user
+ if not candidates:
+ g.log.error("_builder.pyx: empty candidate list: %r" %
+ request.fullpath)
+ return []
+ candidates.sort(key = sorter.get, reverse = self.rev_sort)
while num_have < num and candidates:
to_add = candidates.pop(0)
- if to_add not in comments:
- g.log.error("candidate %r comment missing from link %r" %
- (to_add, self.link))
+ if to_add not in cids:
continue
- comments.remove(to_add)
- if to_add._deleted and not comment_tree.has_key(to_add._id):
- pass
- elif depth[to_add._id] < self.max_depth + start_depth:
+ if (depth[to_add] - offset_depth) < self.max_depth + start_depth:
#add children
- if comment_tree.has_key(to_add._id):
- candidates.extend(comment_tree[to_add._id])
- candidates.sort(key = self.sort_key, reverse = self.rev_sort)
+ if cid_tree.has_key(to_add):
+ candidates.extend([x for x in cid_tree[to_add]
+ if sorter.get(x) is not None])
+ candidates.sort(key = sorter.get, reverse = self.rev_sort)
items.append(to_add)
num_have += 1
elif self.continue_this_thread:
#add the recursion limit
- p_id = to_add.parent_id
- w = Wrapped(MoreRecursion(self.link, 0,
- comment_dict[p_id]))
+ p_id = parents[to_add]
+ w = Wrapped(MoreRecursion(self.link, 0, p_id))
w.children.append(to_add)
extra[p_id] = w
- wrapped = self.wrap_items(items)
+ # items is a list of things we actually care about so load them
+ items = Comment._byID(items, data = True, return_dict = False)
</